| OLD | NEW | 
|---|
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "chrome/tools/convert_dict/dic_reader.h" | 5 #include "chrome/tools/convert_dict/dic_reader.h" | 
| 6 | 6 | 
| 7 #include <algorithm> | 7 #include <algorithm> | 
| 8 #include <set> | 8 #include <set> | 
| 9 | 9 | 
| 10 #include "base/file_util.h" | 10 #include "base/file_util.h" | 
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 110     // token. (It is attached to the first token if a word doesn't have affix | 110     // token. (It is attached to the first token if a word doesn't have affix | 
| 111     // rules.) | 111     // rules.) | 
| 112     size_t word_tab_offset = utf8word.find('\t'); | 112     size_t word_tab_offset = utf8word.find('\t'); | 
| 113     if (word_tab_offset != std::string::npos) | 113     if (word_tab_offset != std::string::npos) | 
| 114       utf8word = utf8word.substr(0, word_tab_offset); | 114       utf8word = utf8word.substr(0, word_tab_offset); | 
| 115 | 115 | 
| 116     WordSet::iterator found = word_set->find(utf8word); | 116     WordSet::iterator found = word_set->find(utf8word); | 
| 117     std::set<int> affix_vector; | 117     std::set<int> affix_vector; | 
| 118     affix_vector.insert(affix_index); | 118     affix_vector.insert(affix_index); | 
| 119 | 119 | 
| 120     if (found == word_set->end()) { | 120     if (found == word_set->end()) | 
| 121       word_set->insert(std::make_pair(utf8word, affix_vector)); | 121       word_set->insert(std::make_pair(utf8word, affix_vector)); | 
| 122     } else { | 122     else | 
| 123       // The affixes of the delta file should override those in the | 123       found->second.insert(affix_index); | 
| 124       // dictionary file. |  | 
| 125       found->second.swap(affix_vector); |  | 
| 126     } |  | 
| 127   } | 124   } | 
| 128 | 125 | 
| 129   return true; | 126   return true; | 
| 130 } | 127 } | 
| 131 | 128 | 
| 132 }  // namespace | 129 }  // namespace | 
| 133 | 130 | 
| 134 DicReader::DicReader(const FilePath& path) { | 131 DicReader::DicReader(const FilePath& path) { | 
| 135   file_ = file_util::OpenFile(path, "r"); | 132   file_ = file_util::OpenFile(path, "r"); | 
| 136 | 133 | 
| (...skipping 26 matching lines...) Expand all  Loading... | 
| 163                        aff_reader->encoding(), true)) | 160                        aff_reader->encoding(), true)) | 
| 164     return false; | 161     return false; | 
| 165 | 162 | 
| 166   // Add words from the .dic_delta file to the word set, if it exists. | 163   // Add words from the .dic_delta file to the word set, if it exists. | 
| 167   // The first line is the first word to add. Word count line is not present. | 164   // The first line is the first word to add. Word count line is not present. | 
| 168   // NOTE: These additional words should be encoded as UTF-8. | 165   // NOTE: These additional words should be encoded as UTF-8. | 
| 169   if (additional_words_file_ != NULL) { | 166   if (additional_words_file_ != NULL) { | 
| 170     PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", | 167     PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", | 
| 171                     "UTF-8", false); | 168                     "UTF-8", false); | 
| 172   } | 169   } | 
| 173 |  | 
| 174   // Make sure the words are sorted, they may be unsorted in the input. | 170   // Make sure the words are sorted, they may be unsorted in the input. | 
| 175   for (WordSet::iterator word = word_set.begin(); word != word_set.end(); | 171   for (WordSet::iterator word = word_set.begin(); word != word_set.end(); | 
| 176        ++word) { | 172        ++word) { | 
| 177     std::vector<int> affixes; | 173     std::vector<int> affixes; | 
| 178     for (std::set<int>::iterator aff = word->second.begin(); | 174     for (std::set<int>::iterator aff = word->second.begin(); | 
| 179          aff != word->second.end(); ++aff) | 175          aff != word->second.end(); ++aff) | 
| 180       affixes.push_back(*aff); | 176       affixes.push_back(*aff); | 
| 181 | 177 | 
| 182     // Double check that the affixes are sorted. This isn't strictly necessary | 178     // Double check that the affixes are sorted. This isn't strictly necessary | 
| 183     // but it's nice for the file to have a fixed layout. | 179     // but it's nice for the file to have a fixed layout. | 
| 184     std::sort(affixes.begin(), affixes.end()); | 180     std::sort(affixes.begin(), affixes.end()); | 
|  | 181     std::reverse(affixes.begin(), affixes.end()); | 
| 185     words_.push_back(std::make_pair(word->first, affixes)); | 182     words_.push_back(std::make_pair(word->first, affixes)); | 
| 186   } | 183   } | 
| 187 | 184 | 
| 188   // Double-check that the words are sorted. | 185   // Double-check that the words are sorted. | 
| 189   std::sort(words_.begin(), words_.end()); | 186   std::sort(words_.begin(), words_.end()); | 
| 190   return true; | 187   return true; | 
| 191 } | 188 } | 
| 192 | 189 | 
| 193 }  // namespace convert_dict | 190 }  // namespace convert_dict | 
| OLD | NEW | 
|---|