| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/tools/convert_dict/dic_reader.h" | 5 #include "chrome/tools/convert_dict/dic_reader.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <set> | 8 #include <set> |
| 9 | 9 |
| 10 #include "base/file_util.h" | 10 #include "base/file_util.h" |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 // token. (It is attached to the first token if a word doesn't have affix | 110 // token. (It is attached to the first token if a word doesn't have affix |
| 111 // rules.) | 111 // rules.) |
| 112 size_t word_tab_offset = utf8word.find('\t'); | 112 size_t word_tab_offset = utf8word.find('\t'); |
| 113 if (word_tab_offset != std::string::npos) | 113 if (word_tab_offset != std::string::npos) |
| 114 utf8word = utf8word.substr(0, word_tab_offset); | 114 utf8word = utf8word.substr(0, word_tab_offset); |
| 115 | 115 |
| 116 WordSet::iterator found = word_set->find(utf8word); | 116 WordSet::iterator found = word_set->find(utf8word); |
| 117 std::set<int> affix_vector; | 117 std::set<int> affix_vector; |
| 118 affix_vector.insert(affix_index); | 118 affix_vector.insert(affix_index); |
| 119 | 119 |
| 120 if (found == word_set->end()) { | 120 if (found == word_set->end()) |
| 121 word_set->insert(std::make_pair(utf8word, affix_vector)); | 121 word_set->insert(std::make_pair(utf8word, affix_vector)); |
| 122 } else { | 122 else |
| 123 // The affixes of the delta file should override those in the | 123 found->second.insert(affix_index); |
| 124 // dictionary file. | |
| 125 found->second.swap(affix_vector); | |
| 126 } | |
| 127 } | 124 } |
| 128 | 125 |
| 129 return true; | 126 return true; |
| 130 } | 127 } |
| 131 | 128 |
| 132 } // namespace | 129 } // namespace |
| 133 | 130 |
| 134 DicReader::DicReader(const FilePath& path) { | 131 DicReader::DicReader(const FilePath& path) { |
| 135 file_ = file_util::OpenFile(path, "r"); | 132 file_ = file_util::OpenFile(path, "r"); |
| 136 | 133 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 163 aff_reader->encoding(), true)) | 160 aff_reader->encoding(), true)) |
| 164 return false; | 161 return false; |
| 165 | 162 |
| 166 // Add words from the .dic_delta file to the word set, if it exists. | 163 // Add words from the .dic_delta file to the word set, if it exists. |
| 167 // The first line is the first word to add. Word count line is not present. | 164 // The first line is the first word to add. Word count line is not present. |
| 168 // NOTE: These additional words should be encoded as UTF-8. | 165 // NOTE: These additional words should be encoded as UTF-8. |
| 169 if (additional_words_file_ != NULL) { | 166 if (additional_words_file_ != NULL) { |
| 170 PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", | 167 PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", |
| 171 "UTF-8", false); | 168 "UTF-8", false); |
| 172 } | 169 } |
| 173 | |
| 174 // Make sure the words are sorted, they may be unsorted in the input. | 170 // Make sure the words are sorted, they may be unsorted in the input. |
| 175 for (WordSet::iterator word = word_set.begin(); word != word_set.end(); | 171 for (WordSet::iterator word = word_set.begin(); word != word_set.end(); |
| 176 ++word) { | 172 ++word) { |
| 177 std::vector<int> affixes; | 173 std::vector<int> affixes; |
| 178 for (std::set<int>::iterator aff = word->second.begin(); | 174 for (std::set<int>::iterator aff = word->second.begin(); |
| 179 aff != word->second.end(); ++aff) | 175 aff != word->second.end(); ++aff) |
| 180 affixes.push_back(*aff); | 176 affixes.push_back(*aff); |
| 181 | 177 |
| 182 // Double check that the affixes are sorted. This isn't strictly necessary | 178 // Double check that the affixes are sorted. This isn't strictly necessary |
| 183 // but it's nice for the file to have a fixed layout. | 179 // but it's nice for the file to have a fixed layout. |
| 184 std::sort(affixes.begin(), affixes.end()); | 180 std::sort(affixes.begin(), affixes.end()); |
| 181 std::reverse(affixes.begin(), affixes.end()); |
| 185 words_.push_back(std::make_pair(word->first, affixes)); | 182 words_.push_back(std::make_pair(word->first, affixes)); |
| 186 } | 183 } |
| 187 | 184 |
| 188 // Double-check that the words are sorted. | 185 // Double-check that the words are sorted. |
| 189 std::sort(words_.begin(), words_.end()); | 186 std::sort(words_.begin(), words_.end()); |
| 190 return true; | 187 return true; |
| 191 } | 188 } |
| 192 | 189 |
| 193 } // namespace convert_dict | 190 } // namespace convert_dict |
| OLD | NEW |