Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(232)

Side by Side Diff: chrome/tools/convert_dict/dic_reader.cc

Issue 11566003: Bump dictionary versions to 3-0 (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Fix android compile Created 7 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/tools/convert_dict/aff_reader.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/tools/convert_dict/dic_reader.h" 5 #include "chrome/tools/convert_dict/dic_reader.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <set> 8 #include <set>
9 9
10 #include "base/file_util.h" 10 #include "base/file_util.h"
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
110 // token. (It is attached to the first token if a word doesn't have affix 110 // token. (It is attached to the first token if a word doesn't have affix
111 // rules.) 111 // rules.)
112 size_t word_tab_offset = utf8word.find('\t'); 112 size_t word_tab_offset = utf8word.find('\t');
113 if (word_tab_offset != std::string::npos) 113 if (word_tab_offset != std::string::npos)
114 utf8word = utf8word.substr(0, word_tab_offset); 114 utf8word = utf8word.substr(0, word_tab_offset);
115 115
116 WordSet::iterator found = word_set->find(utf8word); 116 WordSet::iterator found = word_set->find(utf8word);
117 std::set<int> affix_vector; 117 std::set<int> affix_vector;
118 affix_vector.insert(affix_index); 118 affix_vector.insert(affix_index);
119 119
120 if (found == word_set->end()) { 120 if (found == word_set->end())
121 word_set->insert(std::make_pair(utf8word, affix_vector)); 121 word_set->insert(std::make_pair(utf8word, affix_vector));
122 } else { 122 else
123 // The affixes of the delta file should override those in the 123 found->second.insert(affix_index);
124 // dictionary file.
125 found->second.swap(affix_vector);
126 }
127 } 124 }
128 125
129 return true; 126 return true;
130 } 127 }
131 128
132 } // namespace 129 } // namespace
133 130
134 DicReader::DicReader(const FilePath& path) { 131 DicReader::DicReader(const FilePath& path) {
135 file_ = file_util::OpenFile(path, "r"); 132 file_ = file_util::OpenFile(path, "r");
136 133
(...skipping 26 matching lines...) Expand all
163 aff_reader->encoding(), true)) 160 aff_reader->encoding(), true))
164 return false; 161 return false;
165 162
166 // Add words from the .dic_delta file to the word set, if it exists. 163 // Add words from the .dic_delta file to the word set, if it exists.
167 // The first line is the first word to add. Word count line is not present. 164 // The first line is the first word to add. Word count line is not present.
168 // NOTE: These additional words should be encoded as UTF-8. 165 // NOTE: These additional words should be encoded as UTF-8.
169 if (additional_words_file_ != NULL) { 166 if (additional_words_file_ != NULL) {
170 PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", 167 PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta",
171 "UTF-8", false); 168 "UTF-8", false);
172 } 169 }
173
174 // Make sure the words are sorted, they may be unsorted in the input. 170 // Make sure the words are sorted, they may be unsorted in the input.
175 for (WordSet::iterator word = word_set.begin(); word != word_set.end(); 171 for (WordSet::iterator word = word_set.begin(); word != word_set.end();
176 ++word) { 172 ++word) {
177 std::vector<int> affixes; 173 std::vector<int> affixes;
178 for (std::set<int>::iterator aff = word->second.begin(); 174 for (std::set<int>::iterator aff = word->second.begin();
179 aff != word->second.end(); ++aff) 175 aff != word->second.end(); ++aff)
180 affixes.push_back(*aff); 176 affixes.push_back(*aff);
181 177
182 // Double check that the affixes are sorted. This isn't strictly necessary 178 // Double check that the affixes are sorted. This isn't strictly necessary
183 // but it's nice for the file to have a fixed layout. 179 // but it's nice for the file to have a fixed layout.
184 std::sort(affixes.begin(), affixes.end()); 180 std::sort(affixes.begin(), affixes.end());
181 std::reverse(affixes.begin(), affixes.end());
185 words_.push_back(std::make_pair(word->first, affixes)); 182 words_.push_back(std::make_pair(word->first, affixes));
186 } 183 }
187 184
188 // Double-check that the words are sorted. 185 // Double-check that the words are sorted.
189 std::sort(words_.begin(), words_.end()); 186 std::sort(words_.begin(), words_.end());
190 return true; 187 return true;
191 } 188 }
192 189
193 } // namespace convert_dict 190 } // namespace convert_dict
OLDNEW
« no previous file with comments | « chrome/tools/convert_dict/aff_reader.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698