chrome/tools/convert_dict/aff_reader.cc - Issue 11566003: Bump dictionary versions to 3-0

Side by Side Diff: chrome/tools/convert_dict/aff_reader.cc

Issue 11566003: Bump dictionary versions to 3-0 (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Fix android compile Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/tools/convert_dict/aff_reader.h"	5 #include "chrome/tools/convert_dict/aff_reader.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8	8

9 #include "base/file_util.h"	9 #include "base/file_util.h"

10 #include "base/i18n/icu_string_conversions.h"	10 #include "base/i18n/icu_string_conversions.h"

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 }	115 }

116 } else if (StringBeginsWith(line, "TRY ") \|\|	116 } else if (StringBeginsWith(line, "TRY ") \|\|

117 StringBeginsWith(line, "MAP ")) {	117 StringBeginsWith(line, "MAP ")) {

118 HandleEncodedCommand(line);	118 HandleEncodedCommand(line);

119 } else if (StringBeginsWith(line, "IGNORE ")) {	119 } else if (StringBeginsWith(line, "IGNORE ")) {

120 printf("We don't support the IGNORE command yet. This would change how "	120 printf("We don't support the IGNORE command yet. This would change how "

121 "we would insert things in our lookup table.\n");	121 "we would insert things in our lookup table.\n");

122 exit(1);	122 exit(1);

123 } else if (StringBeginsWith(line, "COMPLEXPREFIXES ")) {	123 } else if (StringBeginsWith(line, "COMPLEXPREFIXES ")) {

124 printf("We don't support the COMPLEXPREFIXES command yet. This would "	124 printf("We don't support the COMPLEXPREFIXES command yet. This would "

125 "mean we have to insert words backwords as well (I think)\n");	125 "mean we have to insert words backwards as well (I think)\n");

126 exit(1);	126 exit(1);

127 } else {	127 } else {

128 // All other commands get stored in the other commands list.	128 // All other commands get stored in the other commands list.

129 HandleRawCommand(line);	129 HandleRawCommand(line);

130 }	130 }

131 }	131 }

132	132

133 return true;	133 return true;

134 }	134 }

135	135

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
234 size_t slash_index = part.find('/');	234 size_t slash_index = part.find('/');

235 if (slash_index != std::string::npos && !has_indexed_affixes()) {	235 if (slash_index != std::string::npos && !has_indexed_affixes()) {

236 // This can also have a rule string associated with it following a	236 // This can also have a rule string associated with it following a

237 // slash. For example:	237 // slash. For example:

238 // PFX P 0 foo/Y .	238 // PFX P 0 foo/Y .

239 // The "Y" is a flag. For example, the aff file might have a line:	239 // The "Y" is a flag. For example, the aff file might have a line:

240 // COMPOUNDFLAG Y	240 // COMPOUNDFLAG Y

241 // so that means that this prefix would be a compound one.	241 // so that means that this prefix would be a compound one.

242 //	242 //

243 // It expects these rules to use the same alias rules as the .dic	243 // It expects these rules to use the same alias rules as the .dic

244 // file. We've forced it to use aliases, which is a numberical index	244 // file. We've forced it to use aliases, which is a numerical index

245 // instead of these character flags, and this needs to be consistent.	245 // instead of these character flags, and this needs to be consistent.

246	246

247 std::string before_flags = part.substr(0, slash_index + 1);	247 std::string before_flags = part.substr(0, slash_index + 1);

248	248

249 // After the slash are both the flags, then whitespace, then the part	249 // After the slash are both the flags, then whitespace, then the part

250 // that tells us what to strip.	250 // that tells us what to strip.

251 std::vector<std::string> after_slash;	251 std::vector<std::string> after_slash;

252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);	252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);

253 if (after_slash.size() < 2) {	253 if (after_slash.size() == 0) {

254 // Note that we may get a third term here which is the	254 printf("ERROR: Found 0 terms after slash in affix rule '%s', "

255 // morphological description of this rule. This happens in the tests	255 "but need at least 2.\n",

256 // only, so we can just ignore it.	256 part.c_str());

257 printf("ERROR: Didn't get enough after the slash\n");

258 return;	257 return;

259 }	258 }

	259 if (after_slash.size() == 1) {

	260 printf("WARNING: Found 1 term after slash in affix rule '%s', "

	261 "but expected at least 2. Adding '.'.\n",

	262 part.c_str());

	263 after_slash.push_back(".");

	264 }

	265 // Note that we may get a third term here which is the morphological

	266 // description of this rule. This happens in the tests only, so we can

	267 // just ignore it.

260	268

261 part = base::StringPrintf("%s%d %s",	269 part = base::StringPrintf("%s%d %s",

262 before_flags.c_str(),	270 before_flags.c_str(),

263 GetAFIndexForAFString(after_slash[0]),	271 GetAFIndexForAFString(after_slash[0]),

264 after_slash[1].c_str());	272 after_slash[1].c_str());

265 }	273 }

266	274

267 // Reencode from here	275 // Reencode from here

268 std::string reencoded;	276 std::string reencoded;

269 if (!EncodingToUTF8(part, &reencoded))	277 if (!EncodingToUTF8(part, &reencoded)) {

	278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n",

	279 part.c_str());

270 break;	280 break;

	281 }

271	282

272 *rule = rule->substr(0, part_start) + reencoded;	283 *rule = rule->substr(0, part_start) + reencoded;

273 break;	284 break;

274 }	285 }

275 token.clear();	286 token.clear();

276 } else {	287 } else {

277 token.push_back((*rule)[i]);	288 token.push_back((*rule)[i]);

278 }	289 }

279 }	290 }

280	291

281 affix_rules_.push_back(*rule);	292 affix_rules_.push_back(*rule);

282 }	293 }

283	294

284 void AffReader::AddReplacement(std::string* rule) {	295 void AffReader::AddReplacement(std::string* rule) {

285 TrimLine(rule);	296 TrimLine(rule);

	297 CollapseDuplicateSpaces(rule);

286	298

287 std::string utf8rule;	299 std::string utf8rule;

288 if (!EncodingToUTF8(*rule, &utf8rule))	300 if (!EncodingToUTF8(*rule, &utf8rule)) {

	301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n",

	302 rule->c_str());

289 return;	303 return;

	304 }

290	305

	306 // The first space separates key and value.

	307 size_t space_index = utf8rule.find(' ');

	308 if (space_index == std::string::npos) {

	309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str());

	310 return;

	311 }

291 std::vector<std::string> split;	312 std::vector<std::string> split;

292 base::SplitString(utf8rule, ' ', &split);	313 split.push_back(utf8rule.substr(0, space_index));

	314 split.push_back(utf8rule.substr(space_index + 1));

293	315

294 // There should be two parts.	316 // Underscores are used to represent spaces in most aff files

295 if (split.size() != 2)

296 return;

297

298 // Underscores are used to represent spaces

299 // (since the line is parsed on spaces).	317 // (since the line is parsed on spaces).

300 std::replace(split[0].begin(), split[0].end(), '_', ' ');	318 std::replace(split[0].begin(), split[0].end(), '_', ' ');

301 std::replace(split[1].begin(), split[1].end(), '_', ' ');	319 std::replace(split[1].begin(), split[1].end(), '_', ' ');

302	320

303 replacements_.push_back(std::make_pair(split[0], split[1]));	321 replacements_.push_back(std::make_pair(split[0], split[1]));

304 }	322 }

305	323

306 void AffReader::HandleRawCommand(const std::string& line) {	324 void AffReader::HandleRawCommand(const std::string& line) {

307 other_commands_.push_back(line);	325 other_commands_.push_back(line);

308 }	326 }

309	327

310 void AffReader::HandleEncodedCommand(const std::string& line) {	328 void AffReader::HandleEncodedCommand(const std::string& line) {

311 std::string utf8;	329 std::string utf8;

312 if (EncodingToUTF8(line, &utf8))	330 if (!EncodingToUTF8(line, &utf8)) {

313 other_commands_.push_back(utf8);	331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str());

	332 return;

	333 }

	334 other_commands_.push_back(utf8);

314 }	335 }

315	336

316 } // namespace convert_dict	337 } // namespace convert_dict

OLD	NEW

« no previous file with comments | « chrome/renderer/spellchecker/spellcheck_unittest.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | no next file with comments »