chrome/renderer/spellchecker/spellcheck_unittest.cc - Issue 11566003: Bump dictionary versions to 3-0

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_unittest.cc

Issue 11566003: Bump dictionary versions to 3-0 (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Fix android compile Created 7 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "webkit/glue/webkit_glue.h"	5 #include "webkit/glue/webkit_glue.h"

6	6

7 #include "base/file_util.h"	7 #include "base/file_util.h"

8 #include "base/message_loop.h"	8 #include "base/message_loop.h"

9 #include "base/path_service.h"	9 #include "base/path_service.h"

10 #include "base/platform_file.h"	10 #include "base/platform_file.h"

11 #include "base/sys_string_conversions.h"	11 #include "base/sys_string_conversions.h"

12 #include "base/utf_string_conversions.h"	12 #include "base/utf_string_conversions.h"

13 #include "chrome/renderer/spellchecker/hunspell_engine.h"	13 #include "chrome/renderer/spellchecker/hunspell_engine.h"

14 #include "chrome/renderer/spellchecker/spellcheck.h"	14 #include "chrome/renderer/spellchecker/spellcheck.h"

15 #include "chrome/common/chrome_paths.h"	15 #include "chrome/common/chrome_paths.h"

16 #include "chrome/common/spellcheck_common.h"	16 #include "chrome/common/spellcheck_common.h"

17 #include "chrome/common/spellcheck_result.h"	17 #include "chrome/common/spellcheck_result.h"

18 #include "testing/gtest/include/gtest/gtest.h"	18 #include "testing/gtest/include/gtest/gtest.h"

19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCheckingComple tion.h"	19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCheckingComple tion.h"

20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCheckingResult .h"	20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCheckingResult .h"

	21 #include "ui/base/l10n/l10n_util.h"

21	22

22 namespace {	23 namespace {

23	24

24 FilePath GetHunspellDirectory() {	25 FilePath GetHunspellDirectory() {

25 FilePath hunspell_directory;	26 FilePath hunspell_directory;

26 if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory))	27 if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory))

27 return FilePath();	28 return FilePath();

28	29

29 hunspell_directory = hunspell_directory.AppendASCII("third_party");	30 hunspell_directory = hunspell_directory.AppendASCII("third_party");

30 hunspell_directory = hunspell_directory.AppendASCII("hunspell_dictionaries");	31 hunspell_directory = hunspell_directory.AppendASCII("hunspell_dictionaries");

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
129 // space character;	130 // space character;

130 // * Tests for the function with a valid English word with a following	131 // * Tests for the function with a valid English word with a following

131 // non-English word;	132 // non-English word;

132 // * Tests for the function with two valid English words concatenated	133 // * Tests for the function with two valid English words concatenated

133 // with space characters or non-English words;	134 // with space characters or non-English words;

134 // * Tests for the function with an invalid English word;	135 // * Tests for the function with an invalid English word;

135 // * Tests for the function with an invalid English word with a preceding	136 // * Tests for the function with an invalid English word with a preceding

136 // space character;	137 // space character;

137 // * Tests for the function with an invalid English word with a preceding	138 // * Tests for the function with an invalid English word with a preceding

138 // non-English word;	139 // non-English word;

139 // * Tests for the function with2 an invalid English word with a following	140 // * Tests for the function with an invalid English word with a following

140 // space character;	141 // space character;

141 // * Tests for the function with an invalid English word with a following	142 // * Tests for the function with an invalid English word with a following

142 // non-English word, and;	143 // non-English word, and;

143 // * Tests for the function with two invalid English words concatenated	144 // * Tests for the function with two invalid English words concatenated

144 // with space characters or non-English words.	145 // with space characters or non-English words.

145 // A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses	146 // A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses

146 // grammartically incorrect string.	147 // grammatically incorrect string.

147 // TODO(hbono): Please feel free to add more tests.	148 // TODO(hbono): Please feel free to add more tests.

148 TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) {	149 TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) {

149 static const struct {	150 static const struct {

150 // A string to be tested.	151 // A string to be tested.

151 const wchar_t* input;	152 const wchar_t* input;

152 // An expected result for this test case.	153 // An expected result for this test case.

153 // * true: the input string does not have any invalid words.	154 // * true: the input string does not have any invalid words.

154 // * false: the input string has one or more invalid words.	155 // * false: the input string has one or more invalid words.

155 bool expected_result;	156 bool expected_result;

156 // The position and the length of the first invalid word.	157 // The position and the length of the first invalid word.

157 int misspelling_start;	158 int misspelling_start;

158 int misspelling_length;	159 int misspelling_length;

159 } kTestCases[] = {	160 } kTestCases[] = {

160 // Empty strings.	161 // Empty strings.

161 {L"", true},	162 {L"", true},

162 {L" ", true},	163 {L" ", true},

163 {L"\xA0", true},	164 {L"\xA0", true},

164 {L"\x3000", true},	165 {L"\x3000", true},

165	166

166 // A valid English word "hello".	167 // A valid English word "hello".

167 {L"hello", true},	168 {L"hello", true},

168 // A valid Chinese word (meaning "hello") consisiting of two CJKV	169 // A valid Chinese word (meaning "hello") consisting of two CJKV

169 // ideographs	170 // ideographs

170 {L"\x4F60\x597D", true},	171 {L"\x4F60\x597D", true},

171 // A valid Korean word (meaning "hello") consisting of five hangul	172 // A valid Korean word (meaning "hello") consisting of five hangul

172 // syllables	173 // syllables

173 {L"\xC548\xB155\xD558\xC138\xC694", true},	174 {L"\xC548\xB155\xD558\xC138\xC694", true},

174 // A valid Japanese word (meaning "hello") consisting of five Hiragana	175 // A valid Japanese word (meaning "hello") consisting of five Hiragana

175 // letters	176 // letters

176 {L"\x3053\x3093\x306B\x3061\x306F", true},	177 {L"\x3053\x3093\x306B\x3061\x306F", true},

177 // A valid Hindi word (meaning ?) consisting of six Devanagari letters	178 // A valid Hindi word (meaning ?) consisting of six Devanagari letters

178 // (This word is copied from "http://b/issue?id=857583".)	179 // (This word is copied from "http://b/issue?id=857583".)

179 {L"\x0930\x093E\x091C\x0927\x093E\x0928", true},	180 {L"\x0930\x093E\x091C\x0927\x093E\x0928", true},

180 // A valid English word "affix" using a Latin ligature 'ffi'	181 // A valid English word "affix" using a Latin ligature 'ffi'

181 {L"a\xFB03x", true},	182 {L"a\xFB03x", true},

182 // A valid English word "hello" (fullwidth version)	183 // A valid English word "hello" (fullwidth version)

183 {L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true},	184 {L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true},

184 // Two valid Greek words (meaning "hello") consisting of seven Greek	185 // Two valid Greek words (meaning "hello") consisting of seven Greek

185 // letters	186 // letters

186 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true},	187 {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true},

187 // A valid Russian word (meainng "hello") consisting of twelve Cyrillic	188 // A valid Russian word (meaning "hello") consisting of twelve Cyrillic

188 // letters	189 // letters

189 {L"\x0437\x0434\x0440\x0430\x0432\x0441"	190 {L"\x0437\x0434\x0440\x0430\x0432\x0441"

190 L"\x0442\x0432\x0443\x0439\x0442\x0435", true},	191 L"\x0442\x0432\x0443\x0439\x0442\x0435", true},

191 // A valid English contraction	192 // A valid English contraction

192 {L"isn't", true},	193 {L"isn't", true},

193 // A valid English word enclosed with underscores.	194 // A valid English word enclosed with underscores.

194 {L"_hello_", true},	195 {L"_hello_", true},

195	196

196 // A valid English word with a preceding whitespace	197 // A valid English word with a preceding whitespace

197 {L" " L"hello", true},	198 {L" " L"hello", true},

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
740 L"\x043e \x0434\x043e\x0441\x0442\x0443\x043f\x043d\x043e\x044e "	741 L"\x043e \x0434\x043e\x0441\x0442\x0443\x043f\x043d\x043e\x044e "

741 L"\x0442\x0430 \x043a\x043e\x0440\x0438\x0441\x043d\x043e\x044e."	742 L"\x0442\x0430 \x043a\x043e\x0440\x0438\x0441\x043d\x043e\x044e."

742 }, {	743 }, {

743 // Vietnamese	744 // Vietnamese

744 "vi-VN",	745 "vi-VN",

745 L"Nhi\x1EC7m v\x1EE5 c\x1EE7\x0061 "	746 L"Nhi\x1EC7m v\x1EE5 c\x1EE7\x0061 "

746 L"Google la \x0111\x1EC3 t\x1ED5 ch\x1EE9\x0063 "	747 L"Google la \x0111\x1EC3 t\x1ED5 ch\x1EE9\x0063 "

747 L"c\x00E1\x0063 th\x00F4ng tin c\x1EE7\x0061 "	748 L"c\x00E1\x0063 th\x00F4ng tin c\x1EE7\x0061 "

748 L"th\x1EBF gi\x1EDBi va l\x00E0m cho n\x00F3 universal c\x00F3 "	749 L"th\x1EBF gi\x1EDBi va l\x00E0m cho n\x00F3 universal c\x00F3 "

749 L"th\x1EC3 truy c\x1EADp va h\x1EEFu d\x1EE5ng h\x01A1n."	750 L"th\x1EC3 truy c\x1EADp va h\x1EEFu d\x1EE5ng h\x01A1n."

	751 }, {

	752 // Korean

	753 "ko",

	754 L"Google\xC758 \xBAA9\xD45C\xB294 \xC804\xC138\xACC4\xC758 "

	755 L"\xC815\xBCF4\xB97C \xCCB4\xACC4\xD654\xD558\xC5EC \xBAA8\xB450\xAC00 "

	756 L"\xD3B8\xB9AC\xD558\xAC8C \xC774\xC6A9\xD560 \xC218 "

	757 L"\xC788\xB3C4\xB85D \xD558\xB294 \xAC83\xC785\xB2C8\xB2E4."

	758 }, {

	759 // Albanian

	760 "sq",

	761 L"Misioni i Google \x00EBsht\x00EB q\x00EB t\x00EB organizoj\x00EB "

	762 L"informacionin e bot\x00EBs dhe t\x00EB b\x00EBjn\x00EB at\x00EB "

	763 L"universalisht t\x00EB arritshme dhe t\x00EB dobishme."

	764 }, {

	765 // Tamil

	766 "ta",

	767 L"Google \x0B87\x0BA9\x0BCD "

	768 L"\x0BA8\x0BC7\x0BBE\x0B95\x0BCD\x0B95\x0BAE\x0BCD "

	769 L"\x0B89\x0BB2\x0B95\x0BBF\x0BA9\x0BCD \x0BA4\x0B95\x0BB5\x0BB2\x0BCD "

	770 L"\x0B8F\x0BB1\x0BCD\x0BAA\x0BBE\x0B9F\x0BC1 \x0B87\x0BA4\x0BC1 "

	771 L"\u0B89\u0BB2\u0B95\u0BB3\u0BBE\u0BB5\u0BBF\u0BAF "

	772 L"\x0B85\x0BA3\x0BC1\x0B95\x0B95\x0BCD \x0B95\x0BC2\x0B9F\x0BBF\x0BAF "

	773 L"\x0BAE\x0BB1\x0BCD\x0BB1\x0BC1\x0BAE\x0BCD "

	774 L"\x0BAA\x0BAF\x0BA9\x0BC1\x0BB3\x0BCD\x0BB3 "

	775 L"\x0B9A\x0BC6\x0BAF\x0BCD\x0BAF \x0B89\x0BB3\x0BCD\x0BB3\x0BA4\x0BC1."

750 },	776 },

751 };	777 };

752	778

753 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {	779 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {

754 ReinitializeSpellCheck(kTestCases[i].language);	780 ReinitializeSpellCheck(kTestCases[i].language);

755 size_t input_length = 0;	781 size_t input_length = 0;

756 if (kTestCases[i].input != NULL)	782 if (kTestCases[i].input != NULL)

757 input_length = wcslen(kTestCases[i].input);	783 input_length = wcslen(kTestCases[i].input);

758	784

759 int misspelling_start = 0;	785 int misspelling_start = 0;

760 int misspelling_length = 0;	786 int misspelling_length = 0;

761 bool result = spell_check()->SpellCheckWord(	787 bool result = spell_check()->SpellCheckWord(

762 WideToUTF16(kTestCases[i].input).c_str(),	788 WideToUTF16(kTestCases[i].input).c_str(),

763 static_cast<int>(input_length),	789 static_cast<int>(input_length),

764 0,	790 0,

765 &misspelling_start,	791 &misspelling_start,

766 &misspelling_length, NULL);	792 &misspelling_length, NULL);

767	793

768 EXPECT_TRUE(result) << kTestCases[i].language;	794 EXPECT_TRUE(result)

	795 << "\""

	796 << std::wstring(kTestCases[i].input).substr(

	797 misspelling_start, misspelling_length)

	798 << "\" is misspelled in "

	799 << kTestCases[i].language

	800 << ".";

769 EXPECT_EQ(0, misspelling_start);	801 EXPECT_EQ(0, misspelling_start);

770 EXPECT_EQ(0, misspelling_length);	802 EXPECT_EQ(0, misspelling_length);

771 }	803 }

772 }	804 }

773	805

774 TEST_F(SpellCheckTest, GetAutoCorrectionWord_EN_US) {	806 TEST_F(SpellCheckTest, GetAutoCorrectionWord_EN_US) {

775 static const struct {	807 static const struct {

776 // A misspelled word.	808 // A misspelled word.

777 const char* input;	809 const char* input;

778	810

(...skipping 370 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1149	1181

1150 EXPECT_EQ(kTestCases[i].should_pass, result) << kTestCases[i].input <<	1182 EXPECT_EQ(kTestCases[i].should_pass, result) << kTestCases[i].input <<

1151 " in " << kLocales[j];	1183 " in " << kLocales[j];

1152 }	1184 }

1153 }	1185 }

1154 }	1186 }

1155	1187

1156 // Checks that NOSUGGEST works in English dictionaries.	1188 // Checks that NOSUGGEST works in English dictionaries.

1157 TEST_F(SpellCheckTest, NoSuggest) {	1189 TEST_F(SpellCheckTest, NoSuggest) {

1158 static const struct {	1190 static const struct {

	1191 const char* misspelling;

1159 const char* input;	1192 const char* input;

	1193 const char* locale;

1160 bool should_pass;	1194 bool should_pass;

1161 } kTestCases[] = {	1195 } kTestCases[] = {

1162 {"cocksucker", true},	1196 {"suckerbert", "cocksucker", "en-GB", true},

1163 {"cocksuckers", true},	1197 {"suckerbert", "cocksucker", "en-US", true},

	1198 {"suckerbert", "cocksucker", "en-CA", true},

	1199 {"suckerbert", "cocksucker", "en-AU", true},

	1200 {"suckerbert", "cocksuckers", "en-GB", true},

	1201 {"suckerbert", "cocksuckers", "en-US", true},

	1202 {"suckerbert", "cocksuckers", "en-CA", true},

	1203 {"suckerbert", "cocksuckers", "en-AU", true},

	1204 {"Batasunaa", "Batasuna", "ca-ES", true},

	1205 {"pornoo", "porno", "it-IT", true},

	1206 {"catass", "catas", "lt-LT", true},

	1207 {"kuracc", "kurac", "sl-SI", true},

	1208 {"pittt", "pitt", "sv-SE", true},

1164 };	1209 };

1165	1210

1166 static const char* kLocales[] = { "en-GB", "en-US", "en-CA", "en-AU" };	1211 size_t test_cases_size = ARRAYSIZE_UNSAFE(kTestCases);

	1212 for (size_t i = 0; i < test_cases_size; ++i) {

	1213 ReinitializeSpellCheck(kTestCases[i].locale);

	1214 size_t input_length = 0;

	1215 if (kTestCases[i].input != NULL)

	1216 input_length = strlen(kTestCases[i].input);

1167	1217

1168 // First check that the NOSUGGEST flag didn't mark these words as not	1218 // First check that the NOSUGGEST flag didn't mark this word as not being in

1169 // being in the dictionary.	1219 // the dictionary.

1170 size_t test_cases_size = ARRAYSIZE_UNSAFE(kTestCases);	1220 int misspelling_start = 0;

1171 for (size_t j = 0; j < arraysize(kLocales); ++j) {	1221 int misspelling_length = 0;

1172 ReinitializeSpellCheck(kLocales[j]);	1222 bool result = spell_check()->SpellCheckWord(

1173 for (size_t i = 0; i < test_cases_size; ++i) {	1223 ASCIIToUTF16(kTestCases[i].input).c_str(),

1174 size_t input_length = 0;	1224 static_cast<int>(input_length),

1175 if (kTestCases[i].input != NULL)	1225 0,

1176 input_length = strlen(kTestCases[i].input);	1226 &misspelling_start,

	1227 &misspelling_length, NULL);

1177	1228

1178 int misspelling_start = 0;	1229 EXPECT_EQ(kTestCases[i].should_pass, result) << kTestCases[i].input <<

1179 int misspelling_length = 0;	1230 " in " << kTestCases[i].input;

1180 bool result = spell_check()->SpellCheckWord(

1181 ASCIIToUTF16(kTestCases[i].input).c_str(),

1182 static_cast<int>(input_length),

1183 0,

1184 &misspelling_start,

1185 &misspelling_length, NULL);

1186	1231

1187 EXPECT_EQ(kTestCases[i].should_pass, result) << kTestCases[i].input <<	1232 // Now verify that this test case does not show up as a suggestion.

1188 " in " << kLocales[j];

1189 }

1190 }

1191

1192 // Now verify that neither of testCases show up as suggestions.

1193 for (size_t d = 0; d < arraysize(kLocales); ++d) {

1194 ReinitializeSpellCheck(kLocales[d]);

1195 int misspelling_start;

1196 int misspelling_length;

1197 std::vector<string16> suggestions;	1233 std::vector<string16> suggestions;

1198 spell_check()->SpellCheckWord(	1234 input_length = 0;

1199 ASCIIToUTF16("suckerbert").c_str(),	1235 if (kTestCases[i].misspelling != NULL)

1200 10,	1236 input_length = strlen(kTestCases[i].misspelling);

	1237 result = spell_check()->SpellCheckWord(

	1238 ASCIIToUTF16(kTestCases[i].misspelling).c_str(),

	1239 static_cast<int>(input_length),

1201 0,	1240 0,

1202 &misspelling_start,	1241 &misspelling_start,

1203 &misspelling_length,	1242 &misspelling_length,

1204 &suggestions);	1243 &suggestions);

1205 // Check if the suggested words occur.	1244 // Check if the suggested words occur.

	1245 EXPECT_FALSE(result) << kTestCases[i].misspelling

	1246 << " is not a misspelling in "

	1247 << kTestCases[i].locale;

1206 for (int j = 0; j < static_cast<int>(suggestions.size()); j++) {	1248 for (int j = 0; j < static_cast<int>(suggestions.size()); j++) {

1207 for (size_t t = 0; t < test_cases_size; t++) {	1249 for (size_t t = 0; t < test_cases_size; t++) {

1208 int compare_result =	1250 int compare_result =

1209 suggestions.at(j).compare(ASCIIToUTF16(kTestCases[t].input));	1251 suggestions.at(j).compare(ASCIIToUTF16(kTestCases[t].input));

1210 EXPECT_FALSE(compare_result == 0) << kTestCases[t].input <<	1252 EXPECT_FALSE(compare_result == 0) << kTestCases[t].input <<

1211 " in " << kLocales[d];	1253 " in " << kTestCases[i].locale;

1212 }	1254 }

1213 }	1255 }

1214 }	1256 }

1215 }	1257 }

1216	1258

	1259 // Check that the correct dictionary files are checked in.

	1260 TEST_F(SpellCheckTest, DictionaryFiles) {

	1261 std::vector<std::string> locale_codes;

	1262 l10n_util::GetAcceptLanguagesForLocale("C", &locale_codes);

	1263 EXPECT_FALSE(locale_codes.empty());

	1264

	1265 std::vector<std::string> spellcheck_languages;

	1266 chrome::spellcheck_common::SpellCheckLanguages(&spellcheck_languages);

	1267 EXPECT_FALSE(spellcheck_languages.empty());

	1268 EXPECT_LE(spellcheck_languages.size(), locale_codes.size());

	1269

	1270 FilePath hunspell = GetHunspellDirectory();

	1271 for (size_t i = 0; i < spellcheck_languages.size(); ++i) {

	1272 FilePath dict = chrome::spellcheck_common::GetVersionedFileName(

	1273 spellcheck_languages[i], hunspell);

	1274 EXPECT_TRUE(file_util::PathExists(dict)) << dict.value() << " not found";

	1275 }

	1276 }

	1277

1217 #endif	1278 #endif

OLD	NEW

« no previous file with comments | « chrome/common/spellcheck_common.cc ('k') | chrome/tools/convert_dict/aff_reader.cc » ('j') | no next file with comments »