OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <math.h> | |
6 #include <stdarg.h> | |
7 | |
8 #include <limits> | |
9 #include <sstream> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "base/string16.h" | |
13 #include "base/string_util.h" | |
14 #include "base/strings/utf_string_conversions.h" | |
15 #include "testing/gmock/include/gmock/gmock.h" | |
16 #include "testing/gtest/include/gtest/gtest.h" | |
17 | |
18 using ::testing::ElementsAre; | |
19 | |
20 namespace base { | |
21 | |
22 static const struct trim_case { | |
23 const wchar_t* input; | |
24 const TrimPositions positions; | |
25 const wchar_t* output; | |
26 const TrimPositions return_value; | |
27 } trim_cases[] = { | |
28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, | |
29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, | |
30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, | |
31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, | |
32 {L"", TRIM_ALL, L"", TRIM_NONE}, | |
33 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, | |
34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, | |
35 {L" ", TRIM_ALL, L"", TRIM_ALL}, | |
36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, | |
37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, | |
38 }; | |
39 | |
40 static const struct trim_case_ascii { | |
41 const char* input; | |
42 const TrimPositions positions; | |
43 const char* output; | |
44 const TrimPositions return_value; | |
45 } trim_cases_ascii[] = { | |
46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, | |
47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, | |
48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, | |
49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, | |
50 {"", TRIM_ALL, "", TRIM_NONE}, | |
51 {" ", TRIM_LEADING, "", TRIM_LEADING}, | |
52 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, | |
53 {" ", TRIM_ALL, "", TRIM_ALL}, | |
54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, | |
55 }; | |
56 | |
57 namespace { | |
58 | |
59 // Helper used to test TruncateUTF8ToByteSize. | |
60 bool Truncated(const std::string& input, const size_t byte_size, | |
61 std::string* output) { | |
62 size_t prev = input.length(); | |
63 TruncateUTF8ToByteSize(input, byte_size, output); | |
64 return prev != output->length(); | |
65 } | |
66 | |
67 } // namespace | |
68 | |
69 TEST(StringUtilTest, TruncateUTF8ToByteSize) { | |
70 std::string output; | |
71 | |
72 // Empty strings and invalid byte_size arguments | |
73 EXPECT_FALSE(Truncated(std::string(), 0, &output)); | |
74 EXPECT_EQ(output, ""); | |
75 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); | |
76 EXPECT_EQ(output, ""); | |
77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output)); | |
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); | |
79 | |
80 // Testing the truncation of valid UTF8 correctly | |
81 EXPECT_TRUE(Truncated("abc", 2, &output)); | |
82 EXPECT_EQ(output, "ab"); | |
83 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); | |
84 EXPECT_EQ(output.compare("\xc2\x81"), 0); | |
85 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); | |
86 EXPECT_EQ(output.compare("\xc2\x81"), 0); | |
87 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); | |
88 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); | |
89 | |
90 { | |
91 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; | |
92 const std::string array_string(array, arraysize(array)); | |
93 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
94 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); | |
95 } | |
96 | |
97 { | |
98 const char array[] = "\x00\xc2\x81\xc2\x81"; | |
99 const std::string array_string(array, arraysize(array)); | |
100 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
101 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); | |
102 } | |
103 | |
104 // Testing invalid UTF8 | |
105 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); | |
106 EXPECT_EQ(output.compare(""), 0); | |
107 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); | |
108 EXPECT_EQ(output.compare(""), 0); | |
109 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); | |
110 EXPECT_EQ(output.compare(""), 0); | |
111 | |
112 // Testing invalid UTF8 mixed with valid UTF8 | |
113 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); | |
114 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); | |
115 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); | |
116 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); | |
117 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", | |
118 10, &output)); | |
119 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); | |
120 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", | |
121 10, &output)); | |
122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); | |
123 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); | |
124 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); | |
125 | |
126 // Overlong sequences | |
127 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); | |
128 EXPECT_EQ(output.compare(""), 0); | |
129 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); | |
130 EXPECT_EQ(output.compare(""), 0); | |
131 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); | |
132 EXPECT_EQ(output.compare(""), 0); | |
133 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); | |
134 EXPECT_EQ(output.compare(""), 0); | |
135 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); | |
136 EXPECT_EQ(output.compare(""), 0); | |
137 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); | |
138 EXPECT_EQ(output.compare(""), 0); | |
139 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); | |
140 EXPECT_EQ(output.compare(""), 0); | |
141 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); | |
142 EXPECT_EQ(output.compare(""), 0); | |
143 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); | |
144 EXPECT_EQ(output.compare(""), 0); | |
145 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); | |
146 EXPECT_EQ(output.compare(""), 0); | |
147 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); | |
148 EXPECT_EQ(output.compare(""), 0); | |
149 | |
150 // Beyond U+10FFFF (the upper limit of Unicode codespace) | |
151 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); | |
152 EXPECT_EQ(output.compare(""), 0); | |
153 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); | |
154 EXPECT_EQ(output.compare(""), 0); | |
155 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); | |
156 EXPECT_EQ(output.compare(""), 0); | |
157 | |
158 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) | |
159 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); | |
160 EXPECT_EQ(output.compare(""), 0); | |
161 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); | |
162 EXPECT_EQ(output.compare(""), 0); | |
163 | |
164 { | |
165 const char array[] = "\x00\x00\xfe\xff"; | |
166 const std::string array_string(array, arraysize(array)); | |
167 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
168 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); | |
169 } | |
170 | |
171 // Variants on the previous test | |
172 { | |
173 const char array[] = "\xff\xfe\x00\x00"; | |
174 const std::string array_string(array, 4); | |
175 EXPECT_FALSE(Truncated(array_string, 4, &output)); | |
176 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); | |
177 } | |
178 { | |
179 const char array[] = "\xff\x00\x00\xfe"; | |
180 const std::string array_string(array, arraysize(array)); | |
181 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
182 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); | |
183 } | |
184 | |
185 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> | |
186 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); | |
187 EXPECT_EQ(output.compare(""), 0); | |
188 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); | |
189 EXPECT_EQ(output.compare(""), 0); | |
190 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); | |
191 EXPECT_EQ(output.compare(""), 0); | |
192 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); | |
193 EXPECT_EQ(output.compare(""), 0); | |
194 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); | |
195 EXPECT_EQ(output.compare(""), 0); | |
196 | |
197 // Strings in legacy encodings that are valid in UTF-8, but | |
198 // are invalid as UTF-8 in real data. | |
199 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); | |
200 EXPECT_EQ(output.compare("caf"), 0); | |
201 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); | |
202 EXPECT_EQ(output.compare(""), 0); | |
203 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); | |
204 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); | |
205 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, | |
206 &output)); | |
207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); | |
208 | |
209 // Testing using the same string as input and output. | |
210 EXPECT_FALSE(Truncated(output, 4, &output)); | |
211 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); | |
212 EXPECT_TRUE(Truncated(output, 3, &output)); | |
213 EXPECT_EQ(output.compare("\xa7\x41"), 0); | |
214 | |
215 // "abc" with U+201[CD] in windows-125[0-8] | |
216 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); | |
217 EXPECT_EQ(output.compare("\x93" "abc"), 0); | |
218 | |
219 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 | |
220 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); | |
221 EXPECT_EQ(output.compare(""), 0); | |
222 | |
223 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 | |
224 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); | |
225 EXPECT_EQ(output.compare(""), 0); | |
226 } | |
227 | |
228 TEST(StringUtilTest, TrimWhitespace) { | |
229 string16 output; // Allow contents to carry over to next testcase | |
230 for (size_t i = 0; i < arraysize(trim_cases); ++i) { | |
231 const trim_case& value = trim_cases[i]; | |
232 EXPECT_EQ(value.return_value, | |
233 TrimWhitespace(WideToUTF16(value.input), value.positions, | |
234 &output)); | |
235 EXPECT_EQ(WideToUTF16(value.output), output); | |
236 } | |
237 | |
238 // Test that TrimWhitespace() can take the same string for input and output | |
239 output = ASCIIToUTF16(" This is a test \r\n"); | |
240 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); | |
241 EXPECT_EQ(ASCIIToUTF16("This is a test"), output); | |
242 | |
243 // Once more, but with a string of whitespace | |
244 output = ASCIIToUTF16(" \r\n"); | |
245 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); | |
246 EXPECT_EQ(string16(), output); | |
247 | |
248 std::string output_ascii; | |
249 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { | |
250 const trim_case_ascii& value = trim_cases_ascii[i]; | |
251 EXPECT_EQ(value.return_value, | |
252 TrimWhitespace(value.input, value.positions, &output_ascii)); | |
253 EXPECT_EQ(value.output, output_ascii); | |
254 } | |
255 } | |
256 | |
257 static const struct collapse_case { | |
258 const wchar_t* input; | |
259 const bool trim; | |
260 const wchar_t* output; | |
261 } collapse_cases[] = { | |
262 {L" Google Video ", false, L"Google Video"}, | |
263 {L"Google Video", false, L"Google Video"}, | |
264 {L"", false, L""}, | |
265 {L" ", false, L""}, | |
266 {L"\t\rTest String\n", false, L"Test String"}, | |
267 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, | |
268 {L" Test \n \t String ", false, L"Test String"}, | |
269 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, | |
270 {L" Test String", false, L"Test String"}, | |
271 {L"Test String ", false, L"Test String"}, | |
272 {L"Test String", false, L"Test String"}, | |
273 {L"", true, L""}, | |
274 {L"\n", true, L""}, | |
275 {L" \r ", true, L""}, | |
276 {L"\nFoo", true, L"Foo"}, | |
277 {L"\r Foo ", true, L"Foo"}, | |
278 {L" Foo bar ", true, L"Foo bar"}, | |
279 {L" \tFoo bar \n", true, L"Foo bar"}, | |
280 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, | |
281 }; | |
282 | |
283 TEST(StringUtilTest, CollapseWhitespace) { | |
284 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { | |
285 const collapse_case& value = collapse_cases[i]; | |
286 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); | |
287 } | |
288 } | |
289 | |
290 static const struct collapse_case_ascii { | |
291 const char* input; | |
292 const bool trim; | |
293 const char* output; | |
294 } collapse_cases_ascii[] = { | |
295 {" Google Video ", false, "Google Video"}, | |
296 {"Google Video", false, "Google Video"}, | |
297 {"", false, ""}, | |
298 {" ", false, ""}, | |
299 {"\t\rTest String\n", false, "Test String"}, | |
300 {" Test \n \t String ", false, "Test String"}, | |
301 {" Test String", false, "Test String"}, | |
302 {"Test String ", false, "Test String"}, | |
303 {"Test String", false, "Test String"}, | |
304 {"", true, ""}, | |
305 {"\n", true, ""}, | |
306 {" \r ", true, ""}, | |
307 {"\nFoo", true, "Foo"}, | |
308 {"\r Foo ", true, "Foo"}, | |
309 {" Foo bar ", true, "Foo bar"}, | |
310 {" \tFoo bar \n", true, "Foo bar"}, | |
311 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, | |
312 }; | |
313 | |
314 TEST(StringUtilTest, CollapseWhitespaceASCII) { | |
315 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { | |
316 const collapse_case_ascii& value = collapse_cases_ascii[i]; | |
317 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); | |
318 } | |
319 } | |
320 | |
321 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) { | |
322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string())); | |
323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" ")); | |
324 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t")); | |
325 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n ")); | |
326 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a")); | |
327 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n ")); | |
328 } | |
329 | |
330 TEST(StringUtilTest, ContainsOnlyWhitespace) { | |
331 EXPECT_TRUE(ContainsOnlyWhitespace(string16())); | |
332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" "))); | |
333 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t"))); | |
334 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n "))); | |
335 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a"))); | |
336 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n "))); | |
337 } | |
338 | |
339 TEST(StringUtilTest, IsStringUTF8) { | |
340 EXPECT_TRUE(IsStringUTF8("abc")); | |
341 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); | |
342 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); | |
343 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); | |
344 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); | |
345 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM | |
346 | |
347 // surrogate code points | |
348 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); | |
349 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); | |
350 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); | |
351 | |
352 // overlong sequences | |
353 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 | |
354 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" | |
355 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 | |
356 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 | |
357 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff | |
358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D | |
359 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 | |
360 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 | |
361 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) | |
362 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F | |
363 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 | |
364 | |
365 // Beyond U+10FFFF (the upper limit of Unicode codespace) | |
366 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 | |
367 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes | |
368 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes | |
369 | |
370 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) | |
371 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); | |
372 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); | |
373 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); | |
374 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); | |
375 | |
376 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> | |
377 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) | |
378 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE | |
379 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF | |
380 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 | |
381 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF | |
382 // Strings in legacy encodings. We can certainly make up strings | |
383 // in a legacy encoding that are valid in UTF-8, but in real data, | |
384 // most of them are invalid as UTF-8. | |
385 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 | |
386 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR | |
387 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 | |
388 // "abc" with U+201[CD] in windows-125[0-8] | |
389 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); | |
390 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 | |
391 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); | |
392 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 | |
393 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); | |
394 | |
395 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 | |
396 // representation, and the second uses a 2-byte sequence. The second version | |
397 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a | |
398 // given codepoint must be used. | |
399 static const char kEmbeddedNull[] = "embedded\0null"; | |
400 EXPECT_TRUE(IsStringUTF8( | |
401 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); | |
402 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); | |
403 } | |
404 | |
405 TEST(StringUtilTest, ConvertASCII) { | |
406 static const char* char_cases[] = { | |
407 "Google Video", | |
408 "Hello, world\n", | |
409 "0123ABCDwxyz \a\b\t\r\n!+,.~" | |
410 }; | |
411 | |
412 static const wchar_t* const wchar_cases[] = { | |
413 L"Google Video", | |
414 L"Hello, world\n", | |
415 L"0123ABCDwxyz \a\b\t\r\n!+,.~" | |
416 }; | |
417 | |
418 for (size_t i = 0; i < arraysize(char_cases); ++i) { | |
419 EXPECT_TRUE(IsStringASCII(char_cases[i])); | |
420 std::wstring wide = ASCIIToWide(char_cases[i]); | |
421 EXPECT_EQ(wchar_cases[i], wide); | |
422 | |
423 EXPECT_TRUE(IsStringASCII(wchar_cases[i])); | |
424 std::string ascii = WideToASCII(wchar_cases[i]); | |
425 EXPECT_EQ(char_cases[i], ascii); | |
426 } | |
427 | |
428 EXPECT_FALSE(IsStringASCII("Google \x80Video")); | |
429 EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); | |
430 | |
431 // Convert empty strings. | |
432 std::wstring wempty; | |
433 std::string empty; | |
434 EXPECT_EQ(empty, WideToASCII(wempty)); | |
435 EXPECT_EQ(wempty, ASCIIToWide(empty)); | |
436 | |
437 // Convert strings with an embedded NUL character. | |
438 const char chars_with_nul[] = "test\0string"; | |
439 const int length_with_nul = arraysize(chars_with_nul) - 1; | |
440 std::string string_with_nul(chars_with_nul, length_with_nul); | |
441 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); | |
442 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), | |
443 wide_with_nul.length()); | |
444 std::string narrow_with_nul = WideToASCII(wide_with_nul); | |
445 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), | |
446 narrow_with_nul.length()); | |
447 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); | |
448 } | |
449 | |
450 TEST(StringUtilTest, ToUpperASCII) { | |
451 EXPECT_EQ('C', ToUpperASCII('C')); | |
452 EXPECT_EQ('C', ToUpperASCII('c')); | |
453 EXPECT_EQ('2', ToUpperASCII('2')); | |
454 | |
455 EXPECT_EQ(L'C', ToUpperASCII(L'C')); | |
456 EXPECT_EQ(L'C', ToUpperASCII(L'c')); | |
457 EXPECT_EQ(L'2', ToUpperASCII(L'2')); | |
458 | |
459 std::string in_place_a("Cc2"); | |
460 StringToUpperASCII(&in_place_a); | |
461 EXPECT_EQ("CC2", in_place_a); | |
462 | |
463 std::wstring in_place_w(L"Cc2"); | |
464 StringToUpperASCII(&in_place_w); | |
465 EXPECT_EQ(L"CC2", in_place_w); | |
466 | |
467 std::string original_a("Cc2"); | |
468 std::string upper_a = StringToUpperASCII(original_a); | |
469 EXPECT_EQ("CC2", upper_a); | |
470 | |
471 std::wstring original_w(L"Cc2"); | |
472 std::wstring upper_w = StringToUpperASCII(original_w); | |
473 EXPECT_EQ(L"CC2", upper_w); | |
474 } | |
475 | |
476 TEST(StringUtilTest, LowerCaseEqualsASCII) { | |
477 static const struct { | |
478 const wchar_t* src_w; | |
479 const char* src_a; | |
480 const char* dst; | |
481 } lowercase_cases[] = { | |
482 { L"FoO", "FoO", "foo" }, | |
483 { L"foo", "foo", "foo" }, | |
484 { L"FOO", "FOO", "foo" }, | |
485 }; | |
486 | |
487 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { | |
488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, | |
489 lowercase_cases[i].dst)); | |
490 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, | |
491 lowercase_cases[i].dst)); | |
492 } | |
493 } | |
494 | |
495 TEST(StringUtilTest, FormatBytesUnlocalized) { | |
496 static const struct { | |
497 int64 bytes; | |
498 const char* expected; | |
499 } cases[] = { | |
500 // Expected behavior: we show one post-decimal digit when we have | |
501 // under two pre-decimal digits, except in cases where it makes no | |
502 // sense (zero or bytes). | |
503 // Since we switch units once we cross the 1000 mark, this keeps | |
504 // the display of file sizes or bytes consistently around three | |
505 // digits. | |
506 {0, "0 B"}, | |
507 {512, "512 B"}, | |
508 {1024*1024, "1.0 MB"}, | |
509 {1024*1024*1024, "1.0 GB"}, | |
510 {10LL*1024*1024*1024, "10.0 GB"}, | |
511 {99LL*1024*1024*1024, "99.0 GB"}, | |
512 {105LL*1024*1024*1024, "105 GB"}, | |
513 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, | |
514 {~(1LL<<63), "8192 PB"}, | |
515 | |
516 {99*1024 + 103, "99.1 kB"}, | |
517 {1024*1024 + 103, "1.0 MB"}, | |
518 {1024*1024 + 205 * 1024, "1.2 MB"}, | |
519 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, | |
520 {10LL*1024*1024*1024, "10.0 GB"}, | |
521 {100LL*1024*1024*1024, "100 GB"}, | |
522 }; | |
523 | |
524 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
525 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), | |
526 FormatBytesUnlocalized(cases[i].bytes)); | |
527 } | |
528 } | |
529 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { | |
530 static const struct { | |
531 const char* str; | |
532 string16::size_type start_offset; | |
533 const char* find_this; | |
534 const char* replace_with; | |
535 const char* expected; | |
536 } cases[] = { | |
537 {"aaa", 0, "a", "b", "bbb"}, | |
538 {"abb", 0, "ab", "a", "ab"}, | |
539 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, | |
540 {"Not found", 0, "x", "0", "Not found"}, | |
541 {"Not found again", 5, "x", "0", "Not found again"}, | |
542 {" Making it much longer ", 0, " ", "Four score and seven years ago", | |
543 "Four score and seven years agoMakingFour score and seven years agoit" | |
544 "Four score and seven years agomuchFour score and seven years agolonger" | |
545 "Four score and seven years ago"}, | |
546 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, | |
547 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, | |
548 {"abababab", 2, "ab", "c", "abccc"}, | |
549 }; | |
550 | |
551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { | |
552 string16 str = ASCIIToUTF16(cases[i].str); | |
553 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, | |
554 ASCIIToUTF16(cases[i].find_this), | |
555 ASCIIToUTF16(cases[i].replace_with)); | |
556 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); | |
557 } | |
558 } | |
559 | |
560 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { | |
561 static const struct { | |
562 const char* str; | |
563 string16::size_type start_offset; | |
564 const char* find_this; | |
565 const char* replace_with; | |
566 const char* expected; | |
567 } cases[] = { | |
568 {"aaa", 0, "a", "b", "baa"}, | |
569 {"abb", 0, "ab", "a", "ab"}, | |
570 {"Removing some substrings inging", 0, "ing", "", | |
571 "Remov some substrings inging"}, | |
572 {"Not found", 0, "x", "0", "Not found"}, | |
573 {"Not found again", 5, "x", "0", "Not found again"}, | |
574 {" Making it much longer ", 0, " ", "Four score and seven years ago", | |
575 "Four score and seven years agoMaking it much longer "}, | |
576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, | |
577 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, | |
578 {"abababab", 2, "ab", "c", "abcabab"}, | |
579 }; | |
580 | |
581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { | |
582 string16 str = ASCIIToUTF16(cases[i].str); | |
583 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, | |
584 ASCIIToUTF16(cases[i].find_this), | |
585 ASCIIToUTF16(cases[i].replace_with)); | |
586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); | |
587 } | |
588 } | |
589 | |
590 TEST(StringUtilTest, HexDigitToInt) { | |
591 EXPECT_EQ(0, HexDigitToInt('0')); | |
592 EXPECT_EQ(1, HexDigitToInt('1')); | |
593 EXPECT_EQ(2, HexDigitToInt('2')); | |
594 EXPECT_EQ(3, HexDigitToInt('3')); | |
595 EXPECT_EQ(4, HexDigitToInt('4')); | |
596 EXPECT_EQ(5, HexDigitToInt('5')); | |
597 EXPECT_EQ(6, HexDigitToInt('6')); | |
598 EXPECT_EQ(7, HexDigitToInt('7')); | |
599 EXPECT_EQ(8, HexDigitToInt('8')); | |
600 EXPECT_EQ(9, HexDigitToInt('9')); | |
601 EXPECT_EQ(10, HexDigitToInt('A')); | |
602 EXPECT_EQ(11, HexDigitToInt('B')); | |
603 EXPECT_EQ(12, HexDigitToInt('C')); | |
604 EXPECT_EQ(13, HexDigitToInt('D')); | |
605 EXPECT_EQ(14, HexDigitToInt('E')); | |
606 EXPECT_EQ(15, HexDigitToInt('F')); | |
607 | |
608 // Verify the lower case as well. | |
609 EXPECT_EQ(10, HexDigitToInt('a')); | |
610 EXPECT_EQ(11, HexDigitToInt('b')); | |
611 EXPECT_EQ(12, HexDigitToInt('c')); | |
612 EXPECT_EQ(13, HexDigitToInt('d')); | |
613 EXPECT_EQ(14, HexDigitToInt('e')); | |
614 EXPECT_EQ(15, HexDigitToInt('f')); | |
615 } | |
616 | |
617 // This checks where we can use the assignment operator for a va_list. We need | |
618 // a way to do this since Visual C doesn't support va_copy, but assignment on | |
619 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this | |
620 // capability. | |
621 static void VariableArgsFunc(const char* format, ...) { | |
622 va_list org; | |
623 va_start(org, format); | |
624 | |
625 va_list dup; | |
626 GG_VA_COPY(dup, org); | |
627 int i1 = va_arg(org, int); | |
628 int j1 = va_arg(org, int); | |
629 char* s1 = va_arg(org, char*); | |
630 double d1 = va_arg(org, double); | |
631 va_end(org); | |
632 | |
633 int i2 = va_arg(dup, int); | |
634 int j2 = va_arg(dup, int); | |
635 char* s2 = va_arg(dup, char*); | |
636 double d2 = va_arg(dup, double); | |
637 | |
638 EXPECT_EQ(i1, i2); | |
639 EXPECT_EQ(j1, j2); | |
640 EXPECT_STREQ(s1, s2); | |
641 EXPECT_EQ(d1, d2); | |
642 | |
643 va_end(dup); | |
644 } | |
645 | |
646 TEST(StringUtilTest, VAList) { | |
647 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); | |
648 } | |
649 | |
650 // Test for Tokenize | |
651 template <typename STR> | |
652 void TokenizeTest() { | |
653 std::vector<STR> r; | |
654 size_t size; | |
655 | |
656 size = Tokenize(STR("This is a string"), STR(" "), &r); | |
657 EXPECT_EQ(4U, size); | |
658 ASSERT_EQ(4U, r.size()); | |
659 EXPECT_EQ(r[0], STR("This")); | |
660 EXPECT_EQ(r[1], STR("is")); | |
661 EXPECT_EQ(r[2], STR("a")); | |
662 EXPECT_EQ(r[3], STR("string")); | |
663 r.clear(); | |
664 | |
665 size = Tokenize(STR("one,two,three"), STR(","), &r); | |
666 EXPECT_EQ(3U, size); | |
667 ASSERT_EQ(3U, r.size()); | |
668 EXPECT_EQ(r[0], STR("one")); | |
669 EXPECT_EQ(r[1], STR("two")); | |
670 EXPECT_EQ(r[2], STR("three")); | |
671 r.clear(); | |
672 | |
673 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r); | |
674 EXPECT_EQ(3U, size); | |
675 ASSERT_EQ(3U, r.size()); | |
676 EXPECT_EQ(r[0], STR("one")); | |
677 EXPECT_EQ(r[1], STR("two")); | |
678 EXPECT_EQ(r[2], STR("three;four")); | |
679 r.clear(); | |
680 | |
681 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r); | |
682 EXPECT_EQ(4U, size); | |
683 ASSERT_EQ(4U, r.size()); | |
684 EXPECT_EQ(r[0], STR("one")); | |
685 EXPECT_EQ(r[1], STR("two")); | |
686 EXPECT_EQ(r[2], STR("three")); | |
687 EXPECT_EQ(r[3], STR("four")); | |
688 r.clear(); | |
689 | |
690 size = Tokenize(STR("one, two, three"), STR(","), &r); | |
691 EXPECT_EQ(3U, size); | |
692 ASSERT_EQ(3U, r.size()); | |
693 EXPECT_EQ(r[0], STR("one")); | |
694 EXPECT_EQ(r[1], STR(" two")); | |
695 EXPECT_EQ(r[2], STR(" three")); | |
696 r.clear(); | |
697 | |
698 size = Tokenize(STR("one, two, three, "), STR(","), &r); | |
699 EXPECT_EQ(4U, size); | |
700 ASSERT_EQ(4U, r.size()); | |
701 EXPECT_EQ(r[0], STR("one")); | |
702 EXPECT_EQ(r[1], STR(" two")); | |
703 EXPECT_EQ(r[2], STR(" three")); | |
704 EXPECT_EQ(r[3], STR(" ")); | |
705 r.clear(); | |
706 | |
707 size = Tokenize(STR("one, two, three,"), STR(","), &r); | |
708 EXPECT_EQ(3U, size); | |
709 ASSERT_EQ(3U, r.size()); | |
710 EXPECT_EQ(r[0], STR("one")); | |
711 EXPECT_EQ(r[1], STR(" two")); | |
712 EXPECT_EQ(r[2], STR(" three")); | |
713 r.clear(); | |
714 | |
715 size = Tokenize(STR(), STR(","), &r); | |
716 EXPECT_EQ(0U, size); | |
717 ASSERT_EQ(0U, r.size()); | |
718 r.clear(); | |
719 | |
720 size = Tokenize(STR(","), STR(","), &r); | |
721 EXPECT_EQ(0U, size); | |
722 ASSERT_EQ(0U, r.size()); | |
723 r.clear(); | |
724 | |
725 size = Tokenize(STR(",;:."), STR(".:;,"), &r); | |
726 EXPECT_EQ(0U, size); | |
727 ASSERT_EQ(0U, r.size()); | |
728 r.clear(); | |
729 | |
730 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r); | |
731 EXPECT_EQ(1U, size); | |
732 ASSERT_EQ(1U, r.size()); | |
733 EXPECT_EQ(r[0], STR("a")); | |
734 r.clear(); | |
735 | |
736 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r); | |
737 EXPECT_EQ(2U, size); | |
738 ASSERT_EQ(2U, r.size()); | |
739 EXPECT_EQ(r[0], STR("\ta\t")); | |
740 EXPECT_EQ(r[1], STR("b\tcc")); | |
741 r.clear(); | |
742 } | |
743 | |
744 TEST(StringUtilTest, TokenizeStdString) { | |
745 TokenizeTest<std::string>(); | |
746 } | |
747 | |
748 TEST(StringUtilTest, TokenizeStringPiece) { | |
749 TokenizeTest<base::StringPiece>(); | |
750 } | |
751 | |
752 // Test for JoinString | |
753 TEST(StringUtilTest, JoinString) { | |
754 std::vector<std::string> in; | |
755 EXPECT_EQ("", JoinString(in, ',')); | |
756 | |
757 in.push_back("a"); | |
758 EXPECT_EQ("a", JoinString(in, ',')); | |
759 | |
760 in.push_back("b"); | |
761 in.push_back("c"); | |
762 EXPECT_EQ("a,b,c", JoinString(in, ',')); | |
763 | |
764 in.push_back(std::string()); | |
765 EXPECT_EQ("a,b,c,", JoinString(in, ',')); | |
766 in.push_back(" "); | |
767 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); | |
768 } | |
769 | |
770 // Test for JoinString overloaded with std::string separator | |
771 TEST(StringUtilTest, JoinStringWithString) { | |
772 std::string separator(", "); | |
773 std::vector<std::string> parts; | |
774 EXPECT_EQ(std::string(), JoinString(parts, separator)); | |
775 | |
776 parts.push_back("a"); | |
777 EXPECT_EQ("a", JoinString(parts, separator)); | |
778 | |
779 parts.push_back("b"); | |
780 parts.push_back("c"); | |
781 EXPECT_EQ("a, b, c", JoinString(parts, separator)); | |
782 | |
783 parts.push_back(std::string()); | |
784 EXPECT_EQ("a, b, c, ", JoinString(parts, separator)); | |
785 parts.push_back(" "); | |
786 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|")); | |
787 } | |
788 | |
789 // Test for JoinString overloaded with string16 separator | |
790 TEST(StringUtilTest, JoinStringWithString16) { | |
791 string16 separator = ASCIIToUTF16(", "); | |
792 std::vector<string16> parts; | |
793 EXPECT_EQ(string16(), JoinString(parts, separator)); | |
794 | |
795 parts.push_back(ASCIIToUTF16("a")); | |
796 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); | |
797 | |
798 parts.push_back(ASCIIToUTF16("b")); | |
799 parts.push_back(ASCIIToUTF16("c")); | |
800 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); | |
801 | |
802 parts.push_back(ASCIIToUTF16("")); | |
803 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); | |
804 parts.push_back(ASCIIToUTF16(" ")); | |
805 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); | |
806 } | |
807 | |
808 TEST(StringUtilTest, StartsWith) { | |
809 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); | |
810 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); | |
811 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); | |
812 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); | |
813 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); | |
814 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); | |
815 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false)); | |
816 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true)); | |
817 EXPECT_TRUE(StartsWithASCII("java", std::string(), false)); | |
818 EXPECT_TRUE(StartsWithASCII("java", std::string(), true)); | |
819 | |
820 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true)); | |
821 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true)); | |
822 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false)); | |
823 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false)); | |
824 EXPECT_FALSE(StartsWith(L"java", L"javascript", true)); | |
825 EXPECT_FALSE(StartsWith(L"java", L"javascript", false)); | |
826 EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false)); | |
827 EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true)); | |
828 EXPECT_TRUE(StartsWith(L"java", std::wstring(), false)); | |
829 EXPECT_TRUE(StartsWith(L"java", std::wstring(), true)); | |
830 } | |
831 | |
832 TEST(StringUtilTest, EndsWith) { | |
833 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true)); | |
834 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true)); | |
835 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false)); | |
836 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false)); | |
837 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true)); | |
838 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false)); | |
839 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true)); | |
840 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false)); | |
841 EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false)); | |
842 EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true)); | |
843 EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false)); | |
844 EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true)); | |
845 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false)); | |
846 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true)); | |
847 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false)); | |
848 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true)); | |
849 } | |
850 | |
851 TEST(StringUtilTest, GetStringFWithOffsets) { | |
852 std::vector<string16> subst; | |
853 subst.push_back(ASCIIToUTF16("1")); | |
854 subst.push_back(ASCIIToUTF16("2")); | |
855 std::vector<size_t> offsets; | |
856 | |
857 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), | |
858 subst, | |
859 &offsets); | |
860 EXPECT_EQ(2U, offsets.size()); | |
861 EXPECT_EQ(7U, offsets[0]); | |
862 EXPECT_EQ(25U, offsets[1]); | |
863 offsets.clear(); | |
864 | |
865 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), | |
866 subst, | |
867 &offsets); | |
868 EXPECT_EQ(2U, offsets.size()); | |
869 EXPECT_EQ(25U, offsets[0]); | |
870 EXPECT_EQ(7U, offsets[1]); | |
871 offsets.clear(); | |
872 } | |
873 | |
874 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { | |
875 // Test whether replacestringplaceholders works as expected when there | |
876 // are fewer inputs than outputs. | |
877 std::vector<string16> subst; | |
878 subst.push_back(ASCIIToUTF16("9a")); | |
879 subst.push_back(ASCIIToUTF16("8b")); | |
880 subst.push_back(ASCIIToUTF16("7c")); | |
881 | |
882 string16 formatted = | |
883 ReplaceStringPlaceholders( | |
884 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); | |
885 | |
886 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); | |
887 } | |
888 | |
889 TEST(StringUtilTest, ReplaceStringPlaceholders) { | |
890 std::vector<string16> subst; | |
891 subst.push_back(ASCIIToUTF16("9a")); | |
892 subst.push_back(ASCIIToUTF16("8b")); | |
893 subst.push_back(ASCIIToUTF16("7c")); | |
894 subst.push_back(ASCIIToUTF16("6d")); | |
895 subst.push_back(ASCIIToUTF16("5e")); | |
896 subst.push_back(ASCIIToUTF16("4f")); | |
897 subst.push_back(ASCIIToUTF16("3g")); | |
898 subst.push_back(ASCIIToUTF16("2h")); | |
899 subst.push_back(ASCIIToUTF16("1i")); | |
900 | |
901 string16 formatted = | |
902 ReplaceStringPlaceholders( | |
903 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); | |
904 | |
905 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); | |
906 } | |
907 | |
908 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) { | |
909 std::vector<string16> subst; | |
910 subst.push_back(ASCIIToUTF16("9a")); | |
911 subst.push_back(ASCIIToUTF16("8b")); | |
912 subst.push_back(ASCIIToUTF16("7c")); | |
913 subst.push_back(ASCIIToUTF16("6d")); | |
914 subst.push_back(ASCIIToUTF16("5e")); | |
915 subst.push_back(ASCIIToUTF16("4f")); | |
916 subst.push_back(ASCIIToUTF16("3g")); | |
917 subst.push_back(ASCIIToUTF16("2h")); | |
918 subst.push_back(ASCIIToUTF16("1i")); | |
919 subst.push_back(ASCIIToUTF16("0j")); | |
920 subst.push_back(ASCIIToUTF16("-1k")); | |
921 subst.push_back(ASCIIToUTF16("-2l")); | |
922 subst.push_back(ASCIIToUTF16("-3m")); | |
923 subst.push_back(ASCIIToUTF16("-4n")); | |
924 | |
925 string16 formatted = | |
926 ReplaceStringPlaceholders( | |
927 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i," | |
928 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL); | |
929 | |
930 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh," | |
931 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a")); | |
932 } | |
933 | |
934 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { | |
935 std::vector<std::string> subst; | |
936 subst.push_back("9a"); | |
937 subst.push_back("8b"); | |
938 subst.push_back("7c"); | |
939 subst.push_back("6d"); | |
940 subst.push_back("5e"); | |
941 subst.push_back("4f"); | |
942 subst.push_back("3g"); | |
943 subst.push_back("2h"); | |
944 subst.push_back("1i"); | |
945 | |
946 std::string formatted = | |
947 ReplaceStringPlaceholders( | |
948 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); | |
949 | |
950 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); | |
951 } | |
952 | |
953 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { | |
954 std::vector<std::string> subst; | |
955 subst.push_back("a"); | |
956 subst.push_back("b"); | |
957 subst.push_back("c"); | |
958 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL), | |
959 "$1 $$2 $$$3"); | |
960 } | |
961 | |
962 TEST(StringUtilTest, MatchPatternTest) { | |
963 EXPECT_TRUE(MatchPattern("www.google.com", "*.com")); | |
964 EXPECT_TRUE(MatchPattern("www.google.com", "*")); | |
965 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org")); | |
966 EXPECT_TRUE(MatchPattern("Hello", "H?l?o")); | |
967 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)")); | |
968 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM")); | |
969 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*")); | |
970 EXPECT_FALSE(MatchPattern("", "*.*")); | |
971 EXPECT_TRUE(MatchPattern("", "*")); | |
972 EXPECT_TRUE(MatchPattern("", "?")); | |
973 EXPECT_TRUE(MatchPattern("", "")); | |
974 EXPECT_FALSE(MatchPattern("Hello", "")); | |
975 EXPECT_TRUE(MatchPattern("Hello*", "Hello*")); | |
976 // Stop after a certain recursion depth. | |
977 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*")); | |
978 | |
979 // Test UTF8 matching. | |
980 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0")); | |
981 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?.")); | |
982 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*")); | |
983 // Invalid sequences should be handled as a single invalid character. | |
984 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?")); | |
985 // If the pattern has invalid characters, it shouldn't match anything. | |
986 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); | |
987 | |
988 // Test UTF16 character matching. | |
989 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), | |
990 UTF8ToUTF16("*.com"))); | |
991 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), | |
992 UTF8ToUTF16("He??o\\*1*"))); | |
993 | |
994 // This test verifies that consecutive wild cards are collapsed into 1 | |
995 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum | |
996 // recursion depth). | |
997 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"), | |
998 UTF8ToUTF16("He********************************o"))); | |
999 } | |
1000 | |
1001 TEST(StringUtilTest, LcpyTest) { | |
1002 // Test the normal case where we fit in our buffer. | |
1003 { | |
1004 char dst[10]; | |
1005 wchar_t wdst[10]; | |
1006 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1007 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); | |
1008 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1009 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); | |
1010 } | |
1011 | |
1012 // Test dst_size == 0, nothing should be written to |dst| and we should | |
1013 // have the equivalent of strlen(src). | |
1014 { | |
1015 char dst[2] = {1, 2}; | |
1016 wchar_t wdst[2] = {1, 2}; | |
1017 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); | |
1018 EXPECT_EQ(1, dst[0]); | |
1019 EXPECT_EQ(2, dst[1]); | |
1020 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); | |
1021 #if defined(WCHAR_T_IS_UNSIGNED) | |
1022 EXPECT_EQ(1U, wdst[0]); | |
1023 EXPECT_EQ(2U, wdst[1]); | |
1024 #else | |
1025 EXPECT_EQ(1, wdst[0]); | |
1026 EXPECT_EQ(2, wdst[1]); | |
1027 #endif | |
1028 } | |
1029 | |
1030 // Test the case were we _just_ competely fit including the null. | |
1031 { | |
1032 char dst[8]; | |
1033 wchar_t wdst[8]; | |
1034 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1035 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); | |
1036 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1037 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); | |
1038 } | |
1039 | |
1040 // Test the case were we we are one smaller, so we can't fit the null. | |
1041 { | |
1042 char dst[7]; | |
1043 wchar_t wdst[7]; | |
1044 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1045 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); | |
1046 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1047 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); | |
1048 } | |
1049 | |
1050 // Test the case were we are just too small. | |
1051 { | |
1052 char dst[3]; | |
1053 wchar_t wdst[3]; | |
1054 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1055 EXPECT_EQ(0, memcmp(dst, "ab", 3)); | |
1056 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1057 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); | |
1058 } | |
1059 } | |
1060 | |
1061 TEST(StringUtilTest, WprintfFormatPortabilityTest) { | |
1062 static const struct { | |
1063 const wchar_t* input; | |
1064 bool portable; | |
1065 } cases[] = { | |
1066 { L"%ls", true }, | |
1067 { L"%s", false }, | |
1068 { L"%S", false }, | |
1069 { L"%lS", false }, | |
1070 { L"Hello, %s", false }, | |
1071 { L"%lc", true }, | |
1072 { L"%c", false }, | |
1073 { L"%C", false }, | |
1074 { L"%lC", false }, | |
1075 { L"%ls %s", false }, | |
1076 { L"%s %ls", false }, | |
1077 { L"%s %ls %s", false }, | |
1078 { L"%f", true }, | |
1079 { L"%f %F", false }, | |
1080 { L"%d %D", false }, | |
1081 { L"%o %O", false }, | |
1082 { L"%u %U", false }, | |
1083 { L"%f %d %o %u", true }, | |
1084 { L"%-8d (%02.1f%)", true }, | |
1085 { L"% 10s", false }, | |
1086 { L"% 10ls", true } | |
1087 }; | |
1088 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) | |
1089 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); | |
1090 } | |
1091 | |
1092 TEST(StringUtilTest, RemoveChars) { | |
1093 const char* kRemoveChars = "-/+*"; | |
1094 std::string input = "A-+bc/d!*"; | |
1095 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); | |
1096 EXPECT_EQ("Abcd!", input); | |
1097 | |
1098 // No characters match kRemoveChars. | |
1099 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); | |
1100 EXPECT_EQ("Abcd!", input); | |
1101 | |
1102 // Empty string. | |
1103 input.clear(); | |
1104 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); | |
1105 EXPECT_EQ(std::string(), input); | |
1106 } | |
1107 | |
1108 TEST(StringUtilTest, ReplaceChars) { | |
1109 struct TestData { | |
1110 const char* input; | |
1111 const char* replace_chars; | |
1112 const char* replace_with; | |
1113 const char* output; | |
1114 bool result; | |
1115 } cases[] = { | |
1116 { "", "", "", "", false }, | |
1117 { "test", "", "", "test", false }, | |
1118 { "test", "", "!", "test", false }, | |
1119 { "test", "z", "!", "test", false }, | |
1120 { "test", "e", "!", "t!st", true }, | |
1121 { "test", "e", "!?", "t!?st", true }, | |
1122 { "test", "ez", "!", "t!st", true }, | |
1123 { "test", "zed", "!?", "t!?st", true }, | |
1124 { "test", "t", "!?", "!?es!?", true }, | |
1125 { "test", "et", "!>", "!>!>s!>", true }, | |
1126 { "test", "zest", "!", "!!!!", true }, | |
1127 { "test", "szt", "!", "!e!!", true }, | |
1128 { "test", "t", "test", "testestest", true }, | |
1129 }; | |
1130 | |
1131 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
1132 std::string output; | |
1133 bool result = ReplaceChars(cases[i].input, | |
1134 cases[i].replace_chars, | |
1135 cases[i].replace_with, | |
1136 &output); | |
1137 EXPECT_EQ(cases[i].result, result); | |
1138 EXPECT_EQ(cases[i].output, output); | |
1139 } | |
1140 } | |
1141 | |
1142 TEST(StringUtilTest, ContainsOnlyChars) { | |
1143 // Providing an empty list of characters should return false but for the empty | |
1144 // string. | |
1145 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string())); | |
1146 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string())); | |
1147 | |
1148 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234")); | |
1149 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); | |
1150 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); | |
1151 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); | |
1152 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); | |
1153 } | |
1154 | |
1155 class WriteIntoTest : public testing::Test { | |
1156 protected: | |
1157 static void WritesCorrectly(size_t num_chars) { | |
1158 std::string buffer; | |
1159 char kOriginal[] = "supercali"; | |
1160 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars); | |
1161 // Using std::string(buffer.c_str()) instead of |buffer| truncates the | |
1162 // string at the first \0. | |
1163 EXPECT_EQ(std::string(kOriginal, | |
1164 std::min(num_chars, arraysize(kOriginal) - 1)), | |
1165 std::string(buffer.c_str())); | |
1166 EXPECT_EQ(num_chars, buffer.size()); | |
1167 } | |
1168 }; | |
1169 | |
1170 TEST_F(WriteIntoTest, WriteInto) { | |
1171 // Validate that WriteInto reserves enough space and | |
1172 // sizes a string correctly. | |
1173 WritesCorrectly(1); | |
1174 WritesCorrectly(2); | |
1175 WritesCorrectly(5000); | |
1176 | |
1177 // Validate that WriteInto doesn't modify other strings | |
1178 // when using a Copy-on-Write implementation. | |
1179 const char kLive[] = "live"; | |
1180 const char kDead[] = "dead"; | |
1181 const std::string live = kLive; | |
1182 std::string dead = live; | |
1183 strncpy(WriteInto(&dead, 5), kDead, 4); | |
1184 EXPECT_EQ(kDead, dead); | |
1185 EXPECT_EQ(4u, dead.size()); | |
1186 EXPECT_EQ(kLive, live); | |
1187 EXPECT_EQ(4u, live.size()); | |
1188 } | |
1189 | |
1190 } // namespace base | |
OLD | NEW |