base/string_util_unittest.cc - Issue 16331011: Move string files in base/ to the string subdirectory.

Side by Side Diff: base/string_util_unittest.cc

Issue 16331011: Move string files in base/ to the string subdirectory. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rebase Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include <math.h>

6 #include <stdarg.h>

7

8 #include <limits>

9 #include <sstream>

10

11 #include "base/basictypes.h"

12 #include "base/string16.h"

13 #include "base/string_util.h"

14 #include "base/strings/utf_string_conversions.h"

15 #include "testing/gmock/include/gmock/gmock.h"

16 #include "testing/gtest/include/gtest/gtest.h"

17

18 using ::testing::ElementsAre;

19

20 namespace base {

21

22 static const struct trim_case {

23 const wchar_t* input;

24 const TrimPositions positions;

25 const wchar_t* output;

26 const TrimPositions return_value;

27 } trim_cases[] = {

28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},

29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},

30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},

31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},

32 {L"", TRIM_ALL, L"", TRIM_NONE},

33 {L" ", TRIM_LEADING, L"", TRIM_LEADING},

34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},

35 {L" ", TRIM_ALL, L"", TRIM_ALL},

36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},

37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},

38 };

39

40 static const struct trim_case_ascii {

41 const char* input;

42 const TrimPositions positions;

43 const char* output;

44 const TrimPositions return_value;

45 } trim_cases_ascii[] = {

46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},

47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},

48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},

49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},

50 {"", TRIM_ALL, "", TRIM_NONE},

51 {" ", TRIM_LEADING, "", TRIM_LEADING},

52 {" ", TRIM_TRAILING, "", TRIM_TRAILING},

53 {" ", TRIM_ALL, "", TRIM_ALL},

54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},

55 };

56

57 namespace {

58

59 // Helper used to test TruncateUTF8ToByteSize.

60 bool Truncated(const std::string& input, const size_t byte_size,

61 std::string* output) {

62 size_t prev = input.length();

63 TruncateUTF8ToByteSize(input, byte_size, output);

64 return prev != output->length();

65 }

66

67 } // namespace

68

69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {

70 std::string output;

71

72 // Empty strings and invalid byte_size arguments

73 EXPECT_FALSE(Truncated(std::string(), 0, &output));

74 EXPECT_EQ(output, "");

75 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));

76 EXPECT_EQ(output, "");

77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));

78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));

79

80 // Testing the truncation of valid UTF8 correctly

81 EXPECT_TRUE(Truncated("abc", 2, &output));

82 EXPECT_EQ(output, "ab");

83 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));

84 EXPECT_EQ(output.compare("\xc2\x81"), 0);

85 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));

86 EXPECT_EQ(output.compare("\xc2\x81"), 0);

87 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));

88 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);

89

90 {

91 const char array[] = "\x00\x00\xc2\x81\xc2\x81";

92 const std::string array_string(array, arraysize(array));

93 EXPECT_TRUE(Truncated(array_string, 4, &output));

94 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);

95 }

96

97 {

98 const char array[] = "\x00\xc2\x81\xc2\x81";

99 const std::string array_string(array, arraysize(array));

100 EXPECT_TRUE(Truncated(array_string, 4, &output));

101 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);

102 }

103

104 // Testing invalid UTF8

105 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));

106 EXPECT_EQ(output.compare(""), 0);

107 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));

108 EXPECT_EQ(output.compare(""), 0);

109 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));

110 EXPECT_EQ(output.compare(""), 0);

111

112 // Testing invalid UTF8 mixed with valid UTF8

113 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));

114 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);

115 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));

116 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);

117 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",

118 10, &output));

119 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);

120 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",

121 10, &output));

122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);

123 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));

124 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);

125

126 // Overlong sequences

127 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));

128 EXPECT_EQ(output.compare(""), 0);

129 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));

130 EXPECT_EQ(output.compare(""), 0);

131 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));

132 EXPECT_EQ(output.compare(""), 0);

133 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));

134 EXPECT_EQ(output.compare(""), 0);

135 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));

136 EXPECT_EQ(output.compare(""), 0);

137 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));

138 EXPECT_EQ(output.compare(""), 0);

139 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));

140 EXPECT_EQ(output.compare(""), 0);

141 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));

142 EXPECT_EQ(output.compare(""), 0);

143 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));

144 EXPECT_EQ(output.compare(""), 0);

145 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));

146 EXPECT_EQ(output.compare(""), 0);

147 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));

148 EXPECT_EQ(output.compare(""), 0);

149

150 // Beyond U+10FFFF (the upper limit of Unicode codespace)

151 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));

152 EXPECT_EQ(output.compare(""), 0);

153 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));

154 EXPECT_EQ(output.compare(""), 0);

155 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));

156 EXPECT_EQ(output.compare(""), 0);

157

158 // BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)

159 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));

160 EXPECT_EQ(output.compare(""), 0);

161 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));

162 EXPECT_EQ(output.compare(""), 0);

163

164 {

165 const char array[] = "\x00\x00\xfe\xff";

166 const std::string array_string(array, arraysize(array));

167 EXPECT_TRUE(Truncated(array_string, 4, &output));

168 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);

169 }

170

171 // Variants on the previous test

172 {

173 const char array[] = "\xff\xfe\x00\x00";

174 const std::string array_string(array, 4);

175 EXPECT_FALSE(Truncated(array_string, 4, &output));

176 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);

177 }

178 {

179 const char array[] = "\xff\x00\x00\xfe";

180 const std::string array_string(array, arraysize(array));

181 EXPECT_TRUE(Truncated(array_string, 4, &output));

182 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);

183 }

184

185 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>

186 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));

187 EXPECT_EQ(output.compare(""), 0);

188 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));

189 EXPECT_EQ(output.compare(""), 0);

190 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));

191 EXPECT_EQ(output.compare(""), 0);

192 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));

193 EXPECT_EQ(output.compare(""), 0);

194 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));

195 EXPECT_EQ(output.compare(""), 0);

196

197 // Strings in legacy encodings that are valid in UTF-8, but

198 // are invalid as UTF-8 in real data.

199 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));

200 EXPECT_EQ(output.compare("caf"), 0);

201 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));

202 EXPECT_EQ(output.compare(""), 0);

203 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));

204 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);

205 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,

206 &output));

207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);

208

209 // Testing using the same string as input and output.

210 EXPECT_FALSE(Truncated(output, 4, &output));

211 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);

212 EXPECT_TRUE(Truncated(output, 3, &output));

213 EXPECT_EQ(output.compare("\xa7\x41"), 0);

214

215 // "abc" with U+201[CD] in windows-125[0-8]

216 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));

217 EXPECT_EQ(output.compare("\x93" "abc"), 0);

218

219 // U+0639 U+064E U+0644 U+064E in ISO-8859-6

220 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));

221 EXPECT_EQ(output.compare(""), 0);

222

223 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7

224 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));

225 EXPECT_EQ(output.compare(""), 0);

226 }

227

228 TEST(StringUtilTest, TrimWhitespace) {

229 string16 output; // Allow contents to carry over to next testcase

230 for (size_t i = 0; i < arraysize(trim_cases); ++i) {

231 const trim_case& value = trim_cases[i];

232 EXPECT_EQ(value.return_value,

233 TrimWhitespace(WideToUTF16(value.input), value.positions,

234 &output));

235 EXPECT_EQ(WideToUTF16(value.output), output);

236 }

237

238 // Test that TrimWhitespace() can take the same string for input and output

239 output = ASCIIToUTF16(" This is a test \r\n");

240 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));

241 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);

242

243 // Once more, but with a string of whitespace

244 output = ASCIIToUTF16(" \r\n");

245 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));

246 EXPECT_EQ(string16(), output);

247

248 std::string output_ascii;

249 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {

250 const trim_case_ascii& value = trim_cases_ascii[i];

251 EXPECT_EQ(value.return_value,

252 TrimWhitespace(value.input, value.positions, &output_ascii));

253 EXPECT_EQ(value.output, output_ascii);

254 }

255 }

256

257 static const struct collapse_case {

258 const wchar_t* input;

259 const bool trim;

260 const wchar_t* output;

261 } collapse_cases[] = {

262 {L" Google Video ", false, L"Google Video"},

263 {L"Google Video", false, L"Google Video"},

264 {L"", false, L""},

265 {L" ", false, L""},

266 {L"\t\rTest String\n", false, L"Test String"},

267 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},

268 {L" Test \n \t String ", false, L"Test String"},

269 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},

270 {L" Test String", false, L"Test String"},

271 {L"Test String ", false, L"Test String"},

272 {L"Test String", false, L"Test String"},

273 {L"", true, L""},

274 {L"\n", true, L""},

275 {L" \r ", true, L""},

276 {L"\nFoo", true, L"Foo"},

277 {L"\r Foo ", true, L"Foo"},

278 {L" Foo bar ", true, L"Foo bar"},

279 {L" \tFoo bar \n", true, L"Foo bar"},

280 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},

281 };

282

283 TEST(StringUtilTest, CollapseWhitespace) {

284 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {

285 const collapse_case& value = collapse_cases[i];

286 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));

287 }

288 }

289

290 static const struct collapse_case_ascii {

291 const char* input;

292 const bool trim;

293 const char* output;

294 } collapse_cases_ascii[] = {

295 {" Google Video ", false, "Google Video"},

296 {"Google Video", false, "Google Video"},

297 {"", false, ""},

298 {" ", false, ""},

299 {"\t\rTest String\n", false, "Test String"},

300 {" Test \n \t String ", false, "Test String"},

301 {" Test String", false, "Test String"},

302 {"Test String ", false, "Test String"},

303 {"Test String", false, "Test String"},

304 {"", true, ""},

305 {"\n", true, ""},

306 {" \r ", true, ""},

307 {"\nFoo", true, "Foo"},

308 {"\r Foo ", true, "Foo"},

309 {" Foo bar ", true, "Foo bar"},

310 {" \tFoo bar \n", true, "Foo bar"},

311 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},

312 };

313

314 TEST(StringUtilTest, CollapseWhitespaceASCII) {

315 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {

316 const collapse_case_ascii& value = collapse_cases_ascii[i];

317 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));

318 }

319 }

320

321 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {

322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));

323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));

324 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));

325 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));

326 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));

327 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));

328 }

329

330 TEST(StringUtilTest, ContainsOnlyWhitespace) {

331 EXPECT_TRUE(ContainsOnlyWhitespace(string16()));

332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));

333 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));

334 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));

335 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));

336 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));

337 }

338

339 TEST(StringUtilTest, IsStringUTF8) {

340 EXPECT_TRUE(IsStringUTF8("abc"));

341 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));

342 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));

343 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));

344 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));

345 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM

346

347 // surrogate code points

348 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));

349 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));

350 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));

351

352 // overlong sequences

353 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000

354 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"

355 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000

356 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080

357 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff

358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D

359 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091

360 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800

361 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)

362 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F

363 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5

364

365 // Beyond U+10FFFF (the upper limit of Unicode codespace)

366 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000

367 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes

368 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes

369

370 // BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)

371 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));

372 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));

373 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));

374 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));

375

376 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>

377 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)

378 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE

379 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF

380 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0

381 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF

382 // Strings in legacy encodings. We can certainly make up strings

383 // in a legacy encoding that are valid in UTF-8, but in real data,

384 // most of them are invalid as UTF-8.

385 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1

386 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR

387 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5

388 // "abc" with U+201[CD] in windows-125[0-8]

389 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));

390 // U+0639 U+064E U+0644 U+064E in ISO-8859-6

391 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));

392 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7

393 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));

394

395 // Check that we support Embedded Nulls. The first uses the canonical UTF-8

396 // representation, and the second uses a 2-byte sequence. The second version

397 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a

398 // given codepoint must be used.

399 static const char kEmbeddedNull[] = "embedded\0null";

400 EXPECT_TRUE(IsStringUTF8(

401 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));

402 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));

403 }

404

405 TEST(StringUtilTest, ConvertASCII) {

406 static const char* char_cases[] = {

407 "Google Video",

408 "Hello, world\n",

409 "0123ABCDwxyz \a\b\t\r\n!+,.~"

410 };

411

412 static const wchar_t* const wchar_cases[] = {

413 L"Google Video",

414 L"Hello, world\n",

415 L"0123ABCDwxyz \a\b\t\r\n!+,.~"

416 };

417

418 for (size_t i = 0; i < arraysize(char_cases); ++i) {

419 EXPECT_TRUE(IsStringASCII(char_cases[i]));

420 std::wstring wide = ASCIIToWide(char_cases[i]);

421 EXPECT_EQ(wchar_cases[i], wide);

422

423 EXPECT_TRUE(IsStringASCII(wchar_cases[i]));

424 std::string ascii = WideToASCII(wchar_cases[i]);

425 EXPECT_EQ(char_cases[i], ascii);

426 }

427

428 EXPECT_FALSE(IsStringASCII("Google \x80Video"));

429 EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));

430

431 // Convert empty strings.

432 std::wstring wempty;

433 std::string empty;

434 EXPECT_EQ(empty, WideToASCII(wempty));

435 EXPECT_EQ(wempty, ASCIIToWide(empty));

436

437 // Convert strings with an embedded NUL character.

438 const char chars_with_nul[] = "test\0string";

439 const int length_with_nul = arraysize(chars_with_nul) - 1;

440 std::string string_with_nul(chars_with_nul, length_with_nul);

441 std::wstring wide_with_nul = ASCIIToWide(string_with_nul);

442 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),

443 wide_with_nul.length());

444 std::string narrow_with_nul = WideToASCII(wide_with_nul);

445 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),

446 narrow_with_nul.length());

447 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));

448 }

449

450 TEST(StringUtilTest, ToUpperASCII) {

451 EXPECT_EQ('C', ToUpperASCII('C'));

452 EXPECT_EQ('C', ToUpperASCII('c'));

453 EXPECT_EQ('2', ToUpperASCII('2'));

454

455 EXPECT_EQ(L'C', ToUpperASCII(L'C'));

456 EXPECT_EQ(L'C', ToUpperASCII(L'c'));

457 EXPECT_EQ(L'2', ToUpperASCII(L'2'));

458

459 std::string in_place_a("Cc2");

460 StringToUpperASCII(&in_place_a);

461 EXPECT_EQ("CC2", in_place_a);

462

463 std::wstring in_place_w(L"Cc2");

464 StringToUpperASCII(&in_place_w);

465 EXPECT_EQ(L"CC2", in_place_w);

466

467 std::string original_a("Cc2");

468 std::string upper_a = StringToUpperASCII(original_a);

469 EXPECT_EQ("CC2", upper_a);

470

471 std::wstring original_w(L"Cc2");

472 std::wstring upper_w = StringToUpperASCII(original_w);

473 EXPECT_EQ(L"CC2", upper_w);

474 }

475

476 TEST(StringUtilTest, LowerCaseEqualsASCII) {

477 static const struct {

478 const wchar_t* src_w;

479 const char* src_a;

480 const char* dst;

481 } lowercase_cases[] = {

482 { L"FoO", "FoO", "foo" },

483 { L"foo", "foo", "foo" },

484 { L"FOO", "FOO", "foo" },

485 };

486

487 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {

488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,

489 lowercase_cases[i].dst));

490 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,

491 lowercase_cases[i].dst));

492 }

493 }

494

495 TEST(StringUtilTest, FormatBytesUnlocalized) {

496 static const struct {

497 int64 bytes;

498 const char* expected;

499 } cases[] = {

500 // Expected behavior: we show one post-decimal digit when we have

501 // under two pre-decimal digits, except in cases where it makes no

502 // sense (zero or bytes).

503 // Since we switch units once we cross the 1000 mark, this keeps

504 // the display of file sizes or bytes consistently around three

505 // digits.

506 {0, "0 B"},

507 {512, "512 B"},

508 {1024*1024, "1.0 MB"},

509 {102410241024, "1.0 GB"},

510 {10LL10241024*1024, "10.0 GB"},

511 {99LL10241024*1024, "99.0 GB"},

512 {105LL10241024*1024, "105 GB"},

513 {105LL102410241024 + 500LL1024*1024, "105 GB"},

514 {~(1LL<<63), "8192 PB"},

515

516 {99*1024 + 103, "99.1 kB"},

517 {1024*1024 + 103, "1.0 MB"},

518 {10241024 + 205 1024, "1.2 MB"},

519 {102410241024 + (927 * 1024*1024), "1.9 GB"},

520 {10LL10241024*1024, "10.0 GB"},

521 {100LL10241024*1024, "100 GB"},

522 };

523

524 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {

525 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),

526 FormatBytesUnlocalized(cases[i].bytes));

527 }

528 }

529 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {

530 static const struct {

531 const char* str;

532 string16::size_type start_offset;

533 const char* find_this;

534 const char* replace_with;

535 const char* expected;

536 } cases[] = {

537 {"aaa", 0, "a", "b", "bbb"},

538 {"abb", 0, "ab", "a", "ab"},

539 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},

540 {"Not found", 0, "x", "0", "Not found"},

541 {"Not found again", 5, "x", "0", "Not found again"},

542 {" Making it much longer ", 0, " ", "Four score and seven years ago",

543 "Four score and seven years agoMakingFour score and seven years agoit"

544 "Four score and seven years agomuchFour score and seven years agolonger"

545 "Four score and seven years ago"},

546 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},

547 {"Replace me only me once", 9, "me ", "", "Replace me only once"},

548 {"abababab", 2, "ab", "c", "abccc"},

549 };

550

551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {

552 string16 str = ASCIIToUTF16(cases[i].str);

553 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,

554 ASCIIToUTF16(cases[i].find_this),

555 ASCIIToUTF16(cases[i].replace_with));

556 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);

557 }

558 }

559

560 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {

561 static const struct {

562 const char* str;

563 string16::size_type start_offset;

564 const char* find_this;

565 const char* replace_with;

566 const char* expected;

567 } cases[] = {

568 {"aaa", 0, "a", "b", "baa"},

569 {"abb", 0, "ab", "a", "ab"},

570 {"Removing some substrings inging", 0, "ing", "",

571 "Remov some substrings inging"},

572 {"Not found", 0, "x", "0", "Not found"},

573 {"Not found again", 5, "x", "0", "Not found again"},

574 {" Making it much longer ", 0, " ", "Four score and seven years ago",

575 "Four score and seven years agoMaking it much longer "},

576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},

577 {"Replace me only me once", 4, "me ", "", "Replace only me once"},

578 {"abababab", 2, "ab", "c", "abcabab"},

579 };

580

581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {

582 string16 str = ASCIIToUTF16(cases[i].str);

583 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,

584 ASCIIToUTF16(cases[i].find_this),

585 ASCIIToUTF16(cases[i].replace_with));

586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);

587 }

588 }

589

590 TEST(StringUtilTest, HexDigitToInt) {

591 EXPECT_EQ(0, HexDigitToInt('0'));

592 EXPECT_EQ(1, HexDigitToInt('1'));

593 EXPECT_EQ(2, HexDigitToInt('2'));

594 EXPECT_EQ(3, HexDigitToInt('3'));

595 EXPECT_EQ(4, HexDigitToInt('4'));

596 EXPECT_EQ(5, HexDigitToInt('5'));

597 EXPECT_EQ(6, HexDigitToInt('6'));

598 EXPECT_EQ(7, HexDigitToInt('7'));

599 EXPECT_EQ(8, HexDigitToInt('8'));

600 EXPECT_EQ(9, HexDigitToInt('9'));

601 EXPECT_EQ(10, HexDigitToInt('A'));

602 EXPECT_EQ(11, HexDigitToInt('B'));

603 EXPECT_EQ(12, HexDigitToInt('C'));

604 EXPECT_EQ(13, HexDigitToInt('D'));

605 EXPECT_EQ(14, HexDigitToInt('E'));

606 EXPECT_EQ(15, HexDigitToInt('F'));

607

608 // Verify the lower case as well.

609 EXPECT_EQ(10, HexDigitToInt('a'));

610 EXPECT_EQ(11, HexDigitToInt('b'));

611 EXPECT_EQ(12, HexDigitToInt('c'));

612 EXPECT_EQ(13, HexDigitToInt('d'));

613 EXPECT_EQ(14, HexDigitToInt('e'));

614 EXPECT_EQ(15, HexDigitToInt('f'));

615 }

616

617 // This checks where we can use the assignment operator for a va_list. We need

618 // a way to do this since Visual C doesn't support va_copy, but assignment on

619 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this

620 // capability.

621 static void VariableArgsFunc(const char* format, ...) {

622 va_list org;

623 va_start(org, format);

624

625 va_list dup;

626 GG_VA_COPY(dup, org);

627 int i1 = va_arg(org, int);

628 int j1 = va_arg(org, int);

629 char* s1 = va_arg(org, char*);

630 double d1 = va_arg(org, double);

631 va_end(org);

632

633 int i2 = va_arg(dup, int);

634 int j2 = va_arg(dup, int);

635 char* s2 = va_arg(dup, char*);

636 double d2 = va_arg(dup, double);

637

638 EXPECT_EQ(i1, i2);

639 EXPECT_EQ(j1, j2);

640 EXPECT_STREQ(s1, s2);

641 EXPECT_EQ(d1, d2);

642

643 va_end(dup);

644 }

645

646 TEST(StringUtilTest, VAList) {

647 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);

648 }

649

650 // Test for Tokenize

651 template <typename STR>

652 void TokenizeTest() {

653 std::vector<STR> r;

654 size_t size;

655

656 size = Tokenize(STR("This is a string"), STR(" "), &r);

657 EXPECT_EQ(4U, size);

658 ASSERT_EQ(4U, r.size());

659 EXPECT_EQ(r[0], STR("This"));

660 EXPECT_EQ(r[1], STR("is"));

661 EXPECT_EQ(r[2], STR("a"));

662 EXPECT_EQ(r[3], STR("string"));

663 r.clear();

664

665 size = Tokenize(STR("one,two,three"), STR(","), &r);

666 EXPECT_EQ(3U, size);

667 ASSERT_EQ(3U, r.size());

668 EXPECT_EQ(r[0], STR("one"));

669 EXPECT_EQ(r[1], STR("two"));

670 EXPECT_EQ(r[2], STR("three"));

671 r.clear();

672

673 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);

674 EXPECT_EQ(3U, size);

675 ASSERT_EQ(3U, r.size());

676 EXPECT_EQ(r[0], STR("one"));

677 EXPECT_EQ(r[1], STR("two"));

678 EXPECT_EQ(r[2], STR("three;four"));

679 r.clear();

680

681 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);

682 EXPECT_EQ(4U, size);

683 ASSERT_EQ(4U, r.size());

684 EXPECT_EQ(r[0], STR("one"));

685 EXPECT_EQ(r[1], STR("two"));

686 EXPECT_EQ(r[2], STR("three"));

687 EXPECT_EQ(r[3], STR("four"));

688 r.clear();

689

690 size = Tokenize(STR("one, two, three"), STR(","), &r);

691 EXPECT_EQ(3U, size);

692 ASSERT_EQ(3U, r.size());

693 EXPECT_EQ(r[0], STR("one"));

694 EXPECT_EQ(r[1], STR(" two"));

695 EXPECT_EQ(r[2], STR(" three"));

696 r.clear();

697

698 size = Tokenize(STR("one, two, three, "), STR(","), &r);

699 EXPECT_EQ(4U, size);

700 ASSERT_EQ(4U, r.size());

701 EXPECT_EQ(r[0], STR("one"));

702 EXPECT_EQ(r[1], STR(" two"));

703 EXPECT_EQ(r[2], STR(" three"));

704 EXPECT_EQ(r[3], STR(" "));

705 r.clear();

706

707 size = Tokenize(STR("one, two, three,"), STR(","), &r);

708 EXPECT_EQ(3U, size);

709 ASSERT_EQ(3U, r.size());

710 EXPECT_EQ(r[0], STR("one"));

711 EXPECT_EQ(r[1], STR(" two"));

712 EXPECT_EQ(r[2], STR(" three"));

713 r.clear();

714

715 size = Tokenize(STR(), STR(","), &r);

716 EXPECT_EQ(0U, size);

717 ASSERT_EQ(0U, r.size());

718 r.clear();

719

720 size = Tokenize(STR(","), STR(","), &r);

721 EXPECT_EQ(0U, size);

722 ASSERT_EQ(0U, r.size());

723 r.clear();

724

725 size = Tokenize(STR(",;:."), STR(".:;,"), &r);

726 EXPECT_EQ(0U, size);

727 ASSERT_EQ(0U, r.size());

728 r.clear();

729

730 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);

731 EXPECT_EQ(1U, size);

732 ASSERT_EQ(1U, r.size());

733 EXPECT_EQ(r[0], STR("a"));

734 r.clear();

735

736 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);

737 EXPECT_EQ(2U, size);

738 ASSERT_EQ(2U, r.size());

739 EXPECT_EQ(r[0], STR("\ta\t"));

740 EXPECT_EQ(r[1], STR("b\tcc"));

741 r.clear();

742 }

743

744 TEST(StringUtilTest, TokenizeStdString) {

745 TokenizeTest<std::string>();

746 }

747

748 TEST(StringUtilTest, TokenizeStringPiece) {

749 TokenizeTest<base::StringPiece>();

750 }

751

752 // Test for JoinString

753 TEST(StringUtilTest, JoinString) {

754 std::vector<std::string> in;

755 EXPECT_EQ("", JoinString(in, ','));

756

757 in.push_back("a");

758 EXPECT_EQ("a", JoinString(in, ','));

759

760 in.push_back("b");

761 in.push_back("c");

762 EXPECT_EQ("a,b,c", JoinString(in, ','));

763

764 in.push_back(std::string());

765 EXPECT_EQ("a,b,c,", JoinString(in, ','));

766 in.push_back(" ");

767 EXPECT_EQ("a\|b\|c\|\| ", JoinString(in, '\|'));

768 }

769

770 // Test for JoinString overloaded with std::string separator

771 TEST(StringUtilTest, JoinStringWithString) {

772 std::string separator(", ");

773 std::vector<std::string> parts;

774 EXPECT_EQ(std::string(), JoinString(parts, separator));

775

776 parts.push_back("a");

777 EXPECT_EQ("a", JoinString(parts, separator));

778

779 parts.push_back("b");

780 parts.push_back("c");

781 EXPECT_EQ("a, b, c", JoinString(parts, separator));

782

783 parts.push_back(std::string());

784 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));

785 parts.push_back(" ");

786 EXPECT_EQ("a\|b\|c\|\| ", JoinString(parts, "\|"));

787 }

788

789 // Test for JoinString overloaded with string16 separator

790 TEST(StringUtilTest, JoinStringWithString16) {

791 string16 separator = ASCIIToUTF16(", ");

792 std::vector<string16> parts;

793 EXPECT_EQ(string16(), JoinString(parts, separator));

794

795 parts.push_back(ASCIIToUTF16("a"));

796 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));

797

798 parts.push_back(ASCIIToUTF16("b"));

799 parts.push_back(ASCIIToUTF16("c"));

800 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));

801

802 parts.push_back(ASCIIToUTF16(""));

803 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));

804 parts.push_back(ASCIIToUTF16(" "));

805 EXPECT_EQ(ASCIIToUTF16("a\|b\|c\|\| "), JoinString(parts, ASCIIToUTF16("\|")));

806 }

807

808 TEST(StringUtilTest, StartsWith) {

809 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));

810 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));

811 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));

812 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));

813 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));

814 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));

815 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));

816 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));

817 EXPECT_TRUE(StartsWithASCII("java", std::string(), false));

818 EXPECT_TRUE(StartsWithASCII("java", std::string(), true));

819

820 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));

821 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));

822 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));

823 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));

824 EXPECT_FALSE(StartsWith(L"java", L"javascript", true));

825 EXPECT_FALSE(StartsWith(L"java", L"javascript", false));

826 EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false));

827 EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true));

828 EXPECT_TRUE(StartsWith(L"java", std::wstring(), false));

829 EXPECT_TRUE(StartsWith(L"java", std::wstring(), true));

830 }

831

832 TEST(StringUtilTest, EndsWith) {

833 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));

834 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));

835 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));

836 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));

837 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));

838 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));

839 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));

840 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));

841 EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false));

842 EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true));

843 EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false));

844 EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true));

845 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));

846 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));

847 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));

848 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));

849 }

850

851 TEST(StringUtilTest, GetStringFWithOffsets) {

852 std::vector<string16> subst;

853 subst.push_back(ASCIIToUTF16("1"));

854 subst.push_back(ASCIIToUTF16("2"));

855 std::vector<size_t> offsets;

856

857 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),

858 subst,

859 &offsets);

860 EXPECT_EQ(2U, offsets.size());

861 EXPECT_EQ(7U, offsets[0]);

862 EXPECT_EQ(25U, offsets[1]);

863 offsets.clear();

864

865 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),

866 subst,

867 &offsets);

868 EXPECT_EQ(2U, offsets.size());

869 EXPECT_EQ(25U, offsets[0]);

870 EXPECT_EQ(7U, offsets[1]);

871 offsets.clear();

872 }

873

874 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {

875 // Test whether replacestringplaceholders works as expected when there

876 // are fewer inputs than outputs.

877 std::vector<string16> subst;

878 subst.push_back(ASCIIToUTF16("9a"));

879 subst.push_back(ASCIIToUTF16("8b"));

880 subst.push_back(ASCIIToUTF16("7c"));

881

882 string16 formatted =

883 ReplaceStringPlaceholders(

884 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);

885

886 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));

887 }

888

889 TEST(StringUtilTest, ReplaceStringPlaceholders) {

890 std::vector<string16> subst;

891 subst.push_back(ASCIIToUTF16("9a"));

892 subst.push_back(ASCIIToUTF16("8b"));

893 subst.push_back(ASCIIToUTF16("7c"));

894 subst.push_back(ASCIIToUTF16("6d"));

895 subst.push_back(ASCIIToUTF16("5e"));

896 subst.push_back(ASCIIToUTF16("4f"));

897 subst.push_back(ASCIIToUTF16("3g"));

898 subst.push_back(ASCIIToUTF16("2h"));

899 subst.push_back(ASCIIToUTF16("1i"));

900

901 string16 formatted =

902 ReplaceStringPlaceholders(

903 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);

904

905 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));

906 }

907

908 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {

909 std::vector<string16> subst;

910 subst.push_back(ASCIIToUTF16("9a"));

911 subst.push_back(ASCIIToUTF16("8b"));

912 subst.push_back(ASCIIToUTF16("7c"));

913 subst.push_back(ASCIIToUTF16("6d"));

914 subst.push_back(ASCIIToUTF16("5e"));

915 subst.push_back(ASCIIToUTF16("4f"));

916 subst.push_back(ASCIIToUTF16("3g"));

917 subst.push_back(ASCIIToUTF16("2h"));

918 subst.push_back(ASCIIToUTF16("1i"));

919 subst.push_back(ASCIIToUTF16("0j"));

920 subst.push_back(ASCIIToUTF16("-1k"));

921 subst.push_back(ASCIIToUTF16("-2l"));

922 subst.push_back(ASCIIToUTF16("-3m"));

923 subst.push_back(ASCIIToUTF16("-4n"));

924

925 string16 formatted =

926 ReplaceStringPlaceholders(

927 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"

928 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);

929

930 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"

931 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));

932 }

933

934 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {

935 std::vector<std::string> subst;

936 subst.push_back("9a");

937 subst.push_back("8b");

938 subst.push_back("7c");

939 subst.push_back("6d");

940 subst.push_back("5e");

941 subst.push_back("4f");

942 subst.push_back("3g");

943 subst.push_back("2h");

944 subst.push_back("1i");

945

946 std::string formatted =

947 ReplaceStringPlaceholders(

948 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);

949

950 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");

951 }

952

953 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {

954 std::vector<std::string> subst;

955 subst.push_back("a");

956 subst.push_back("b");

957 subst.push_back("c");

958 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),

959 "$1 $$2 $$$3");

960 }

961

962 TEST(StringUtilTest, MatchPatternTest) {

963 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));

964 EXPECT_TRUE(MatchPattern("www.google.com", "*"));

965 EXPECT_FALSE(MatchPattern("www.google.com", "www.g.org"));

966 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));

967 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));

968 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));

969 EXPECT_TRUE(MatchPattern("Hello1234", "He??o\\1*"));

970 EXPECT_FALSE(MatchPattern("", "."));

971 EXPECT_TRUE(MatchPattern("", "*"));

972 EXPECT_TRUE(MatchPattern("", "?"));

973 EXPECT_TRUE(MatchPattern("", ""));

974 EXPECT_FALSE(MatchPattern("Hello", ""));

975 EXPECT_TRUE(MatchPattern("Hello", "Hello"));

976 // Stop after a certain recursion depth.

977 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));

978

979 // Test UTF8 matching.

980 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));

981 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));

982 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));

983 // Invalid sequences should be handled as a single invalid character.

984 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));

985 // If the pattern has invalid characters, it shouldn't match anything.

986 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));

987

988 // Test UTF16 character matching.

989 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),

990 UTF8ToUTF16("*.com")));

991 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),

992 UTF8ToUTF16("He??o\\1")));

993

994 // This test verifies that consecutive wild cards are collapsed into 1

995 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum

996 // recursion depth).

997 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),

998 UTF8ToUTF16("He********************************o")));

999 }

1000

1001 TEST(StringUtilTest, LcpyTest) {

1002 // Test the normal case where we fit in our buffer.

1003 {

1004 char dst[10];

1005 wchar_t wdst[10];

1006 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));

1007 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));

1008 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

1009 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));

1010 }

1011

1012 // Test dst_size == 0, nothing should be written to \|dst\| and we should

1013 // have the equivalent of strlen(src).

1014 {

1015 char dst[2] = {1, 2};

1016 wchar_t wdst[2] = {1, 2};

1017 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));

1018 EXPECT_EQ(1, dst[0]);

1019 EXPECT_EQ(2, dst[1]);

1020 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));

1021 #if defined(WCHAR_T_IS_UNSIGNED)

1022 EXPECT_EQ(1U, wdst[0]);

1023 EXPECT_EQ(2U, wdst[1]);

1024 #else

1025 EXPECT_EQ(1, wdst[0]);

1026 EXPECT_EQ(2, wdst[1]);

1027 #endif

1028 }

1029

1030 // Test the case were we _just_ competely fit including the null.

1031 {

1032 char dst[8];

1033 wchar_t wdst[8];

1034 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));

1035 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));

1036 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

1037 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));

1038 }

1039

1040 // Test the case were we we are one smaller, so we can't fit the null.

1041 {

1042 char dst[7];

1043 wchar_t wdst[7];

1044 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));

1045 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));

1046 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

1047 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));

1048 }

1049

1050 // Test the case were we are just too small.

1051 {

1052 char dst[3];

1053 wchar_t wdst[3];

1054 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));

1055 EXPECT_EQ(0, memcmp(dst, "ab", 3));

1056 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

1057 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));

1058 }

1059 }

1060

1061 TEST(StringUtilTest, WprintfFormatPortabilityTest) {

1062 static const struct {

1063 const wchar_t* input;

1064 bool portable;

1065 } cases[] = {

1066 { L"%ls", true },

1067 { L"%s", false },

1068 { L"%S", false },

1069 { L"%lS", false },

1070 { L"Hello, %s", false },

1071 { L"%lc", true },

1072 { L"%c", false },

1073 { L"%C", false },

1074 { L"%lC", false },

1075 { L"%ls %s", false },

1076 { L"%s %ls", false },

1077 { L"%s %ls %s", false },

1078 { L"%f", true },

1079 { L"%f %F", false },

1080 { L"%d %D", false },

1081 { L"%o %O", false },

1082 { L"%u %U", false },

1083 { L"%f %d %o %u", true },

1084 { L"%-8d (%02.1f%)", true },

1085 { L"% 10s", false },

1086 { L"% 10ls", true }

1087 };

1088 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)

1089 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));

1090 }

1091

1092 TEST(StringUtilTest, RemoveChars) {

1093 const char* kRemoveChars = "-/+*";

1094 std::string input = "A-+bc/d!*";

1095 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));

1096 EXPECT_EQ("Abcd!", input);

1097

1098 // No characters match kRemoveChars.

1099 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));

1100 EXPECT_EQ("Abcd!", input);

1101

1102 // Empty string.

1103 input.clear();

1104 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));

1105 EXPECT_EQ(std::string(), input);

1106 }

1107

1108 TEST(StringUtilTest, ReplaceChars) {

1109 struct TestData {

1110 const char* input;

1111 const char* replace_chars;

1112 const char* replace_with;

1113 const char* output;

1114 bool result;

1115 } cases[] = {

1116 { "", "", "", "", false },

1117 { "test", "", "", "test", false },

1118 { "test", "", "!", "test", false },

1119 { "test", "z", "!", "test", false },

1120 { "test", "e", "!", "t!st", true },

1121 { "test", "e", "!?", "t!?st", true },

1122 { "test", "ez", "!", "t!st", true },

1123 { "test", "zed", "!?", "t!?st", true },

1124 { "test", "t", "!?", "!?es!?", true },

1125 { "test", "et", "!>", "!>!>s!>", true },

1126 { "test", "zest", "!", "!!!!", true },

1127 { "test", "szt", "!", "!e!!", true },

1128 { "test", "t", "test", "testestest", true },

1129 };

1130

1131 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {

1132 std::string output;

1133 bool result = ReplaceChars(cases[i].input,

1134 cases[i].replace_chars,

1135 cases[i].replace_with,

1136 &output);

1137 EXPECT_EQ(cases[i].result, result);

1138 EXPECT_EQ(cases[i].output, output);

1139 }

1140 }

1141

1142 TEST(StringUtilTest, ContainsOnlyChars) {

1143 // Providing an empty list of characters should return false but for the empty

1144 // string.

1145 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));

1146 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));

1147

1148 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));

1149 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));

1150 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));

1151 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));

1152 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));

1153 }

1154

1155 class WriteIntoTest : public testing::Test {

1156 protected:

1157 static void WritesCorrectly(size_t num_chars) {

1158 std::string buffer;

1159 char kOriginal[] = "supercali";

1160 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);

1161 // Using std::string(buffer.c_str()) instead of \|buffer\| truncates the

1162 // string at the first \0.

1163 EXPECT_EQ(std::string(kOriginal,

1164 std::min(num_chars, arraysize(kOriginal) - 1)),

1165 std::string(buffer.c_str()));

1166 EXPECT_EQ(num_chars, buffer.size());

1167 }

1168 };

1169

1170 TEST_F(WriteIntoTest, WriteInto) {

1171 // Validate that WriteInto reserves enough space and

1172 // sizes a string correctly.

1173 WritesCorrectly(1);

1174 WritesCorrectly(2);

1175 WritesCorrectly(5000);

1176

1177 // Validate that WriteInto doesn't modify other strings

1178 // when using a Copy-on-Write implementation.

1179 const char kLive[] = "live";

1180 const char kDead[] = "dead";

1181 const std::string live = kLive;

1182 std::string dead = live;

1183 strncpy(WriteInto(&dead, 5), kDead, 4);

1184 EXPECT_EQ(kDead, dead);

1185 EXPECT_EQ(4u, dead.size());

1186 EXPECT_EQ(kLive, live);

1187 EXPECT_EQ(4u, live.size());

1188 }

1189

1190 } // namespace base

OLD	NEW

« no previous file with comments | « base/string_util_posix.h ('k') | base/string_util_win.h » ('j') | no next file with comments »