base/string_util.cc - Issue 16331011: Move string files in base/ to the string subdirectory.

Side by Side Diff: base/string_util.cc

Issue 16331011: Move string files in base/ to the string subdirectory. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rebase Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/string_util.h"

6

7 #include "build/build_config.h"

8

9 #include <ctype.h>

10 #include <errno.h>

11 #include <math.h>

12 #include <stdarg.h>

13 #include <stdio.h>

14 #include <stdlib.h>

15 #include <string.h>

16 #include <time.h>

17 #include <wchar.h>

18 #include <wctype.h>

19

20 #include <algorithm>

21 #include <vector>

22

23 #include "base/basictypes.h"

24 #include "base/logging.h"

25 #include "base/memory/singleton.h"

26 #include "base/strings/utf_string_conversion_utils.h"

27 #include "base/strings/utf_string_conversions.h"

28 #include "base/third_party/icu/icu_utf.h"

29

30 namespace {

31

32 // Force the singleton used by Empty[W]String[16] to be a unique type. This

33 // prevents other code that might accidentally use Singleton<string> from

34 // getting our internal one.

35 struct EmptyStrings {

36 EmptyStrings() {}

37 const std::string s;

38 const std::wstring ws;

39 const string16 s16;

40

41 static EmptyStrings* GetInstance() {

42 return Singleton<EmptyStrings>::get();

43 }

44 };

45

46 // Used by ReplaceStringPlaceholders to track the position in the string of

47 // replaced parameters.

48 struct ReplacementOffset {

49 ReplacementOffset(uintptr_t parameter, size_t offset)

50 : parameter(parameter),

51 offset(offset) {}

52

53 // Index of the parameter.

54 uintptr_t parameter;

55

56 // Starting position in the string.

57 size_t offset;

58 };

59

60 static bool CompareParameter(const ReplacementOffset& elem1,

61 const ReplacementOffset& elem2) {

62 return elem1.parameter < elem2.parameter;

63 }

64

65 } // namespace

66

67 namespace base {

68

69 bool IsWprintfFormatPortable(const wchar_t* format) {

70 for (const wchar_t* position = format; *position != '\0'; ++position) {

71 if (*position == '%') {

72 bool in_specification = true;

73 bool modifier_l = false;

74 while (in_specification) {

75 // Eat up characters until reaching a known specifier.

76 if (*++position == '\0') {

77 // The format string ended in the middle of a specification. Call

78 // it portable because no unportable specifications were found. The

79 // string is equally broken on all platforms.

80 return true;

81 }

82

83 if (*position == 'l') {

84 // 'l' is the only thing that can save the 's' and 'c' specifiers.

85 modifier_l = true;

86 } else if (((position == 's' \|\| position == 'c') && !modifier_l) \|\|

87 position == 'S' \|\| position == 'C' \|\| *position == 'F' \|\|

88 position == 'D' \|\| position == 'O' \|\| *position == 'U') {

89 // Not portable.

90 return false;

91 }

92

93 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {

94 // Portable, keep scanning the rest of the format string.

95 in_specification = false;

96 }

97 }

98 }

99 }

100

101 return true;

102 }

103

104 } // namespace base

105

106

107 const std::string& EmptyString() {

108 return EmptyStrings::GetInstance()->s;

109 }

110

111 const std::wstring& EmptyWString() {

112 return EmptyStrings::GetInstance()->ws;

113 }

114

115 const string16& EmptyString16() {

116 return EmptyStrings::GetInstance()->s16;

117 }

118

119 template<typename STR>

120 bool ReplaceCharsT(const STR& input,

121 const typename STR::value_type replace_chars[],

122 const STR& replace_with,

123 STR* output) {

124 bool removed = false;

125 size_t replace_length = replace_with.length();

126

127 *output = input;

128

129 size_t found = output->find_first_of(replace_chars);

130 while (found != STR::npos) {

131 removed = true;

132 output->replace(found, 1, replace_with);

133 found = output->find_first_of(replace_chars, found + replace_length);

134 }

135

136 return removed;

137 }

138

139 bool ReplaceChars(const string16& input,

140 const char16 replace_chars[],

141 const string16& replace_with,

142 string16* output) {

143 return ReplaceCharsT(input, replace_chars, replace_with, output);

144 }

145

146 bool ReplaceChars(const std::string& input,

147 const char replace_chars[],

148 const std::string& replace_with,

149 std::string* output) {

150 return ReplaceCharsT(input, replace_chars, replace_with, output);

151 }

152

153 bool RemoveChars(const string16& input,

154 const char16 remove_chars[],

155 string16* output) {

156 return ReplaceChars(input, remove_chars, string16(), output);

157 }

158

159 bool RemoveChars(const std::string& input,

160 const char remove_chars[],

161 std::string* output) {

162 return ReplaceChars(input, remove_chars, std::string(), output);

163 }

164

165 template<typename STR>

166 TrimPositions TrimStringT(const STR& input,

167 const typename STR::value_type trim_chars[],

168 TrimPositions positions,

169 STR* output) {

170 // Find the edges of leading/trailing whitespace as desired.

171 const typename STR::size_type last_char = input.length() - 1;

172 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?

173 input.find_first_not_of(trim_chars) : 0;

174 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?

175 input.find_last_not_of(trim_chars) : last_char;

176

177 // When the string was all whitespace, report that we stripped off whitespace

178 // from whichever position the caller was interested in. For empty input, we

179 // stripped no whitespace, but we still need to clear \|output\|.

180 if (input.empty() \|\|

181 (first_good_char == STR::npos) \|\| (last_good_char == STR::npos)) {

182 bool input_was_empty = input.empty(); // in case output == &input

183 output->clear();

184 return input_was_empty ? TRIM_NONE : positions;

185 }

186

187 // Trim the whitespace.

188 *output =

189 input.substr(first_good_char, last_good_char - first_good_char + 1);

190

191 // Return where we trimmed from.

192 return static_cast<TrimPositions>(

193 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) \|

194 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));

195 }

196

197 bool TrimString(const std::wstring& input,

198 const wchar_t trim_chars[],

199 std::wstring* output) {

200 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;

201 }

202

203 #if !defined(WCHAR_T_IS_UTF16)

204 bool TrimString(const string16& input,

205 const char16 trim_chars[],

206 string16* output) {

207 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;

208 }

209 #endif

210

211 bool TrimString(const std::string& input,

212 const char trim_chars[],

213 std::string* output) {

214 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;

215 }

216

217 void TruncateUTF8ToByteSize(const std::string& input,

218 const size_t byte_size,

219 std::string* output) {

220 DCHECK(output);

221 if (byte_size > input.length()) {

222 *output = input;

223 return;

224 }

225 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));

226 // Note: This cast is necessary because CBU8_NEXT uses int32s.

227 int32 truncation_length = static_cast<int32>(byte_size);

228 int32 char_index = truncation_length - 1;

229 const char* data = input.data();

230

231 // Using CBU8, we will move backwards from the truncation point

232 // to the beginning of the string looking for a valid UTF8

233 // character. Once a full UTF8 character is found, we will

234 // truncate the string to the end of that character.

235 while (char_index >= 0) {

236 int32 prev = char_index;

237 uint32 code_point = 0;

238 CBU8_NEXT(data, char_index, truncation_length, code_point);

239 if (!base::IsValidCharacter(code_point) \|\|

240 !base::IsValidCodepoint(code_point)) {

241 char_index = prev - 1;

242 } else {

243 break;

244 }

245 }

246

247 if (char_index >= 0 )

248 *output = input.substr(0, char_index);

249 else

250 output->clear();

251 }

252

253 TrimPositions TrimWhitespace(const string16& input,

254 TrimPositions positions,

255 string16* output) {

256 return TrimStringT(input, kWhitespaceUTF16, positions, output);

257 }

258

259 TrimPositions TrimWhitespaceASCII(const std::string& input,

260 TrimPositions positions,

261 std::string* output) {

262 return TrimStringT(input, kWhitespaceASCII, positions, output);

263 }

264

265 // This function is only for backward-compatibility.

266 // To be removed when all callers are updated.

267 TrimPositions TrimWhitespace(const std::string& input,

268 TrimPositions positions,

269 std::string* output) {

270 return TrimWhitespaceASCII(input, positions, output);

271 }

272

273 template<typename STR>

274 STR CollapseWhitespaceT(const STR& text,

275 bool trim_sequences_with_line_breaks) {

276 STR result;

277 result.resize(text.size());

278

279 // Set flags to pretend we're already in a trimmed whitespace sequence, so we

280 // will trim any leading whitespace.

281 bool in_whitespace = true;

282 bool already_trimmed = true;

283

284 int chars_written = 0;

285 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {

286 if (IsWhitespace(*i)) {

287 if (!in_whitespace) {

288 // Reduce all whitespace sequences to a single space.

289 in_whitespace = true;

290 result[chars_written++] = L' ';

291 }

292 if (trim_sequences_with_line_breaks && !already_trimmed &&

293 ((i == '\n') \|\| (i == '\r'))) {

294 // Whitespace sequences containing CR or LF are eliminated entirely.

295 already_trimmed = true;

296 --chars_written;

297 }

298 } else {

299 // Non-whitespace chracters are copied straight across.

300 in_whitespace = false;

301 already_trimmed = false;

302 result[chars_written++] = *i;

303 }

304 }

305

306 if (in_whitespace && !already_trimmed) {

307 // Any trailing whitespace is eliminated.

308 --chars_written;

309 }

310

311 result.resize(chars_written);

312 return result;

313 }

314

315 std::wstring CollapseWhitespace(const std::wstring& text,

316 bool trim_sequences_with_line_breaks) {

317 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);

318 }

319

320 #if !defined(WCHAR_T_IS_UTF16)

321 string16 CollapseWhitespace(const string16& text,

322 bool trim_sequences_with_line_breaks) {

323 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);

324 }

325 #endif

326

327 std::string CollapseWhitespaceASCII(const std::string& text,

328 bool trim_sequences_with_line_breaks) {

329 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);

330 }

331

332 bool ContainsOnlyWhitespaceASCII(const std::string& str) {

333 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {

334 if (!IsAsciiWhitespace(*i))

335 return false;

336 }

337 return true;

338 }

339

340 bool ContainsOnlyWhitespace(const string16& str) {

341 return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;

342 }

343

344 template<typename STR>

345 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {

346 for (typename STR::const_iterator iter = input.begin();

347 iter != input.end(); ++iter) {

348 if (characters.find(*iter) == STR::npos)

349 return false;

350 }

351 return true;

352 }

353

354 bool ContainsOnlyChars(const std::wstring& input,

355 const std::wstring& characters) {

356 return ContainsOnlyCharsT(input, characters);

357 }

358

359 #if !defined(WCHAR_T_IS_UTF16)

360 bool ContainsOnlyChars(const string16& input, const string16& characters) {

361 return ContainsOnlyCharsT(input, characters);

362 }

363 #endif

364

365 bool ContainsOnlyChars(const std::string& input,

366 const std::string& characters) {

367 return ContainsOnlyCharsT(input, characters);

368 }

369

370 std::string WideToASCII(const std::wstring& wide) {

371 DCHECK(IsStringASCII(wide)) << wide;

372 return std::string(wide.begin(), wide.end());

373 }

374

375 std::string UTF16ToASCII(const string16& utf16) {

376 DCHECK(IsStringASCII(utf16)) << utf16;

377 return std::string(utf16.begin(), utf16.end());

378 }

379

380 // Latin1 is just the low range of Unicode, so we can copy directly to convert.

381 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {

382 std::string output;

383 output.resize(wide.size());

384 latin1->clear();

385 for (size_t i = 0; i < wide.size(); i++) {

386 if (wide[i] > 255)

387 return false;

388 output[i] = static_cast<char>(wide[i]);

389 }

390 latin1->swap(output);

391 return true;

392 }

393

394 template<class STR>

395 static bool DoIsStringASCII(const STR& str) {

396 for (size_t i = 0; i < str.length(); i++) {

397 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];

398 if (c > 0x7F)

399 return false;

400 }

401 return true;

402 }

403

404 bool IsStringASCII(const std::wstring& str) {

405 return DoIsStringASCII(str);

406 }

407

408 #if !defined(WCHAR_T_IS_UTF16)

409 bool IsStringASCII(const string16& str) {

410 return DoIsStringASCII(str);

411 }

412 #endif

413

414 bool IsStringASCII(const base::StringPiece& str) {

415 return DoIsStringASCII(str);

416 }

417

418 bool IsStringUTF8(const std::string& str) {

419 const char *src = str.data();

420 int32 src_len = static_cast<int32>(str.length());

421 int32 char_index = 0;

422

423 while (char_index < src_len) {

424 int32 code_point;

425 CBU8_NEXT(src, char_index, src_len, code_point);

426 if (!base::IsValidCharacter(code_point))

427 return false;

428 }

429 return true;

430 }

431

432 template<typename Iter>

433 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,

434 Iter a_end,

435 const char* b) {

436 for (Iter it = a_begin; it != a_end; ++it, ++b) {

437 if (!b \|\| base::ToLowerASCII(it) != *b)

438 return false;

439 }

440 return *b == 0;

441 }

442

443 // Front-ends for LowerCaseEqualsASCII.

444 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {

445 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);

446 }

447

448 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {

449 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);

450 }

451

452 #if !defined(WCHAR_T_IS_UTF16)

453 bool LowerCaseEqualsASCII(const string16& a, const char* b) {

454 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);

455 }

456 #endif

457

458 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,

459 std::string::const_iterator a_end,

460 const char* b) {

461 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

462 }

463

464 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,

465 std::wstring::const_iterator a_end,

466 const char* b) {

467 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

468 }

469

470 #if !defined(WCHAR_T_IS_UTF16)

471 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,

472 string16::const_iterator a_end,

473 const char* b) {

474 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

475 }

476 #endif

477

478 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.

479 #if !defined(OS_ANDROID)

480 bool LowerCaseEqualsASCII(const char* a_begin,

481 const char* a_end,

482 const char* b) {

483 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

484 }

485

486 bool LowerCaseEqualsASCII(const wchar_t* a_begin,

487 const wchar_t* a_end,

488 const char* b) {

489 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

490 }

491

492 #if !defined(WCHAR_T_IS_UTF16)

493 bool LowerCaseEqualsASCII(const char16* a_begin,

494 const char16* a_end,

495 const char* b) {

496 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

497 }

498 #endif

499

500 #endif // !defined(OS_ANDROID)

501

502 bool EqualsASCII(const string16& a, const base::StringPiece& b) {

503 if (a.length() != b.length())

504 return false;

505 return std::equal(b.begin(), b.end(), a.begin());

506 }

507

508 bool StartsWithASCII(const std::string& str,

509 const std::string& search,

510 bool case_sensitive) {

511 if (case_sensitive)

512 return str.compare(0, search.length(), search) == 0;

513 else

514 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;

515 }

516

517 template <typename STR>

518 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {

519 if (case_sensitive) {

520 return str.compare(0, search.length(), search) == 0;

521 } else {

522 if (search.size() > str.size())

523 return false;

524 return std::equal(search.begin(), search.end(), str.begin(),

525 base::CaseInsensitiveCompare<typename STR::value_type>());

526 }

527 }

528

529 bool StartsWith(const std::wstring& str, const std::wstring& search,

530 bool case_sensitive) {

531 return StartsWithT(str, search, case_sensitive);

532 }

533

534 #if !defined(WCHAR_T_IS_UTF16)

535 bool StartsWith(const string16& str, const string16& search,

536 bool case_sensitive) {

537 return StartsWithT(str, search, case_sensitive);

538 }

539 #endif

540

541 template <typename STR>

542 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {

543 typename STR::size_type str_length = str.length();

544 typename STR::size_type search_length = search.length();

545 if (search_length > str_length)

546 return false;

547 if (case_sensitive) {

548 return str.compare(str_length - search_length, search_length, search) == 0;

549 } else {

550 return std::equal(search.begin(), search.end(),

551 str.begin() + (str_length - search_length),

552 base::CaseInsensitiveCompare<typename STR::value_type>());

553 }

554 }

555

556 bool EndsWith(const std::string& str, const std::string& search,

557 bool case_sensitive) {

558 return EndsWithT(str, search, case_sensitive);

559 }

560

561 bool EndsWith(const std::wstring& str, const std::wstring& search,

562 bool case_sensitive) {

563 return EndsWithT(str, search, case_sensitive);

564 }

565

566 #if !defined(WCHAR_T_IS_UTF16)

567 bool EndsWith(const string16& str, const string16& search,

568 bool case_sensitive) {

569 return EndsWithT(str, search, case_sensitive);

570 }

571 #endif

572

573 static const char* const kByteStringsUnlocalized[] = {

574 " B",

575 " kB",

576 " MB",

577 " GB",

578 " TB",

579 " PB"

580 };

581

582 string16 FormatBytesUnlocalized(int64 bytes) {

583 double unit_amount = static_cast<double>(bytes);

584 size_t dimension = 0;

585 const int kKilo = 1024;

586 while (unit_amount >= kKilo &&

587 dimension < arraysize(kByteStringsUnlocalized) - 1) {

588 unit_amount /= kKilo;

589 dimension++;

590 }

591

592 char buf[64];

593 if (bytes != 0 && dimension > 0 && unit_amount < 100) {

594 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,

595 kByteStringsUnlocalized[dimension]);

596 } else {

597 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,

598 kByteStringsUnlocalized[dimension]);

599 }

600

601 return ASCIIToUTF16(buf);

602 }

603

604 template<class StringType>

605 void DoReplaceSubstringsAfterOffset(StringType* str,

606 typename StringType::size_type start_offset,

607 const StringType& find_this,

608 const StringType& replace_with,

609 bool replace_all) {

610 if ((start_offset == StringType::npos) \|\| (start_offset >= str->length()))

611 return;

612

613 DCHECK(!find_this.empty());

614 for (typename StringType::size_type offs(str->find(find_this, start_offset));

615 offs != StringType::npos; offs = str->find(find_this, offs)) {

616 str->replace(offs, find_this.length(), replace_with);

617 offs += replace_with.length();

618

619 if (!replace_all)

620 break;

621 }

622 }

623

624 void ReplaceFirstSubstringAfterOffset(string16* str,

625 string16::size_type start_offset,

626 const string16& find_this,

627 const string16& replace_with) {

628 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

629 false); // replace first instance

630 }

631

632 void ReplaceFirstSubstringAfterOffset(std::string* str,

633 std::string::size_type start_offset,

634 const std::string& find_this,

635 const std::string& replace_with) {

636 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

637 false); // replace first instance

638 }

639

640 void ReplaceSubstringsAfterOffset(string16* str,

641 string16::size_type start_offset,

642 const string16& find_this,

643 const string16& replace_with) {

644 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

645 true); // replace all instances

646 }

647

648 void ReplaceSubstringsAfterOffset(std::string* str,

649 std::string::size_type start_offset,

650 const std::string& find_this,

651 const std::string& replace_with) {

652 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

653 true); // replace all instances

654 }

655

656

657 template<typename STR>

658 static size_t TokenizeT(const STR& str,

659 const STR& delimiters,

660 std::vector<STR>* tokens) {

661 tokens->clear();

662

663 typename STR::size_type start = str.find_first_not_of(delimiters);

664 while (start != STR::npos) {

665 typename STR::size_type end = str.find_first_of(delimiters, start + 1);

666 if (end == STR::npos) {

667 tokens->push_back(str.substr(start));

668 break;

669 } else {

670 tokens->push_back(str.substr(start, end - start));

671 start = str.find_first_not_of(delimiters, end + 1);

672 }

673 }

674

675 return tokens->size();

676 }

677

678 size_t Tokenize(const std::wstring& str,

679 const std::wstring& delimiters,

680 std::vector<std::wstring>* tokens) {

681 return TokenizeT(str, delimiters, tokens);

682 }

683

684 #if !defined(WCHAR_T_IS_UTF16)

685 size_t Tokenize(const string16& str,

686 const string16& delimiters,

687 std::vector<string16>* tokens) {

688 return TokenizeT(str, delimiters, tokens);

689 }

690 #endif

691

692 size_t Tokenize(const std::string& str,

693 const std::string& delimiters,

694 std::vector<std::string>* tokens) {

695 return TokenizeT(str, delimiters, tokens);

696 }

697

698 size_t Tokenize(const base::StringPiece& str,

699 const base::StringPiece& delimiters,

700 std::vector<base::StringPiece>* tokens) {

701 return TokenizeT(str, delimiters, tokens);

702 }

703

704 template<typename STR>

705 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {

706 if (parts.empty())

707 return STR();

708

709 STR result(parts[0]);

710 typename std::vector<STR>::const_iterator iter = parts.begin();

711 ++iter;

712

713 for (; iter != parts.end(); ++iter) {

714 result += sep;

715 result += *iter;

716 }

717

718 return result;

719 }

720

721 std::string JoinString(const std::vector<std::string>& parts, char sep) {

722 return JoinStringT(parts, std::string(1, sep));

723 }

724

725 string16 JoinString(const std::vector<string16>& parts, char16 sep) {

726 return JoinStringT(parts, string16(1, sep));

727 }

728

729 std::string JoinString(const std::vector<std::string>& parts,

730 const std::string& separator) {

731 return JoinStringT(parts, separator);

732 }

733

734 string16 JoinString(const std::vector<string16>& parts,

735 const string16& separator) {

736 return JoinStringT(parts, separator);

737 }

738

739 template<class FormatStringType, class OutStringType>

740 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,

741 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {

742 size_t substitutions = subst.size();

743

744 size_t sub_length = 0;

745 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();

746 iter != subst.end(); ++iter) {

747 sub_length += iter->length();

748 }

749

750 OutStringType formatted;

751 formatted.reserve(format_string.length() + sub_length);

752

753 std::vector<ReplacementOffset> r_offsets;

754 for (typename FormatStringType::const_iterator i = format_string.begin();

755 i != format_string.end(); ++i) {

756 if ('$' == *i) {

757 if (i + 1 != format_string.end()) {

758 ++i;

759 DCHECK('$' == i \|\| '1' <= i) << "Invalid placeholder: " << *i;

760 if ('$' == *i) {

761 while (i != format_string.end() && '$' == *i) {

762 formatted.push_back('$');

763 ++i;

764 }

765 --i;

766 } else {

767 uintptr_t index = 0;

768 while (i != format_string.end() && '0' <= i && i <= '9') {

769 index *= 10;

770 index += *i - '0';

771 ++i;

772 }

773 --i;

774 index -= 1;

775 if (offsets) {

776 ReplacementOffset r_offset(index,

777 static_cast<int>(formatted.size()));

778 r_offsets.insert(std::lower_bound(r_offsets.begin(),

779 r_offsets.end(),

780 r_offset,

781 &CompareParameter),

782 r_offset);

783 }

784 if (index < substitutions)

785 formatted.append(subst.at(index));

786 }

787 }

788 } else {

789 formatted.push_back(*i);

790 }

791 }

792 if (offsets) {

793 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();

794 i != r_offsets.end(); ++i) {

795 offsets->push_back(i->offset);

796 }

797 }

798 return formatted;

799 }

800

801 string16 ReplaceStringPlaceholders(const string16& format_string,

802 const std::vector<string16>& subst,

803 std::vector<size_t>* offsets) {

804 return DoReplaceStringPlaceholders(format_string, subst, offsets);

805 }

806

807 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,

808 const std::vector<std::string>& subst,

809 std::vector<size_t>* offsets) {

810 return DoReplaceStringPlaceholders(format_string, subst, offsets);

811 }

812

813 string16 ReplaceStringPlaceholders(const string16& format_string,

814 const string16& a,

815 size_t* offset) {

816 std::vector<size_t> offsets;

817 std::vector<string16> subst;

818 subst.push_back(a);

819 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);

820

821 DCHECK(offsets.size() == 1);

822 if (offset) {

823 *offset = offsets[0];

824 }

825 return result;

826 }

827

828 static bool IsWildcard(base_icu::UChar32 character) {

829 return character == '*' \|\| character == '?';

830 }

831

832 // Move the strings pointers to the point where they start to differ.

833 template <typename CHAR, typename NEXT>

834 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,

835 const CHAR** string, const CHAR* string_end,

836 NEXT next) {

837 const CHAR* escape = NULL;

838 while (pattern != pattern_end && string != string_end) {

839 if (!escape && IsWildcard(**pattern)) {

840 // We don't want to match wildcard here, except if it's escaped.

841 return;

842 }

843

844 // Check if the escapement char is found. If so, skip it and move to the

845 // next character.

846 if (!escape && **pattern == '\\') {

847 escape = *pattern;

848 next(pattern, pattern_end);

849 continue;

850 }

851

852 // Check if the chars match, if so, increment the ptrs.

853 const CHAR* pattern_next = *pattern;

854 const CHAR* string_next = *string;

855 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);

856 if (pattern_char == next(&string_next, string_end) &&

857 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {

858 *pattern = pattern_next;

859 *string = string_next;

860 } else {

861 // Uh ho, it did not match, we are done. If the last char was an

862 // escapement, that means that it was an error to advance the ptr here,

863 // let's put it back where it was. This also mean that the MatchPattern

864 // function will return false because if we can't match an escape char

865 // here, then no one will.

866 if (escape) {

867 *pattern = escape;

868 }

869 return;

870 }

871

872 escape = NULL;

873 }

874 }

875

876 template <typename CHAR, typename NEXT>

877 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {

878 while (*pattern != end) {

879 if (!IsWildcard(**pattern))

880 return;

881 next(pattern, end);

882 }

883 }

884

885 template <typename CHAR, typename NEXT>

886 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,

887 const CHAR* pattern, const CHAR* pattern_end,

888 int depth,

889 NEXT next) {

890 const int kMaxDepth = 16;

891 if (depth > kMaxDepth)

892 return false;

893

894 // Eat all the matching chars.

895 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);

896

897 // If the string is empty, then the pattern must be empty too, or contains

898 // only wildcards.

899 if (eval == eval_end) {

900 EatWildcard(&pattern, pattern_end, next);

901 return pattern == pattern_end;

902 }

903

904 // Pattern is empty but not string, this is not a match.

905 if (pattern == pattern_end)

906 return false;

907

908 // If this is a question mark, then we need to compare the rest with

909 // the current string or the string with one character eaten.

910 const CHAR* next_pattern = pattern;

911 next(&next_pattern, pattern_end);

912 if (pattern[0] == '?') {

913 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,

914 depth + 1, next))

915 return true;

916 const CHAR* next_eval = eval;

917 next(&next_eval, eval_end);

918 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,

919 depth + 1, next))

920 return true;

921 }

922

923 // This is a *, try to match all the possible substrings with the remainder

924 // of the pattern.

925 if (pattern[0] == '*') {

926 // Collapse duplicate wild cards (********** into *) so that the

927 // method does not recurse unnecessarily. http://crbug.com/52839

928 EatWildcard(&next_pattern, pattern_end, next);

929

930 while (eval != eval_end) {

931 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,

932 depth + 1, next))

933 return true;

934 eval++;

935 }

936

937 // We reached the end of the string, let see if the pattern contains only

938 // wildcards.

939 if (eval == eval_end) {

940 EatWildcard(&pattern, pattern_end, next);

941 if (pattern != pattern_end)

942 return false;

943 return true;

944 }

945 }

946

947 return false;

948 }

949

950 struct NextCharUTF8 {

951 base_icu::UChar32 operator()(const char** p, const char* end) {

952 base_icu::UChar32 c;

953 int offset = 0;

954 CBU8_NEXT(p, offset, end - p, c);

955 *p += offset;

956 return c;

957 }

958 };

959

960 struct NextCharUTF16 {

961 base_icu::UChar32 operator()(const char16** p, const char16* end) {

962 base_icu::UChar32 c;

963 int offset = 0;

964 CBU16_NEXT(p, offset, end - p, c);

965 *p += offset;

966 return c;

967 }

968 };

969

970 bool MatchPattern(const base::StringPiece& eval,

971 const base::StringPiece& pattern) {

972 return MatchPatternT(eval.data(), eval.data() + eval.size(),

973 pattern.data(), pattern.data() + pattern.size(),

974 0, NextCharUTF8());

975 }

976

977 bool MatchPattern(const string16& eval, const string16& pattern) {

978 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),

979 pattern.c_str(), pattern.c_str() + pattern.size(),

980 0, NextCharUTF16());

981 }

982

983 // The following code is compatible with the OpenBSD lcpy interface. See:

984 // http://www.gratisoft.us/todd/papers/strlcpy.html

985 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c

986

987 namespace {

988

989 template <typename CHAR>

990 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {

991 for (size_t i = 0; i < dst_size; ++i) {

992 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.

993 return i;

994 }

995

996 // We were left off at dst_size. We over copied 1 byte. Null terminate.

997 if (dst_size != 0)

998 dst[dst_size - 1] = 0;

999

1000 // Count the rest of the \|src\|, and return it's length in characters.

1001 while (src[dst_size]) ++dst_size;

1002 return dst_size;

1003 }

1004

1005 } // namespace

1006

1007 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {

1008 return lcpyT<char>(dst, src, dst_size);

1009 }

1010 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {

1011 return lcpyT<wchar_t>(dst, src, dst_size);

1012 }

OLD	NEW

« no previous file with comments | « base/string_util.h ('k') | base/string_util_constants.cc » ('j') | no next file with comments »