OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/string_util.h" | |
6 | |
7 #include "build/build_config.h" | |
8 | |
9 #include <ctype.h> | |
10 #include <errno.h> | |
11 #include <math.h> | |
12 #include <stdarg.h> | |
13 #include <stdio.h> | |
14 #include <stdlib.h> | |
15 #include <string.h> | |
16 #include <time.h> | |
17 #include <wchar.h> | |
18 #include <wctype.h> | |
19 | |
20 #include <algorithm> | |
21 #include <vector> | |
22 | |
23 #include "base/basictypes.h" | |
24 #include "base/logging.h" | |
25 #include "base/memory/singleton.h" | |
26 #include "base/strings/utf_string_conversion_utils.h" | |
27 #include "base/strings/utf_string_conversions.h" | |
28 #include "base/third_party/icu/icu_utf.h" | |
29 | |
30 namespace { | |
31 | |
32 // Force the singleton used by Empty[W]String[16] to be a unique type. This | |
33 // prevents other code that might accidentally use Singleton<string> from | |
34 // getting our internal one. | |
35 struct EmptyStrings { | |
36 EmptyStrings() {} | |
37 const std::string s; | |
38 const std::wstring ws; | |
39 const string16 s16; | |
40 | |
41 static EmptyStrings* GetInstance() { | |
42 return Singleton<EmptyStrings>::get(); | |
43 } | |
44 }; | |
45 | |
46 // Used by ReplaceStringPlaceholders to track the position in the string of | |
47 // replaced parameters. | |
48 struct ReplacementOffset { | |
49 ReplacementOffset(uintptr_t parameter, size_t offset) | |
50 : parameter(parameter), | |
51 offset(offset) {} | |
52 | |
53 // Index of the parameter. | |
54 uintptr_t parameter; | |
55 | |
56 // Starting position in the string. | |
57 size_t offset; | |
58 }; | |
59 | |
60 static bool CompareParameter(const ReplacementOffset& elem1, | |
61 const ReplacementOffset& elem2) { | |
62 return elem1.parameter < elem2.parameter; | |
63 } | |
64 | |
65 } // namespace | |
66 | |
67 namespace base { | |
68 | |
69 bool IsWprintfFormatPortable(const wchar_t* format) { | |
70 for (const wchar_t* position = format; *position != '\0'; ++position) { | |
71 if (*position == '%') { | |
72 bool in_specification = true; | |
73 bool modifier_l = false; | |
74 while (in_specification) { | |
75 // Eat up characters until reaching a known specifier. | |
76 if (*++position == '\0') { | |
77 // The format string ended in the middle of a specification. Call | |
78 // it portable because no unportable specifications were found. The | |
79 // string is equally broken on all platforms. | |
80 return true; | |
81 } | |
82 | |
83 if (*position == 'l') { | |
84 // 'l' is the only thing that can save the 's' and 'c' specifiers. | |
85 modifier_l = true; | |
86 } else if (((*position == 's' || *position == 'c') && !modifier_l) || | |
87 *position == 'S' || *position == 'C' || *position == 'F' || | |
88 *position == 'D' || *position == 'O' || *position == 'U') { | |
89 // Not portable. | |
90 return false; | |
91 } | |
92 | |
93 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { | |
94 // Portable, keep scanning the rest of the format string. | |
95 in_specification = false; | |
96 } | |
97 } | |
98 } | |
99 } | |
100 | |
101 return true; | |
102 } | |
103 | |
104 } // namespace base | |
105 | |
106 | |
107 const std::string& EmptyString() { | |
108 return EmptyStrings::GetInstance()->s; | |
109 } | |
110 | |
111 const std::wstring& EmptyWString() { | |
112 return EmptyStrings::GetInstance()->ws; | |
113 } | |
114 | |
115 const string16& EmptyString16() { | |
116 return EmptyStrings::GetInstance()->s16; | |
117 } | |
118 | |
119 template<typename STR> | |
120 bool ReplaceCharsT(const STR& input, | |
121 const typename STR::value_type replace_chars[], | |
122 const STR& replace_with, | |
123 STR* output) { | |
124 bool removed = false; | |
125 size_t replace_length = replace_with.length(); | |
126 | |
127 *output = input; | |
128 | |
129 size_t found = output->find_first_of(replace_chars); | |
130 while (found != STR::npos) { | |
131 removed = true; | |
132 output->replace(found, 1, replace_with); | |
133 found = output->find_first_of(replace_chars, found + replace_length); | |
134 } | |
135 | |
136 return removed; | |
137 } | |
138 | |
139 bool ReplaceChars(const string16& input, | |
140 const char16 replace_chars[], | |
141 const string16& replace_with, | |
142 string16* output) { | |
143 return ReplaceCharsT(input, replace_chars, replace_with, output); | |
144 } | |
145 | |
146 bool ReplaceChars(const std::string& input, | |
147 const char replace_chars[], | |
148 const std::string& replace_with, | |
149 std::string* output) { | |
150 return ReplaceCharsT(input, replace_chars, replace_with, output); | |
151 } | |
152 | |
153 bool RemoveChars(const string16& input, | |
154 const char16 remove_chars[], | |
155 string16* output) { | |
156 return ReplaceChars(input, remove_chars, string16(), output); | |
157 } | |
158 | |
159 bool RemoveChars(const std::string& input, | |
160 const char remove_chars[], | |
161 std::string* output) { | |
162 return ReplaceChars(input, remove_chars, std::string(), output); | |
163 } | |
164 | |
165 template<typename STR> | |
166 TrimPositions TrimStringT(const STR& input, | |
167 const typename STR::value_type trim_chars[], | |
168 TrimPositions positions, | |
169 STR* output) { | |
170 // Find the edges of leading/trailing whitespace as desired. | |
171 const typename STR::size_type last_char = input.length() - 1; | |
172 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? | |
173 input.find_first_not_of(trim_chars) : 0; | |
174 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? | |
175 input.find_last_not_of(trim_chars) : last_char; | |
176 | |
177 // When the string was all whitespace, report that we stripped off whitespace | |
178 // from whichever position the caller was interested in. For empty input, we | |
179 // stripped no whitespace, but we still need to clear |output|. | |
180 if (input.empty() || | |
181 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { | |
182 bool input_was_empty = input.empty(); // in case output == &input | |
183 output->clear(); | |
184 return input_was_empty ? TRIM_NONE : positions; | |
185 } | |
186 | |
187 // Trim the whitespace. | |
188 *output = | |
189 input.substr(first_good_char, last_good_char - first_good_char + 1); | |
190 | |
191 // Return where we trimmed from. | |
192 return static_cast<TrimPositions>( | |
193 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | | |
194 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); | |
195 } | |
196 | |
197 bool TrimString(const std::wstring& input, | |
198 const wchar_t trim_chars[], | |
199 std::wstring* output) { | |
200 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
201 } | |
202 | |
203 #if !defined(WCHAR_T_IS_UTF16) | |
204 bool TrimString(const string16& input, | |
205 const char16 trim_chars[], | |
206 string16* output) { | |
207 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
208 } | |
209 #endif | |
210 | |
211 bool TrimString(const std::string& input, | |
212 const char trim_chars[], | |
213 std::string* output) { | |
214 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | |
215 } | |
216 | |
217 void TruncateUTF8ToByteSize(const std::string& input, | |
218 const size_t byte_size, | |
219 std::string* output) { | |
220 DCHECK(output); | |
221 if (byte_size > input.length()) { | |
222 *output = input; | |
223 return; | |
224 } | |
225 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); | |
226 // Note: This cast is necessary because CBU8_NEXT uses int32s. | |
227 int32 truncation_length = static_cast<int32>(byte_size); | |
228 int32 char_index = truncation_length - 1; | |
229 const char* data = input.data(); | |
230 | |
231 // Using CBU8, we will move backwards from the truncation point | |
232 // to the beginning of the string looking for a valid UTF8 | |
233 // character. Once a full UTF8 character is found, we will | |
234 // truncate the string to the end of that character. | |
235 while (char_index >= 0) { | |
236 int32 prev = char_index; | |
237 uint32 code_point = 0; | |
238 CBU8_NEXT(data, char_index, truncation_length, code_point); | |
239 if (!base::IsValidCharacter(code_point) || | |
240 !base::IsValidCodepoint(code_point)) { | |
241 char_index = prev - 1; | |
242 } else { | |
243 break; | |
244 } | |
245 } | |
246 | |
247 if (char_index >= 0 ) | |
248 *output = input.substr(0, char_index); | |
249 else | |
250 output->clear(); | |
251 } | |
252 | |
253 TrimPositions TrimWhitespace(const string16& input, | |
254 TrimPositions positions, | |
255 string16* output) { | |
256 return TrimStringT(input, kWhitespaceUTF16, positions, output); | |
257 } | |
258 | |
259 TrimPositions TrimWhitespaceASCII(const std::string& input, | |
260 TrimPositions positions, | |
261 std::string* output) { | |
262 return TrimStringT(input, kWhitespaceASCII, positions, output); | |
263 } | |
264 | |
265 // This function is only for backward-compatibility. | |
266 // To be removed when all callers are updated. | |
267 TrimPositions TrimWhitespace(const std::string& input, | |
268 TrimPositions positions, | |
269 std::string* output) { | |
270 return TrimWhitespaceASCII(input, positions, output); | |
271 } | |
272 | |
273 template<typename STR> | |
274 STR CollapseWhitespaceT(const STR& text, | |
275 bool trim_sequences_with_line_breaks) { | |
276 STR result; | |
277 result.resize(text.size()); | |
278 | |
279 // Set flags to pretend we're already in a trimmed whitespace sequence, so we | |
280 // will trim any leading whitespace. | |
281 bool in_whitespace = true; | |
282 bool already_trimmed = true; | |
283 | |
284 int chars_written = 0; | |
285 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { | |
286 if (IsWhitespace(*i)) { | |
287 if (!in_whitespace) { | |
288 // Reduce all whitespace sequences to a single space. | |
289 in_whitespace = true; | |
290 result[chars_written++] = L' '; | |
291 } | |
292 if (trim_sequences_with_line_breaks && !already_trimmed && | |
293 ((*i == '\n') || (*i == '\r'))) { | |
294 // Whitespace sequences containing CR or LF are eliminated entirely. | |
295 already_trimmed = true; | |
296 --chars_written; | |
297 } | |
298 } else { | |
299 // Non-whitespace chracters are copied straight across. | |
300 in_whitespace = false; | |
301 already_trimmed = false; | |
302 result[chars_written++] = *i; | |
303 } | |
304 } | |
305 | |
306 if (in_whitespace && !already_trimmed) { | |
307 // Any trailing whitespace is eliminated. | |
308 --chars_written; | |
309 } | |
310 | |
311 result.resize(chars_written); | |
312 return result; | |
313 } | |
314 | |
315 std::wstring CollapseWhitespace(const std::wstring& text, | |
316 bool trim_sequences_with_line_breaks) { | |
317 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
318 } | |
319 | |
320 #if !defined(WCHAR_T_IS_UTF16) | |
321 string16 CollapseWhitespace(const string16& text, | |
322 bool trim_sequences_with_line_breaks) { | |
323 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
324 } | |
325 #endif | |
326 | |
327 std::string CollapseWhitespaceASCII(const std::string& text, | |
328 bool trim_sequences_with_line_breaks) { | |
329 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); | |
330 } | |
331 | |
332 bool ContainsOnlyWhitespaceASCII(const std::string& str) { | |
333 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { | |
334 if (!IsAsciiWhitespace(*i)) | |
335 return false; | |
336 } | |
337 return true; | |
338 } | |
339 | |
340 bool ContainsOnlyWhitespace(const string16& str) { | |
341 return str.find_first_not_of(kWhitespaceUTF16) == string16::npos; | |
342 } | |
343 | |
344 template<typename STR> | |
345 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { | |
346 for (typename STR::const_iterator iter = input.begin(); | |
347 iter != input.end(); ++iter) { | |
348 if (characters.find(*iter) == STR::npos) | |
349 return false; | |
350 } | |
351 return true; | |
352 } | |
353 | |
354 bool ContainsOnlyChars(const std::wstring& input, | |
355 const std::wstring& characters) { | |
356 return ContainsOnlyCharsT(input, characters); | |
357 } | |
358 | |
359 #if !defined(WCHAR_T_IS_UTF16) | |
360 bool ContainsOnlyChars(const string16& input, const string16& characters) { | |
361 return ContainsOnlyCharsT(input, characters); | |
362 } | |
363 #endif | |
364 | |
365 bool ContainsOnlyChars(const std::string& input, | |
366 const std::string& characters) { | |
367 return ContainsOnlyCharsT(input, characters); | |
368 } | |
369 | |
370 std::string WideToASCII(const std::wstring& wide) { | |
371 DCHECK(IsStringASCII(wide)) << wide; | |
372 return std::string(wide.begin(), wide.end()); | |
373 } | |
374 | |
375 std::string UTF16ToASCII(const string16& utf16) { | |
376 DCHECK(IsStringASCII(utf16)) << utf16; | |
377 return std::string(utf16.begin(), utf16.end()); | |
378 } | |
379 | |
380 // Latin1 is just the low range of Unicode, so we can copy directly to convert. | |
381 bool WideToLatin1(const std::wstring& wide, std::string* latin1) { | |
382 std::string output; | |
383 output.resize(wide.size()); | |
384 latin1->clear(); | |
385 for (size_t i = 0; i < wide.size(); i++) { | |
386 if (wide[i] > 255) | |
387 return false; | |
388 output[i] = static_cast<char>(wide[i]); | |
389 } | |
390 latin1->swap(output); | |
391 return true; | |
392 } | |
393 | |
394 template<class STR> | |
395 static bool DoIsStringASCII(const STR& str) { | |
396 for (size_t i = 0; i < str.length(); i++) { | |
397 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; | |
398 if (c > 0x7F) | |
399 return false; | |
400 } | |
401 return true; | |
402 } | |
403 | |
404 bool IsStringASCII(const std::wstring& str) { | |
405 return DoIsStringASCII(str); | |
406 } | |
407 | |
408 #if !defined(WCHAR_T_IS_UTF16) | |
409 bool IsStringASCII(const string16& str) { | |
410 return DoIsStringASCII(str); | |
411 } | |
412 #endif | |
413 | |
414 bool IsStringASCII(const base::StringPiece& str) { | |
415 return DoIsStringASCII(str); | |
416 } | |
417 | |
418 bool IsStringUTF8(const std::string& str) { | |
419 const char *src = str.data(); | |
420 int32 src_len = static_cast<int32>(str.length()); | |
421 int32 char_index = 0; | |
422 | |
423 while (char_index < src_len) { | |
424 int32 code_point; | |
425 CBU8_NEXT(src, char_index, src_len, code_point); | |
426 if (!base::IsValidCharacter(code_point)) | |
427 return false; | |
428 } | |
429 return true; | |
430 } | |
431 | |
432 template<typename Iter> | |
433 static inline bool DoLowerCaseEqualsASCII(Iter a_begin, | |
434 Iter a_end, | |
435 const char* b) { | |
436 for (Iter it = a_begin; it != a_end; ++it, ++b) { | |
437 if (!*b || base::ToLowerASCII(*it) != *b) | |
438 return false; | |
439 } | |
440 return *b == 0; | |
441 } | |
442 | |
443 // Front-ends for LowerCaseEqualsASCII. | |
444 bool LowerCaseEqualsASCII(const std::string& a, const char* b) { | |
445 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
446 } | |
447 | |
448 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) { | |
449 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
450 } | |
451 | |
452 #if !defined(WCHAR_T_IS_UTF16) | |
453 bool LowerCaseEqualsASCII(const string16& a, const char* b) { | |
454 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); | |
455 } | |
456 #endif | |
457 | |
458 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | |
459 std::string::const_iterator a_end, | |
460 const char* b) { | |
461 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
462 } | |
463 | |
464 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
465 std::wstring::const_iterator a_end, | |
466 const char* b) { | |
467 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
468 } | |
469 | |
470 #if !defined(WCHAR_T_IS_UTF16) | |
471 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | |
472 string16::const_iterator a_end, | |
473 const char* b) { | |
474 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
475 } | |
476 #endif | |
477 | |
478 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. | |
479 #if !defined(OS_ANDROID) | |
480 bool LowerCaseEqualsASCII(const char* a_begin, | |
481 const char* a_end, | |
482 const char* b) { | |
483 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
484 } | |
485 | |
486 bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
487 const wchar_t* a_end, | |
488 const char* b) { | |
489 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
490 } | |
491 | |
492 #if !defined(WCHAR_T_IS_UTF16) | |
493 bool LowerCaseEqualsASCII(const char16* a_begin, | |
494 const char16* a_end, | |
495 const char* b) { | |
496 return DoLowerCaseEqualsASCII(a_begin, a_end, b); | |
497 } | |
498 #endif | |
499 | |
500 #endif // !defined(OS_ANDROID) | |
501 | |
502 bool EqualsASCII(const string16& a, const base::StringPiece& b) { | |
503 if (a.length() != b.length()) | |
504 return false; | |
505 return std::equal(b.begin(), b.end(), a.begin()); | |
506 } | |
507 | |
508 bool StartsWithASCII(const std::string& str, | |
509 const std::string& search, | |
510 bool case_sensitive) { | |
511 if (case_sensitive) | |
512 return str.compare(0, search.length(), search) == 0; | |
513 else | |
514 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; | |
515 } | |
516 | |
517 template <typename STR> | |
518 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { | |
519 if (case_sensitive) { | |
520 return str.compare(0, search.length(), search) == 0; | |
521 } else { | |
522 if (search.size() > str.size()) | |
523 return false; | |
524 return std::equal(search.begin(), search.end(), str.begin(), | |
525 base::CaseInsensitiveCompare<typename STR::value_type>()); | |
526 } | |
527 } | |
528 | |
529 bool StartsWith(const std::wstring& str, const std::wstring& search, | |
530 bool case_sensitive) { | |
531 return StartsWithT(str, search, case_sensitive); | |
532 } | |
533 | |
534 #if !defined(WCHAR_T_IS_UTF16) | |
535 bool StartsWith(const string16& str, const string16& search, | |
536 bool case_sensitive) { | |
537 return StartsWithT(str, search, case_sensitive); | |
538 } | |
539 #endif | |
540 | |
541 template <typename STR> | |
542 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { | |
543 typename STR::size_type str_length = str.length(); | |
544 typename STR::size_type search_length = search.length(); | |
545 if (search_length > str_length) | |
546 return false; | |
547 if (case_sensitive) { | |
548 return str.compare(str_length - search_length, search_length, search) == 0; | |
549 } else { | |
550 return std::equal(search.begin(), search.end(), | |
551 str.begin() + (str_length - search_length), | |
552 base::CaseInsensitiveCompare<typename STR::value_type>()); | |
553 } | |
554 } | |
555 | |
556 bool EndsWith(const std::string& str, const std::string& search, | |
557 bool case_sensitive) { | |
558 return EndsWithT(str, search, case_sensitive); | |
559 } | |
560 | |
561 bool EndsWith(const std::wstring& str, const std::wstring& search, | |
562 bool case_sensitive) { | |
563 return EndsWithT(str, search, case_sensitive); | |
564 } | |
565 | |
566 #if !defined(WCHAR_T_IS_UTF16) | |
567 bool EndsWith(const string16& str, const string16& search, | |
568 bool case_sensitive) { | |
569 return EndsWithT(str, search, case_sensitive); | |
570 } | |
571 #endif | |
572 | |
573 static const char* const kByteStringsUnlocalized[] = { | |
574 " B", | |
575 " kB", | |
576 " MB", | |
577 " GB", | |
578 " TB", | |
579 " PB" | |
580 }; | |
581 | |
582 string16 FormatBytesUnlocalized(int64 bytes) { | |
583 double unit_amount = static_cast<double>(bytes); | |
584 size_t dimension = 0; | |
585 const int kKilo = 1024; | |
586 while (unit_amount >= kKilo && | |
587 dimension < arraysize(kByteStringsUnlocalized) - 1) { | |
588 unit_amount /= kKilo; | |
589 dimension++; | |
590 } | |
591 | |
592 char buf[64]; | |
593 if (bytes != 0 && dimension > 0 && unit_amount < 100) { | |
594 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, | |
595 kByteStringsUnlocalized[dimension]); | |
596 } else { | |
597 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, | |
598 kByteStringsUnlocalized[dimension]); | |
599 } | |
600 | |
601 return ASCIIToUTF16(buf); | |
602 } | |
603 | |
604 template<class StringType> | |
605 void DoReplaceSubstringsAfterOffset(StringType* str, | |
606 typename StringType::size_type start_offset, | |
607 const StringType& find_this, | |
608 const StringType& replace_with, | |
609 bool replace_all) { | |
610 if ((start_offset == StringType::npos) || (start_offset >= str->length())) | |
611 return; | |
612 | |
613 DCHECK(!find_this.empty()); | |
614 for (typename StringType::size_type offs(str->find(find_this, start_offset)); | |
615 offs != StringType::npos; offs = str->find(find_this, offs)) { | |
616 str->replace(offs, find_this.length(), replace_with); | |
617 offs += replace_with.length(); | |
618 | |
619 if (!replace_all) | |
620 break; | |
621 } | |
622 } | |
623 | |
624 void ReplaceFirstSubstringAfterOffset(string16* str, | |
625 string16::size_type start_offset, | |
626 const string16& find_this, | |
627 const string16& replace_with) { | |
628 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
629 false); // replace first instance | |
630 } | |
631 | |
632 void ReplaceFirstSubstringAfterOffset(std::string* str, | |
633 std::string::size_type start_offset, | |
634 const std::string& find_this, | |
635 const std::string& replace_with) { | |
636 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
637 false); // replace first instance | |
638 } | |
639 | |
640 void ReplaceSubstringsAfterOffset(string16* str, | |
641 string16::size_type start_offset, | |
642 const string16& find_this, | |
643 const string16& replace_with) { | |
644 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
645 true); // replace all instances | |
646 } | |
647 | |
648 void ReplaceSubstringsAfterOffset(std::string* str, | |
649 std::string::size_type start_offset, | |
650 const std::string& find_this, | |
651 const std::string& replace_with) { | |
652 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, | |
653 true); // replace all instances | |
654 } | |
655 | |
656 | |
657 template<typename STR> | |
658 static size_t TokenizeT(const STR& str, | |
659 const STR& delimiters, | |
660 std::vector<STR>* tokens) { | |
661 tokens->clear(); | |
662 | |
663 typename STR::size_type start = str.find_first_not_of(delimiters); | |
664 while (start != STR::npos) { | |
665 typename STR::size_type end = str.find_first_of(delimiters, start + 1); | |
666 if (end == STR::npos) { | |
667 tokens->push_back(str.substr(start)); | |
668 break; | |
669 } else { | |
670 tokens->push_back(str.substr(start, end - start)); | |
671 start = str.find_first_not_of(delimiters, end + 1); | |
672 } | |
673 } | |
674 | |
675 return tokens->size(); | |
676 } | |
677 | |
678 size_t Tokenize(const std::wstring& str, | |
679 const std::wstring& delimiters, | |
680 std::vector<std::wstring>* tokens) { | |
681 return TokenizeT(str, delimiters, tokens); | |
682 } | |
683 | |
684 #if !defined(WCHAR_T_IS_UTF16) | |
685 size_t Tokenize(const string16& str, | |
686 const string16& delimiters, | |
687 std::vector<string16>* tokens) { | |
688 return TokenizeT(str, delimiters, tokens); | |
689 } | |
690 #endif | |
691 | |
692 size_t Tokenize(const std::string& str, | |
693 const std::string& delimiters, | |
694 std::vector<std::string>* tokens) { | |
695 return TokenizeT(str, delimiters, tokens); | |
696 } | |
697 | |
698 size_t Tokenize(const base::StringPiece& str, | |
699 const base::StringPiece& delimiters, | |
700 std::vector<base::StringPiece>* tokens) { | |
701 return TokenizeT(str, delimiters, tokens); | |
702 } | |
703 | |
704 template<typename STR> | |
705 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { | |
706 if (parts.empty()) | |
707 return STR(); | |
708 | |
709 STR result(parts[0]); | |
710 typename std::vector<STR>::const_iterator iter = parts.begin(); | |
711 ++iter; | |
712 | |
713 for (; iter != parts.end(); ++iter) { | |
714 result += sep; | |
715 result += *iter; | |
716 } | |
717 | |
718 return result; | |
719 } | |
720 | |
721 std::string JoinString(const std::vector<std::string>& parts, char sep) { | |
722 return JoinStringT(parts, std::string(1, sep)); | |
723 } | |
724 | |
725 string16 JoinString(const std::vector<string16>& parts, char16 sep) { | |
726 return JoinStringT(parts, string16(1, sep)); | |
727 } | |
728 | |
729 std::string JoinString(const std::vector<std::string>& parts, | |
730 const std::string& separator) { | |
731 return JoinStringT(parts, separator); | |
732 } | |
733 | |
734 string16 JoinString(const std::vector<string16>& parts, | |
735 const string16& separator) { | |
736 return JoinStringT(parts, separator); | |
737 } | |
738 | |
739 template<class FormatStringType, class OutStringType> | |
740 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, | |
741 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { | |
742 size_t substitutions = subst.size(); | |
743 | |
744 size_t sub_length = 0; | |
745 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); | |
746 iter != subst.end(); ++iter) { | |
747 sub_length += iter->length(); | |
748 } | |
749 | |
750 OutStringType formatted; | |
751 formatted.reserve(format_string.length() + sub_length); | |
752 | |
753 std::vector<ReplacementOffset> r_offsets; | |
754 for (typename FormatStringType::const_iterator i = format_string.begin(); | |
755 i != format_string.end(); ++i) { | |
756 if ('$' == *i) { | |
757 if (i + 1 != format_string.end()) { | |
758 ++i; | |
759 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; | |
760 if ('$' == *i) { | |
761 while (i != format_string.end() && '$' == *i) { | |
762 formatted.push_back('$'); | |
763 ++i; | |
764 } | |
765 --i; | |
766 } else { | |
767 uintptr_t index = 0; | |
768 while (i != format_string.end() && '0' <= *i && *i <= '9') { | |
769 index *= 10; | |
770 index += *i - '0'; | |
771 ++i; | |
772 } | |
773 --i; | |
774 index -= 1; | |
775 if (offsets) { | |
776 ReplacementOffset r_offset(index, | |
777 static_cast<int>(formatted.size())); | |
778 r_offsets.insert(std::lower_bound(r_offsets.begin(), | |
779 r_offsets.end(), | |
780 r_offset, | |
781 &CompareParameter), | |
782 r_offset); | |
783 } | |
784 if (index < substitutions) | |
785 formatted.append(subst.at(index)); | |
786 } | |
787 } | |
788 } else { | |
789 formatted.push_back(*i); | |
790 } | |
791 } | |
792 if (offsets) { | |
793 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); | |
794 i != r_offsets.end(); ++i) { | |
795 offsets->push_back(i->offset); | |
796 } | |
797 } | |
798 return formatted; | |
799 } | |
800 | |
801 string16 ReplaceStringPlaceholders(const string16& format_string, | |
802 const std::vector<string16>& subst, | |
803 std::vector<size_t>* offsets) { | |
804 return DoReplaceStringPlaceholders(format_string, subst, offsets); | |
805 } | |
806 | |
807 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, | |
808 const std::vector<std::string>& subst, | |
809 std::vector<size_t>* offsets) { | |
810 return DoReplaceStringPlaceholders(format_string, subst, offsets); | |
811 } | |
812 | |
813 string16 ReplaceStringPlaceholders(const string16& format_string, | |
814 const string16& a, | |
815 size_t* offset) { | |
816 std::vector<size_t> offsets; | |
817 std::vector<string16> subst; | |
818 subst.push_back(a); | |
819 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); | |
820 | |
821 DCHECK(offsets.size() == 1); | |
822 if (offset) { | |
823 *offset = offsets[0]; | |
824 } | |
825 return result; | |
826 } | |
827 | |
828 static bool IsWildcard(base_icu::UChar32 character) { | |
829 return character == '*' || character == '?'; | |
830 } | |
831 | |
832 // Move the strings pointers to the point where they start to differ. | |
833 template <typename CHAR, typename NEXT> | |
834 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, | |
835 const CHAR** string, const CHAR* string_end, | |
836 NEXT next) { | |
837 const CHAR* escape = NULL; | |
838 while (*pattern != pattern_end && *string != string_end) { | |
839 if (!escape && IsWildcard(**pattern)) { | |
840 // We don't want to match wildcard here, except if it's escaped. | |
841 return; | |
842 } | |
843 | |
844 // Check if the escapement char is found. If so, skip it and move to the | |
845 // next character. | |
846 if (!escape && **pattern == '\\') { | |
847 escape = *pattern; | |
848 next(pattern, pattern_end); | |
849 continue; | |
850 } | |
851 | |
852 // Check if the chars match, if so, increment the ptrs. | |
853 const CHAR* pattern_next = *pattern; | |
854 const CHAR* string_next = *string; | |
855 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | |
856 if (pattern_char == next(&string_next, string_end) && | |
857 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { | |
858 *pattern = pattern_next; | |
859 *string = string_next; | |
860 } else { | |
861 // Uh ho, it did not match, we are done. If the last char was an | |
862 // escapement, that means that it was an error to advance the ptr here, | |
863 // let's put it back where it was. This also mean that the MatchPattern | |
864 // function will return false because if we can't match an escape char | |
865 // here, then no one will. | |
866 if (escape) { | |
867 *pattern = escape; | |
868 } | |
869 return; | |
870 } | |
871 | |
872 escape = NULL; | |
873 } | |
874 } | |
875 | |
876 template <typename CHAR, typename NEXT> | |
877 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { | |
878 while (*pattern != end) { | |
879 if (!IsWildcard(**pattern)) | |
880 return; | |
881 next(pattern, end); | |
882 } | |
883 } | |
884 | |
885 template <typename CHAR, typename NEXT> | |
886 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, | |
887 const CHAR* pattern, const CHAR* pattern_end, | |
888 int depth, | |
889 NEXT next) { | |
890 const int kMaxDepth = 16; | |
891 if (depth > kMaxDepth) | |
892 return false; | |
893 | |
894 // Eat all the matching chars. | |
895 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); | |
896 | |
897 // If the string is empty, then the pattern must be empty too, or contains | |
898 // only wildcards. | |
899 if (eval == eval_end) { | |
900 EatWildcard(&pattern, pattern_end, next); | |
901 return pattern == pattern_end; | |
902 } | |
903 | |
904 // Pattern is empty but not string, this is not a match. | |
905 if (pattern == pattern_end) | |
906 return false; | |
907 | |
908 // If this is a question mark, then we need to compare the rest with | |
909 // the current string or the string with one character eaten. | |
910 const CHAR* next_pattern = pattern; | |
911 next(&next_pattern, pattern_end); | |
912 if (pattern[0] == '?') { | |
913 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
914 depth + 1, next)) | |
915 return true; | |
916 const CHAR* next_eval = eval; | |
917 next(&next_eval, eval_end); | |
918 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, | |
919 depth + 1, next)) | |
920 return true; | |
921 } | |
922 | |
923 // This is a *, try to match all the possible substrings with the remainder | |
924 // of the pattern. | |
925 if (pattern[0] == '*') { | |
926 // Collapse duplicate wild cards (********** into *) so that the | |
927 // method does not recurse unnecessarily. http://crbug.com/52839 | |
928 EatWildcard(&next_pattern, pattern_end, next); | |
929 | |
930 while (eval != eval_end) { | |
931 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
932 depth + 1, next)) | |
933 return true; | |
934 eval++; | |
935 } | |
936 | |
937 // We reached the end of the string, let see if the pattern contains only | |
938 // wildcards. | |
939 if (eval == eval_end) { | |
940 EatWildcard(&pattern, pattern_end, next); | |
941 if (pattern != pattern_end) | |
942 return false; | |
943 return true; | |
944 } | |
945 } | |
946 | |
947 return false; | |
948 } | |
949 | |
950 struct NextCharUTF8 { | |
951 base_icu::UChar32 operator()(const char** p, const char* end) { | |
952 base_icu::UChar32 c; | |
953 int offset = 0; | |
954 CBU8_NEXT(*p, offset, end - *p, c); | |
955 *p += offset; | |
956 return c; | |
957 } | |
958 }; | |
959 | |
960 struct NextCharUTF16 { | |
961 base_icu::UChar32 operator()(const char16** p, const char16* end) { | |
962 base_icu::UChar32 c; | |
963 int offset = 0; | |
964 CBU16_NEXT(*p, offset, end - *p, c); | |
965 *p += offset; | |
966 return c; | |
967 } | |
968 }; | |
969 | |
970 bool MatchPattern(const base::StringPiece& eval, | |
971 const base::StringPiece& pattern) { | |
972 return MatchPatternT(eval.data(), eval.data() + eval.size(), | |
973 pattern.data(), pattern.data() + pattern.size(), | |
974 0, NextCharUTF8()); | |
975 } | |
976 | |
977 bool MatchPattern(const string16& eval, const string16& pattern) { | |
978 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), | |
979 pattern.c_str(), pattern.c_str() + pattern.size(), | |
980 0, NextCharUTF16()); | |
981 } | |
982 | |
983 // The following code is compatible with the OpenBSD lcpy interface. See: | |
984 // http://www.gratisoft.us/todd/papers/strlcpy.html | |
985 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c | |
986 | |
987 namespace { | |
988 | |
989 template <typename CHAR> | |
990 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { | |
991 for (size_t i = 0; i < dst_size; ++i) { | |
992 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. | |
993 return i; | |
994 } | |
995 | |
996 // We were left off at dst_size. We over copied 1 byte. Null terminate. | |
997 if (dst_size != 0) | |
998 dst[dst_size - 1] = 0; | |
999 | |
1000 // Count the rest of the |src|, and return it's length in characters. | |
1001 while (src[dst_size]) ++dst_size; | |
1002 return dst_size; | |
1003 } | |
1004 | |
1005 } // namespace | |
1006 | |
1007 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { | |
1008 return lcpyT<char>(dst, src, dst_size); | |
1009 } | |
1010 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { | |
1011 return lcpyT<wchar_t>(dst, src, dst_size); | |
1012 } | |
OLD | NEW |