OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | |
5 // This file defines utility functions for working with strings. | |
6 | 4 |
7 #ifndef BASE_STRING_UTIL_H_ | 5 // This file has moved, please use the new location. |
8 #define BASE_STRING_UTIL_H_ | 6 // TODO(avi) remove this file when all users have been updated. |
9 | 7 #include "base/strings/string_util.h" |
10 #include <ctype.h> | |
11 #include <stdarg.h> // va_list | |
12 | |
13 #include <string> | |
14 #include <vector> | |
15 | |
16 #include "base/base_export.h" | |
17 #include "base/basictypes.h" | |
18 #include "base/compiler_specific.h" | |
19 #include "base/string16.h" | |
20 #include "base/strings/string_piece.h" // For implicit conversions. | |
21 | |
22 // Safe standard library wrappers for all platforms. | |
23 | |
24 namespace base { | |
25 | |
26 // C standard-library functions like "strncasecmp" and "snprintf" that aren't | |
27 // cross-platform are provided as "base::strncasecmp", and their prototypes | |
28 // are listed below. These functions are then implemented as inline calls | |
29 // to the platform-specific equivalents in the platform-specific headers. | |
30 | |
31 // Compares the two strings s1 and s2 without regard to case using | |
32 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | |
33 // s2 > s1 according to a lexicographic comparison. | |
34 int strcasecmp(const char* s1, const char* s2); | |
35 | |
36 // Compares up to count characters of s1 and s2 without regard to case using | |
37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | |
38 // s2 > s1 according to a lexicographic comparison. | |
39 int strncasecmp(const char* s1, const char* s2, size_t count); | |
40 | |
41 // Same as strncmp but for char16 strings. | |
42 int strncmp16(const char16* s1, const char16* s2, size_t count); | |
43 | |
44 // Wrapper for vsnprintf that always null-terminates and always returns the | |
45 // number of characters that would be in an untruncated formatted | |
46 // string, even when truncation occurs. | |
47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) | |
48 PRINTF_FORMAT(3, 0); | |
49 | |
50 // vswprintf always null-terminates, but when truncation occurs, it will either | |
51 // return -1 or the number of characters that would be in an untruncated | |
52 // formatted string. The actual return value depends on the underlying | |
53 // C library's vswprintf implementation. | |
54 int vswprintf(wchar_t* buffer, size_t size, | |
55 const wchar_t* format, va_list arguments) | |
56 WPRINTF_FORMAT(3, 0); | |
57 | |
58 // Some of these implementations need to be inlined. | |
59 | |
60 // We separate the declaration from the implementation of this inline | |
61 // function just so the PRINTF_FORMAT works. | |
62 inline int snprintf(char* buffer, size_t size, const char* format, ...) | |
63 PRINTF_FORMAT(3, 4); | |
64 inline int snprintf(char* buffer, size_t size, const char* format, ...) { | |
65 va_list arguments; | |
66 va_start(arguments, format); | |
67 int result = vsnprintf(buffer, size, format, arguments); | |
68 va_end(arguments); | |
69 return result; | |
70 } | |
71 | |
72 // We separate the declaration from the implementation of this inline | |
73 // function just so the WPRINTF_FORMAT works. | |
74 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) | |
75 WPRINTF_FORMAT(3, 4); | |
76 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { | |
77 va_list arguments; | |
78 va_start(arguments, format); | |
79 int result = vswprintf(buffer, size, format, arguments); | |
80 va_end(arguments); | |
81 return result; | |
82 } | |
83 | |
84 // BSD-style safe and consistent string copy functions. | |
85 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. | |
86 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as | |
87 // long as |dst_size| is not 0. Returns the length of |src| in characters. | |
88 // If the return value is >= dst_size, then the output was truncated. | |
89 // NOTE: All sizes are in number of characters, NOT in bytes. | |
90 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); | |
91 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); | |
92 | |
93 // Scan a wprintf format string to determine whether it's portable across a | |
94 // variety of systems. This function only checks that the conversion | |
95 // specifiers used by the format string are supported and have the same meaning | |
96 // on a variety of systems. It doesn't check for other errors that might occur | |
97 // within a format string. | |
98 // | |
99 // Nonportable conversion specifiers for wprintf are: | |
100 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char | |
101 // data on all systems except Windows, which treat them as wchar_t data. | |
102 // Use %ls and %lc for wchar_t data instead. | |
103 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, | |
104 // which treat them as char data. Use %ls and %lc for wchar_t data | |
105 // instead. | |
106 // - 'F', which is not identified by Windows wprintf documentation. | |
107 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. | |
108 // Use %ld, %lo, and %lu instead. | |
109 // | |
110 // Note that there is no portable conversion specifier for char data when | |
111 // working with wprintf. | |
112 // | |
113 // This function is intended to be called from base::vswprintf. | |
114 BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); | |
115 | |
116 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | |
117 // so we don't want to use it here. | |
118 template <class Char> inline Char ToLowerASCII(Char c) { | |
119 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; | |
120 } | |
121 | |
122 // ASCII-specific toupper. The standard library's toupper is locale sensitive, | |
123 // so we don't want to use it here. | |
124 template <class Char> inline Char ToUpperASCII(Char c) { | |
125 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; | |
126 } | |
127 | |
128 // Function objects to aid in comparing/searching strings. | |
129 | |
130 template<typename Char> struct CaseInsensitiveCompare { | |
131 public: | |
132 bool operator()(Char x, Char y) const { | |
133 // TODO(darin): Do we really want to do locale sensitive comparisons here? | |
134 // See http://crbug.com/24917 | |
135 return tolower(x) == tolower(y); | |
136 } | |
137 }; | |
138 | |
139 template<typename Char> struct CaseInsensitiveCompareASCII { | |
140 public: | |
141 bool operator()(Char x, Char y) const { | |
142 return ToLowerASCII(x) == ToLowerASCII(y); | |
143 } | |
144 }; | |
145 | |
146 } // namespace base | |
147 | |
148 #if defined(OS_WIN) | |
149 #include "base/string_util_win.h" | |
150 #elif defined(OS_POSIX) | |
151 #include "base/string_util_posix.h" | |
152 #else | |
153 #error Define string operations appropriately for your platform | |
154 #endif | |
155 | |
156 // These threadsafe functions return references to globally unique empty | |
157 // strings. | |
158 // | |
159 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. | |
160 // There is only one case where you should use these: functions which need to | |
161 // return a string by reference (e.g. as a class member accessor), and don't | |
162 // have an empty string to use (e.g. in an error case). These should not be | |
163 // used as initializers, function arguments, or return values for functions | |
164 // which return by value or outparam. | |
165 BASE_EXPORT const std::string& EmptyString(); | |
166 BASE_EXPORT const std::wstring& EmptyWString(); | |
167 BASE_EXPORT const string16& EmptyString16(); | |
168 | |
169 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; | |
170 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; | |
171 BASE_EXPORT extern const char kWhitespaceASCII[]; | |
172 | |
173 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; | |
174 | |
175 // Removes characters in |remove_chars| from anywhere in |input|. Returns true | |
176 // if any characters were removed. |remove_chars| must be null-terminated. | |
177 // NOTE: Safe to use the same variable for both |input| and |output|. | |
178 BASE_EXPORT bool RemoveChars(const string16& input, | |
179 const char16 remove_chars[], | |
180 string16* output); | |
181 BASE_EXPORT bool RemoveChars(const std::string& input, | |
182 const char remove_chars[], | |
183 std::string* output); | |
184 | |
185 // Replaces characters in |replace_chars| from anywhere in |input| with | |
186 // |replace_with|. Each character in |replace_chars| will be replaced with | |
187 // the |replace_with| string. Returns true if any characters were replaced. | |
188 // |replace_chars| must be null-terminated. | |
189 // NOTE: Safe to use the same variable for both |input| and |output|. | |
190 BASE_EXPORT bool ReplaceChars(const string16& input, | |
191 const char16 replace_chars[], | |
192 const string16& replace_with, | |
193 string16* output); | |
194 BASE_EXPORT bool ReplaceChars(const std::string& input, | |
195 const char replace_chars[], | |
196 const std::string& replace_with, | |
197 std::string* output); | |
198 | |
199 // Removes characters in |trim_chars| from the beginning and end of |input|. | |
200 // |trim_chars| must be null-terminated. | |
201 // NOTE: Safe to use the same variable for both |input| and |output|. | |
202 BASE_EXPORT bool TrimString(const std::wstring& input, | |
203 const wchar_t trim_chars[], | |
204 std::wstring* output); | |
205 BASE_EXPORT bool TrimString(const string16& input, | |
206 const char16 trim_chars[], | |
207 string16* output); | |
208 BASE_EXPORT bool TrimString(const std::string& input, | |
209 const char trim_chars[], | |
210 std::string* output); | |
211 | |
212 // Truncates a string to the nearest UTF-8 character that will leave | |
213 // the string less than or equal to the specified byte size. | |
214 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, | |
215 const size_t byte_size, | |
216 std::string* output); | |
217 | |
218 // Trims any whitespace from either end of the input string. Returns where | |
219 // whitespace was found. | |
220 // The non-wide version has two functions: | |
221 // * TrimWhitespaceASCII() | |
222 // This function is for ASCII strings and only looks for ASCII whitespace; | |
223 // Please choose the best one according to your usage. | |
224 // NOTE: Safe to use the same variable for both input and output. | |
225 enum TrimPositions { | |
226 TRIM_NONE = 0, | |
227 TRIM_LEADING = 1 << 0, | |
228 TRIM_TRAILING = 1 << 1, | |
229 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, | |
230 }; | |
231 BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, | |
232 TrimPositions positions, | |
233 string16* output); | |
234 BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, | |
235 TrimPositions positions, | |
236 std::string* output); | |
237 | |
238 // Deprecated. This function is only for backward compatibility and calls | |
239 // TrimWhitespaceASCII(). | |
240 BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, | |
241 TrimPositions positions, | |
242 std::string* output); | |
243 | |
244 // Searches for CR or LF characters. Removes all contiguous whitespace | |
245 // strings that contain them. This is useful when trying to deal with text | |
246 // copied from terminals. | |
247 // Returns |text|, with the following three transformations: | |
248 // (1) Leading and trailing whitespace is trimmed. | |
249 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | |
250 // sequences containing a CR or LF are trimmed. | |
251 // (3) All other whitespace sequences are converted to single spaces. | |
252 BASE_EXPORT std::wstring CollapseWhitespace( | |
253 const std::wstring& text, | |
254 bool trim_sequences_with_line_breaks); | |
255 BASE_EXPORT string16 CollapseWhitespace( | |
256 const string16& text, | |
257 bool trim_sequences_with_line_breaks); | |
258 BASE_EXPORT std::string CollapseWhitespaceASCII( | |
259 const std::string& text, | |
260 bool trim_sequences_with_line_breaks); | |
261 | |
262 // Returns true if the passed string is empty or contains only white-space | |
263 // characters. | |
264 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); | |
265 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); | |
266 | |
267 // Returns true if |input| is empty or contains only characters found in | |
268 // |characters|. | |
269 BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, | |
270 const std::wstring& characters); | |
271 BASE_EXPORT bool ContainsOnlyChars(const string16& input, | |
272 const string16& characters); | |
273 BASE_EXPORT bool ContainsOnlyChars(const std::string& input, | |
274 const std::string& characters); | |
275 | |
276 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII | |
277 // beforehand. | |
278 BASE_EXPORT std::string WideToASCII(const std::wstring& wide); | |
279 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); | |
280 | |
281 // Converts the given wide string to the corresponding Latin1. This will fail | |
282 // (return false) if any characters are more than 255. | |
283 BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); | |
284 | |
285 // Returns true if the specified string matches the criteria. How can a wide | |
286 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | |
287 // first case) or characters that use only 8-bits and whose 8-bit | |
288 // representation looks like a UTF-8 string (the second case). | |
289 // | |
290 // Note that IsStringUTF8 checks not only if the input is structurally | |
291 // valid but also if it doesn't contain any non-character codepoint | |
292 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | |
293 // to have the maximum 'discriminating' power from other encodings. If | |
294 // there's a use case for just checking the structural validity, we have to | |
295 // add a new function for that. | |
296 BASE_EXPORT bool IsStringUTF8(const std::string& str); | |
297 BASE_EXPORT bool IsStringASCII(const std::wstring& str); | |
298 BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); | |
299 BASE_EXPORT bool IsStringASCII(const string16& str); | |
300 | |
301 // Converts the elements of the given string. This version uses a pointer to | |
302 // clearly differentiate it from the non-pointer variant. | |
303 template <class str> inline void StringToLowerASCII(str* s) { | |
304 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | |
305 *i = base::ToLowerASCII(*i); | |
306 } | |
307 | |
308 template <class str> inline str StringToLowerASCII(const str& s) { | |
309 // for std::string and std::wstring | |
310 str output(s); | |
311 StringToLowerASCII(&output); | |
312 return output; | |
313 } | |
314 | |
315 // Converts the elements of the given string. This version uses a pointer to | |
316 // clearly differentiate it from the non-pointer variant. | |
317 template <class str> inline void StringToUpperASCII(str* s) { | |
318 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | |
319 *i = base::ToUpperASCII(*i); | |
320 } | |
321 | |
322 template <class str> inline str StringToUpperASCII(const str& s) { | |
323 // for std::string and std::wstring | |
324 str output(s); | |
325 StringToUpperASCII(&output); | |
326 return output; | |
327 } | |
328 | |
329 // Compare the lower-case form of the given string against the given ASCII | |
330 // string. This is useful for doing checking if an input string matches some | |
331 // token, and it is optimized to avoid intermediate string copies. This API is | |
332 // borrowed from the equivalent APIs in Mozilla. | |
333 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); | |
334 BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); | |
335 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); | |
336 | |
337 // Same thing, but with string iterators instead. | |
338 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | |
339 std::string::const_iterator a_end, | |
340 const char* b); | |
341 BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
342 std::wstring::const_iterator a_end, | |
343 const char* b); | |
344 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | |
345 string16::const_iterator a_end, | |
346 const char* b); | |
347 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, | |
348 const char* a_end, | |
349 const char* b); | |
350 BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
351 const wchar_t* a_end, | |
352 const char* b); | |
353 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, | |
354 const char16* a_end, | |
355 const char* b); | |
356 | |
357 // Performs a case-sensitive string compare. The behavior is undefined if both | |
358 // strings are not ASCII. | |
359 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); | |
360 | |
361 // Returns true if str starts with search, or false otherwise. | |
362 BASE_EXPORT bool StartsWithASCII(const std::string& str, | |
363 const std::string& search, | |
364 bool case_sensitive); | |
365 BASE_EXPORT bool StartsWith(const std::wstring& str, | |
366 const std::wstring& search, | |
367 bool case_sensitive); | |
368 BASE_EXPORT bool StartsWith(const string16& str, | |
369 const string16& search, | |
370 bool case_sensitive); | |
371 | |
372 // Returns true if str ends with search, or false otherwise. | |
373 BASE_EXPORT bool EndsWith(const std::string& str, | |
374 const std::string& search, | |
375 bool case_sensitive); | |
376 BASE_EXPORT bool EndsWith(const std::wstring& str, | |
377 const std::wstring& search, | |
378 bool case_sensitive); | |
379 BASE_EXPORT bool EndsWith(const string16& str, | |
380 const string16& search, | |
381 bool case_sensitive); | |
382 | |
383 | |
384 // Determines the type of ASCII character, independent of locale (the C | |
385 // library versions will change based on locale). | |
386 template <typename Char> | |
387 inline bool IsAsciiWhitespace(Char c) { | |
388 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; | |
389 } | |
390 template <typename Char> | |
391 inline bool IsAsciiAlpha(Char c) { | |
392 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); | |
393 } | |
394 template <typename Char> | |
395 inline bool IsAsciiDigit(Char c) { | |
396 return c >= '0' && c <= '9'; | |
397 } | |
398 | |
399 template <typename Char> | |
400 inline bool IsHexDigit(Char c) { | |
401 return (c >= '0' && c <= '9') || | |
402 (c >= 'A' && c <= 'F') || | |
403 (c >= 'a' && c <= 'f'); | |
404 } | |
405 | |
406 template <typename Char> | |
407 inline Char HexDigitToInt(Char c) { | |
408 DCHECK(IsHexDigit(c)); | |
409 if (c >= '0' && c <= '9') | |
410 return c - '0'; | |
411 if (c >= 'A' && c <= 'F') | |
412 return c - 'A' + 10; | |
413 if (c >= 'a' && c <= 'f') | |
414 return c - 'a' + 10; | |
415 return 0; | |
416 } | |
417 | |
418 // Returns true if it's a whitespace character. | |
419 inline bool IsWhitespace(wchar_t c) { | |
420 return wcschr(kWhitespaceWide, c) != NULL; | |
421 } | |
422 | |
423 // Return a byte string in human-readable format with a unit suffix. Not | |
424 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is | |
425 // highly recommended instead. TODO(avi): Figure out how to get callers to use | |
426 // FormatBytes instead; remove this. | |
427 BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); | |
428 | |
429 // Starting at |start_offset| (usually 0), replace the first instance of | |
430 // |find_this| with |replace_with|. | |
431 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( | |
432 string16* str, | |
433 string16::size_type start_offset, | |
434 const string16& find_this, | |
435 const string16& replace_with); | |
436 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( | |
437 std::string* str, | |
438 std::string::size_type start_offset, | |
439 const std::string& find_this, | |
440 const std::string& replace_with); | |
441 | |
442 // Starting at |start_offset| (usually 0), look through |str| and replace all | |
443 // instances of |find_this| with |replace_with|. | |
444 // | |
445 // This does entire substrings; use std::replace in <algorithm> for single | |
446 // characters, for example: | |
447 // std::replace(str.begin(), str.end(), 'a', 'b'); | |
448 BASE_EXPORT void ReplaceSubstringsAfterOffset( | |
449 string16* str, | |
450 string16::size_type start_offset, | |
451 const string16& find_this, | |
452 const string16& replace_with); | |
453 BASE_EXPORT void ReplaceSubstringsAfterOffset( | |
454 std::string* str, | |
455 std::string::size_type start_offset, | |
456 const std::string& find_this, | |
457 const std::string& replace_with); | |
458 | |
459 // Reserves enough memory in |str| to accommodate |length_with_null| characters, | |
460 // sets the size of |str| to |length_with_null - 1| characters, and returns a | |
461 // pointer to the underlying contiguous array of characters. This is typically | |
462 // used when calling a function that writes results into a character array, but | |
463 // the caller wants the data to be managed by a string-like object. It is | |
464 // convenient in that is can be used inline in the call, and fast in that it | |
465 // avoids copying the results of the call from a char* into a string. | |
466 // | |
467 // |length_with_null| must be at least 2, since otherwise the underlying string | |
468 // would have size 0, and trying to access &((*str)[0]) in that case can result | |
469 // in a number of problems. | |
470 // | |
471 // Internally, this takes linear time because the resize() call 0-fills the | |
472 // underlying array for potentially all | |
473 // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we | |
474 // could avoid this aspect of the resize() call, as we expect the caller to | |
475 // immediately write over this memory, but there is no other way to set the size | |
476 // of the string, and not doing that will mean people who access |str| rather | |
477 // than str.c_str() will get back a string of whatever size |str| had on entry | |
478 // to this function (probably 0). | |
479 template <class string_type> | |
480 inline typename string_type::value_type* WriteInto(string_type* str, | |
481 size_t length_with_null) { | |
482 DCHECK_GT(length_with_null, 1u); | |
483 str->reserve(length_with_null); | |
484 str->resize(length_with_null - 1); | |
485 return &((*str)[0]); | |
486 } | |
487 | |
488 //----------------------------------------------------------------------------- | |
489 | |
490 // Splits a string into its fields delimited by any of the characters in | |
491 // |delimiters|. Each field is added to the |tokens| vector. Returns the | |
492 // number of tokens found. | |
493 BASE_EXPORT size_t Tokenize(const std::wstring& str, | |
494 const std::wstring& delimiters, | |
495 std::vector<std::wstring>* tokens); | |
496 BASE_EXPORT size_t Tokenize(const string16& str, | |
497 const string16& delimiters, | |
498 std::vector<string16>* tokens); | |
499 BASE_EXPORT size_t Tokenize(const std::string& str, | |
500 const std::string& delimiters, | |
501 std::vector<std::string>* tokens); | |
502 BASE_EXPORT size_t Tokenize(const base::StringPiece& str, | |
503 const base::StringPiece& delimiters, | |
504 std::vector<base::StringPiece>* tokens); | |
505 | |
506 // Does the opposite of SplitString(). | |
507 BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); | |
508 BASE_EXPORT std::string JoinString( | |
509 const std::vector<std::string>& parts, char s); | |
510 | |
511 // Join |parts| using |separator|. | |
512 BASE_EXPORT std::string JoinString( | |
513 const std::vector<std::string>& parts, | |
514 const std::string& separator); | |
515 BASE_EXPORT string16 JoinString( | |
516 const std::vector<string16>& parts, | |
517 const string16& separator); | |
518 | |
519 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. | |
520 // Additionally, any number of consecutive '$' characters is replaced by that | |
521 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be | |
522 // NULL. This only allows you to use up to nine replacements. | |
523 BASE_EXPORT string16 ReplaceStringPlaceholders( | |
524 const string16& format_string, | |
525 const std::vector<string16>& subst, | |
526 std::vector<size_t>* offsets); | |
527 | |
528 BASE_EXPORT std::string ReplaceStringPlaceholders( | |
529 const base::StringPiece& format_string, | |
530 const std::vector<std::string>& subst, | |
531 std::vector<size_t>* offsets); | |
532 | |
533 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. | |
534 BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, | |
535 const string16& a, | |
536 size_t* offset); | |
537 | |
538 // Returns true if the string passed in matches the pattern. The pattern | |
539 // string can contain wildcards like * and ? | |
540 // The backslash character (\) is an escape character for * and ? | |
541 // We limit the patterns to having a max of 16 * or ? characters. | |
542 // ? matches 0 or 1 character, while * matches 0 or more characters. | |
543 BASE_EXPORT bool MatchPattern(const base::StringPiece& string, | |
544 const base::StringPiece& pattern); | |
545 BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); | |
546 | |
547 // Hack to convert any char-like type to its unsigned counterpart. | |
548 // For example, it will convert char, signed char and unsigned char to unsigned | |
549 // char. | |
550 template<typename T> | |
551 struct ToUnsigned { | |
552 typedef T Unsigned; | |
553 }; | |
554 | |
555 template<> | |
556 struct ToUnsigned<char> { | |
557 typedef unsigned char Unsigned; | |
558 }; | |
559 template<> | |
560 struct ToUnsigned<signed char> { | |
561 typedef unsigned char Unsigned; | |
562 }; | |
563 template<> | |
564 struct ToUnsigned<wchar_t> { | |
565 #if defined(WCHAR_T_IS_UTF16) | |
566 typedef unsigned short Unsigned; | |
567 #elif defined(WCHAR_T_IS_UTF32) | |
568 typedef uint32 Unsigned; | |
569 #endif | |
570 }; | |
571 template<> | |
572 struct ToUnsigned<short> { | |
573 typedef unsigned short Unsigned; | |
574 }; | |
575 | |
576 #endif // BASE_STRING_UTIL_H_ | |
OLD | NEW |