OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/icu_string_conversions.h" | 5 #include "base/i18n/icu_string_conversions.h" |
6 | 6 |
7 #include <vector> | 7 #include <vector> |
8 | 8 |
9 #include "base/basictypes.h" | 9 #include "base/basictypes.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
181 // output would be at most the same as the number of bytes in input. There | 181 // output would be at most the same as the number of bytes in input. There |
182 // is no single-byte encoding in which a character is mapped to a | 182 // is no single-byte encoding in which a character is mapped to a |
183 // non-BMP character requiring two 2-byte units. | 183 // non-BMP character requiring two 2-byte units. |
184 // | 184 // |
185 // Moreover, non-BMP characters in legacy multibyte encodings | 185 // Moreover, non-BMP characters in legacy multibyte encodings |
186 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are | 186 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are |
187 // BOCU and SCSU, but we don't care about them. | 187 // BOCU and SCSU, but we don't care about them. |
188 size_t uchar_max_length = encoded.length() + 1; | 188 size_t uchar_max_length = encoded.length() + 1; |
189 | 189 |
190 SetUpErrorHandlerForToUChars(on_error, converter, &status); | 190 SetUpErrorHandlerForToUChars(on_error, converter, &status); |
191 scoped_array<char16> buffer(new char16[uchar_max_length]); | 191 scoped_ptr<char16[]> buffer(new char16[uchar_max_length]); |
192 int actual_size = ucnv_toUChars(converter, buffer.get(), | 192 int actual_size = ucnv_toUChars(converter, buffer.get(), |
193 static_cast<int>(uchar_max_length), encoded.data(), | 193 static_cast<int>(uchar_max_length), encoded.data(), |
194 static_cast<int>(encoded.length()), &status); | 194 static_cast<int>(encoded.length()), &status); |
195 ucnv_close(converter); | 195 ucnv_close(converter); |
196 if (!U_SUCCESS(status)) { | 196 if (!U_SUCCESS(status)) { |
197 utf16->clear(); // Make sure the output is empty on error. | 197 utf16->clear(); // Make sure the output is empty on error. |
198 return false; | 198 return false; |
199 } | 199 } |
200 | 200 |
201 utf16->assign(buffer.get(), actual_size); | 201 utf16->assign(buffer.get(), actual_size); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
246 if (!U_SUCCESS(status)) | 246 if (!U_SUCCESS(status)) |
247 return false; | 247 return false; |
248 | 248 |
249 // The maximum length in 4 byte unit of UTF-32 output would be | 249 // The maximum length in 4 byte unit of UTF-32 output would be |
250 // at most the same as the number of bytes in input. In the worst | 250 // at most the same as the number of bytes in input. In the worst |
251 // case of GB18030 (excluding escaped-based encodings like ISO-2022-JP), | 251 // case of GB18030 (excluding escaped-based encodings like ISO-2022-JP), |
252 // this can be 4 times larger than actually needed. | 252 // this can be 4 times larger than actually needed. |
253 size_t wchar_max_length = encoded.length() + 1; | 253 size_t wchar_max_length = encoded.length() + 1; |
254 | 254 |
255 SetUpErrorHandlerForToUChars(on_error, converter, &status); | 255 SetUpErrorHandlerForToUChars(on_error, converter, &status); |
256 scoped_array<wchar_t> buffer(new wchar_t[wchar_max_length]); | 256 scoped_ptr<wchar_t[]> buffer(new wchar_t[wchar_max_length]); |
257 int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter, | 257 int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter, |
258 reinterpret_cast<char*>(buffer.get()), | 258 reinterpret_cast<char*>(buffer.get()), |
259 static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(), | 259 static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(), |
260 static_cast<int>(encoded.length()), &status); | 260 static_cast<int>(encoded.length()), &status); |
261 ucnv_close(converter); | 261 ucnv_close(converter); |
262 if (!U_SUCCESS(status)) { | 262 if (!U_SUCCESS(status)) { |
263 wide->clear(); // Make sure the output is empty on error. | 263 wide->clear(); // Make sure the output is empty on error. |
264 return false; | 264 return false; |
265 } | 265 } |
266 | 266 |
267 // actual_size is # of bytes. | 267 // actual_size is # of bytes. |
268 wide->assign(buffer.get(), actual_size / sizeof(wchar_t)); | 268 wide->assign(buffer.get(), actual_size / sizeof(wchar_t)); |
269 return true; | 269 return true; |
270 #endif // defined(WCHAR_T_IS_UTF32) | 270 #endif // defined(WCHAR_T_IS_UTF32) |
271 } | 271 } |
272 | 272 |
273 bool ConvertToUtf8AndNormalize(const std::string& text, | 273 bool ConvertToUtf8AndNormalize(const std::string& text, |
274 const std::string& charset, | 274 const std::string& charset, |
275 std::string* result) { | 275 std::string* result) { |
276 result->clear(); | 276 result->clear(); |
277 string16 utf16; | 277 string16 utf16; |
278 if (!CodepageToUTF16( | 278 if (!CodepageToUTF16( |
279 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) | 279 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) |
280 return false; | 280 return false; |
281 | 281 |
282 UErrorCode status = U_ZERO_ERROR; | 282 UErrorCode status = U_ZERO_ERROR; |
283 size_t max_length = utf16.length() + 1; | 283 size_t max_length = utf16.length() + 1; |
284 string16 normalized_utf16; | 284 string16 normalized_utf16; |
285 scoped_array<char16> buffer(new char16[max_length]); | 285 scoped_ptr<char16[]> buffer(new char16[max_length]); |
286 int actual_length = unorm_normalize( | 286 int actual_length = unorm_normalize( |
287 utf16.c_str(), utf16.length(), UNORM_NFC, 0, | 287 utf16.c_str(), utf16.length(), UNORM_NFC, 0, |
288 buffer.get(), static_cast<int>(max_length), &status); | 288 buffer.get(), static_cast<int>(max_length), &status); |
289 if (!U_SUCCESS(status)) | 289 if (!U_SUCCESS(status)) |
290 return false; | 290 return false; |
291 normalized_utf16.assign(buffer.get(), actual_length); | 291 normalized_utf16.assign(buffer.get(), actual_length); |
292 | 292 |
293 return UTF16ToUTF8(normalized_utf16.data(), | 293 return UTF16ToUTF8(normalized_utf16.data(), |
294 normalized_utf16.length(), result); | 294 normalized_utf16.length(), result); |
295 } | 295 } |
296 | 296 |
297 } // namespace base | 297 } // namespace base |
OLD | NEW |