OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. |
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
4 * | 4 * |
5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
7 * are met: | 7 * are met: |
8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
11 * notice, this list of conditions and the following disclaimer in the | 11 * notice, this list of conditions and the following disclaimer in the |
12 * documentation and/or other materials provided with the distribution. | 12 * documentation and/or other materials provided with the distribution. |
13 * | 13 * |
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 */ | 25 */ |
26 | 26 |
27 #include "config.h" | 27 #include "config.h" |
28 #include "wtf/text/TextCodecICU.h" | 28 #include "wtf/text/TextCodecICU.h" |
29 | 29 |
30 #include <unicode/ucnv.h> | 30 #include <unicode/ucnv.h> |
31 #include <unicode/ucnv_cb.h> | 31 #include <unicode/ucnv_cb.h> |
32 #include "wtf/Assertions.h" | 32 #include "wtf/Assertions.h" |
33 #include "wtf/StringExtras.h" | 33 #include "wtf/StringExtras.h" |
34 #include "wtf/Threading.h" | 34 #include "wtf/Threading.h" |
(...skipping 30 matching lines...) Expand all Loading... |
65 // Otherwise, this would share the same canonical name as the | 65 // Otherwise, this would share the same canonical name as the |
66 // visual ordering case, and then TextEncoding could not tell them | 66 // visual ordering case, and then TextEncoding could not tell them |
67 // apart; ICU treats these names as synonyms. | 67 // apart; ICU treats these names as synonyms. |
68 registrar("ISO-8859-8-I", "ISO-8859-8-I"); | 68 registrar("ISO-8859-8-I", "ISO-8859-8-I"); |
69 | 69 |
70 int32_t numEncodings = ucnv_countAvailable(); | 70 int32_t numEncodings = ucnv_countAvailable(); |
71 for (int32_t i = 0; i < numEncodings; ++i) { | 71 for (int32_t i = 0; i < numEncodings; ++i) { |
72 const char* name = ucnv_getAvailableName(i); | 72 const char* name = ucnv_getAvailableName(i); |
73 UErrorCode error = U_ZERO_ERROR; | 73 UErrorCode error = U_ZERO_ERROR; |
74 // Try MIME before trying IANA to pick up commonly used names like | 74 // Try MIME before trying IANA to pick up commonly used names like |
75 // 'EUC-JP' instead of horrendously long names like | 75 // 'EUC-JP' instead of horrendously long names like |
76 // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. | 76 // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. |
77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); | 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); |
78 if (!U_SUCCESS(error) || !standardName) { | 78 if (!U_SUCCESS(error) || !standardName) { |
79 error = U_ZERO_ERROR; | 79 error = U_ZERO_ERROR; |
80 // Try IANA to pick up 'windows-12xx' and other names | 80 // Try IANA to pick up 'windows-12xx' and other names |
81 // which are not preferred MIME names but are widely used. | 81 // which are not preferred MIME names but are widely used. |
82 standardName = ucnv_getStandardName(name, "IANA", &error); | 82 standardName = ucnv_getStandardName(name, "IANA", &error); |
83 if (!U_SUCCESS(error) || !standardName) | 83 if (!U_SUCCESS(error) || !standardName) |
84 continue; | 84 continue; |
85 } | 85 } |
86 | 86 |
87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. | 87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. |
88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding | 88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding |
89 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. | 89 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. |
90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) | 90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) |
91 standardName = "GBK"; | 91 standardName = "GBK"; |
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 { | 297 { |
298 // Get a converter for the passed-in encoding. | 298 // Get a converter for the passed-in encoding. |
299 if (!m_converterICU) { | 299 if (!m_converterICU) { |
300 createICUConverter(); | 300 createICUConverter(); |
301 ASSERT(m_converterICU); | 301 ASSERT(m_converterICU); |
302 if (!m_converterICU) { | 302 if (!m_converterICU) { |
303 LOG_ERROR("error creating ICU encoder even though encoding was in ta
ble"); | 303 LOG_ERROR("error creating ICU encoder even though encoding was in ta
ble"); |
304 return String(); | 304 return String(); |
305 } | 305 } |
306 } | 306 } |
307 | 307 |
308 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); | 308 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); |
309 | 309 |
310 StringBuilder result; | 310 StringBuilder result; |
311 | 311 |
312 UChar buffer[ConversionBufferSize]; | 312 UChar buffer[ConversionBufferSize]; |
313 UChar* bufferLimit = buffer + ConversionBufferSize; | 313 UChar* bufferLimit = buffer + ConversionBufferSize; |
314 const char* source = reinterpret_cast<const char*>(bytes); | 314 const char* source = reinterpret_cast<const char*>(bytes); |
315 const char* sourceLimit = source + length; | 315 const char* sourceLimit = source + length; |
316 int32_t* offsets = NULL; | 316 int32_t* offsets = NULL; |
317 UErrorCode err = U_ZERO_ERROR; | 317 UErrorCode err = U_ZERO_ERROR; |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
366 | 366 |
367 UnencodableReplacementArray entity; | 367 UnencodableReplacementArray entity; |
368 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod
edEntitiesForUnencodables, entity); | 368 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod
edEntitiesForUnencodables, entity); |
369 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); | 369 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); |
370 } else | 370 } else |
371 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); | 371 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); |
372 } | 372 } |
373 | 373 |
374 // Substitutes special GBK characters, escaping all other unassigned entities. | 374 // Substitutes special GBK characters, escaping all other unassigned entities. |
375 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr
omUArgs, const UChar* codeUnits, int32_t length, | 375 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr
omUArgs, const UChar* codeUnits, int32_t length, |
376 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) | 376 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) |
377 { | 377 { |
378 UChar outChar; | 378 UChar outChar; |
379 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 379 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { |
380 const UChar* source = &outChar; | 380 const UChar* source = &outChar; |
381 *err = U_ZERO_ERROR; | 381 *err = U_ZERO_ERROR; |
382 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 382 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); |
383 return; | 383 return; |
384 } | 384 } |
385 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); | 385 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); |
386 } | 386 } |
387 | 387 |
388 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. | 388 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. |
389 static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicod
eArgs* fromUArgs, const UChar* codeUnits, int32_t length, | 389 static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicod
eArgs* fromUArgs, const UChar* codeUnits, int32_t length, |
390 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) | 390 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) |
391 { | 391 { |
392 if (reason == UCNV_UNASSIGNED) { | 392 if (reason == UCNV_UNASSIGNED) { |
393 if (UChar outChar = fallbackForGBK(codePoint)) { | 393 if (UChar outChar = fallbackForGBK(codePoint)) { |
394 const UChar* source = &outChar; | 394 const UChar* source = &outChar; |
395 *err = U_ZERO_ERROR; | 395 *err = U_ZERO_ERROR; |
396 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 396 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); |
397 return; | 397 return; |
398 } | 398 } |
399 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoin
t, reason, err); | 399 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoin
t, reason, err); |
400 return; | 400 return; |
401 } | 401 } |
402 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); | 402 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); |
403 } | 403 } |
404 | 404 |
405 static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs
* fromUArgs, const UChar* codeUnits, int32_t length, | 405 static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs
* fromUArgs, const UChar* codeUnits, int32_t length, |
406 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) | 406 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) |
407 { | 407 { |
408 UChar outChar; | 408 UChar outChar; |
409 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 409 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { |
410 const UChar* source = &outChar; | 410 const UChar* source = &outChar; |
411 *err = U_ZERO_ERROR; | 411 *err = U_ZERO_ERROR; |
412 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 412 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); |
413 return; | 413 return; |
414 } | 414 } |
415 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); | 415 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); |
416 } | 416 } |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
515 { | 515 { |
516 return encodeCommon(characters, length, handling); | 516 return encodeCommon(characters, length, handling); |
517 } | 517 } |
518 | 518 |
519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) | 519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) |
520 { | 520 { |
521 return encodeCommon(characters, length, handling); | 521 return encodeCommon(characters, length, handling); |
522 } | 522 } |
523 | 523 |
524 } // namespace WTF | 524 } // namespace WTF |
OLD | NEW |