OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
11 * documentation and/or other materials provided with the distribution. | 11 * documentation and/or other materials provided with the distribution. |
12 * | 12 * |
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 */ | 24 */ |
25 | 25 |
26 #include "config.h" | 26 #include "config.h" |
27 #include "wtf/text/TextCodecUTF8.h" | 27 #include "wtf/text/TextCodecUTF8.h" |
28 | 28 |
29 #include "wtf/text/TextCodecASCIIFastPath.h" | 29 #include "wtf/text/TextCodecASCIIFastPath.h" |
30 #include "wtf/text/CString.h" | 30 #include "wtf/text/CString.h" |
31 #include "wtf/text/StringBuffer.h" | 31 #include "wtf/text/StringBuffer.h" |
32 #include "wtf/unicode/CharacterNames.h" | 32 #include "wtf/unicode/CharacterNames.h" |
33 | 33 |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
253 return false; | 253 return false; |
254 continue; | 254 continue; |
255 } | 255 } |
256 | 256 |
257 m_partialSequenceSize -= count; | 257 m_partialSequenceSize -= count; |
258 destination = appendCharacter(destination, character); | 258 destination = appendCharacter(destination, character); |
259 } while (m_partialSequenceSize); | 259 } while (m_partialSequenceSize); |
260 | 260 |
261 return false; | 261 return false; |
262 } | 262 } |
263 | 263 |
264 String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool
stopOnError, bool& sawError) | 264 String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool
stopOnError, bool& sawError) |
265 { | 265 { |
266 // Each input byte might turn into a character. | 266 // Each input byte might turn into a character. |
267 // That includes all bytes in the partial-sequence buffer because | 267 // That includes all bytes in the partial-sequence buffer because |
268 // each byte in an invalid sequence will turn into a replacement character. | 268 // each byte in an invalid sequence will turn into a replacement character. |
269 StringBuffer<LChar> buffer(m_partialSequenceSize + length); | 269 StringBuffer<LChar> buffer(m_partialSequenceSize + length); |
270 | 270 |
271 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); | 271 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); |
272 const uint8_t* end = source + length; | 272 const uint8_t* end = source + length; |
273 const uint8_t* alignedEnd = alignToMachineWord(end); | 273 const uint8_t* alignedEnd = alignToMachineWord(end); |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
322 memcpy(m_partialSequence, source, m_partialSequenceSize); | 322 memcpy(m_partialSequence, source, m_partialSequenceSize); |
323 source = end; | 323 source = end; |
324 break; | 324 break; |
325 } | 325 } |
326 character = decodeNonASCIISequence(source, count); | 326 character = decodeNonASCIISequence(source, count); |
327 } | 327 } |
328 if (character == nonCharacter) { | 328 if (character == nonCharacter) { |
329 sawError = true; | 329 sawError = true; |
330 if (stopOnError) | 330 if (stopOnError) |
331 break; | 331 break; |
332 | 332 |
333 goto upConvertTo16Bit; | 333 goto upConvertTo16Bit; |
334 } | 334 } |
335 if (character > 0xff) | 335 if (character > 0xff) |
336 goto upConvertTo16Bit; | 336 goto upConvertTo16Bit; |
337 | 337 |
338 source += count; | 338 source += count; |
339 *destination++ = character; | 339 *destination++ = character; |
340 } | 340 } |
341 } while (flush && m_partialSequenceSize); | 341 } while (flush && m_partialSequenceSize); |
342 | 342 |
(...skipping 16 matching lines...) Expand all Loading... |
359 // local variables, which may harm code generation by disabling some
optimizations | 359 // local variables, which may harm code generation by disabling some
optimizations |
360 // in some compilers. | 360 // in some compilers. |
361 UChar* destinationForHandlePartialSequence = destination16; | 361 UChar* destinationForHandlePartialSequence = destination16; |
362 const uint8_t* sourceForHandlePartialSequence = source; | 362 const uint8_t* sourceForHandlePartialSequence = source; |
363 handlePartialSequence(destinationForHandlePartialSequence, sourceFor
HandlePartialSequence, end, flush, stopOnError, sawError); | 363 handlePartialSequence(destinationForHandlePartialSequence, sourceFor
HandlePartialSequence, end, flush, stopOnError, sawError); |
364 destination16 = destinationForHandlePartialSequence; | 364 destination16 = destinationForHandlePartialSequence; |
365 source = sourceForHandlePartialSequence; | 365 source = sourceForHandlePartialSequence; |
366 if (m_partialSequenceSize) | 366 if (m_partialSequenceSize) |
367 break; | 367 break; |
368 } | 368 } |
369 | 369 |
370 while (source < end) { | 370 while (source < end) { |
371 if (isASCII(*source)) { | 371 if (isASCII(*source)) { |
372 // Fast path for ASCII. Most UTF-8 text will be ASCII. | 372 // Fast path for ASCII. Most UTF-8 text will be ASCII. |
373 if (isAlignedToMachineWord(source)) { | 373 if (isAlignedToMachineWord(source)) { |
374 while (source < alignedEnd) { | 374 while (source < alignedEnd) { |
375 MachineWord chunk = *reinterpret_cast_ptr<const MachineW
ord*>(source); | 375 MachineWord chunk = *reinterpret_cast_ptr<const MachineW
ord*>(source); |
376 if (!isAllASCII<LChar>(chunk)) | 376 if (!isAllASCII<LChar>(chunk)) |
377 break; | 377 break; |
378 copyASCIIMachineWord(destination16, source); | 378 copyASCIIMachineWord(destination16, source); |
379 source += sizeof(MachineWord); | 379 source += sizeof(MachineWord); |
(...skipping 28 matching lines...) Expand all Loading... |
408 break; | 408 break; |
409 // Each error generates a replacement character and consumes one
byte. | 409 // Each error generates a replacement character and consumes one
byte. |
410 *destination16++ = replacementCharacter; | 410 *destination16++ = replacementCharacter; |
411 ++source; | 411 ++source; |
412 continue; | 412 continue; |
413 } | 413 } |
414 source += count; | 414 source += count; |
415 destination16 = appendCharacter(destination16, character); | 415 destination16 = appendCharacter(destination16, character); |
416 } | 416 } |
417 } while (flush && m_partialSequenceSize); | 417 } while (flush && m_partialSequenceSize); |
418 | 418 |
419 buffer16.shrink(destination16 - buffer16.characters()); | 419 buffer16.shrink(destination16 - buffer16.characters()); |
420 | 420 |
421 return String::adopt(buffer16); | 421 return String::adopt(buffer16); |
422 } | 422 } |
423 | 423 |
424 template<typename CharType> | 424 template<typename CharType> |
425 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) | 425 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) |
426 { | 426 { |
427 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. | 427 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. |
428 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). | 428 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). |
429 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). | 429 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). |
430 if (length > numeric_limits<size_t>::max() / 3) | 430 if (length > numeric_limits<size_t>::max() / 3) |
(...skipping 15 matching lines...) Expand all Loading... |
446 { | 446 { |
447 return encodeCommon(characters, length); | 447 return encodeCommon(characters, length); |
448 } | 448 } |
449 | 449 |
450 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) | 450 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) |
451 { | 451 { |
452 return encodeCommon(characters, length); | 452 return encodeCommon(characters, length); |
453 } | 453 } |
454 | 454 |
455 } // namespace WTF | 455 } // namespace WTF |
OLD | NEW |