| OLD | NEW |
| 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 71 } else { | 71 } else { |
| 72 entries_[c & kMask] = CacheEntry(c, 0); | 72 entries_[c & kMask] = CacheEntry(c, 0); |
| 73 return 0; | 73 return 0; |
| 74 } | 74 } |
| 75 } else { | 75 } else { |
| 76 return length; | 76 return length; |
| 77 } | 77 } |
| 78 } | 78 } |
| 79 | 79 |
| 80 | 80 |
| 81 unsigned Utf8::Encode(char* str, uchar c) { | 81 unsigned Utf8::Encode(char* str, uchar c, int previous) { |
| 82 static const int kMask = ~(1 << 6); | 82 static const int kMask = ~(1 << 6); |
| 83 if (c <= kMaxOneByteChar) { | 83 if (c <= kMaxOneByteChar) { |
| 84 str[0] = c; | 84 str[0] = c; |
| 85 return 1; | 85 return 1; |
| 86 } else if (c <= kMaxTwoByteChar) { | 86 } else if (c <= kMaxTwoByteChar) { |
| 87 str[0] = 0xC0 | (c >> 6); | 87 str[0] = 0xC0 | (c >> 6); |
| 88 str[1] = 0x80 | (c & kMask); | 88 str[1] = 0x80 | (c & kMask); |
| 89 return 2; | 89 return 2; |
| 90 } else if (c <= kMaxThreeByteChar) { | 90 } else if (c <= kMaxThreeByteChar) { |
| 91 if (Utf16::IsTrailSurrogate(c) && |
| 92 Utf16::IsLeadSurrogate(previous)) { |
| 93 const int kUnmatchedSize = kSizeOfUnmatchedSurrogate; |
| 94 return Encode(str - kUnmatchedSize, |
| 95 Utf16::CombineSurrogatePair(previous, c), |
| 96 Utf16::kNoPreviousCharacter) - kUnmatchedSize; |
| 97 } |
| 91 str[0] = 0xE0 | (c >> 12); | 98 str[0] = 0xE0 | (c >> 12); |
| 92 str[1] = 0x80 | ((c >> 6) & kMask); | 99 str[1] = 0x80 | ((c >> 6) & kMask); |
| 93 str[2] = 0x80 | (c & kMask); | 100 str[2] = 0x80 | (c & kMask); |
| 94 return 3; | 101 return 3; |
| 95 } else { | 102 } else { |
| 96 str[0] = 0xF0 | (c >> 18); | 103 str[0] = 0xF0 | (c >> 18); |
| 97 str[1] = 0x80 | ((c >> 12) & kMask); | 104 str[1] = 0x80 | ((c >> 12) & kMask); |
| 98 str[2] = 0x80 | ((c >> 6) & kMask); | 105 str[2] = 0x80 | ((c >> 6) & kMask); |
| 99 str[3] = 0x80 | (c & kMask); | 106 str[3] = 0x80 | (c & kMask); |
| 100 return 4; | 107 return 4; |
| 101 } | 108 } |
| 102 } | 109 } |
| 103 | 110 |
| 104 | 111 |
| 105 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { | 112 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { |
| 106 if (length <= 0) return kBadChar; | 113 if (length <= 0) return kBadChar; |
| 107 byte first = bytes[0]; | 114 byte first = bytes[0]; |
| 108 // Characters between 0000 and 0007F are encoded as a single character | 115 // Characters between 0000 and 0007F are encoded as a single character |
| 109 if (first <= kMaxOneByteChar) { | 116 if (first <= kMaxOneByteChar) { |
| 110 *cursor += 1; | 117 *cursor += 1; |
| 111 return first; | 118 return first; |
| 112 } | 119 } |
| 113 return CalculateValue(bytes, length, cursor); | 120 return CalculateValue(bytes, length, cursor); |
| 114 } | 121 } |
| 115 | 122 |
| 116 unsigned Utf8::Length(uchar c) { | 123 unsigned Utf8::Length(uchar c, int previous) { |
| 117 if (c <= kMaxOneByteChar) { | 124 if (c <= kMaxOneByteChar) { |
| 118 return 1; | 125 return 1; |
| 119 } else if (c <= kMaxTwoByteChar) { | 126 } else if (c <= kMaxTwoByteChar) { |
| 120 return 2; | 127 return 2; |
| 121 } else if (c <= kMaxThreeByteChar) { | 128 } else if (c <= kMaxThreeByteChar) { |
| 129 if (Utf16::IsTrailSurrogate(c) && |
| 130 Utf16::IsLeadSurrogate(previous)) { |
| 131 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; |
| 132 } |
| 122 return 3; | 133 return 3; |
| 123 } else { | 134 } else { |
| 124 return 4; | 135 return 4; |
| 125 } | 136 } |
| 126 } | 137 } |
| 127 | 138 |
| 128 uchar CharacterStream::GetNext() { | 139 uchar CharacterStream::GetNext() { |
| 129 uchar result = DecodeCharacter(buffer_, &cursor_); | 140 uchar result = DecodeCharacter(buffer_, &cursor_); |
| 130 if (remaining_ == 1) { | 141 if (remaining_ == 1) { |
| 131 cursor_ = 0; | 142 cursor_ = 0; |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 229 | 240 |
| 230 template <unsigned s> | 241 template <unsigned s> |
| 231 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) | 242 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) |
| 232 : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, | 243 : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, |
| 233 length)) { | 244 length)) { |
| 234 } | 245 } |
| 235 | 246 |
| 236 } // namespace unibrow | 247 } // namespace unibrow |
| 237 | 248 |
| 238 #endif // V8_UNICODE_INL_H_ | 249 #endif // V8_UNICODE_INL_H_ |
| OLD | NEW |