| OLD | NEW |
| 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 130 if (Utf16::IsTrailSurrogate(c) && | 130 if (Utf16::IsTrailSurrogate(c) && |
| 131 Utf16::IsLeadSurrogate(previous)) { | 131 Utf16::IsLeadSurrogate(previous)) { |
| 132 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; | 132 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; |
| 133 } | 133 } |
| 134 return 3; | 134 return 3; |
| 135 } else { | 135 } else { |
| 136 return 4; | 136 return 4; |
| 137 } | 137 } |
| 138 } | 138 } |
| 139 | 139 |
| 140 uchar CharacterStream::GetNext() { | |
| 141 uchar result = DecodeCharacter(buffer_, &cursor_); | |
| 142 if (remaining_ == 1) { | |
| 143 cursor_ = 0; | |
| 144 FillBuffer(); | |
| 145 } else { | |
| 146 remaining_--; | |
| 147 } | |
| 148 ASSERT(BoundsCheck(cursor_)); | |
| 149 return result; | |
| 150 } | |
| 151 | |
| 152 #if __BYTE_ORDER == __LITTLE_ENDIAN | |
| 153 #define IF_LITTLE(expr) expr | |
| 154 #define IF_BIG(expr) ((void) 0) | |
| 155 #elif __BYTE_ORDER == __BIG_ENDIAN | |
| 156 #define IF_LITTLE(expr) ((void) 0) | |
| 157 #define IF_BIG(expr) expr | |
| 158 #else | |
| 159 #warning Unknown byte ordering | |
| 160 #endif | |
| 161 | |
| 162 bool CharacterStream::EncodeAsciiCharacter(uchar c, byte* buffer, | |
| 163 unsigned capacity, unsigned& offset) { | |
| 164 if (offset >= capacity) return false; | |
| 165 buffer[offset] = c; | |
| 166 offset += 1; | |
| 167 return true; | |
| 168 } | |
| 169 | |
| 170 bool CharacterStream::EncodeNonAsciiCharacter(uchar c, byte* buffer, | |
| 171 unsigned capacity, unsigned& offset) { | |
| 172 unsigned aligned = (offset + 0x3) & ~0x3; | |
| 173 if ((aligned + sizeof(uchar)) > capacity) | |
| 174 return false; | |
| 175 if (offset == aligned) { | |
| 176 IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = (c << 8) | 0x80); | |
| 177 IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c | (1 << 31)); | |
| 178 } else { | |
| 179 buffer[offset] = 0x80; | |
| 180 IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = c << 8); | |
| 181 IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c); | |
| 182 } | |
| 183 offset = aligned + sizeof(uchar); | |
| 184 return true; | |
| 185 } | |
| 186 | |
| 187 bool CharacterStream::EncodeCharacter(uchar c, byte* buffer, unsigned capacity, | |
| 188 unsigned& offset) { | |
| 189 if (c <= Utf8::kMaxOneByteChar) { | |
| 190 return EncodeAsciiCharacter(c, buffer, capacity, offset); | |
| 191 } else { | |
| 192 return EncodeNonAsciiCharacter(c, buffer, capacity, offset); | |
| 193 } | |
| 194 } | |
| 195 | |
| 196 uchar CharacterStream::DecodeCharacter(const byte* buffer, unsigned* offset) { | |
| 197 byte b = buffer[*offset]; | |
| 198 if (b <= Utf8::kMaxOneByteChar) { | |
| 199 (*offset)++; | |
| 200 return b; | |
| 201 } else { | |
| 202 unsigned aligned = (*offset + 0x3) & ~0x3; | |
| 203 *offset = aligned + sizeof(uchar); | |
| 204 IF_LITTLE(return *reinterpret_cast<const uchar*>(buffer + aligned) >> 8); | |
| 205 IF_BIG(return *reinterpret_cast<const uchar*>(buffer + aligned) & | |
| 206 ~(1 << 31)); | |
| 207 } | |
| 208 } | |
| 209 | |
| 210 #undef IF_LITTLE | |
| 211 #undef IF_BIG | |
| 212 | |
| 213 template <class R, class I, unsigned s> | |
| 214 void InputBuffer<R, I, s>::FillBuffer() { | |
| 215 buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_); | |
| 216 } | |
| 217 | |
| 218 template <class R, class I, unsigned s> | |
| 219 void InputBuffer<R, I, s>::Rewind() { | |
| 220 Reset(input_); | |
| 221 } | |
| 222 | |
| 223 template <class R, class I, unsigned s> | |
| 224 void InputBuffer<R, I, s>::Reset(unsigned position, I input) { | |
| 225 input_ = input; | |
| 226 remaining_ = 0; | |
| 227 cursor_ = 0; | |
| 228 offset_ = position; | |
| 229 buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_); | |
| 230 } | |
| 231 | |
| 232 template <class R, class I, unsigned s> | |
| 233 void InputBuffer<R, I, s>::Reset(I input) { | |
| 234 Reset(0, input); | |
| 235 } | |
| 236 | |
| 237 template <class R, class I, unsigned s> | |
| 238 void InputBuffer<R, I, s>::Seek(unsigned position) { | |
| 239 offset_ = position; | |
| 240 buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_); | |
| 241 } | |
| 242 | |
| 243 Utf8DecoderBase::Utf8DecoderBase() | 140 Utf8DecoderBase::Utf8DecoderBase() |
| 244 : unbuffered_start_(NULL), | 141 : unbuffered_start_(NULL), |
| 245 utf16_length_(0), | 142 utf16_length_(0), |
| 246 last_byte_of_buffer_unused_(false) {} | 143 last_byte_of_buffer_unused_(false) {} |
| 247 | 144 |
| 248 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, | 145 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, |
| 249 unsigned buffer_length, | 146 unsigned buffer_length, |
| 250 const uint8_t* stream, | 147 const uint8_t* stream, |
| 251 unsigned stream_length) { | 148 unsigned stream_length) { |
| 252 Reset(buffer, buffer_length, stream, stream_length); | 149 Reset(buffer, buffer_length, stream, stream_length); |
| (...skipping 30 matching lines...) Expand all Loading... |
| 283 // Copy the rest the slow way. | 180 // Copy the rest the slow way. |
| 284 WriteUtf16Slow(unbuffered_start_, | 181 WriteUtf16Slow(unbuffered_start_, |
| 285 data + buffer_length, | 182 data + buffer_length, |
| 286 length - buffer_length); | 183 length - buffer_length); |
| 287 return length; | 184 return length; |
| 288 } | 185 } |
| 289 | 186 |
| 290 } // namespace unibrow | 187 } // namespace unibrow |
| 291 | 188 |
| 292 #endif // V8_UNICODE_INL_H_ | 189 #endif // V8_UNICODE_INL_H_ |
| OLD | NEW |