OLD | NEW |
1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
130 if (Utf16::IsTrailSurrogate(c) && | 130 if (Utf16::IsTrailSurrogate(c) && |
131 Utf16::IsLeadSurrogate(previous)) { | 131 Utf16::IsLeadSurrogate(previous)) { |
132 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; | 132 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; |
133 } | 133 } |
134 return 3; | 134 return 3; |
135 } else { | 135 } else { |
136 return 4; | 136 return 4; |
137 } | 137 } |
138 } | 138 } |
139 | 139 |
140 uchar CharacterStream::GetNext() { | |
141 uchar result = DecodeCharacter(buffer_, &cursor_); | |
142 if (remaining_ == 1) { | |
143 cursor_ = 0; | |
144 FillBuffer(); | |
145 } else { | |
146 remaining_--; | |
147 } | |
148 ASSERT(BoundsCheck(cursor_)); | |
149 return result; | |
150 } | |
151 | |
152 #if __BYTE_ORDER == __LITTLE_ENDIAN | |
153 #define IF_LITTLE(expr) expr | |
154 #define IF_BIG(expr) ((void) 0) | |
155 #elif __BYTE_ORDER == __BIG_ENDIAN | |
156 #define IF_LITTLE(expr) ((void) 0) | |
157 #define IF_BIG(expr) expr | |
158 #else | |
159 #warning Unknown byte ordering | |
160 #endif | |
161 | |
162 bool CharacterStream::EncodeAsciiCharacter(uchar c, byte* buffer, | |
163 unsigned capacity, unsigned& offset) { | |
164 if (offset >= capacity) return false; | |
165 buffer[offset] = c; | |
166 offset += 1; | |
167 return true; | |
168 } | |
169 | |
170 bool CharacterStream::EncodeNonAsciiCharacter(uchar c, byte* buffer, | |
171 unsigned capacity, unsigned& offset) { | |
172 unsigned aligned = (offset + 0x3) & ~0x3; | |
173 if ((aligned + sizeof(uchar)) > capacity) | |
174 return false; | |
175 if (offset == aligned) { | |
176 IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = (c << 8) | 0x80); | |
177 IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c | (1 << 31)); | |
178 } else { | |
179 buffer[offset] = 0x80; | |
180 IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = c << 8); | |
181 IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c); | |
182 } | |
183 offset = aligned + sizeof(uchar); | |
184 return true; | |
185 } | |
186 | |
187 bool CharacterStream::EncodeCharacter(uchar c, byte* buffer, unsigned capacity, | |
188 unsigned& offset) { | |
189 if (c <= Utf8::kMaxOneByteChar) { | |
190 return EncodeAsciiCharacter(c, buffer, capacity, offset); | |
191 } else { | |
192 return EncodeNonAsciiCharacter(c, buffer, capacity, offset); | |
193 } | |
194 } | |
195 | |
196 uchar CharacterStream::DecodeCharacter(const byte* buffer, unsigned* offset) { | |
197 byte b = buffer[*offset]; | |
198 if (b <= Utf8::kMaxOneByteChar) { | |
199 (*offset)++; | |
200 return b; | |
201 } else { | |
202 unsigned aligned = (*offset + 0x3) & ~0x3; | |
203 *offset = aligned + sizeof(uchar); | |
204 IF_LITTLE(return *reinterpret_cast<const uchar*>(buffer + aligned) >> 8); | |
205 IF_BIG(return *reinterpret_cast<const uchar*>(buffer + aligned) & | |
206 ~(1 << 31)); | |
207 } | |
208 } | |
209 | |
210 #undef IF_LITTLE | |
211 #undef IF_BIG | |
212 | |
213 template <class R, class I, unsigned s> | |
214 void InputBuffer<R, I, s>::FillBuffer() { | |
215 buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_); | |
216 } | |
217 | |
218 template <class R, class I, unsigned s> | |
219 void InputBuffer<R, I, s>::Rewind() { | |
220 Reset(input_); | |
221 } | |
222 | |
223 template <class R, class I, unsigned s> | |
224 void InputBuffer<R, I, s>::Reset(unsigned position, I input) { | |
225 input_ = input; | |
226 remaining_ = 0; | |
227 cursor_ = 0; | |
228 offset_ = position; | |
229 buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_); | |
230 } | |
231 | |
232 template <class R, class I, unsigned s> | |
233 void InputBuffer<R, I, s>::Reset(I input) { | |
234 Reset(0, input); | |
235 } | |
236 | |
237 template <class R, class I, unsigned s> | |
238 void InputBuffer<R, I, s>::Seek(unsigned position) { | |
239 offset_ = position; | |
240 buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_); | |
241 } | |
242 | |
243 Utf8DecoderBase::Utf8DecoderBase() | 140 Utf8DecoderBase::Utf8DecoderBase() |
244 : unbuffered_start_(NULL), | 141 : unbuffered_start_(NULL), |
245 utf16_length_(0), | 142 utf16_length_(0), |
246 last_byte_of_buffer_unused_(false) {} | 143 last_byte_of_buffer_unused_(false) {} |
247 | 144 |
248 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, | 145 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, |
249 unsigned buffer_length, | 146 unsigned buffer_length, |
250 const uint8_t* stream, | 147 const uint8_t* stream, |
251 unsigned stream_length) { | 148 unsigned stream_length) { |
252 Reset(buffer, buffer_length, stream, stream_length); | 149 Reset(buffer, buffer_length, stream, stream_length); |
(...skipping 30 matching lines...) Expand all Loading... |
283 // Copy the rest the slow way. | 180 // Copy the rest the slow way. |
284 WriteUtf16Slow(unbuffered_start_, | 181 WriteUtf16Slow(unbuffered_start_, |
285 data + buffer_length, | 182 data + buffer_length, |
286 length - buffer_length); | 183 length - buffer_length); |
287 return length; | 184 return length; |
288 } | 185 } |
289 | 186 |
290 } // namespace unibrow | 187 } // namespace unibrow |
291 | 188 |
292 #endif // V8_UNICODE_INL_H_ | 189 #endif // V8_UNICODE_INL_H_ |
OLD | NEW |