Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: src/unicode.h

Issue 11727004: Remove InputBuffer (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.h ('k') | src/unicode.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 CacheEntry entries_[kSize]; 93 CacheEntry entries_[kSize];
94 }; 94 };
95 95
96 class UnicodeData { 96 class UnicodeData {
97 private: 97 private:
98 friend class Test; 98 friend class Test;
99 static int GetByteCount(); 99 static int GetByteCount();
100 static const uchar kMaxCodePoint; 100 static const uchar kMaxCodePoint;
101 }; 101 };
102 102
103 // --- U t f 8 a n d 16 ---
104
105 template <typename Data>
106 class Buffer {
107 public:
108 inline Buffer(Data data, unsigned length) : data_(data), length_(length) { }
109 inline Buffer() : data_(0), length_(0) { }
110 Data data() { return data_; }
111 unsigned length() { return length_; }
112 private:
113 Data data_;
114 unsigned length_;
115 };
116
117
118 class Utf16 { 103 class Utf16 {
119 public: 104 public:
120 static inline bool IsLeadSurrogate(int code) { 105 static inline bool IsLeadSurrogate(int code) {
121 if (code == kNoPreviousCharacter) return false; 106 if (code == kNoPreviousCharacter) return false;
122 return (code & 0xfc00) == 0xd800; 107 return (code & 0xfc00) == 0xd800;
123 } 108 }
124 static inline bool IsTrailSurrogate(int code) { 109 static inline bool IsTrailSurrogate(int code) {
125 if (code == kNoPreviousCharacter) return false; 110 if (code == kNoPreviousCharacter) return false;
126 return (code & 0xfc00) == 0xdc00; 111 return (code & 0xfc00) == 0xdc00;
127 } 112 }
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 151
167 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together 152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together
168 // that match are coded as a 4 byte UTF-8 sequence. 153 // that match are coded as a 4 byte UTF-8 sequence.
169 static const unsigned kBytesSavedByCombiningSurrogates = 2; 154 static const unsigned kBytesSavedByCombiningSurrogates = 2;
170 static const unsigned kSizeOfUnmatchedSurrogate = 3; 155 static const unsigned kSizeOfUnmatchedSurrogate = 3;
171 static inline uchar ValueOf(const byte* str, 156 static inline uchar ValueOf(const byte* str,
172 unsigned length, 157 unsigned length,
173 unsigned* cursor); 158 unsigned* cursor);
174 }; 159 };
175 160
176 // --- C h a r a c t e r S t r e a m ---
177
178 class CharacterStream {
179 public:
180 inline uchar GetNext();
181 inline bool has_more() { return remaining_ != 0; }
182 // Note that default implementation is not efficient.
183 virtual void Seek(unsigned);
184 unsigned Length();
185 unsigned Utf16Length();
186 virtual ~CharacterStream() { }
187 static inline bool EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
188 unsigned& offset);
189 static inline bool EncodeAsciiCharacter(uchar c, byte* buffer,
190 unsigned capacity, unsigned& offset);
191 static inline bool EncodeNonAsciiCharacter(uchar c, byte* buffer,
192 unsigned capacity, unsigned& offset);
193 static inline uchar DecodeCharacter(const byte* buffer, unsigned* offset);
194 virtual void Rewind() = 0;
195
196 protected:
197 virtual void FillBuffer() = 0;
198 virtual bool BoundsCheck(unsigned offset) = 0;
199 // The number of characters left in the current buffer
200 unsigned remaining_;
201 // The current offset within the buffer
202 unsigned cursor_;
203 // The buffer containing the decoded characters.
204 const byte* buffer_;
205 };
206
207 // --- I n p u t B u f f e r ---
208
209 /**
210 * Provides efficient access to encoded characters in strings. It
211 * does so by reading characters one block at a time, rather than one
212 * character at a time, which gives string implementations an
213 * opportunity to optimize the decoding.
214 */
215 template <class Reader, class Input = Reader*, unsigned kSize = 256>
216 class InputBuffer : public CharacterStream {
217 public:
218 virtual void Rewind();
219 inline void Reset(Input input);
220 void Seek(unsigned position);
221 inline void Reset(unsigned position, Input input);
222 protected:
223 InputBuffer() { }
224 explicit InputBuffer(Input input) { Reset(input); }
225 virtual void FillBuffer();
226 virtual bool BoundsCheck(unsigned offset) {
227 return (buffer_ != util_buffer_) || (offset < kSize);
228 }
229
230 // A custom offset that can be used by the string implementation to
231 // mark progress within the encoded string.
232 unsigned offset_;
233 // The input string
234 Input input_;
235 // To avoid heap allocation, we keep an internal buffer to which
236 // the encoded string can write its characters. The string
237 // implementation is free to decide whether it wants to use this
238 // buffer or not.
239 byte util_buffer_[kSize];
240 };
241
242 161
243 class Utf8DecoderBase { 162 class Utf8DecoderBase {
244 public: 163 public:
245 // Initialization done in subclass. 164 // Initialization done in subclass.
246 inline Utf8DecoderBase(); 165 inline Utf8DecoderBase();
247 inline Utf8DecoderBase(uint16_t* buffer, 166 inline Utf8DecoderBase(uint16_t* buffer,
248 unsigned buffer_length, 167 unsigned buffer_length,
249 const uint8_t* stream, 168 const uint8_t* stream,
250 unsigned stream_length); 169 unsigned stream_length);
251 inline unsigned Utf16Length() const { return utf16_length_; } 170 inline unsigned Utf16Length() const { return utf16_length_; }
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
337 static const int kMaxWidth = 1; 256 static const int kMaxWidth = 1;
338 static int Convert(uchar c, 257 static int Convert(uchar c,
339 uchar n, 258 uchar n,
340 uchar* result, 259 uchar* result,
341 bool* allow_caching_ptr); 260 bool* allow_caching_ptr);
342 }; 261 };
343 262
344 } // namespace unibrow 263 } // namespace unibrow
345 264
346 #endif // V8_UNICODE_H_ 265 #endif // V8_UNICODE_H_
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/unicode.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698