Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(210)

Side by Side Diff: src/preparser-api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 28 matching lines...) Expand all
39 #include "list.h" 39 #include "list.h"
40 #include "hashmap.h" 40 #include "hashmap.h"
41 #include "preparse-data-format.h" 41 #include "preparse-data-format.h"
42 #include "preparse-data.h" 42 #include "preparse-data.h"
43 #include "preparser.h" 43 #include "preparser.h"
44 44
45 namespace v8 { 45 namespace v8 {
46 namespace internal { 46 namespace internal {
47 47
48 // UTF16Buffer based on a v8::UnicodeInputStream. 48 // UTF16Buffer based on a v8::UnicodeInputStream.
49 class InputStreamUTF16Buffer : public UC16CharacterStream { 49 class InputStreamUtf16Buffer : public Utf16CharacterStream {
50 public: 50 public:
51 /* The InputStreamUTF16Buffer maintains an internal buffer 51 /* The InputStreamUtf16Buffer maintains an internal buffer
52 * that is filled in chunks from the UC16CharacterStream. 52 * that is filled in chunks from the Utf16CharacterStream.
53 * It also maintains unlimited pushback capability, but optimized 53 * It also maintains unlimited pushback capability, but optimized
54 * for small pushbacks. 54 * for small pushbacks.
55 * The pushback_buffer_ pointer points to the limit of pushbacks 55 * The pushback_buffer_ pointer points to the limit of pushbacks
56 * in the current buffer. There is room for a few pushback'ed chars before 56 * in the current buffer. There is room for a few pushback'ed chars before
57 * the buffer containing the most recently read chunk. If this is overflowed, 57 * the buffer containing the most recently read chunk. If this is overflowed,
58 * an external buffer is allocated/reused to hold further pushbacks, and 58 * an external buffer is allocated/reused to hold further pushbacks, and
59 * pushback_buffer_ and buffer_cursor_/buffer_end_ now points to the 59 * pushback_buffer_ and buffer_cursor_/buffer_end_ now points to the
60 * new buffer. When this buffer is read to the end again, the cursor is 60 * new buffer. When this buffer is read to the end again, the cursor is
61 * switched back to the internal buffer 61 * switched back to the internal buffer
62 */ 62 */
63 explicit InputStreamUTF16Buffer(v8::UnicodeInputStream* stream) 63 explicit InputStreamUtf16Buffer(v8::UnicodeInputStream* stream)
64 : UC16CharacterStream(), 64 : Utf16CharacterStream(),
65 stream_(stream), 65 stream_(stream),
66 pushback_buffer_(buffer_), 66 pushback_buffer_(buffer_),
67 pushback_buffer_end_cache_(NULL), 67 pushback_buffer_end_cache_(NULL),
68 pushback_buffer_backing_(NULL), 68 pushback_buffer_backing_(NULL),
69 pushback_buffer_backing_size_(0) { 69 pushback_buffer_backing_size_(0) {
70 buffer_cursor_ = buffer_end_ = buffer_ + kPushBackSize; 70 buffer_cursor_ = buffer_end_ = buffer_ + kPushBackSize;
71 } 71 }
72 72
73 virtual ~InputStreamUTF16Buffer() { 73 virtual ~InputStreamUtf16Buffer() {
74 if (pushback_buffer_backing_ != NULL) { 74 if (pushback_buffer_backing_ != NULL) {
75 DeleteArray(pushback_buffer_backing_); 75 DeleteArray(pushback_buffer_backing_);
76 } 76 }
77 } 77 }
78 78
79 virtual void PushBack(uc32 ch) { 79 virtual void PushBack(uc32 ch) {
80 ASSERT(pos_ > 0); 80 ASSERT(pos_ > 0);
81 if (ch == kEndOfInput) { 81 if (ch == kEndOfInput) {
82 pos_--; 82 pos_--;
83 return; 83 return;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
120 buffer_cursor_ = buffer_; 120 buffer_cursor_ = buffer_;
121 buffer_end_ = pushback_buffer_end_cache_; 121 buffer_end_ = pushback_buffer_end_cache_;
122 pushback_buffer_end_cache_ = NULL; 122 pushback_buffer_end_cache_ = NULL;
123 return buffer_end_ > buffer_cursor_; 123 return buffer_end_ > buffer_cursor_;
124 } 124 }
125 // Copy the top of the buffer into the pushback area. 125 // Copy the top of the buffer into the pushback area.
126 int32_t value; 126 int32_t value;
127 uc16* buffer_start = buffer_ + kPushBackSize; 127 uc16* buffer_start = buffer_ + kPushBackSize;
128 buffer_cursor_ = buffer_end_ = buffer_start; 128 buffer_cursor_ = buffer_end_ = buffer_start;
129 while ((value = stream_->Next()) >= 0) { 129 while ((value = stream_->Next()) >= 0) {
130 if (value > static_cast<int32_t>(unibrow::Utf8::kMaxThreeByteChar)) { 130 if (value >
131 value = unibrow::Utf8::kBadChar; 131 static_cast<int32_t>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
132 buffer_start[buffer_end_++ - buffer_start] =
133 unibrow::Utf16::LeadSurrogate(value);
134 buffer_start[buffer_end_++ - buffer_start] =
135 unibrow::Utf16::TrailSurrogate(value);
136 } else {
137 // buffer_end_ is a const pointer, but buffer_ is writable.
138 buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value);
132 } 139 }
133 // buffer_end_ is a const pointer, but buffer_ is writable. 140 // Stop one before the end of the buffer in case we get a surrogate pair.
134 buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value); 141 if (buffer_end_ <= buffer_ + 1 + kPushBackSize + kBufferSize) break;
135 if (buffer_end_ == buffer_ + kPushBackSize + kBufferSize) break;
136 } 142 }
137 return buffer_end_ > buffer_start; 143 return buffer_end_ > buffer_start;
138 } 144 }
139 145
140 virtual unsigned SlowSeekForward(unsigned pos) { 146 virtual unsigned SlowSeekForward(unsigned pos) {
141 // Seeking in the input is not used by preparsing. 147 // Seeking in the input is not used by preparsing.
142 // It's only used by the real parser based on preparser data. 148 // It's only used by the real parser based on preparser data.
143 UNIMPLEMENTED(); 149 UNIMPLEMENTED();
144 return 0; 150 return 0;
145 } 151 }
(...skipping 26 matching lines...) Expand all
172 178
173 bool EnableSlowAsserts() { return true; } 179 bool EnableSlowAsserts() { return true; }
174 180
175 } // namespace internal. 181 } // namespace internal.
176 182
177 183
178 UnicodeInputStream::~UnicodeInputStream() { } 184 UnicodeInputStream::~UnicodeInputStream() { }
179 185
180 186
181 PreParserData Preparse(UnicodeInputStream* input, size_t max_stack) { 187 PreParserData Preparse(UnicodeInputStream* input, size_t max_stack) {
182 internal::InputStreamUTF16Buffer buffer(input); 188 internal::InputStreamUtf16Buffer buffer(input);
183 uintptr_t stack_limit = reinterpret_cast<uintptr_t>(&buffer) - max_stack; 189 uintptr_t stack_limit = reinterpret_cast<uintptr_t>(&buffer) - max_stack;
184 internal::UnicodeCache unicode_cache; 190 internal::UnicodeCache unicode_cache;
185 internal::Scanner scanner(&unicode_cache); 191 internal::Scanner scanner(&unicode_cache);
186 scanner.Initialize(&buffer); 192 scanner.Initialize(&buffer);
187 internal::CompleteParserRecorder recorder; 193 internal::CompleteParserRecorder recorder;
188 preparser::PreParser::PreParseResult result = 194 preparser::PreParser::PreParseResult result =
189 preparser::PreParser::PreParseProgram(&scanner, 195 preparser::PreParser::PreParseProgram(&scanner,
190 &recorder, 196 &recorder,
191 internal::kAllowLazy, 197 internal::kAllowLazy,
192 stack_limit); 198 stack_limit);
193 if (result == preparser::PreParser::kPreParseStackOverflow) { 199 if (result == preparser::PreParser::kPreParseStackOverflow) {
194 return PreParserData::StackOverflow(); 200 return PreParserData::StackOverflow();
195 } 201 }
196 internal::Vector<unsigned> pre_data = recorder.ExtractData(); 202 internal::Vector<unsigned> pre_data = recorder.ExtractData();
197 size_t size = pre_data.length() * sizeof(pre_data[0]); 203 size_t size = pre_data.length() * sizeof(pre_data[0]);
198 unsigned char* data = reinterpret_cast<unsigned char*>(pre_data.start()); 204 unsigned char* data = reinterpret_cast<unsigned char*>(pre_data.start());
199 return PreParserData(size, data); 205 return PreParserData(size, data);
200 } 206 }
201 207
202 } // namespace v8. 208 } // namespace v8.
203 209
204 210
205 // Used by ASSERT macros and other immediate exits. 211 // Used by ASSERT macros and other immediate exits.
206 extern "C" void V8_Fatal(const char* file, int line, const char* format, ...) { 212 extern "C" void V8_Fatal(const char* file, int line, const char* format, ...) {
207 exit(EXIT_FAILURE); 213 exit(EXIT_FAILURE);
208 } 214 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698