Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(939)

Unified Diff: src/unicode.cc

Issue 11649018: Remove Utf8InputBuffer (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/unicode.cc
diff --git a/src/unicode.cc b/src/unicode.cc
index 14f380642a7a8dcd8286f93a3dbd176b59a68b99..3897f8f8931957d93ca184931c927215af1b94c9 100644
--- a/src/unicode.cc
+++ b/src/unicode.cc
@@ -277,58 +277,6 @@ uchar Utf8::CalculateValue(const byte* str,
}
-const byte* Utf8::ReadBlock(Buffer<const char*> str, byte* buffer,
- unsigned capacity, unsigned* chars_read_ptr, unsigned* offset_ptr) {
- unsigned offset = *offset_ptr;
- // Bail out early if we've reached the end of the string.
- if (offset == str.length()) {
- *chars_read_ptr = 0;
- return NULL;
- }
- const byte* data = reinterpret_cast<const byte*>(str.data());
- if (data[offset] <= kMaxOneByteChar) {
- // The next character is an ASCII char so we scan forward over
- // the following ASCII characters and return the next pure ASCII
- // substring
- const byte* result = data + offset;
- offset++;
- while ((offset < str.length()) && (data[offset] <= kMaxOneByteChar))
- offset++;
- *chars_read_ptr = offset - *offset_ptr;
- *offset_ptr = offset;
- return result;
- } else {
- // The next character is non-ASCII so we just fill the buffer
- unsigned cursor = 0;
- unsigned chars_read = 0;
- while (offset < str.length()) {
- uchar c = data[offset];
- if (c <= kMaxOneByteChar) {
- // Fast case for ASCII characters
- if (!CharacterStream::EncodeAsciiCharacter(c,
- buffer,
- capacity,
- cursor))
- break;
- offset += 1;
- } else {
- unsigned chars = 0;
- c = Utf8::ValueOf(data + offset, str.length() - offset, &chars);
- if (!CharacterStream::EncodeNonAsciiCharacter(c,
- buffer,
- capacity,
- cursor))
- break;
- offset += chars;
- }
- chars_read++;
- }
- *offset_ptr = offset;
- *chars_read_ptr = chars_read;
- return buffer;
- }
-}
-
unsigned CharacterStream::Length() {
unsigned result = 0;
while (has_more()) {
@@ -356,6 +304,75 @@ void CharacterStream::Seek(unsigned position) {
}
}
+void Utf8DecoderBase::Reset(uint16_t* buffer,
+ unsigned buffer_length,
+ const uint8_t* stream,
+ unsigned stream_length) {
+ // Assume everything will fit in the buffer and stream won't be needed.
+ last_byte_of_buffer_unused_ = false;
+ unbuffered_start_ = NULL;
+ bool writing_to_buffer = true;
+ // Loop until stream is read, writing to buffer as long as buffer has space.
+ unsigned utf16_length = 0;
+ while (stream_length != 0) {
+ unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
+ ASSERT(cursor > 0 && cursor <= stream_length);
+ stream += cursor;
+ stream_length -= cursor;
+ bool is_two_byte = character > Utf16::kMaxNonSurrogateCharCode;
+ utf16_length += is_two_byte ? 2 : 1;
+ // Don't need to write to the buffer, but still need utf16_length.
+ if (!writing_to_buffer) continue;
+ // Write out the characters to the buffer.
+ // Must check for equality with buffer_length as we've already updated it.
+ if (utf16_length <= buffer_length) {
+ if (is_two_byte) {
Yang 2012/12/20 09:20:27 misnomer?
+ *buffer++ = Utf16::LeadSurrogate(character);
+ *buffer++ = Utf16::TrailSurrogate(character);
+ } else {
+ *buffer++ = character;
+ }
+ if (utf16_length == buffer_length) {
+ // Just wrote last character of buffer
+ writing_to_buffer = false;
+ unbuffered_start_ = stream;
+ }
+ continue;
+ }
+ // Have gone over buffer.
+ // Last char of buffer is unused, set cursor back.
+ ASSERT(is_two_byte);
+ writing_to_buffer = false;
+ last_byte_of_buffer_unused_ = true;
+ unbuffered_start_ = stream - cursor;
+ }
+ utf16_length_ = utf16_length;
+}
+
+
+void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream,
+ uint16_t* data,
+ unsigned data_length) {
+ while (data_length != 0) {
+ unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
+ // There's a total lack of bounds checking for stream
+ // as it was already done in Reset.
+ stream += cursor;
+ if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ *data++ = Utf16::LeadSurrogate(character);
+ *data++ = Utf16::TrailSurrogate(character);
+ ASSERT(data_length > 1);
+ data_length -= 2;
+ } else {
+ *data++ = character;
+ data_length -= 1;
+ }
+ }
+}
+
+
// Uppercase: point.category == 'Lu'
static const uint16_t kUppercaseTable0Size = 450;

Powered by Google App Engine
This is Rietveld 408576698