Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Unified Diff: src/api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/debug-agent.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/api.cc
===================================================================
--- src/api.cc (revision 10944)
+++ src/api.cc (working copy)
@@ -1429,7 +1429,7 @@
ScriptData* ScriptData::PreCompile(const char* input, int length) {
- i::Utf8ToUC16CharacterStream stream(
+ i::Utf8ToUtf16CharacterStream stream(
reinterpret_cast<const unsigned char*>(input), length);
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
}
@@ -1438,11 +1438,11 @@
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
i::Handle<i::String> str = Utils::OpenHandle(*source);
if (str->IsExternalTwoByteString()) {
- i::ExternalTwoByteStringUC16CharacterStream stream(
+ i::ExternalTwoByteStringUtf16CharacterStream stream(
i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
} else {
- i::GenericStringUC16CharacterStream stream(str, 0, str->length());
+ i::GenericStringUtf16CharacterStream stream(str, 0, str->length());
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
}
}
@@ -3689,7 +3689,13 @@
int String::Utf8Length() const {
i::Handle<i::String> str = Utils::OpenHandle(this);
if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
- return str->Utf8Length();
+ int length = str->Utf8Length();
+ if (length < 0) {
+ FlattenString(str);
+ length = str->Utf8Length();
+ }
+ ASSERT(length >= 0);
+ return length;
}
@@ -3735,11 +3741,13 @@
int i;
int pos = 0;
int nchars = 0;
+ int previous = unibrow::Utf8::kNoPreviousCharacter;
for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
i::uc32 c = write_input_buffer.GetNext();
- int written = unibrow::Utf8::Encode(buffer + pos, c);
+ int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
pos += written;
nchars++;
+ previous = c;
}
if (i < len) {
// For the last characters we need to check the length for each one
@@ -3748,16 +3756,34 @@
char intermediate[unibrow::Utf8::kMaxEncodedSize];
for (; i < len && pos < capacity; i++) {
i::uc32 c = write_input_buffer.GetNext();
- int written = unibrow::Utf8::Encode(intermediate, c);
- if (pos + written <= capacity) {
- for (int j = 0; j < written; j++)
- buffer[pos + j] = intermediate[j];
+ if (unibrow::Utf16::IsTrailSurrogate(c) &&
+ previous != unibrow::Utf8::kNoPreviousCharacter &&
+ unibrow::Utf16::IsLeadSurrogate(previous)) {
+ // We can't use the intermediate buffer here because the encoding
+ // of surrogate pairs is done under assumption that you can step
+ // back and fix the UTF8 stream. Luckily we only need space for one
+ // more byte, so there is always space.
+ ASSERT(pos < capacity);
+ int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
+ ASSERT(written == 1);
pos += written;
nchars++;
} else {
- // We've reached the end of the buffer
- break;
+ int written =
+ unibrow::Utf8::Encode(intermediate,
+ c,
+ unibrow::Utf8::kNoPreviousCharacter);
+ if (pos + written <= capacity) {
+ for (int j = 0; j < written; j++)
+ buffer[pos + j] = intermediate[j];
+ pos += written;
+ nchars++;
+ } else {
+ // We've reached the end of the buffer
+ break;
+ }
}
+ previous = c;
}
}
if (nchars_ref != NULL) *nchars_ref = nchars;
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/debug-agent.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698