Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: src/api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/handles.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/api.cc
===================================================================
--- src/api.cc (revision 10944)
+++ src/api.cc (working copy)
@@ -1429,7 +1429,7 @@
ScriptData* ScriptData::PreCompile(const char* input, int length) {
- i::Utf8ToUC16CharacterStream stream(
+ i::Utf8ToUtf16CharacterStream stream(
reinterpret_cast<const unsigned char*>(input), length);
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
}
@@ -1438,11 +1438,11 @@
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
i::Handle<i::String> str = Utils::OpenHandle(*source);
if (str->IsExternalTwoByteString()) {
- i::ExternalTwoByteStringUC16CharacterStream stream(
+ i::ExternalTwoByteStringUtf16CharacterStream stream(
i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
} else {
- i::GenericStringUC16CharacterStream stream(str, 0, str->length());
+ i::GenericStringUtf16CharacterStream stream(str, 0, str->length());
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
}
}
@@ -3689,7 +3689,7 @@
int String::Utf8Length() const {
i::Handle<i::String> str = Utils::OpenHandle(this);
if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
- return str->Utf8Length();
+ return i::Utf8Length(str);
}
@@ -3735,11 +3735,13 @@
int i;
int pos = 0;
int nchars = 0;
+ int previous = unibrow::Utf16::kNoPreviousCharacter;
for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
i::uc32 c = write_input_buffer.GetNext();
- int written = unibrow::Utf8::Encode(buffer + pos, c);
+ int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
pos += written;
nchars++;
+ previous = c;
}
if (i < len) {
// For the last characters we need to check the length for each one
@@ -3748,16 +3750,33 @@
char intermediate[unibrow::Utf8::kMaxEncodedSize];
for (; i < len && pos < capacity; i++) {
i::uc32 c = write_input_buffer.GetNext();
- int written = unibrow::Utf8::Encode(intermediate, c);
- if (pos + written <= capacity) {
- for (int j = 0; j < written; j++)
- buffer[pos + j] = intermediate[j];
+ if (unibrow::Utf16::IsTrailSurrogate(c) &&
+ unibrow::Utf16::IsLeadSurrogate(previous)) {
+ // We can't use the intermediate buffer here because the encoding
+ // of surrogate pairs is done under assumption that you can step
+ // back and fix the UTF8 stream. Luckily we only need space for one
+ // more byte, so there is always space.
+ ASSERT(pos < capacity);
+ int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
+ ASSERT(written == 1);
pos += written;
nchars++;
} else {
- // We've reached the end of the buffer
- break;
+ int written =
+ unibrow::Utf8::Encode(intermediate,
+ c,
+ unibrow::Utf16::kNoPreviousCharacter);
+ if (pos + written <= capacity) {
+ for (int j = 0; j < written; j++)
+ buffer[pos + j] = intermediate[j];
+ pos += written;
+ nchars++;
+ } else {
+ // We've reached the end of the buffer
+ break;
+ }
}
+ previous = c;
}
}
if (nchars_ref != NULL) *nchars_ref = nchars;
@@ -5237,7 +5256,8 @@
TryCatch try_catch;
Handle<String> str = obj->ToString();
if (str.IsEmpty()) return;
- length_ = str->Utf8Length();
+ i::Handle<i::String> i_str = Utils::OpenHandle(*str);
+ length_ = i::Utf8Length(i_str);
str_ = i::NewArray<char>(length_ + 1);
str->WriteUtf8(str_);
}
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/handles.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698