| Index: src/api.cc
|
| ===================================================================
|
| --- src/api.cc (revision 10944)
|
| +++ src/api.cc (working copy)
|
| @@ -1429,7 +1429,7 @@
|
|
|
|
|
| ScriptData* ScriptData::PreCompile(const char* input, int length) {
|
| - i::Utf8ToUC16CharacterStream stream(
|
| + i::Utf8ToUtf16CharacterStream stream(
|
| reinterpret_cast<const unsigned char*>(input), length);
|
| return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
|
| }
|
| @@ -1438,11 +1438,11 @@
|
| ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
|
| i::Handle<i::String> str = Utils::OpenHandle(*source);
|
| if (str->IsExternalTwoByteString()) {
|
| - i::ExternalTwoByteStringUC16CharacterStream stream(
|
| + i::ExternalTwoByteStringUtf16CharacterStream stream(
|
| i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
|
| return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
|
| } else {
|
| - i::GenericStringUC16CharacterStream stream(str, 0, str->length());
|
| + i::GenericStringUtf16CharacterStream stream(str, 0, str->length());
|
| return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
|
| }
|
| }
|
| @@ -3689,7 +3689,13 @@
|
| int String::Utf8Length() const {
|
| i::Handle<i::String> str = Utils::OpenHandle(this);
|
| if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
|
| - return str->Utf8Length();
|
| + int length = str->Utf8Length();
|
| + if (length < 0) {
|
| + FlattenString(str);
|
| + length = str->Utf8Length();
|
| + }
|
| + ASSERT(length >= 0);
|
| + return length;
|
| }
|
|
|
|
|
| @@ -3735,11 +3741,13 @@
|
| int i;
|
| int pos = 0;
|
| int nchars = 0;
|
| + int previous = unibrow::Utf8::kNoPreviousCharacter;
|
| for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
|
| i::uc32 c = write_input_buffer.GetNext();
|
| - int written = unibrow::Utf8::Encode(buffer + pos, c);
|
| + int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
|
| pos += written;
|
| nchars++;
|
| + previous = c;
|
| }
|
| if (i < len) {
|
| // For the last characters we need to check the length for each one
|
| @@ -3748,16 +3756,34 @@
|
| char intermediate[unibrow::Utf8::kMaxEncodedSize];
|
| for (; i < len && pos < capacity; i++) {
|
| i::uc32 c = write_input_buffer.GetNext();
|
| - int written = unibrow::Utf8::Encode(intermediate, c);
|
| - if (pos + written <= capacity) {
|
| - for (int j = 0; j < written; j++)
|
| - buffer[pos + j] = intermediate[j];
|
| + if (unibrow::Utf16::IsTrailSurrogate(c) &&
|
| + previous != unibrow::Utf8::kNoPreviousCharacter &&
|
| + unibrow::Utf16::IsLeadSurrogate(previous)) {
|
| + // We can't use the intermediate buffer here because the encoding
|
| + // of surrogate pairs is done under assumption that you can step
|
| + // back and fix the UTF8 stream. Luckily we only need space for one
|
| + // more byte, so there is always space.
|
| + ASSERT(pos < capacity);
|
| + int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
|
| + ASSERT(written == 1);
|
| pos += written;
|
| nchars++;
|
| } else {
|
| - // We've reached the end of the buffer
|
| - break;
|
| + int written =
|
| + unibrow::Utf8::Encode(intermediate,
|
| + c,
|
| + unibrow::Utf8::kNoPreviousCharacter);
|
| + if (pos + written <= capacity) {
|
| + for (int j = 0; j < written; j++)
|
| + buffer[pos + j] = intermediate[j];
|
| + pos += written;
|
| + nchars++;
|
| + } else {
|
| + // We've reached the end of the buffer
|
| + break;
|
| + }
|
| }
|
| + previous = c;
|
| }
|
| }
|
| if (nchars_ref != NULL) *nchars_ref = nchars;
|
|
|