Index: src/heap.cc |
=================================================================== |
--- src/heap.cc (revision 10944) |
+++ src/heap.cc (working copy) |
@@ -4175,8 +4175,6 @@ |
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string, |
PretenureFlag pretenure) { |
- // V8 only supports characters in the Basic Multilingual Plane. |
- const uc32 kMaxSupportedChar = 0xFFFF; |
// Count the number of characters in the UTF-8 string and check if |
// it is an ASCII string. |
Access<UnicodeCache::Utf8Decoder> |
@@ -4184,8 +4182,12 @@ |
decoder->Reset(string.start(), string.length()); |
int chars = 0; |
while (decoder->has_more()) { |
- decoder->GetNext(); |
- chars++; |
+ uint32_t r = decoder->GetNext(); |
+ if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ chars++; |
+ } else { |
+ chars += 2; |
+ } |
} |
Object* result; |
@@ -4196,10 +4198,15 @@ |
// Convert and copy the characters into the new object. |
String* string_result = String::cast(result); |
decoder->Reset(string.start(), string.length()); |
- for (int i = 0; i < chars; i++) { |
- uc32 r = decoder->GetNext(); |
- if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; } |
- string_result->Set(i, r); |
+ int i = 0; |
+ while (i < chars) { |
+ uint32_t r = decoder->GetNext(); |
+ if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r)); |
+ string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r)); |
+ } else { |
+ string_result->Set(i++, r); |
+ } |
} |
return result; |
} |
@@ -4256,7 +4263,7 @@ |
uint32_t hash_field) { |
ASSERT(chars >= 0); |
// Ensure the chars matches the number of characters in the buffer. |
- ASSERT(static_cast<unsigned>(chars) == buffer->Length()); |
+ ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length()); |
// Determine whether the string is ASCII. |
bool is_ascii = true; |
while (buffer->has_more()) { |
@@ -4302,8 +4309,15 @@ |
ASSERT_EQ(size, answer->Size()); |
// Fill in the characters. |
- for (int i = 0; i < chars; i++) { |
- answer->Set(i, buffer->GetNext()); |
+ int i = 0; |
+ while (i < chars) { |
+ uint32_t character = buffer->GetNext(); |
+ if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ answer->Set(i++, unibrow::Utf16::LeadSurrogate(character)); |
+ answer->Set(i++, unibrow::Utf16::TrailSurrogate(character)); |
+ } else { |
+ answer->Set(i++, character); |
+ } |
} |
return answer; |
} |