Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(966)

Unified Diff: src/heap.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/heap.cc
===================================================================
--- src/heap.cc (revision 10944)
+++ src/heap.cc (working copy)
@@ -4175,8 +4175,6 @@
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
PretenureFlag pretenure) {
- // V8 only supports characters in the Basic Multilingual Plane.
- const uc32 kMaxSupportedChar = 0xFFFF;
// Count the number of characters in the UTF-8 string and check if
// it is an ASCII string.
Access<UnicodeCache::Utf8Decoder>
@@ -4184,8 +4182,12 @@
decoder->Reset(string.start(), string.length());
int chars = 0;
while (decoder->has_more()) {
- decoder->GetNext();
- chars++;
+ uint32_t r = decoder->GetNext();
+ if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ chars++;
+ } else {
+ chars += 2;
+ }
}
Object* result;
@@ -4196,10 +4198,15 @@
// Convert and copy the characters into the new object.
String* string_result = String::cast(result);
decoder->Reset(string.start(), string.length());
- for (int i = 0; i < chars; i++) {
- uc32 r = decoder->GetNext();
- if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; }
- string_result->Set(i, r);
+ int i = 0;
+ while (i < chars) {
+ uint32_t r = decoder->GetNext();
+ if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r));
+ string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r));
+ } else {
+ string_result->Set(i++, r);
+ }
}
return result;
}
@@ -4256,7 +4263,7 @@
uint32_t hash_field) {
ASSERT(chars >= 0);
// Ensure the chars matches the number of characters in the buffer.
- ASSERT(static_cast<unsigned>(chars) == buffer->Length());
+ ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length());
// Determine whether the string is ASCII.
bool is_ascii = true;
while (buffer->has_more()) {
@@ -4302,8 +4309,15 @@
ASSERT_EQ(size, answer->Size());
// Fill in the characters.
- for (int i = 0; i < chars; i++) {
- answer->Set(i, buffer->GetNext());
+ int i = 0;
+ while (i < chars) {
+ uint32_t character = buffer->GetNext();
+ if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
+ answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
+ } else {
+ answer->Set(i++, character);
+ }
}
return answer;
}

Powered by Google App Engine
This is Rietveld 408576698