Index: src/objects.cc |
=================================================================== |
--- src/objects.cc (revision 10944) |
+++ src/objects.cc (working copy) |
@@ -6043,9 +6043,11 @@ |
buffer->Reset(offset, this); |
int character_position = offset; |
int utf8_bytes = 0; |
+ int last = unibrow::Utf16::kNoPreviousCharacter; |
while (buffer->has_more() && character_position++ < offset + length) { |
uint16_t character = buffer->GetNext(); |
- utf8_bytes += unibrow::Utf8::Length(character); |
+ utf8_bytes += unibrow::Utf8::Length(character, last); |
+ last = character; |
} |
if (length_return) { |
@@ -6059,13 +6061,15 @@ |
buffer->Seek(offset); |
character_position = offset; |
int utf8_byte_position = 0; |
+ last = unibrow::Utf16::kNoPreviousCharacter; |
while (buffer->has_more() && character_position++ < offset + length) { |
uint16_t character = buffer->GetNext(); |
if (allow_nulls == DISALLOW_NULLS && character == 0) { |
character = ' '; |
} |
utf8_byte_position += |
- unibrow::Utf8::Encode(result + utf8_byte_position, character); |
+ unibrow::Utf8::Encode(result + utf8_byte_position, character, last); |
+ last = character; |
} |
result[utf8_byte_position] = 0; |
return SmartArrayPointer<char>(result); |
@@ -6379,73 +6383,6 @@ |
} |
-// This method determines the type of string involved and then gets the UTF8 |
-// length of the string. It doesn't flatten the string and has log(n) recursion |
-// for a string of length n. |
-int String::Utf8Length(String* input, int from, int to) { |
- if (from == to) return 0; |
- int total = 0; |
- while (true) { |
- if (input->IsAsciiRepresentation()) return total + to - from; |
- switch (StringShape(input).representation_tag()) { |
- case kConsStringTag: { |
- ConsString* str = ConsString::cast(input); |
- String* first = str->first(); |
- String* second = str->second(); |
- int first_length = first->length(); |
- if (first_length - from < to - first_length) { |
- if (first_length > from) { |
- // Left hand side is shorter. |
- total += Utf8Length(first, from, first_length); |
- input = second; |
- from = 0; |
- to -= first_length; |
- } else { |
- // We only need the right hand side. |
- input = second; |
- from -= first_length; |
- to -= first_length; |
- } |
- } else { |
- if (first_length <= to) { |
- // Right hand side is shorter. |
- total += Utf8Length(second, 0, to - first_length); |
- input = first; |
- to = first_length; |
- } else { |
- // We only need the left hand side. |
- input = first; |
- } |
- } |
- continue; |
- } |
- case kExternalStringTag: |
- case kSeqStringTag: { |
- Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector(); |
- const uc16* p = vector.start(); |
- for (int i = from; i < to; i++) { |
- total += unibrow::Utf8::Length(p[i]); |
- } |
- return total; |
- } |
- case kSlicedStringTag: { |
- SlicedString* str = SlicedString::cast(input); |
- int offset = str->offset(); |
- input = str->parent(); |
- from += offset; |
- to += offset; |
- continue; |
- } |
- default: |
- break; |
- } |
- UNREACHABLE(); |
- return 0; |
- } |
- return 0; |
-} |
- |
- |
void Relocatable::PostGarbageCollectionProcessing() { |
Isolate* isolate = Isolate::Current(); |
Relocatable* current = isolate->relocatable_top(); |
@@ -6839,8 +6776,10 @@ |
// General slow case check. We know that the ia and ib iterators |
// have the same length. |
while (ia->has_more()) { |
- uc32 ca = ia->GetNext(); |
- uc32 cb = ib->GetNext(); |
+ uint32_t ca = ia->GetNext(); |
+ uint32_t cb = ib->GetNext(); |
+ ASSERT(ca <= unibrow::Utf16::kMaxNonSurrogateCharCode); |
+ ASSERT(cb <= unibrow::Utf16::kMaxNonSurrogateCharCode); |
if (ca != cb) |
return false; |
} |
@@ -7023,8 +6962,14 @@ |
decoder->Reset(str.start(), str.length()); |
int i; |
for (i = 0; i < slen && decoder->has_more(); i++) { |
- uc32 r = decoder->GetNext(); |
- if (Get(i) != r) return false; |
+ uint32_t r = decoder->GetNext(); |
+ if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ if (i > slen - 1) return false; |
+ if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false; |
+ if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false; |
+ } else { |
+ if (Get(i) != r) return false; |
+ } |
} |
return i == slen && !decoder->has_more(); |
} |
@@ -7154,6 +7099,22 @@ |
} |
+void StringHasher::AddSurrogatePair(uc32 c) { |
+ uint16_t lead = unibrow::Utf16::LeadSurrogate(c); |
+ AddCharacter(lead); |
+ uint16_t trail = unibrow::Utf16::TrailSurrogate(c); |
+ AddCharacter(trail); |
+} |
+ |
+ |
+void StringHasher::AddSurrogatePairNoIndex(uc32 c) { |
+ uint16_t lead = unibrow::Utf16::LeadSurrogate(c); |
+ AddCharacterNoIndex(lead); |
+ uint16_t trail = unibrow::Utf16::TrailSurrogate(c); |
+ AddCharacterNoIndex(trail); |
+} |
+ |
+ |
uint32_t StringHasher::GetHashField() { |
ASSERT(is_valid()); |
if (length_ <= String::kMaxHashCalcLength) { |
@@ -10746,7 +10707,7 @@ |
if (hash_field_ != 0) return hash_field_ >> String::kHashShift; |
unibrow::Utf8InputBuffer<> buffer(string_.start(), |
static_cast<unsigned>(string_.length())); |
- chars_ = buffer.Length(); |
+ chars_ = buffer.Utf16Length(); |
hash_field_ = String::ComputeHashField(&buffer, chars_, seed_); |
uint32_t result = hash_field_ >> String::kHashShift; |
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. |