| Index: src/handles.cc
|
| diff --git a/src/handles.cc b/src/handles.cc
|
| index 3bc1f4bea03d831cf802c453eb69c15e6a13f49f..16fe0c795c0271d6be8711261925fc96db017445 100644
|
| --- a/src/handles.cc
|
| +++ b/src/handles.cc
|
| @@ -883,165 +883,6 @@ Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table,
|
| }
|
|
|
|
|
| -// This method determines the type of string involved and then gets the UTF8
|
| -// length of the string. It doesn't flatten the string and has log(n) recursion
|
| -// for a string of length n. If the failure flag gets set, then we have to
|
| -// flatten the string and retry. Failures are caused by surrogate pairs in deep
|
| -// cons strings.
|
| -
|
| -// Single surrogate characters that are encountered in the UTF-16 character
|
| -// sequence of the input string get counted as 3 UTF-8 bytes, because that
|
| -// is the way that WriteUtf8 will encode them. Surrogate pairs are counted and
|
| -// encoded as one 4-byte UTF-8 sequence.
|
| -
|
| -// This function conceptually uses recursion on the two halves of cons strings.
|
| -// However, in order to avoid the recursion going too deep it recurses on the
|
| -// second string of the cons, but iterates on the first substring (by manually
|
| -// eliminating it as a tail recursion). This means it counts the UTF-8 length
|
| -// from the end to the start, which makes no difference to the total.
|
| -
|
| -// Surrogate pairs are recognized even if they are split across two sides of a
|
| -// cons, which complicates the implementation somewhat. Therefore, too deep
|
| -// recursion cannot always be avoided. This case is detected, and the failure
|
| -// flag is set, a signal to the caller that the string should be flattened and
|
| -// the operation retried.
|
| -int Utf8LengthHelper(String* input,
|
| - int from,
|
| - int to,
|
| - bool followed_by_surrogate,
|
| - int max_recursion,
|
| - bool* failure,
|
| - bool* starts_with_surrogate) {
|
| - if (from == to) return 0;
|
| - int total = 0;
|
| - bool dummy;
|
| - while (true) {
|
| - if (input->IsOneByteRepresentation()) {
|
| - *starts_with_surrogate = false;
|
| - return total + to - from;
|
| - }
|
| - switch (StringShape(input).representation_tag()) {
|
| - case kConsStringTag: {
|
| - ConsString* str = ConsString::cast(input);
|
| - String* first = str->first();
|
| - String* second = str->second();
|
| - int first_length = first->length();
|
| - if (first_length - from > to - first_length) {
|
| - if (first_length < to) {
|
| - // Right hand side is shorter. No need to check the recursion depth
|
| - // since this can only happen log(n) times.
|
| - bool right_starts_with_surrogate = false;
|
| - total += Utf8LengthHelper(second,
|
| - 0,
|
| - to - first_length,
|
| - followed_by_surrogate,
|
| - max_recursion - 1,
|
| - failure,
|
| - &right_starts_with_surrogate);
|
| - if (*failure) return 0;
|
| - followed_by_surrogate = right_starts_with_surrogate;
|
| - input = first;
|
| - to = first_length;
|
| - } else {
|
| - // We only need the left hand side.
|
| - input = first;
|
| - }
|
| - } else {
|
| - if (first_length > from) {
|
| - // Left hand side is shorter.
|
| - if (first->IsOneByteRepresentation()) {
|
| - total += first_length - from;
|
| - *starts_with_surrogate = false;
|
| - starts_with_surrogate = &dummy;
|
| - input = second;
|
| - from = 0;
|
| - to -= first_length;
|
| - } else if (second->IsOneByteRepresentation()) {
|
| - followed_by_surrogate = false;
|
| - total += to - first_length;
|
| - input = first;
|
| - to = first_length;
|
| - } else if (max_recursion > 0) {
|
| - bool right_starts_with_surrogate = false;
|
| - // Recursing on the long one. This may fail.
|
| - total += Utf8LengthHelper(second,
|
| - 0,
|
| - to - first_length,
|
| - followed_by_surrogate,
|
| - max_recursion - 1,
|
| - failure,
|
| - &right_starts_with_surrogate);
|
| - if (*failure) return 0;
|
| - input = first;
|
| - to = first_length;
|
| - followed_by_surrogate = right_starts_with_surrogate;
|
| - } else {
|
| - *failure = true;
|
| - return 0;
|
| - }
|
| - } else {
|
| - // We only need the right hand side.
|
| - input = second;
|
| - from = 0;
|
| - to -= first_length;
|
| - }
|
| - }
|
| - continue;
|
| - }
|
| - case kExternalStringTag:
|
| - case kSeqStringTag: {
|
| - Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector();
|
| - const uc16* p = vector.start();
|
| - int previous = unibrow::Utf16::kNoPreviousCharacter;
|
| - for (int i = from; i < to; i++) {
|
| - uc16 c = p[i];
|
| - total += unibrow::Utf8::Length(c, previous);
|
| - previous = c;
|
| - }
|
| - if (to - from > 0) {
|
| - if (unibrow::Utf16::IsLeadSurrogate(previous) &&
|
| - followed_by_surrogate) {
|
| - total -= unibrow::Utf8::kBytesSavedByCombiningSurrogates;
|
| - }
|
| - if (unibrow::Utf16::IsTrailSurrogate(p[from])) {
|
| - *starts_with_surrogate = true;
|
| - }
|
| - }
|
| - return total;
|
| - }
|
| - case kSlicedStringTag: {
|
| - SlicedString* str = SlicedString::cast(input);
|
| - int offset = str->offset();
|
| - input = str->parent();
|
| - from += offset;
|
| - to += offset;
|
| - continue;
|
| - }
|
| - default:
|
| - break;
|
| - }
|
| - UNREACHABLE();
|
| - return 0;
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -
|
| -int Utf8Length(Handle<String> str) {
|
| - bool dummy;
|
| - bool failure;
|
| - int len;
|
| - const int kRecursionBudget = 100;
|
| - do {
|
| - failure = false;
|
| - len = Utf8LengthHelper(
|
| - *str, 0, str->length(), false, kRecursionBudget, &failure, &dummy);
|
| - if (failure) FlattenString(str);
|
| - } while (failure);
|
| - return len;
|
| -}
|
| -
|
| -
|
| DeferredHandleScope::DeferredHandleScope(Isolate* isolate)
|
| : impl_(isolate->handle_scope_implementer()) {
|
| ASSERT(impl_->isolate() == Isolate::Current());
|
|
|