Chromium Code Reviews| Index: src/runtime.cc |
| diff --git a/src/runtime.cc b/src/runtime.cc |
| index 3017f3f1e50710a3650032b9f78297435a64d18f..64226e9d44451596c6032777779a44770d406e38 100644 |
| --- a/src/runtime.cc |
| +++ b/src/runtime.cc |
| @@ -2574,24 +2574,28 @@ class ReplacementStringBuilder { |
| class CompiledReplacement { |
| public: |
| explicit CompiledReplacement(Zone* zone) |
| - : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} |
| + : parts_(1, zone), replacement_substrings_(0, zone), |
| + simple_hint_(false), |
| + zone_(zone) {} |
| - // Return whether the replacement is simple. |
| - bool Compile(Handle<String> replacement, |
| + void Compile(Handle<String> replacement, |
| int capture_count, |
| int subject_length); |
| - // Use Apply only if Compile returned false. |
| void Apply(ReplacementStringBuilder* builder, |
| int match_from, |
| int match_to, |
| - int32_t* match); |
| + Handle<JSArray> last_match_info); |
| // Number of distinct parts of the replacement pattern. |
| int parts() { |
| return parts_.length(); |
| } |
| + bool simple_hint() { |
| + return simple_hint_; |
| + } |
| + |
| Zone* zone() const { return zone_; } |
| private: |
| @@ -2652,11 +2656,11 @@ class CompiledReplacement { |
| }; |
| template<typename Char> |
| - bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, |
| - Vector<Char> characters, |
| - int capture_count, |
| - int subject_length, |
| - Zone* zone) { |
| + static bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, |
| + Vector<Char> characters, |
| + int capture_count, |
| + int subject_length, |
| + Zone* zone) { |
| int length = characters.length(); |
| int last = 0; |
| for (int i = 0; i < length; i++) { |
| @@ -2750,7 +2754,7 @@ class CompiledReplacement { |
| } |
| if (length > last) { |
| if (last == 0) { |
| - // Replacement is simple. Do not use Apply to do the replacement. |
| + parts->Add(ReplacementPart::ReplacementString(), zone); |
| return true; |
| } else { |
| parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); |
| @@ -2761,35 +2765,33 @@ class CompiledReplacement { |
| ZoneList<ReplacementPart> parts_; |
| ZoneList<Handle<String> > replacement_substrings_; |
| + bool simple_hint_; |
| Zone* zone_; |
| }; |
| -bool CompiledReplacement::Compile(Handle<String> replacement, |
| +void CompiledReplacement::Compile(Handle<String> replacement, |
| int capture_count, |
| int subject_length) { |
| { |
| AssertNoAllocation no_alloc; |
| String::FlatContent content = replacement->GetFlatContent(); |
| ASSERT(content.IsFlat()); |
| - bool simple = false; |
| if (content.IsAscii()) { |
| - simple = ParseReplacementPattern(&parts_, |
| - content.ToAsciiVector(), |
| - capture_count, |
| - subject_length, |
| - zone()); |
| + simple_hint_ = ParseReplacementPattern(&parts_, |
| + content.ToAsciiVector(), |
| + capture_count, |
| + subject_length, |
| + zone()); |
| } else { |
| ASSERT(content.IsTwoByte()); |
| - simple = ParseReplacementPattern(&parts_, |
| - content.ToUC16Vector(), |
| - capture_count, |
| - subject_length, |
| - zone()); |
| + simple_hint_ = ParseReplacementPattern(&parts_, |
| + content.ToUC16Vector(), |
| + capture_count, |
| + subject_length, |
| + zone()); |
| } |
| - if (simple) return true; |
| } |
| - |
| Isolate* isolate = replacement->GetIsolate(); |
| // Find substrings of replacement string and create them as String objects. |
| int substring_index = 0; |
| @@ -2809,15 +2811,13 @@ bool CompiledReplacement::Compile(Handle<String> replacement, |
| substring_index++; |
| } |
| } |
| - return false; |
| } |
| void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
| int match_from, |
| int match_to, |
| - int32_t* match) { |
| - ASSERT_LT(0, parts_.length()); |
| + Handle<JSArray> last_match_info) { |
| for (int i = 0, n = parts_.length(); i < n; i++) { |
| ReplacementPart part = parts_[i]; |
| switch (part.tag) { |
| @@ -2833,8 +2833,9 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
| } |
| case SUBJECT_CAPTURE: { |
| int capture = part.data; |
| - int from = match[capture * 2]; |
| - int to = match[capture * 2 + 1]; |
| + FixedArray* match_info = FixedArray::cast(last_match_info->elements()); |
| + int from = RegExpImpl::GetCapture(match_info, capture * 2); |
| + int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1); |
| if (from >= 0 && to > from) { |
| builder->AddSubjectSlice(from, to); |
| } |
| @@ -2956,19 +2957,85 @@ void FindStringIndicesDispatch(Isolate* isolate, |
| } |
| +// Two smis before and after the match, for very long strings. |
| +const int kMaxBuilderEntriesPerRegExpMatch = 5; |
| + |
| + |
| +static void SetLastMatchInfoNoCaptures(Handle<String> subject, |
| + Handle<JSArray> last_match_info, |
| + int match_start, |
| + int match_end) { |
| + // Fill last_match_info with a single capture. |
| + last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead); |
| + AssertNoAllocation no_gc; |
| + FixedArray* elements = FixedArray::cast(last_match_info->elements()); |
| + RegExpImpl::SetLastCaptureCount(elements, 2); |
| + RegExpImpl::SetLastInput(elements, *subject); |
| + RegExpImpl::SetLastSubject(elements, *subject); |
| + RegExpImpl::SetCapture(elements, 0, match_start); |
| + RegExpImpl::SetCapture(elements, 1, match_end); |
| +} |
| + |
| + |
| +template <typename SubjectChar, typename PatternChar> |
| +static bool SearchStringMultiple(Isolate* isolate, |
| + Vector<const SubjectChar> subject, |
| + Vector<const PatternChar> pattern, |
| + String* pattern_string, |
| + FixedArrayBuilder* builder, |
| + int* match_pos) { |
| + int pos = *match_pos; |
| + int subject_length = subject.length(); |
| + int pattern_length = pattern.length(); |
| + int max_search_start = subject_length - pattern_length; |
| + StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| + while (pos <= max_search_start) { |
| + if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { |
| + *match_pos = pos; |
| + return false; |
| + } |
| + // Position of end of previous match. |
| + int match_end = pos + pattern_length; |
| + int new_pos = search.Search(subject, match_end); |
| + if (new_pos >= 0) { |
| + // A match. |
| + if (new_pos > match_end) { |
| + ReplacementStringBuilder::AddSubjectSlice(builder, |
| + match_end, |
| + new_pos); |
| + } |
| + pos = new_pos; |
| + builder->Add(pattern_string); |
| + } else { |
| + break; |
| + } |
| + } |
| + |
| + if (pos < max_search_start) { |
| + ReplacementStringBuilder::AddSubjectSlice(builder, |
| + pos + pattern_length, |
| + subject_length); |
| + } |
| + *match_pos = pos; |
| + return true; |
| +} |
| + |
| + |
| + |
| + |
| template<typename ResultSeqString> |
| MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( |
| Isolate* isolate, |
| Handle<String> subject, |
| Handle<JSRegExp> pattern_regexp, |
| Handle<String> replacement, |
| - Handle<JSArray> last_match_info) { |
| + Handle<JSArray> last_match_info, |
| + Zone* zone) { |
| ASSERT(subject->IsFlat()); |
| ASSERT(replacement->IsFlat()); |
| - Zone* zone = isolate->runtime_zone(); |
| - ZoneScope zone_space(zone, DELETE_ON_EXIT); |
| - ZoneList<int> indices(8, zone); |
| + ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT); |
| + ZoneList<int> indices(8, isolate->runtime_zone()); |
| ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
| String* pattern = |
| String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
| @@ -2976,8 +3043,8 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( |
| int pattern_len = pattern->length(); |
| int replacement_len = replacement->length(); |
| - FindStringIndicesDispatch( |
| - isolate, *subject, pattern, &indices, 0xffffffff, zone); |
| + FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, |
| + zone); |
| int matches = indices.length(); |
| if (matches == 0) return *subject; |
| @@ -3032,9 +3099,10 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( |
| subject_len); |
| } |
| - int32_t match_indices[] = { indices.at(matches - 1), |
| - indices.at(matches - 1) + pattern_len }; |
| - RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); |
| + SetLastMatchInfoNoCaptures(subject, |
| + last_match_info, |
| + indices.at(matches - 1), |
| + indices.at(matches - 1) + pattern_len); |
| return *result; |
| } |
| @@ -3042,101 +3110,138 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( |
| MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( |
| Isolate* isolate, |
| - Handle<String> subject, |
| - Handle<JSRegExp> regexp, |
| - Handle<String> replacement, |
| - Handle<JSArray> last_match_info) { |
| + String* subject, |
| + JSRegExp* regexp, |
| + String* replacement, |
| + JSArray* last_match_info, |
| + Zone* zone) { |
| ASSERT(subject->IsFlat()); |
| ASSERT(replacement->IsFlat()); |
| - bool is_global = regexp->GetFlags().is_global(); |
| - int capture_count = regexp->CaptureCount(); |
| - int subject_length = subject->length(); |
| + HandleScope handles(isolate); |
|
ulan
2012/08/21 09:43:40
Empty line is missing here.
Yang
2012/08/21 09:46:10
Done.
|
| + int length = subject->length(); |
| + Handle<String> subject_handle(subject); |
| + Handle<JSRegExp> regexp_handle(regexp); |
| + Handle<String> replacement_handle(replacement); |
| + Handle<JSArray> last_match_info_handle(last_match_info); |
| + Handle<Object> match = RegExpImpl::Exec(regexp_handle, |
| + subject_handle, |
| + 0, |
| + last_match_info_handle); |
| + if (match.is_null()) { |
| + return Failure::Exception(); |
| + } |
| + if (match->IsNull()) { |
| + return *subject_handle; |
| + } |
| + |
| + int capture_count = regexp_handle->CaptureCount(); |
| // CompiledReplacement uses zone allocation. |
| - Zone* zone = isolate->runtime_zone(); |
| ZoneScope zonescope(zone, DELETE_ON_EXIT); |
| CompiledReplacement compiled_replacement(zone); |
| - bool simple_replace = compiled_replacement.Compile(replacement, |
| - capture_count, |
| - subject_length); |
| + |
| + compiled_replacement.Compile(replacement_handle, |
| + capture_count, |
| + length); |
| + |
| + bool is_global = regexp_handle->GetFlags().is_global(); |
| // Shortcut for simple non-regexp global replacements |
| if (is_global && |
| - regexp->TypeTag() == JSRegExp::ATOM && |
| - simple_replace) { |
| - if (subject->HasOnlyAsciiChars()) { |
| + regexp_handle->TypeTag() == JSRegExp::ATOM && |
| + compiled_replacement.simple_hint()) { |
| + if (subject_handle->HasOnlyAsciiChars() && |
| + replacement_handle->HasOnlyAsciiChars()) { |
| return StringReplaceAtomRegExpWithString<SeqAsciiString>( |
| - isolate, subject, regexp, replacement, last_match_info); |
| - } else { |
| + isolate, |
| + subject_handle, |
| + regexp_handle, |
| + replacement_handle, |
| + last_match_info_handle, |
| + zone); |
| + } else { |
| return StringReplaceAtomRegExpWithString<SeqTwoByteString>( |
| - isolate, subject, regexp, replacement, last_match_info); |
| + isolate, |
| + subject_handle, |
| + regexp_handle, |
| + replacement_handle, |
| + last_match_info_handle, |
| + zone); |
| } |
| } |
| - RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate); |
| - if (global_cache.HasException()) return Failure::Exception(); |
| - |
| - int32_t* current_match = global_cache.FetchNext(); |
| - if (current_match == NULL) { |
| - if (global_cache.HasException()) return Failure::Exception(); |
| - return *subject; |
| - } |
| - |
| // Guessing the number of parts that the final result string is built |
| // from. Global regexps can match any number of times, so we guess |
| // conservatively. |
| int expected_parts = |
| (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; |
| ReplacementStringBuilder builder(isolate->heap(), |
| - subject, |
| + subject_handle, |
| expected_parts); |
| + // Index of end of last match. |
| + int prev = 0; |
| + |
| + |
| // Number of parts added by compiled replacement plus preceeding |
| // string and possibly suffix after last match. It is possible for |
| // all components to use two elements when encoded as two smis. |
| const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); |
| - |
| - int prev = 0; |
| - |
| + bool matched = true; |
| do { |
| + ASSERT(last_match_info_handle->HasFastObjectElements()); |
| + // Increase the capacity of the builder before entering local handle-scope, |
| + // so its internal buffer can safely allocate a new handle if it grows. |
| builder.EnsureCapacity(parts_added_per_loop); |
| - int start = current_match[0]; |
| - int end = current_match[1]; |
| + HandleScope loop_scope(isolate); |
| + int start, end; |
| + { |
| + AssertNoAllocation match_info_array_is_not_in_a_handle; |
| + FixedArray* match_info_array = |
| + FixedArray::cast(last_match_info_handle->elements()); |
| + |
| + ASSERT_EQ(capture_count * 2 + 2, |
| + RegExpImpl::GetLastCaptureCount(match_info_array)); |
| + start = RegExpImpl::GetCapture(match_info_array, 0); |
| + end = RegExpImpl::GetCapture(match_info_array, 1); |
| + } |
| if (prev < start) { |
| builder.AddSubjectSlice(prev, start); |
| } |
| + compiled_replacement.Apply(&builder, |
| + start, |
| + end, |
| + last_match_info_handle); |
| - if (simple_replace) { |
| - builder.AddString(replacement); |
| - } else { |
| - compiled_replacement.Apply(&builder, |
| - start, |
| - end, |
| - current_match); |
| - } |
| prev = end; |
| // Only continue checking for global regexps. |
| if (!is_global) break; |
| - current_match = global_cache.FetchNext(); |
| - } while (current_match != NULL); |
| + // Continue from where the match ended, unless it was an empty match. |
| + int next = end; |
| + if (start == end) { |
| + next = end + 1; |
| + if (next > length) break; |
| + } |
| - if (global_cache.HasException()) return Failure::Exception(); |
| + match = RegExpImpl::Exec(regexp_handle, |
| + subject_handle, |
| + next, |
| + last_match_info_handle); |
| + if (match.is_null()) { |
| + return Failure::Exception(); |
| + } |
| + matched = !match->IsNull(); |
| + } while (matched); |
| - if (prev < subject_length) { |
| - builder.EnsureCapacity(2); |
| - builder.AddSubjectSlice(prev, subject_length); |
| + if (prev < length) { |
| + builder.AddSubjectSlice(prev, length); |
| } |
| - RegExpImpl::SetLastMatchInfo(last_match_info, |
| - subject, |
| - capture_count, |
| - global_cache.LastSuccessfulMatch()); |
| - |
| return *(builder.ToString()); |
| } |
| @@ -3144,51 +3249,69 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( |
| template <typename ResultSeqString> |
| MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( |
| Isolate* isolate, |
| - Handle<String> subject, |
| - Handle<JSRegExp> regexp, |
| - Handle<JSArray> last_match_info) { |
| + String* subject, |
| + JSRegExp* regexp, |
| + JSArray* last_match_info, |
| + Zone* zone) { |
| ASSERT(subject->IsFlat()); |
| - bool is_global = regexp->GetFlags().is_global(); |
| + HandleScope handles(isolate); |
| + |
| + Handle<String> subject_handle(subject); |
| + Handle<JSRegExp> regexp_handle(regexp); |
| + Handle<JSArray> last_match_info_handle(last_match_info); |
| // Shortcut for simple non-regexp global replacements |
| - if (is_global && |
| - regexp->TypeTag() == JSRegExp::ATOM) { |
| - Handle<String> empty_string(HEAP->empty_string()); |
| - if (subject->HasOnlyAsciiChars()) { |
| + if (regexp_handle->GetFlags().is_global() && |
| + regexp_handle->TypeTag() == JSRegExp::ATOM) { |
| + Handle<String> empty_string_handle(HEAP->empty_string()); |
| + if (subject_handle->HasOnlyAsciiChars()) { |
| return StringReplaceAtomRegExpWithString<SeqAsciiString>( |
| isolate, |
| - subject, |
| - regexp, |
| - empty_string, |
| - last_match_info); |
| + subject_handle, |
| + regexp_handle, |
| + empty_string_handle, |
| + last_match_info_handle, |
| + zone); |
| } else { |
| return StringReplaceAtomRegExpWithString<SeqTwoByteString>( |
| isolate, |
| - subject, |
| - regexp, |
| - empty_string, |
| - last_match_info); |
| + subject_handle, |
| + regexp_handle, |
| + empty_string_handle, |
| + last_match_info_handle, |
| + zone); |
| } |
| } |
| - RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate); |
| - if (global_cache.HasException()) return Failure::Exception(); |
| + Handle<Object> match = RegExpImpl::Exec(regexp_handle, |
| + subject_handle, |
| + 0, |
| + last_match_info_handle); |
| + if (match.is_null()) return Failure::Exception(); |
| + if (match->IsNull()) return *subject_handle; |
| + |
| + ASSERT(last_match_info_handle->HasFastObjectElements()); |
| - int32_t* current_match = global_cache.FetchNext(); |
| - if (current_match == NULL) { |
| - if (global_cache.HasException()) return Failure::Exception(); |
| - return *subject; |
| + int start, end; |
| + { |
| + AssertNoAllocation match_info_array_is_not_in_a_handle; |
| + FixedArray* match_info_array = |
| + FixedArray::cast(last_match_info_handle->elements()); |
| + |
| + start = RegExpImpl::GetCapture(match_info_array, 0); |
| + end = RegExpImpl::GetCapture(match_info_array, 1); |
| } |
| - int start = current_match[0]; |
| - int end = current_match[1]; |
| - int capture_count = regexp->CaptureCount(); |
| - int subject_length = subject->length(); |
| + bool global = regexp_handle->GetFlags().is_global(); |
| - int new_length = subject_length - (end - start); |
| - if (new_length == 0) return isolate->heap()->empty_string(); |
| + if (start == end && !global) return *subject_handle; |
| + int length = subject_handle->length(); |
| + int new_length = length - (end - start); |
| + if (new_length == 0) { |
| + return isolate->heap()->empty_string(); |
| + } |
| Handle<ResultSeqString> answer; |
| if (ResultSeqString::kHasAsciiEncoding) { |
| answer = Handle<ResultSeqString>::cast( |
| @@ -3198,55 +3321,73 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( |
| isolate->factory()->NewRawTwoByteString(new_length)); |
| } |
| - if (!is_global) { |
| - RegExpImpl::SetLastMatchInfo( |
| - last_match_info, subject, capture_count, current_match); |
| - if (start == end) { |
| - return *subject; |
| - } else { |
| - if (start > 0) { |
| - String::WriteToFlat(*subject, answer->GetChars(), 0, start); |
| - } |
| - if (end < subject_length) { |
| - String::WriteToFlat( |
| - *subject, answer->GetChars() + start, end, subject_length); |
| - } |
| - return *answer; |
| + // If the regexp isn't global, only match once. |
| + if (!global) { |
| + if (start > 0) { |
| + String::WriteToFlat(*subject_handle, |
| + answer->GetChars(), |
| + 0, |
| + start); |
| } |
| + if (end < length) { |
| + String::WriteToFlat(*subject_handle, |
| + answer->GetChars() + start, |
| + end, |
| + length); |
| + } |
| + return *answer; |
| } |
| - int prev = 0; |
| + int prev = 0; // Index of end of last match. |
| + int next = 0; // Start of next search (prev unless last match was empty). |
| int position = 0; |
| do { |
| - start = current_match[0]; |
| - end = current_match[1]; |
| if (prev < start) { |
| // Add substring subject[prev;start] to answer string. |
| - String::WriteToFlat( |
| - *subject, answer->GetChars() + position, prev, start); |
| + String::WriteToFlat(*subject_handle, |
| + answer->GetChars() + position, |
| + prev, |
| + start); |
| position += start - prev; |
| } |
| prev = end; |
| + next = end; |
| + // Continue from where the match ended, unless it was an empty match. |
| + if (start == end) { |
| + next++; |
| + if (next > length) break; |
| + } |
| + match = RegExpImpl::Exec(regexp_handle, |
| + subject_handle, |
| + next, |
| + last_match_info_handle); |
| + if (match.is_null()) return Failure::Exception(); |
| + if (match->IsNull()) break; |
| + |
| + ASSERT(last_match_info_handle->HasFastObjectElements()); |
| + HandleScope loop_scope(isolate); |
| + { |
| + AssertNoAllocation match_info_array_is_not_in_a_handle; |
| + FixedArray* match_info_array = |
| + FixedArray::cast(last_match_info_handle->elements()); |
| + start = RegExpImpl::GetCapture(match_info_array, 0); |
| + end = RegExpImpl::GetCapture(match_info_array, 1); |
| + } |
| + } while (true); |
| - current_match = global_cache.FetchNext(); |
| - } while (current_match != NULL); |
| - |
| - if (global_cache.HasException()) return Failure::Exception(); |
| - |
| - RegExpImpl::SetLastMatchInfo(last_match_info, |
| - subject, |
| - capture_count, |
| - global_cache.LastSuccessfulMatch()); |
| - |
| - if (prev < subject_length) { |
| + if (prev < length) { |
| // Add substring subject[prev;length] to answer string. |
| - String::WriteToFlat( |
| - *subject, answer->GetChars() + position, prev, subject_length); |
| - position += subject_length - prev; |
| + String::WriteToFlat(*subject_handle, |
| + answer->GetChars() + position, |
| + prev, |
| + length); |
| + position += length - prev; |
| } |
| - if (position == 0) return isolate->heap()->empty_string(); |
| + if (position == 0) { |
| + return isolate->heap()->empty_string(); |
| + } |
| // Shorten string and fill |
| int string_size = ResultSeqString::SizeFor(position); |
| @@ -3269,31 +3410,50 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( |
| RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { |
| ASSERT(args.length() == 4); |
| - HandleScope scope(isolate); |
| - |
| - CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| - CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); |
| - CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); |
| - CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| + CONVERT_ARG_CHECKED(String, subject, 0); |
| + if (!subject->IsFlat()) { |
| + Object* flat_subject; |
| + { MaybeObject* maybe_flat_subject = subject->TryFlatten(); |
| + if (!maybe_flat_subject->ToObject(&flat_subject)) { |
| + return maybe_flat_subject; |
| + } |
| + } |
| + subject = String::cast(flat_subject); |
| + } |
| - if (!subject->IsFlat()) subject = FlattenGetString(subject); |
| + CONVERT_ARG_CHECKED(String, replacement, 2); |
| + if (!replacement->IsFlat()) { |
| + Object* flat_replacement; |
| + { MaybeObject* maybe_flat_replacement = replacement->TryFlatten(); |
| + if (!maybe_flat_replacement->ToObject(&flat_replacement)) { |
| + return maybe_flat_replacement; |
| + } |
| + } |
| + replacement = String::cast(flat_replacement); |
| + } |
| - if (!replacement->IsFlat()) replacement = FlattenGetString(replacement); |
| + CONVERT_ARG_CHECKED(JSRegExp, regexp, 1); |
| + CONVERT_ARG_CHECKED(JSArray, last_match_info, 3); |
| ASSERT(last_match_info->HasFastObjectElements()); |
| + Zone* zone = isolate->runtime_zone(); |
| if (replacement->length() == 0) { |
| if (subject->HasOnlyAsciiChars()) { |
| return StringReplaceRegExpWithEmptyString<SeqAsciiString>( |
| - isolate, subject, regexp, last_match_info); |
| + isolate, subject, regexp, last_match_info, zone); |
| } else { |
| return StringReplaceRegExpWithEmptyString<SeqTwoByteString>( |
| - isolate, subject, regexp, last_match_info); |
| + isolate, subject, regexp, last_match_info, zone); |
| } |
| } |
| - return StringReplaceRegExpWithString( |
| - isolate, subject, regexp, replacement, last_match_info); |
| + return StringReplaceRegExpWithString(isolate, |
| + subject, |
| + regexp, |
| + replacement, |
| + last_match_info, |
| + zone); |
| } |
| @@ -3616,45 +3776,46 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { |
| CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2); |
| HandleScope handles; |
| - RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| - if (global_cache.HasException()) return Failure::Exception(); |
| - |
| - int capture_count = regexp->CaptureCount(); |
| + Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info); |
| - Zone* zone = isolate->runtime_zone(); |
| - ZoneScope zone_space(zone, DELETE_ON_EXIT); |
| - ZoneList<int> offsets(8, zone); |
| - |
| - while (true) { |
| - int32_t* match = global_cache.FetchNext(); |
| - if (match == NULL) break; |
| - offsets.Add(match[0], zone); // start |
| - offsets.Add(match[1], zone); // end |
| + if (match.is_null()) { |
| + return Failure::Exception(); |
| } |
| - |
| - if (global_cache.HasException()) return Failure::Exception(); |
| - |
| - if (offsets.length() == 0) { |
| - // Not a single match. |
| + if (match->IsNull()) { |
| return isolate->heap()->null_value(); |
| } |
| + int length = subject->length(); |
| - RegExpImpl::SetLastMatchInfo(regexp_info, |
| - subject, |
| - capture_count, |
| - global_cache.LastSuccessfulMatch()); |
| - |
| + Zone* zone = isolate->runtime_zone(); |
| + ZoneScope zone_space(zone, DELETE_ON_EXIT); |
| + ZoneList<int> offsets(8, zone); |
| + int start; |
| + int end; |
| + do { |
| + { |
| + AssertNoAllocation no_alloc; |
| + FixedArray* elements = FixedArray::cast(regexp_info->elements()); |
| + start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value(); |
| + end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value(); |
| + } |
| + offsets.Add(start, zone); |
| + offsets.Add(end, zone); |
| + if (start == end) if (++end > length) break; |
| + match = RegExpImpl::Exec(regexp, subject, end, regexp_info); |
| + if (match.is_null()) { |
| + return Failure::Exception(); |
| + } |
| + } while (!match->IsNull()); |
| int matches = offsets.length() / 2; |
| Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches); |
| - Handle<String> substring = |
| - isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1)); |
| + Handle<String> substring = isolate->factory()-> |
| + NewSubString(subject, offsets.at(0), offsets.at(1)); |
| elements->set(0, *substring); |
| - for (int i = 1; i < matches; i++) { |
| - HandleScope temp_scope(isolate); |
| + for (int i = 1; i < matches ; i++) { |
| int from = offsets.at(i * 2); |
| int to = offsets.at(i * 2 + 1); |
| - Handle<String> substring = |
| - isolate->factory()->NewProperSubString(subject, from, to); |
| + Handle<String> substring = isolate->factory()-> |
| + NewProperSubString(subject, from, to); |
| elements->set(i, *substring); |
| } |
| Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements); |
| @@ -3663,100 +3824,149 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { |
| } |
| -// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
| -// separate last match info. See comment on that function. |
| -template<bool has_capture> |
| -static int SearchRegExpMultiple( |
| +static bool SearchStringMultiple(Isolate* isolate, |
| + Handle<String> subject, |
| + Handle<String> pattern, |
| + Handle<JSArray> last_match_info, |
| + FixedArrayBuilder* builder) { |
| + ASSERT(subject->IsFlat()); |
| + ASSERT(pattern->IsFlat()); |
| + |
| + // Treating as if a previous match was before first character. |
| + int match_pos = -pattern->length(); |
| + |
| + for (;;) { // Break when search complete. |
| + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| + AssertNoAllocation no_gc; |
| + String::FlatContent subject_content = subject->GetFlatContent(); |
| + String::FlatContent pattern_content = pattern->GetFlatContent(); |
| + if (subject_content.IsAscii()) { |
| + Vector<const char> subject_vector = subject_content.ToAsciiVector(); |
| + if (pattern_content.IsAscii()) { |
| + if (SearchStringMultiple(isolate, |
| + subject_vector, |
| + pattern_content.ToAsciiVector(), |
| + *pattern, |
| + builder, |
| + &match_pos)) break; |
| + } else { |
| + if (SearchStringMultiple(isolate, |
| + subject_vector, |
| + pattern_content.ToUC16Vector(), |
| + *pattern, |
| + builder, |
| + &match_pos)) break; |
| + } |
| + } else { |
| + Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
| + if (pattern_content.IsAscii()) { |
| + if (SearchStringMultiple(isolate, |
| + subject_vector, |
| + pattern_content.ToAsciiVector(), |
| + *pattern, |
| + builder, |
| + &match_pos)) break; |
| + } else { |
| + if (SearchStringMultiple(isolate, |
| + subject_vector, |
| + pattern_content.ToUC16Vector(), |
| + *pattern, |
| + builder, |
| + &match_pos)) break; |
| + } |
| + } |
| + } |
| + |
| + if (match_pos >= 0) { |
| + SetLastMatchInfoNoCaptures(subject, |
| + last_match_info, |
| + match_pos, |
| + match_pos + pattern->length()); |
| + return true; |
| + } |
| + return false; // No matches at all. |
| +} |
| + |
| + |
| +static int SearchRegExpNoCaptureMultiple( |
| Isolate* isolate, |
| Handle<String> subject, |
| Handle<JSRegExp> regexp, |
| Handle<JSArray> last_match_array, |
| FixedArrayBuilder* builder) { |
| ASSERT(subject->IsFlat()); |
| - ASSERT_NE(has_capture, regexp->CaptureCount() == 0); |
| - |
| - RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| - if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION; |
| - |
| - int capture_count = regexp->CaptureCount(); |
| - int subject_length = subject->length(); |
| - |
| - // Position to search from. |
| + ASSERT(regexp->CaptureCount() == 0); |
| int match_start = -1; |
| int match_end = 0; |
| + int pos = 0; |
| + int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject); |
| + if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION; |
| + |
| + int max_matches; |
| + int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, |
| + registers_per_match, |
| + &max_matches); |
| + OffsetsVector registers(num_registers, isolate); |
| + Vector<int32_t> register_vector(registers.vector(), registers.length()); |
| + int subject_length = subject->length(); |
| bool first = true; |
| - |
| - // Two smis before and after the match, for very long strings. |
| - static const int kMaxBuilderEntriesPerRegExpMatch = 5; |
| - |
| - while (true) { |
| - int32_t* current_match = global_cache.FetchNext(); |
| - if (current_match == NULL) break; |
| - match_start = current_match[0]; |
| - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| - if (match_end < match_start) { |
| - ReplacementStringBuilder::AddSubjectSlice(builder, |
| - match_end, |
| - match_start); |
| - } |
| - match_end = current_match[1]; |
| - { |
| - // Avoid accumulating new handles inside loop. |
| - HandleScope temp_scope(isolate); |
| - Handle<String> match; |
| - if (!first) { |
| - match = isolate->factory()->NewProperSubString(subject, |
| - match_start, |
| - match_end); |
| - } else { |
| - match = isolate->factory()->NewSubString(subject, |
| - match_start, |
| - match_end); |
| - first = false; |
| + for (;;) { // Break on failure, return on exception. |
| + int num_matches = RegExpImpl::IrregexpExecRaw(regexp, |
| + subject, |
| + pos, |
| + register_vector); |
| + if (num_matches > 0) { |
| + for (int match_index = 0; match_index < num_matches; match_index++) { |
| + int32_t* current_match = ®ister_vector[match_index * 2]; |
| + match_start = current_match[0]; |
| + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| + if (match_end < match_start) { |
| + ReplacementStringBuilder::AddSubjectSlice(builder, |
| + match_end, |
| + match_start); |
| + } |
| + match_end = current_match[1]; |
| + HandleScope loop_scope(isolate); |
| + if (!first) { |
| + builder->Add(*isolate->factory()->NewProperSubString(subject, |
| + match_start, |
| + match_end)); |
| + } else { |
| + builder->Add(*isolate->factory()->NewSubString(subject, |
| + match_start, |
| + match_end)); |
| + first = false; |
| + } |
| } |
| - if (has_capture) { |
| - // Arguments array to replace function is match, captures, index and |
| - // subject, i.e., 3 + capture count in total. |
| - Handle<FixedArray> elements = |
| - isolate->factory()->NewFixedArray(3 + capture_count); |
| - |
| - elements->set(0, *match); |
| - for (int i = 1; i <= capture_count; i++) { |
| - int start = current_match[i * 2]; |
| - if (start >= 0) { |
| - int end = current_match[i * 2 + 1]; |
| - ASSERT(start <= end); |
| - Handle<String> substring = |
| - isolate->factory()->NewSubString(subject, start, end); |
| - elements->set(i, *substring); |
| - } else { |
| - ASSERT(current_match[i * 2 + 1] < 0); |
| - elements->set(i, isolate->heap()->undefined_value()); |
| - } |
| - } |
| - elements->set(capture_count + 1, Smi::FromInt(match_start)); |
| - elements->set(capture_count + 2, *subject); |
| - builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
| + // If we did not get the maximum number of matches, we can stop here |
| + // since there are no matches left. |
| + if (num_matches < max_matches) break; |
| + |
| + if (match_start != match_end) { |
| + pos = match_end; |
| } else { |
| - builder->Add(*match); |
| + pos = match_end + 1; |
| + if (pos > subject_length) break; |
| } |
| + } else if (num_matches == 0) { |
| + break; |
| + } else { |
| + ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION); |
| + return RegExpImpl::RE_EXCEPTION; |
| } |
| } |
| - if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION; |
| - |
| if (match_start >= 0) { |
| - // Finished matching, with at least one match. |
| if (match_end < subject_length) { |
| ReplacementStringBuilder::AddSubjectSlice(builder, |
| match_end, |
| subject_length); |
| } |
| - |
| - RegExpImpl::SetLastMatchInfo( |
| - last_match_array, subject, capture_count, NULL); |
| - |
| + SetLastMatchInfoNoCaptures(subject, |
| + last_match_array, |
| + match_start, |
| + match_end); |
| return RegExpImpl::RE_SUCCESS; |
| } else { |
| return RegExpImpl::RE_FAILURE; // No matches at all. |
| @@ -3764,6 +3974,147 @@ static int SearchRegExpMultiple( |
| } |
| +// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
| +// separate last match info. See comment on that function. |
| +static int SearchRegExpMultiple( |
| + Isolate* isolate, |
| + Handle<String> subject, |
| + Handle<JSRegExp> regexp, |
| + Handle<JSArray> last_match_array, |
| + FixedArrayBuilder* builder, |
| + Zone* zone) { |
| + |
| + ASSERT(subject->IsFlat()); |
| + int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject); |
| + if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION; |
| + |
| + int max_matches; |
| + int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, |
| + registers_per_match, |
| + &max_matches); |
| + OffsetsVector registers(num_registers, isolate); |
| + Vector<int32_t> register_vector(registers.vector(), registers.length()); |
| + |
| + int num_matches = RegExpImpl::IrregexpExecRaw(regexp, |
| + subject, |
| + 0, |
| + register_vector); |
| + |
| + int capture_count = regexp->CaptureCount(); |
| + int subject_length = subject->length(); |
| + |
| + // Position to search from. |
| + int pos = 0; |
| + // End of previous match. Differs from pos if match was empty. |
| + int match_end = 0; |
| + bool first = true; |
| + |
| + if (num_matches > 0) { |
| + do { |
| + int match_start = 0; |
| + for (int match_index = 0; match_index < num_matches; match_index++) { |
| + int32_t* current_match = |
| + ®ister_vector[match_index * registers_per_match]; |
| + match_start = current_match[0]; |
| + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| + if (match_end < match_start) { |
| + ReplacementStringBuilder::AddSubjectSlice(builder, |
| + match_end, |
| + match_start); |
| + } |
| + match_end = current_match[1]; |
| + |
| + { |
| + // Avoid accumulating new handles inside loop. |
| + HandleScope temp_scope(isolate); |
| + // Arguments array to replace function is match, captures, index and |
| + // subject, i.e., 3 + capture count in total. |
| + Handle<FixedArray> elements = |
| + isolate->factory()->NewFixedArray(3 + capture_count); |
| + Handle<String> match; |
| + if (!first) { |
| + match = isolate->factory()->NewProperSubString(subject, |
| + match_start, |
| + match_end); |
| + } else { |
| + match = isolate->factory()->NewSubString(subject, |
| + match_start, |
| + match_end); |
| + } |
| + elements->set(0, *match); |
| + for (int i = 1; i <= capture_count; i++) { |
| + int start = current_match[i * 2]; |
| + if (start >= 0) { |
| + int end = current_match[i * 2 + 1]; |
| + ASSERT(start <= end); |
| + Handle<String> substring; |
| + if (!first) { |
| + substring = |
| + isolate->factory()->NewProperSubString(subject, start, end); |
| + } else { |
| + substring = |
| + isolate->factory()->NewSubString(subject, start, end); |
| + } |
| + elements->set(i, *substring); |
| + } else { |
| + ASSERT(current_match[i * 2 + 1] < 0); |
| + elements->set(i, isolate->heap()->undefined_value()); |
| + } |
| + } |
| + elements->set(capture_count + 1, Smi::FromInt(match_start)); |
| + elements->set(capture_count + 2, *subject); |
| + builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
| + } |
| + first = false; |
| + } |
| + |
| + // If we did not get the maximum number of matches, we can stop here |
| + // since there are no matches left. |
| + if (num_matches < max_matches) break; |
| + |
| + if (match_end > match_start) { |
| + pos = match_end; |
| + } else { |
| + pos = match_end + 1; |
| + if (pos > subject_length) { |
| + break; |
| + } |
| + } |
| + |
| + num_matches = RegExpImpl::IrregexpExecRaw(regexp, |
| + subject, |
| + pos, |
| + register_vector); |
| + } while (num_matches > 0); |
| + |
| + if (num_matches != RegExpImpl::RE_EXCEPTION) { |
| + // Finished matching, with at least one match. |
| + if (match_end < subject_length) { |
| + ReplacementStringBuilder::AddSubjectSlice(builder, |
| + match_end, |
| + subject_length); |
| + } |
| + |
| + int last_match_capture_count = (capture_count + 1) * 2; |
| + int last_match_array_size = |
| + last_match_capture_count + RegExpImpl::kLastMatchOverhead; |
| + last_match_array->EnsureSize(last_match_array_size); |
| + AssertNoAllocation no_gc; |
| + FixedArray* elements = FixedArray::cast(last_match_array->elements()); |
| + // We have to set this even though the rest of the last match array is |
| + // ignored. |
| + RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); |
| + // These are also read without consulting the override. |
| + RegExpImpl::SetLastSubject(elements, *subject); |
| + RegExpImpl::SetLastInput(elements, *subject); |
| + return RegExpImpl::RE_SUCCESS; |
| + } |
| + } |
| + // No matches at all, return failure or exception result directly. |
| + return num_matches; |
| +} |
| + |
| + |
| // This is only called for StringReplaceGlobalRegExpWithFunction. This sets |
| // lastMatchInfoOverride to maintain the last match info, so we don't need to |
| // set any other last match array info. |
| @@ -3789,15 +4140,34 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { |
| } |
| FixedArrayBuilder builder(result_elements); |
| + if (regexp->TypeTag() == JSRegExp::ATOM) { |
| + Handle<String> pattern( |
| + String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); |
| + ASSERT(pattern->IsFlat()); |
| + if (SearchStringMultiple(isolate, subject, pattern, |
| + last_match_info, &builder)) { |
| + return *builder.ToJSArray(result_array); |
| + } |
| + return isolate->heap()->null_value(); |
| + } |
| + |
| + ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| + |
| int result; |
| if (regexp->CaptureCount() == 0) { |
| - result = SearchRegExpMultiple<false>( |
| - isolate, subject, regexp, last_match_info, &builder); |
| + result = SearchRegExpNoCaptureMultiple(isolate, |
| + subject, |
| + regexp, |
| + last_match_info, |
| + &builder); |
| } else { |
| - result = SearchRegExpMultiple<true>( |
| - isolate, subject, regexp, last_match_info, &builder); |
| + result = SearchRegExpMultiple(isolate, |
| + subject, |
| + regexp, |
| + last_match_info, |
| + &builder, |
| + isolate->runtime_zone()); |
| } |
| - |
| if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); |
| if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); |
| ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); |