Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/runtime.cc

Issue 10872010: Reland regexp global optimizations. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/mips/code-stubs-mips.cc ('k') | src/unicode.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2556 matching lines...) Expand 10 before | Expand all | Expand 10 after
2567 FixedArrayBuilder array_builder_; 2567 FixedArrayBuilder array_builder_;
2568 Handle<String> subject_; 2568 Handle<String> subject_;
2569 int character_count_; 2569 int character_count_;
2570 bool is_ascii_; 2570 bool is_ascii_;
2571 }; 2571 };
2572 2572
2573 2573
2574 class CompiledReplacement { 2574 class CompiledReplacement {
2575 public: 2575 public:
2576 explicit CompiledReplacement(Zone* zone) 2576 explicit CompiledReplacement(Zone* zone)
2577 : parts_(1, zone), replacement_substrings_(0, zone), 2577 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
2578 simple_hint_(false),
2579 zone_(zone) {}
2580 2578
2581 void Compile(Handle<String> replacement, 2579 // Return whether the replacement is simple.
2580 bool Compile(Handle<String> replacement,
2582 int capture_count, 2581 int capture_count,
2583 int subject_length); 2582 int subject_length);
2584 2583
2584 // Use Apply only if Compile returned false.
2585 void Apply(ReplacementStringBuilder* builder, 2585 void Apply(ReplacementStringBuilder* builder,
2586 int match_from, 2586 int match_from,
2587 int match_to, 2587 int match_to,
2588 Handle<JSArray> last_match_info); 2588 int32_t* match);
2589 2589
2590 // Number of distinct parts of the replacement pattern. 2590 // Number of distinct parts of the replacement pattern.
2591 int parts() { 2591 int parts() {
2592 return parts_.length(); 2592 return parts_.length();
2593 } 2593 }
2594 2594
2595 bool simple_hint() {
2596 return simple_hint_;
2597 }
2598
2599 Zone* zone() const { return zone_; } 2595 Zone* zone() const { return zone_; }
2600 2596
2601 private: 2597 private:
2602 enum PartType { 2598 enum PartType {
2603 SUBJECT_PREFIX = 1, 2599 SUBJECT_PREFIX = 1,
2604 SUBJECT_SUFFIX, 2600 SUBJECT_SUFFIX,
2605 SUBJECT_CAPTURE, 2601 SUBJECT_CAPTURE,
2606 REPLACEMENT_SUBSTRING, 2602 REPLACEMENT_SUBSTRING,
2607 REPLACEMENT_STRING, 2603 REPLACEMENT_STRING,
2608 2604
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
2649 // tag == REPLACEMENT_STRING: data is index into array of substrings 2645 // tag == REPLACEMENT_STRING: data is index into array of substrings
2650 // of the replacement string. 2646 // of the replacement string.
2651 // tag <= 0: Temporary representation of the substring of the replacement 2647 // tag <= 0: Temporary representation of the substring of the replacement
2652 // string ranging over -tag .. data. 2648 // string ranging over -tag .. data.
2653 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 2649 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
2654 // substring objects. 2650 // substring objects.
2655 int data; 2651 int data;
2656 }; 2652 };
2657 2653
2658 template<typename Char> 2654 template<typename Char>
2659 static bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, 2655 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
2660 Vector<Char> characters, 2656 Vector<Char> characters,
2661 int capture_count, 2657 int capture_count,
2662 int subject_length, 2658 int subject_length,
2663 Zone* zone) { 2659 Zone* zone) {
2664 int length = characters.length(); 2660 int length = characters.length();
2665 int last = 0; 2661 int last = 0;
2666 for (int i = 0; i < length; i++) { 2662 for (int i = 0; i < length; i++) {
2667 Char c = characters[i]; 2663 Char c = characters[i];
2668 if (c == '$') { 2664 if (c == '$') {
2669 int next_index = i + 1; 2665 int next_index = i + 1;
2670 if (next_index == length) { // No next character! 2666 if (next_index == length) { // No next character!
2671 break; 2667 break;
2672 } 2668 }
2673 Char c2 = characters[next_index]; 2669 Char c2 = characters[next_index];
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
2747 break; 2743 break;
2748 } 2744 }
2749 default: 2745 default:
2750 i = next_index; 2746 i = next_index;
2751 break; 2747 break;
2752 } 2748 }
2753 } 2749 }
2754 } 2750 }
2755 if (length > last) { 2751 if (length > last) {
2756 if (last == 0) { 2752 if (last == 0) {
2757 parts->Add(ReplacementPart::ReplacementString(), zone); 2753 // Replacement is simple. Do not use Apply to do the replacement.
2758 return true; 2754 return true;
2759 } else { 2755 } else {
2760 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); 2756 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
2761 } 2757 }
2762 } 2758 }
2763 return false; 2759 return false;
2764 } 2760 }
2765 2761
2766 ZoneList<ReplacementPart> parts_; 2762 ZoneList<ReplacementPart> parts_;
2767 ZoneList<Handle<String> > replacement_substrings_; 2763 ZoneList<Handle<String> > replacement_substrings_;
2768 bool simple_hint_;
2769 Zone* zone_; 2764 Zone* zone_;
2770 }; 2765 };
2771 2766
2772 2767
2773 void CompiledReplacement::Compile(Handle<String> replacement, 2768 bool CompiledReplacement::Compile(Handle<String> replacement,
2774 int capture_count, 2769 int capture_count,
2775 int subject_length) { 2770 int subject_length) {
2776 { 2771 {
2777 AssertNoAllocation no_alloc; 2772 AssertNoAllocation no_alloc;
2778 String::FlatContent content = replacement->GetFlatContent(); 2773 String::FlatContent content = replacement->GetFlatContent();
2779 ASSERT(content.IsFlat()); 2774 ASSERT(content.IsFlat());
2775 bool simple = false;
2780 if (content.IsAscii()) { 2776 if (content.IsAscii()) {
2781 simple_hint_ = ParseReplacementPattern(&parts_, 2777 simple = ParseReplacementPattern(&parts_,
2782 content.ToAsciiVector(), 2778 content.ToAsciiVector(),
2783 capture_count, 2779 capture_count,
2784 subject_length, 2780 subject_length,
2785 zone()); 2781 zone());
2786 } else { 2782 } else {
2787 ASSERT(content.IsTwoByte()); 2783 ASSERT(content.IsTwoByte());
2788 simple_hint_ = ParseReplacementPattern(&parts_, 2784 simple = ParseReplacementPattern(&parts_,
2789 content.ToUC16Vector(), 2785 content.ToUC16Vector(),
2790 capture_count, 2786 capture_count,
2791 subject_length, 2787 subject_length,
2792 zone()); 2788 zone());
2793 } 2789 }
2790 if (simple) return true;
2794 } 2791 }
2792
2795 Isolate* isolate = replacement->GetIsolate(); 2793 Isolate* isolate = replacement->GetIsolate();
2796 // Find substrings of replacement string and create them as String objects. 2794 // Find substrings of replacement string and create them as String objects.
2797 int substring_index = 0; 2795 int substring_index = 0;
2798 for (int i = 0, n = parts_.length(); i < n; i++) { 2796 for (int i = 0, n = parts_.length(); i < n; i++) {
2799 int tag = parts_[i].tag; 2797 int tag = parts_[i].tag;
2800 if (tag <= 0) { // A replacement string slice. 2798 if (tag <= 0) { // A replacement string slice.
2801 int from = -tag; 2799 int from = -tag;
2802 int to = parts_[i].data; 2800 int to = parts_[i].data;
2803 replacement_substrings_.Add( 2801 replacement_substrings_.Add(
2804 isolate->factory()->NewSubString(replacement, from, to), zone()); 2802 isolate->factory()->NewSubString(replacement, from, to), zone());
2805 parts_[i].tag = REPLACEMENT_SUBSTRING; 2803 parts_[i].tag = REPLACEMENT_SUBSTRING;
2806 parts_[i].data = substring_index; 2804 parts_[i].data = substring_index;
2807 substring_index++; 2805 substring_index++;
2808 } else if (tag == REPLACEMENT_STRING) { 2806 } else if (tag == REPLACEMENT_STRING) {
2809 replacement_substrings_.Add(replacement, zone()); 2807 replacement_substrings_.Add(replacement, zone());
2810 parts_[i].data = substring_index; 2808 parts_[i].data = substring_index;
2811 substring_index++; 2809 substring_index++;
2812 } 2810 }
2813 } 2811 }
2812 return false;
2814 } 2813 }
2815 2814
2816 2815
2817 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 2816 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
2818 int match_from, 2817 int match_from,
2819 int match_to, 2818 int match_to,
2820 Handle<JSArray> last_match_info) { 2819 int32_t* match) {
2820 ASSERT_LT(0, parts_.length());
2821 for (int i = 0, n = parts_.length(); i < n; i++) { 2821 for (int i = 0, n = parts_.length(); i < n; i++) {
2822 ReplacementPart part = parts_[i]; 2822 ReplacementPart part = parts_[i];
2823 switch (part.tag) { 2823 switch (part.tag) {
2824 case SUBJECT_PREFIX: 2824 case SUBJECT_PREFIX:
2825 if (match_from > 0) builder->AddSubjectSlice(0, match_from); 2825 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
2826 break; 2826 break;
2827 case SUBJECT_SUFFIX: { 2827 case SUBJECT_SUFFIX: {
2828 int subject_length = part.data; 2828 int subject_length = part.data;
2829 if (match_to < subject_length) { 2829 if (match_to < subject_length) {
2830 builder->AddSubjectSlice(match_to, subject_length); 2830 builder->AddSubjectSlice(match_to, subject_length);
2831 } 2831 }
2832 break; 2832 break;
2833 } 2833 }
2834 case SUBJECT_CAPTURE: { 2834 case SUBJECT_CAPTURE: {
2835 int capture = part.data; 2835 int capture = part.data;
2836 FixedArray* match_info = FixedArray::cast(last_match_info->elements()); 2836 int from = match[capture * 2];
2837 int from = RegExpImpl::GetCapture(match_info, capture * 2); 2837 int to = match[capture * 2 + 1];
2838 int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1);
2839 if (from >= 0 && to > from) { 2838 if (from >= 0 && to > from) {
2840 builder->AddSubjectSlice(from, to); 2839 builder->AddSubjectSlice(from, to);
2841 } 2840 }
2842 break; 2841 break;
2843 } 2842 }
2844 case REPLACEMENT_SUBSTRING: 2843 case REPLACEMENT_SUBSTRING:
2845 case REPLACEMENT_STRING: 2844 case REPLACEMENT_STRING:
2846 builder->AddString(replacement_substrings_[part.data]); 2845 builder->AddString(replacement_substrings_[part.data]);
2847 break; 2846 break;
2848 default: 2847 default:
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
2950 pattern_content.ToUC16Vector(), 2949 pattern_content.ToUC16Vector(),
2951 indices, 2950 indices,
2952 limit, 2951 limit,
2953 zone); 2952 zone);
2954 } 2953 }
2955 } 2954 }
2956 } 2955 }
2957 } 2956 }
2958 2957
2959 2958
2960 // Two smis before and after the match, for very long strings.
2961 const int kMaxBuilderEntriesPerRegExpMatch = 5;
2962
2963
2964 static void SetLastMatchInfoNoCaptures(Handle<String> subject,
2965 Handle<JSArray> last_match_info,
2966 int match_start,
2967 int match_end) {
2968 // Fill last_match_info with a single capture.
2969 last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
2970 AssertNoAllocation no_gc;
2971 FixedArray* elements = FixedArray::cast(last_match_info->elements());
2972 RegExpImpl::SetLastCaptureCount(elements, 2);
2973 RegExpImpl::SetLastInput(elements, *subject);
2974 RegExpImpl::SetLastSubject(elements, *subject);
2975 RegExpImpl::SetCapture(elements, 0, match_start);
2976 RegExpImpl::SetCapture(elements, 1, match_end);
2977 }
2978
2979
2980 template <typename SubjectChar, typename PatternChar>
2981 static bool SearchStringMultiple(Isolate* isolate,
2982 Vector<const SubjectChar> subject,
2983 Vector<const PatternChar> pattern,
2984 String* pattern_string,
2985 FixedArrayBuilder* builder,
2986 int* match_pos) {
2987 int pos = *match_pos;
2988 int subject_length = subject.length();
2989 int pattern_length = pattern.length();
2990 int max_search_start = subject_length - pattern_length;
2991 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
2992 while (pos <= max_search_start) {
2993 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
2994 *match_pos = pos;
2995 return false;
2996 }
2997 // Position of end of previous match.
2998 int match_end = pos + pattern_length;
2999 int new_pos = search.Search(subject, match_end);
3000 if (new_pos >= 0) {
3001 // A match.
3002 if (new_pos > match_end) {
3003 ReplacementStringBuilder::AddSubjectSlice(builder,
3004 match_end,
3005 new_pos);
3006 }
3007 pos = new_pos;
3008 builder->Add(pattern_string);
3009 } else {
3010 break;
3011 }
3012 }
3013
3014 if (pos < max_search_start) {
3015 ReplacementStringBuilder::AddSubjectSlice(builder,
3016 pos + pattern_length,
3017 subject_length);
3018 }
3019 *match_pos = pos;
3020 return true;
3021 }
3022
3023
3024
3025
3026 template<typename ResultSeqString> 2959 template<typename ResultSeqString>
3027 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( 2960 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
3028 Isolate* isolate, 2961 Isolate* isolate,
3029 Handle<String> subject, 2962 Handle<String> subject,
3030 Handle<JSRegExp> pattern_regexp, 2963 Handle<JSRegExp> pattern_regexp,
3031 Handle<String> replacement, 2964 Handle<String> replacement,
3032 Handle<JSArray> last_match_info, 2965 Handle<JSArray> last_match_info) {
3033 Zone* zone) {
3034 ASSERT(subject->IsFlat()); 2966 ASSERT(subject->IsFlat());
3035 ASSERT(replacement->IsFlat()); 2967 ASSERT(replacement->IsFlat());
3036 2968
3037 ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT); 2969 Zone* zone = isolate->runtime_zone();
3038 ZoneList<int> indices(8, isolate->runtime_zone()); 2970 ZoneScope zone_space(zone, DELETE_ON_EXIT);
2971 ZoneList<int> indices(8, zone);
3039 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); 2972 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
3040 String* pattern = 2973 String* pattern =
3041 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); 2974 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
3042 int subject_len = subject->length(); 2975 int subject_len = subject->length();
3043 int pattern_len = pattern->length(); 2976 int pattern_len = pattern->length();
3044 int replacement_len = replacement->length(); 2977 int replacement_len = replacement->length();
3045 2978
3046 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, 2979 FindStringIndicesDispatch(
3047 zone); 2980 isolate, *subject, pattern, &indices, 0xffffffff, zone);
3048 2981
3049 int matches = indices.length(); 2982 int matches = indices.length();
3050 if (matches == 0) return *subject; 2983 if (matches == 0) return *subject;
3051 2984
3052 // Detect integer overflow. 2985 // Detect integer overflow.
3053 int64_t result_len_64 = 2986 int64_t result_len_64 =
3054 (static_cast<int64_t>(replacement_len) - 2987 (static_cast<int64_t>(replacement_len) -
3055 static_cast<int64_t>(pattern_len)) * 2988 static_cast<int64_t>(pattern_len)) *
3056 static_cast<int64_t>(matches) + 2989 static_cast<int64_t>(matches) +
3057 static_cast<int64_t>(subject_len); 2990 static_cast<int64_t>(subject_len);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3092 subject_pos = indices.at(i) + pattern_len; 3025 subject_pos = indices.at(i) + pattern_len;
3093 } 3026 }
3094 // Add remaining subject content at the end. 3027 // Add remaining subject content at the end.
3095 if (subject_pos < subject_len) { 3028 if (subject_pos < subject_len) {
3096 String::WriteToFlat(*subject, 3029 String::WriteToFlat(*subject,
3097 result->GetChars() + result_pos, 3030 result->GetChars() + result_pos,
3098 subject_pos, 3031 subject_pos,
3099 subject_len); 3032 subject_len);
3100 } 3033 }
3101 3034
3102 SetLastMatchInfoNoCaptures(subject, 3035 int32_t match_indices[] = { indices.at(matches - 1),
3103 last_match_info, 3036 indices.at(matches - 1) + pattern_len };
3104 indices.at(matches - 1), 3037 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
3105 indices.at(matches - 1) + pattern_len);
3106 3038
3107 return *result; 3039 return *result;
3108 } 3040 }
3109 3041
3110 3042
3111 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( 3043 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
3112 Isolate* isolate, 3044 Isolate* isolate,
3113 String* subject, 3045 Handle<String> subject,
3114 JSRegExp* regexp, 3046 Handle<JSRegExp> regexp,
3115 String* replacement, 3047 Handle<String> replacement,
3116 JSArray* last_match_info, 3048 Handle<JSArray> last_match_info) {
3117 Zone* zone) {
3118 ASSERT(subject->IsFlat()); 3049 ASSERT(subject->IsFlat());
3119 ASSERT(replacement->IsFlat()); 3050 ASSERT(replacement->IsFlat());
3120 3051
3121 HandleScope handles(isolate); 3052 bool is_global = regexp->GetFlags().is_global();
3122 3053 int capture_count = regexp->CaptureCount();
3123 int length = subject->length(); 3054 int subject_length = subject->length();
3124 Handle<String> subject_handle(subject);
3125 Handle<JSRegExp> regexp_handle(regexp);
3126 Handle<String> replacement_handle(replacement);
3127 Handle<JSArray> last_match_info_handle(last_match_info);
3128 Handle<Object> match = RegExpImpl::Exec(regexp_handle,
3129 subject_handle,
3130 0,
3131 last_match_info_handle);
3132 if (match.is_null()) {
3133 return Failure::Exception();
3134 }
3135 if (match->IsNull()) {
3136 return *subject_handle;
3137 }
3138
3139 int capture_count = regexp_handle->CaptureCount();
3140 3055
3141 // CompiledReplacement uses zone allocation. 3056 // CompiledReplacement uses zone allocation.
3057 Zone* zone = isolate->runtime_zone();
3142 ZoneScope zonescope(zone, DELETE_ON_EXIT); 3058 ZoneScope zonescope(zone, DELETE_ON_EXIT);
3143 CompiledReplacement compiled_replacement(zone); 3059 CompiledReplacement compiled_replacement(zone);
3144 3060 bool simple_replace = compiled_replacement.Compile(replacement,
3145 compiled_replacement.Compile(replacement_handle, 3061 capture_count,
3146 capture_count, 3062 subject_length);
3147 length);
3148
3149 bool is_global = regexp_handle->GetFlags().is_global();
3150 3063
3151 // Shortcut for simple non-regexp global replacements 3064 // Shortcut for simple non-regexp global replacements
3152 if (is_global && 3065 if (is_global &&
3153 regexp_handle->TypeTag() == JSRegExp::ATOM && 3066 regexp->TypeTag() == JSRegExp::ATOM &&
3154 compiled_replacement.simple_hint()) { 3067 simple_replace) {
3155 if (subject_handle->HasOnlyAsciiChars() && 3068 if (subject->HasOnlyAsciiChars() && replacement->HasOnlyAsciiChars()) {
3156 replacement_handle->HasOnlyAsciiChars()) {
3157 return StringReplaceAtomRegExpWithString<SeqAsciiString>( 3069 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3158 isolate, 3070 isolate, subject, regexp, replacement, last_match_info);
3159 subject_handle, 3071 } else {
3160 regexp_handle,
3161 replacement_handle,
3162 last_match_info_handle,
3163 zone);
3164 } else {
3165 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( 3072 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3166 isolate, 3073 isolate, subject, regexp, replacement, last_match_info);
3167 subject_handle,
3168 regexp_handle,
3169 replacement_handle,
3170 last_match_info_handle,
3171 zone);
3172 } 3074 }
3173 } 3075 }
3174 3076
3077 RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
3078 if (global_cache.HasException()) return Failure::Exception();
3079
3080 int32_t* current_match = global_cache.FetchNext();
3081 if (current_match == NULL) {
3082 if (global_cache.HasException()) return Failure::Exception();
3083 return *subject;
3084 }
3085
3175 // Guessing the number of parts that the final result string is built 3086 // Guessing the number of parts that the final result string is built
3176 // from. Global regexps can match any number of times, so we guess 3087 // from. Global regexps can match any number of times, so we guess
3177 // conservatively. 3088 // conservatively.
3178 int expected_parts = 3089 int expected_parts =
3179 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; 3090 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
3180 ReplacementStringBuilder builder(isolate->heap(), 3091 ReplacementStringBuilder builder(isolate->heap(),
3181 subject_handle, 3092 subject,
3182 expected_parts); 3093 expected_parts);
3183 3094
3184 // Index of end of last match.
3185 int prev = 0;
3186
3187
3188 // Number of parts added by compiled replacement plus preceeding 3095 // Number of parts added by compiled replacement plus preceeding
3189 // string and possibly suffix after last match. It is possible for 3096 // string and possibly suffix after last match. It is possible for
3190 // all components to use two elements when encoded as two smis. 3097 // all components to use two elements when encoded as two smis.
3191 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); 3098 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
3192 bool matched = true; 3099
3100 int prev = 0;
3101
3193 do { 3102 do {
3194 ASSERT(last_match_info_handle->HasFastObjectElements());
3195 // Increase the capacity of the builder before entering local handle-scope,
3196 // so its internal buffer can safely allocate a new handle if it grows.
3197 builder.EnsureCapacity(parts_added_per_loop); 3103 builder.EnsureCapacity(parts_added_per_loop);
3198 3104
3199 HandleScope loop_scope(isolate); 3105 int start = current_match[0];
3200 int start, end; 3106 int end = current_match[1];
3201 {
3202 AssertNoAllocation match_info_array_is_not_in_a_handle;
3203 FixedArray* match_info_array =
3204 FixedArray::cast(last_match_info_handle->elements());
3205
3206 ASSERT_EQ(capture_count * 2 + 2,
3207 RegExpImpl::GetLastCaptureCount(match_info_array));
3208 start = RegExpImpl::GetCapture(match_info_array, 0);
3209 end = RegExpImpl::GetCapture(match_info_array, 1);
3210 }
3211 3107
3212 if (prev < start) { 3108 if (prev < start) {
3213 builder.AddSubjectSlice(prev, start); 3109 builder.AddSubjectSlice(prev, start);
3214 } 3110 }
3215 compiled_replacement.Apply(&builder,
3216 start,
3217 end,
3218 last_match_info_handle);
3219 3111
3112 if (simple_replace) {
3113 builder.AddString(replacement);
3114 } else {
3115 compiled_replacement.Apply(&builder,
3116 start,
3117 end,
3118 current_match);
3119 }
3220 prev = end; 3120 prev = end;
3221 3121
3222 // Only continue checking for global regexps. 3122 // Only continue checking for global regexps.
3223 if (!is_global) break; 3123 if (!is_global) break;
3224 3124
3225 // Continue from where the match ended, unless it was an empty match. 3125 current_match = global_cache.FetchNext();
3226 int next = end; 3126 } while (current_match != NULL);
3227 if (start == end) {
3228 next = end + 1;
3229 if (next > length) break;
3230 }
3231 3127
3232 match = RegExpImpl::Exec(regexp_handle, 3128 if (global_cache.HasException()) return Failure::Exception();
3233 subject_handle,
3234 next,
3235 last_match_info_handle);
3236 if (match.is_null()) {
3237 return Failure::Exception();
3238 }
3239 matched = !match->IsNull();
3240 } while (matched);
3241 3129
3242 if (prev < length) { 3130 if (prev < subject_length) {
3243 builder.AddSubjectSlice(prev, length); 3131 builder.EnsureCapacity(2);
3132 builder.AddSubjectSlice(prev, subject_length);
3244 } 3133 }
3245 3134
3135 RegExpImpl::SetLastMatchInfo(last_match_info,
3136 subject,
3137 capture_count,
3138 global_cache.LastSuccessfulMatch());
3139
3246 return *(builder.ToString()); 3140 return *(builder.ToString());
3247 } 3141 }
3248 3142
3249 3143
3250 template <typename ResultSeqString> 3144 template <typename ResultSeqString>
3251 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( 3145 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
3252 Isolate* isolate, 3146 Isolate* isolate,
3253 String* subject, 3147 Handle<String> subject,
3254 JSRegExp* regexp, 3148 Handle<JSRegExp> regexp,
3255 JSArray* last_match_info, 3149 Handle<JSArray> last_match_info) {
3256 Zone* zone) {
3257 ASSERT(subject->IsFlat()); 3150 ASSERT(subject->IsFlat());
3258 3151
3259 HandleScope handles(isolate); 3152 bool is_global = regexp->GetFlags().is_global();
3260
3261 Handle<String> subject_handle(subject);
3262 Handle<JSRegExp> regexp_handle(regexp);
3263 Handle<JSArray> last_match_info_handle(last_match_info);
3264 3153
3265 // Shortcut for simple non-regexp global replacements 3154 // Shortcut for simple non-regexp global replacements
3266 if (regexp_handle->GetFlags().is_global() && 3155 if (is_global &&
3267 regexp_handle->TypeTag() == JSRegExp::ATOM) { 3156 regexp->TypeTag() == JSRegExp::ATOM) {
3268 Handle<String> empty_string_handle(HEAP->empty_string()); 3157 Handle<String> empty_string(HEAP->empty_string());
3269 if (subject_handle->HasOnlyAsciiChars()) { 3158 if (subject->HasOnlyAsciiChars()) {
3270 return StringReplaceAtomRegExpWithString<SeqAsciiString>( 3159 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3271 isolate, 3160 isolate,
3272 subject_handle, 3161 subject,
3273 regexp_handle, 3162 regexp,
3274 empty_string_handle, 3163 empty_string,
3275 last_match_info_handle, 3164 last_match_info);
3276 zone);
3277 } else { 3165 } else {
3278 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( 3166 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3279 isolate, 3167 isolate,
3280 subject_handle, 3168 subject,
3281 regexp_handle, 3169 regexp,
3282 empty_string_handle, 3170 empty_string,
3283 last_match_info_handle, 3171 last_match_info);
3284 zone);
3285 } 3172 }
3286 } 3173 }
3287 3174
3288 Handle<Object> match = RegExpImpl::Exec(regexp_handle, 3175 RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
3289 subject_handle, 3176 if (global_cache.HasException()) return Failure::Exception();
3290 0,
3291 last_match_info_handle);
3292 if (match.is_null()) return Failure::Exception();
3293 if (match->IsNull()) return *subject_handle;
3294 3177
3295 ASSERT(last_match_info_handle->HasFastObjectElements()); 3178 int32_t* current_match = global_cache.FetchNext();
3296 3179 if (current_match == NULL) {
3297 int start, end; 3180 if (global_cache.HasException()) return Failure::Exception();
3298 { 3181 return *subject;
3299 AssertNoAllocation match_info_array_is_not_in_a_handle;
3300 FixedArray* match_info_array =
3301 FixedArray::cast(last_match_info_handle->elements());
3302
3303 start = RegExpImpl::GetCapture(match_info_array, 0);
3304 end = RegExpImpl::GetCapture(match_info_array, 1);
3305 } 3182 }
3306 3183
3307 bool global = regexp_handle->GetFlags().is_global(); 3184 int start = current_match[0];
3185 int end = current_match[1];
3186 int capture_count = regexp->CaptureCount();
3187 int subject_length = subject->length();
3308 3188
3309 if (start == end && !global) return *subject_handle; 3189 int new_length = subject_length - (end - start);
3190 if (new_length == 0) return isolate->heap()->empty_string();
3310 3191
3311 int length = subject_handle->length();
3312 int new_length = length - (end - start);
3313 if (new_length == 0) {
3314 return isolate->heap()->empty_string();
3315 }
3316 Handle<ResultSeqString> answer; 3192 Handle<ResultSeqString> answer;
3317 if (ResultSeqString::kHasAsciiEncoding) { 3193 if (ResultSeqString::kHasAsciiEncoding) {
3318 answer = Handle<ResultSeqString>::cast( 3194 answer = Handle<ResultSeqString>::cast(
3319 isolate->factory()->NewRawAsciiString(new_length)); 3195 isolate->factory()->NewRawAsciiString(new_length));
3320 } else { 3196 } else {
3321 answer = Handle<ResultSeqString>::cast( 3197 answer = Handle<ResultSeqString>::cast(
3322 isolate->factory()->NewRawTwoByteString(new_length)); 3198 isolate->factory()->NewRawTwoByteString(new_length));
3323 } 3199 }
3324 3200
3325 // If the regexp isn't global, only match once. 3201 if (!is_global) {
3326 if (!global) { 3202 RegExpImpl::SetLastMatchInfo(
3327 if (start > 0) { 3203 last_match_info, subject, capture_count, current_match);
3328 String::WriteToFlat(*subject_handle, 3204 if (start == end) {
3329 answer->GetChars(), 3205 return *subject;
3330 0, 3206 } else {
3331 start); 3207 if (start > 0) {
3208 String::WriteToFlat(*subject, answer->GetChars(), 0, start);
3209 }
3210 if (end < subject_length) {
3211 String::WriteToFlat(
3212 *subject, answer->GetChars() + start, end, subject_length);
3213 }
3214 return *answer;
3332 } 3215 }
3333 if (end < length) {
3334 String::WriteToFlat(*subject_handle,
3335 answer->GetChars() + start,
3336 end,
3337 length);
3338 }
3339 return *answer;
3340 } 3216 }
3341 3217
3342 int prev = 0; // Index of end of last match. 3218 int prev = 0;
3343 int next = 0; // Start of next search (prev unless last match was empty).
3344 int position = 0; 3219 int position = 0;
3345 3220
3346 do { 3221 do {
3222 start = current_match[0];
3223 end = current_match[1];
3347 if (prev < start) { 3224 if (prev < start) {
3348 // Add substring subject[prev;start] to answer string. 3225 // Add substring subject[prev;start] to answer string.
3349 String::WriteToFlat(*subject_handle, 3226 String::WriteToFlat(
3350 answer->GetChars() + position, 3227 *subject, answer->GetChars() + position, prev, start);
3351 prev,
3352 start);
3353 position += start - prev; 3228 position += start - prev;
3354 } 3229 }
3355 prev = end; 3230 prev = end;
3356 next = end;
3357 // Continue from where the match ended, unless it was an empty match.
3358 if (start == end) {
3359 next++;
3360 if (next > length) break;
3361 }
3362 match = RegExpImpl::Exec(regexp_handle,
3363 subject_handle,
3364 next,
3365 last_match_info_handle);
3366 if (match.is_null()) return Failure::Exception();
3367 if (match->IsNull()) break;
3368 3231
3369 ASSERT(last_match_info_handle->HasFastObjectElements()); 3232 current_match = global_cache.FetchNext();
3370 HandleScope loop_scope(isolate); 3233 } while (current_match != NULL);
3371 {
3372 AssertNoAllocation match_info_array_is_not_in_a_handle;
3373 FixedArray* match_info_array =
3374 FixedArray::cast(last_match_info_handle->elements());
3375 start = RegExpImpl::GetCapture(match_info_array, 0);
3376 end = RegExpImpl::GetCapture(match_info_array, 1);
3377 }
3378 } while (true);
3379 3234
3380 if (prev < length) { 3235 if (global_cache.HasException()) return Failure::Exception();
3236
3237 RegExpImpl::SetLastMatchInfo(last_match_info,
3238 subject,
3239 capture_count,
3240 global_cache.LastSuccessfulMatch());
3241
3242 if (prev < subject_length) {
3381 // Add substring subject[prev;length] to answer string. 3243 // Add substring subject[prev;length] to answer string.
3382 String::WriteToFlat(*subject_handle, 3244 String::WriteToFlat(
3383 answer->GetChars() + position, 3245 *subject, answer->GetChars() + position, prev, subject_length);
3384 prev, 3246 position += subject_length - prev;
3385 length);
3386 position += length - prev;
3387 } 3247 }
3388 3248
3389 if (position == 0) { 3249 if (position == 0) return isolate->heap()->empty_string();
3390 return isolate->heap()->empty_string();
3391 }
3392 3250
3393 // Shorten string and fill 3251 // Shorten string and fill
3394 int string_size = ResultSeqString::SizeFor(position); 3252 int string_size = ResultSeqString::SizeFor(position);
3395 int allocated_string_size = ResultSeqString::SizeFor(new_length); 3253 int allocated_string_size = ResultSeqString::SizeFor(new_length);
3396 int delta = allocated_string_size - string_size; 3254 int delta = allocated_string_size - string_size;
3397 3255
3398 answer->set_length(position); 3256 answer->set_length(position);
3399 if (delta == 0) return *answer; 3257 if (delta == 0) return *answer;
3400 3258
3401 Address end_of_string = answer->address() + string_size; 3259 Address end_of_string = answer->address() + string_size;
3402 isolate->heap()->CreateFillerObjectAt(end_of_string, delta); 3260 isolate->heap()->CreateFillerObjectAt(end_of_string, delta);
3403 if (Marking::IsBlack(Marking::MarkBitFrom(*answer))) { 3261 if (Marking::IsBlack(Marking::MarkBitFrom(*answer))) {
3404 MemoryChunk::IncrementLiveBytesFromMutator(answer->address(), -delta); 3262 MemoryChunk::IncrementLiveBytesFromMutator(answer->address(), -delta);
3405 } 3263 }
3406 3264
3407 return *answer; 3265 return *answer;
3408 } 3266 }
3409 3267
3410 3268
3411 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { 3269 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
3412 ASSERT(args.length() == 4); 3270 ASSERT(args.length() == 4);
3413 3271
3414 CONVERT_ARG_CHECKED(String, subject, 0); 3272 HandleScope scope(isolate);
3415 if (!subject->IsFlat()) {
3416 Object* flat_subject;
3417 { MaybeObject* maybe_flat_subject = subject->TryFlatten();
3418 if (!maybe_flat_subject->ToObject(&flat_subject)) {
3419 return maybe_flat_subject;
3420 }
3421 }
3422 subject = String::cast(flat_subject);
3423 }
3424 3273
3425 CONVERT_ARG_CHECKED(String, replacement, 2); 3274 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
3426 if (!replacement->IsFlat()) { 3275 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
3427 Object* flat_replacement; 3276 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
3428 { MaybeObject* maybe_flat_replacement = replacement->TryFlatten(); 3277 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
3429 if (!maybe_flat_replacement->ToObject(&flat_replacement)) {
3430 return maybe_flat_replacement;
3431 }
3432 }
3433 replacement = String::cast(flat_replacement);
3434 }
3435 3278
3436 CONVERT_ARG_CHECKED(JSRegExp, regexp, 1); 3279 if (!subject->IsFlat()) subject = FlattenGetString(subject);
3437 CONVERT_ARG_CHECKED(JSArray, last_match_info, 3); 3280
3281 if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
3438 3282
3439 ASSERT(last_match_info->HasFastObjectElements()); 3283 ASSERT(last_match_info->HasFastObjectElements());
3440 3284
3441 Zone* zone = isolate->runtime_zone();
3442 if (replacement->length() == 0) { 3285 if (replacement->length() == 0) {
3443 if (subject->HasOnlyAsciiChars()) { 3286 if (subject->HasOnlyAsciiChars()) {
3444 return StringReplaceRegExpWithEmptyString<SeqAsciiString>( 3287 return StringReplaceRegExpWithEmptyString<SeqAsciiString>(
3445 isolate, subject, regexp, last_match_info, zone); 3288 isolate, subject, regexp, last_match_info);
3446 } else { 3289 } else {
3447 return StringReplaceRegExpWithEmptyString<SeqTwoByteString>( 3290 return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
3448 isolate, subject, regexp, last_match_info, zone); 3291 isolate, subject, regexp, last_match_info);
3449 } 3292 }
3450 } 3293 }
3451 3294
3452 return StringReplaceRegExpWithString(isolate, 3295 return StringReplaceRegExpWithString(
3453 subject, 3296 isolate, subject, regexp, replacement, last_match_info);
3454 regexp,
3455 replacement,
3456 last_match_info,
3457 zone);
3458 } 3297 }
3459 3298
3460 3299
3461 Handle<String> Runtime::StringReplaceOneCharWithString(Isolate* isolate, 3300 Handle<String> Runtime::StringReplaceOneCharWithString(Isolate* isolate,
3462 Handle<String> subject, 3301 Handle<String> subject,
3463 Handle<String> search, 3302 Handle<String> search,
3464 Handle<String> replace, 3303 Handle<String> replace,
3465 bool* found, 3304 bool* found,
3466 int recursion_limit) { 3305 int recursion_limit) {
3467 if (recursion_limit == 0) return Handle<String>::null(); 3306 if (recursion_limit == 0) return Handle<String>::null();
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after
3770 3609
3771 3610
3772 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { 3611 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
3773 ASSERT_EQ(3, args.length()); 3612 ASSERT_EQ(3, args.length());
3774 3613
3775 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 3614 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
3776 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); 3615 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
3777 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2); 3616 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
3778 HandleScope handles; 3617 HandleScope handles;
3779 3618
3780 Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info); 3619 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
3620 if (global_cache.HasException()) return Failure::Exception();
3781 3621
3782 if (match.is_null()) { 3622 int capture_count = regexp->CaptureCount();
3783 return Failure::Exception();
3784 }
3785 if (match->IsNull()) {
3786 return isolate->heap()->null_value();
3787 }
3788 int length = subject->length();
3789 3623
3790 Zone* zone = isolate->runtime_zone(); 3624 Zone* zone = isolate->runtime_zone();
3791 ZoneScope zone_space(zone, DELETE_ON_EXIT); 3625 ZoneScope zone_space(zone, DELETE_ON_EXIT);
3792 ZoneList<int> offsets(8, zone); 3626 ZoneList<int> offsets(8, zone);
3793 int start; 3627
3794 int end; 3628 while (true) {
3795 do { 3629 int32_t* match = global_cache.FetchNext();
3796 { 3630 if (match == NULL) break;
3797 AssertNoAllocation no_alloc; 3631 offsets.Add(match[0], zone); // start
3798 FixedArray* elements = FixedArray::cast(regexp_info->elements()); 3632 offsets.Add(match[1], zone); // end
3799 start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value(); 3633 }
3800 end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value(); 3634
3801 } 3635 if (global_cache.HasException()) return Failure::Exception();
3802 offsets.Add(start, zone); 3636
3803 offsets.Add(end, zone); 3637 if (offsets.length() == 0) {
3804 if (start == end) if (++end > length) break; 3638 // Not a single match.
3805 match = RegExpImpl::Exec(regexp, subject, end, regexp_info); 3639 return isolate->heap()->null_value();
3806 if (match.is_null()) { 3640 }
3807 return Failure::Exception(); 3641
3808 } 3642 RegExpImpl::SetLastMatchInfo(regexp_info,
3809 } while (!match->IsNull()); 3643 subject,
3644 capture_count,
3645 global_cache.LastSuccessfulMatch());
3646
3810 int matches = offsets.length() / 2; 3647 int matches = offsets.length() / 2;
3811 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches); 3648 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
3812 Handle<String> substring = isolate->factory()-> 3649 Handle<String> substring =
3813 NewSubString(subject, offsets.at(0), offsets.at(1)); 3650 isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
3814 elements->set(0, *substring); 3651 elements->set(0, *substring);
3815 for (int i = 1; i < matches ; i++) { 3652 for (int i = 1; i < matches; i++) {
3653 HandleScope temp_scope(isolate);
3816 int from = offsets.at(i * 2); 3654 int from = offsets.at(i * 2);
3817 int to = offsets.at(i * 2 + 1); 3655 int to = offsets.at(i * 2 + 1);
3818 Handle<String> substring = isolate->factory()-> 3656 Handle<String> substring =
3819 NewProperSubString(subject, from, to); 3657 isolate->factory()->NewProperSubString(subject, from, to);
3820 elements->set(i, *substring); 3658 elements->set(i, *substring);
3821 } 3659 }
3822 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements); 3660 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
3823 result->set_length(Smi::FromInt(matches)); 3661 result->set_length(Smi::FromInt(matches));
3824 return *result; 3662 return *result;
3825 } 3663 }
3826 3664
3827 3665
3828 static bool SearchStringMultiple(Isolate* isolate, 3666 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3829 Handle<String> subject, 3667 // separate last match info. See comment on that function.
3830 Handle<String> pattern, 3668 template<bool has_capture>
3831 Handle<JSArray> last_match_info, 3669 static int SearchRegExpMultiple(
3832 FixedArrayBuilder* builder) {
3833 ASSERT(subject->IsFlat());
3834 ASSERT(pattern->IsFlat());
3835
3836 // Treating as if a previous match was before first character.
3837 int match_pos = -pattern->length();
3838
3839 for (;;) { // Break when search complete.
3840 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3841 AssertNoAllocation no_gc;
3842 String::FlatContent subject_content = subject->GetFlatContent();
3843 String::FlatContent pattern_content = pattern->GetFlatContent();
3844 if (subject_content.IsAscii()) {
3845 Vector<const char> subject_vector = subject_content.ToAsciiVector();
3846 if (pattern_content.IsAscii()) {
3847 if (SearchStringMultiple(isolate,
3848 subject_vector,
3849 pattern_content.ToAsciiVector(),
3850 *pattern,
3851 builder,
3852 &match_pos)) break;
3853 } else {
3854 if (SearchStringMultiple(isolate,
3855 subject_vector,
3856 pattern_content.ToUC16Vector(),
3857 *pattern,
3858 builder,
3859 &match_pos)) break;
3860 }
3861 } else {
3862 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
3863 if (pattern_content.IsAscii()) {
3864 if (SearchStringMultiple(isolate,
3865 subject_vector,
3866 pattern_content.ToAsciiVector(),
3867 *pattern,
3868 builder,
3869 &match_pos)) break;
3870 } else {
3871 if (SearchStringMultiple(isolate,
3872 subject_vector,
3873 pattern_content.ToUC16Vector(),
3874 *pattern,
3875 builder,
3876 &match_pos)) break;
3877 }
3878 }
3879 }
3880
3881 if (match_pos >= 0) {
3882 SetLastMatchInfoNoCaptures(subject,
3883 last_match_info,
3884 match_pos,
3885 match_pos + pattern->length());
3886 return true;
3887 }
3888 return false; // No matches at all.
3889 }
3890
3891
3892 static int SearchRegExpNoCaptureMultiple(
3893 Isolate* isolate, 3670 Isolate* isolate,
3894 Handle<String> subject, 3671 Handle<String> subject,
3895 Handle<JSRegExp> regexp, 3672 Handle<JSRegExp> regexp,
3896 Handle<JSArray> last_match_array, 3673 Handle<JSArray> last_match_array,
3897 FixedArrayBuilder* builder) { 3674 FixedArrayBuilder* builder) {
3898 ASSERT(subject->IsFlat()); 3675 ASSERT(subject->IsFlat());
3899 ASSERT(regexp->CaptureCount() == 0); 3676 ASSERT_NE(has_capture, regexp->CaptureCount() == 0);
3677
3678 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
3679 if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
3680
3681 int capture_count = regexp->CaptureCount();
3682 int subject_length = subject->length();
3683
3684 // Position to search from.
3900 int match_start = -1; 3685 int match_start = -1;
3901 int match_end = 0; 3686 int match_end = 0;
3902 int pos = 0; 3687 bool first = true;
3903 int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
3904 if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
3905 3688
3906 int max_matches; 3689 // Two smis before and after the match, for very long strings.
3907 int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, 3690 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
3908 registers_per_match, 3691
3909 &max_matches); 3692 while (true) {
3910 OffsetsVector registers(num_registers, isolate); 3693 int32_t* current_match = global_cache.FetchNext();
3911 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3694 if (current_match == NULL) break;
3912 int subject_length = subject->length(); 3695 match_start = current_match[0];
3913 bool first = true; 3696 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3914 for (;;) { // Break on failure, return on exception. 3697 if (match_end < match_start) {
3915 int num_matches = RegExpImpl::IrregexpExecRaw(regexp, 3698 ReplacementStringBuilder::AddSubjectSlice(builder,
3916 subject, 3699 match_end,
3917 pos, 3700 match_start);
3918 register_vector); 3701 }
3919 if (num_matches > 0) { 3702 match_end = current_match[1];
3920 for (int match_index = 0; match_index < num_matches; match_index++) { 3703 {
3921 int32_t* current_match = &register_vector[match_index * 2]; 3704 // Avoid accumulating new handles inside loop.
3922 match_start = current_match[0]; 3705 HandleScope temp_scope(isolate);
3923 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 3706 Handle<String> match;
3924 if (match_end < match_start) { 3707 if (!first) {
3925 ReplacementStringBuilder::AddSubjectSlice(builder, 3708 match = isolate->factory()->NewProperSubString(subject,
3926 match_end, 3709 match_start,
3927 match_start); 3710 match_end);
3928 } 3711 } else {
3929 match_end = current_match[1]; 3712 match = isolate->factory()->NewSubString(subject,
3930 HandleScope loop_scope(isolate); 3713 match_start,
3931 if (!first) { 3714 match_end);
3932 builder->Add(*isolate->factory()->NewProperSubString(subject, 3715 first = false;
3933 match_start,
3934 match_end));
3935 } else {
3936 builder->Add(*isolate->factory()->NewSubString(subject,
3937 match_start,
3938 match_end));
3939 first = false;
3940 }
3941 } 3716 }
3942 3717
3943 // If we did not get the maximum number of matches, we can stop here 3718 if (has_capture) {
3944 // since there are no matches left. 3719 // Arguments array to replace function is match, captures, index and
3945 if (num_matches < max_matches) break; 3720 // subject, i.e., 3 + capture count in total.
3721 Handle<FixedArray> elements =
3722 isolate->factory()->NewFixedArray(3 + capture_count);
3946 3723
3947 if (match_start != match_end) { 3724 elements->set(0, *match);
3948 pos = match_end; 3725 for (int i = 1; i <= capture_count; i++) {
3726 int start = current_match[i * 2];
3727 if (start >= 0) {
3728 int end = current_match[i * 2 + 1];
3729 ASSERT(start <= end);
3730 Handle<String> substring =
3731 isolate->factory()->NewSubString(subject, start, end);
3732 elements->set(i, *substring);
3733 } else {
3734 ASSERT(current_match[i * 2 + 1] < 0);
3735 elements->set(i, isolate->heap()->undefined_value());
3736 }
3737 }
3738 elements->set(capture_count + 1, Smi::FromInt(match_start));
3739 elements->set(capture_count + 2, *subject);
3740 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
3949 } else { 3741 } else {
3950 pos = match_end + 1; 3742 builder->Add(*match);
3951 if (pos > subject_length) break;
3952 } 3743 }
3953 } else if (num_matches == 0) {
3954 break;
3955 } else {
3956 ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
3957 return RegExpImpl::RE_EXCEPTION;
3958 } 3744 }
3959 } 3745 }
3960 3746
3747 if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
3748
3961 if (match_start >= 0) { 3749 if (match_start >= 0) {
3750 // Finished matching, with at least one match.
3962 if (match_end < subject_length) { 3751 if (match_end < subject_length) {
3963 ReplacementStringBuilder::AddSubjectSlice(builder, 3752 ReplacementStringBuilder::AddSubjectSlice(builder,
3964 match_end, 3753 match_end,
3965 subject_length); 3754 subject_length);
3966 } 3755 }
3967 SetLastMatchInfoNoCaptures(subject, 3756
3968 last_match_array, 3757 RegExpImpl::SetLastMatchInfo(
3969 match_start, 3758 last_match_array, subject, capture_count, NULL);
3970 match_end); 3759
3971 return RegExpImpl::RE_SUCCESS; 3760 return RegExpImpl::RE_SUCCESS;
3972 } else { 3761 } else {
3973 return RegExpImpl::RE_FAILURE; // No matches at all. 3762 return RegExpImpl::RE_FAILURE; // No matches at all.
3974 } 3763 }
3975 } 3764 }
3976 3765
3977 3766
3978 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3979 // separate last match info. See comment on that function.
3980 static int SearchRegExpMultiple(
3981 Isolate* isolate,
3982 Handle<String> subject,
3983 Handle<JSRegExp> regexp,
3984 Handle<JSArray> last_match_array,
3985 FixedArrayBuilder* builder,
3986 Zone* zone) {
3987
3988 ASSERT(subject->IsFlat());
3989 int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
3990 if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
3991
3992 int max_matches;
3993 int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
3994 registers_per_match,
3995 &max_matches);
3996 OffsetsVector registers(num_registers, isolate);
3997 Vector<int32_t> register_vector(registers.vector(), registers.length());
3998
3999 int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
4000 subject,
4001 0,
4002 register_vector);
4003
4004 int capture_count = regexp->CaptureCount();
4005 int subject_length = subject->length();
4006
4007 // Position to search from.
4008 int pos = 0;
4009 // End of previous match. Differs from pos if match was empty.
4010 int match_end = 0;
4011 bool first = true;
4012
4013 if (num_matches > 0) {
4014 do {
4015 int match_start = 0;
4016 for (int match_index = 0; match_index < num_matches; match_index++) {
4017 int32_t* current_match =
4018 &register_vector[match_index * registers_per_match];
4019 match_start = current_match[0];
4020 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
4021 if (match_end < match_start) {
4022 ReplacementStringBuilder::AddSubjectSlice(builder,
4023 match_end,
4024 match_start);
4025 }
4026 match_end = current_match[1];
4027
4028 {
4029 // Avoid accumulating new handles inside loop.
4030 HandleScope temp_scope(isolate);
4031 // Arguments array to replace function is match, captures, index and
4032 // subject, i.e., 3 + capture count in total.
4033 Handle<FixedArray> elements =
4034 isolate->factory()->NewFixedArray(3 + capture_count);
4035 Handle<String> match;
4036 if (!first) {
4037 match = isolate->factory()->NewProperSubString(subject,
4038 match_start,
4039 match_end);
4040 } else {
4041 match = isolate->factory()->NewSubString(subject,
4042 match_start,
4043 match_end);
4044 }
4045 elements->set(0, *match);
4046 for (int i = 1; i <= capture_count; i++) {
4047 int start = current_match[i * 2];
4048 if (start >= 0) {
4049 int end = current_match[i * 2 + 1];
4050 ASSERT(start <= end);
4051 Handle<String> substring;
4052 if (!first) {
4053 substring =
4054 isolate->factory()->NewProperSubString(subject, start, end);
4055 } else {
4056 substring =
4057 isolate->factory()->NewSubString(subject, start, end);
4058 }
4059 elements->set(i, *substring);
4060 } else {
4061 ASSERT(current_match[i * 2 + 1] < 0);
4062 elements->set(i, isolate->heap()->undefined_value());
4063 }
4064 }
4065 elements->set(capture_count + 1, Smi::FromInt(match_start));
4066 elements->set(capture_count + 2, *subject);
4067 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
4068 }
4069 first = false;
4070 }
4071
4072 // If we did not get the maximum number of matches, we can stop here
4073 // since there are no matches left.
4074 if (num_matches < max_matches) break;
4075
4076 if (match_end > match_start) {
4077 pos = match_end;
4078 } else {
4079 pos = match_end + 1;
4080 if (pos > subject_length) {
4081 break;
4082 }
4083 }
4084
4085 num_matches = RegExpImpl::IrregexpExecRaw(regexp,
4086 subject,
4087 pos,
4088 register_vector);
4089 } while (num_matches > 0);
4090
4091 if (num_matches != RegExpImpl::RE_EXCEPTION) {
4092 // Finished matching, with at least one match.
4093 if (match_end < subject_length) {
4094 ReplacementStringBuilder::AddSubjectSlice(builder,
4095 match_end,
4096 subject_length);
4097 }
4098
4099 int last_match_capture_count = (capture_count + 1) * 2;
4100 int last_match_array_size =
4101 last_match_capture_count + RegExpImpl::kLastMatchOverhead;
4102 last_match_array->EnsureSize(last_match_array_size);
4103 AssertNoAllocation no_gc;
4104 FixedArray* elements = FixedArray::cast(last_match_array->elements());
4105 // We have to set this even though the rest of the last match array is
4106 // ignored.
4107 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
4108 // These are also read without consulting the override.
4109 RegExpImpl::SetLastSubject(elements, *subject);
4110 RegExpImpl::SetLastInput(elements, *subject);
4111 return RegExpImpl::RE_SUCCESS;
4112 }
4113 }
4114 // No matches at all, return failure or exception result directly.
4115 return num_matches;
4116 }
4117
4118
4119 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets 3767 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets
4120 // lastMatchInfoOverride to maintain the last match info, so we don't need to 3768 // lastMatchInfoOverride to maintain the last match info, so we don't need to
4121 // set any other last match array info. 3769 // set any other last match array info.
4122 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { 3770 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
4123 ASSERT(args.length() == 4); 3771 ASSERT(args.length() == 4);
4124 HandleScope handles(isolate); 3772 HandleScope handles(isolate);
4125 3773
4126 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 3774 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
4127 if (!subject->IsFlat()) FlattenString(subject); 3775 if (!subject->IsFlat()) FlattenString(subject);
4128 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 3776 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
4129 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); 3777 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
4130 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); 3778 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
4131 3779
4132 ASSERT(last_match_info->HasFastObjectElements()); 3780 ASSERT(last_match_info->HasFastObjectElements());
4133 ASSERT(regexp->GetFlags().is_global()); 3781 ASSERT(regexp->GetFlags().is_global());
4134 Handle<FixedArray> result_elements; 3782 Handle<FixedArray> result_elements;
4135 if (result_array->HasFastObjectElements()) { 3783 if (result_array->HasFastObjectElements()) {
4136 result_elements = 3784 result_elements =
4137 Handle<FixedArray>(FixedArray::cast(result_array->elements())); 3785 Handle<FixedArray>(FixedArray::cast(result_array->elements()));
4138 } 3786 }
4139 if (result_elements.is_null() || result_elements->length() < 16) { 3787 if (result_elements.is_null() || result_elements->length() < 16) {
4140 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); 3788 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
4141 } 3789 }
4142 FixedArrayBuilder builder(result_elements); 3790 FixedArrayBuilder builder(result_elements);
4143 3791
4144 if (regexp->TypeTag() == JSRegExp::ATOM) { 3792 int result;
4145 Handle<String> pattern( 3793 if (regexp->CaptureCount() == 0) {
4146 String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); 3794 result = SearchRegExpMultiple<false>(
4147 ASSERT(pattern->IsFlat()); 3795 isolate, subject, regexp, last_match_info, &builder);
4148 if (SearchStringMultiple(isolate, subject, pattern, 3796 } else {
4149 last_match_info, &builder)) { 3797 result = SearchRegExpMultiple<true>(
4150 return *builder.ToJSArray(result_array); 3798 isolate, subject, regexp, last_match_info, &builder);
4151 }
4152 return isolate->heap()->null_value();
4153 } 3799 }
4154 3800
4155 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
4156
4157 int result;
4158 if (regexp->CaptureCount() == 0) {
4159 result = SearchRegExpNoCaptureMultiple(isolate,
4160 subject,
4161 regexp,
4162 last_match_info,
4163 &builder);
4164 } else {
4165 result = SearchRegExpMultiple(isolate,
4166 subject,
4167 regexp,
4168 last_match_info,
4169 &builder,
4170 isolate->runtime_zone());
4171 }
4172 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); 3801 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
4173 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); 3802 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
4174 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); 3803 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
4175 return Failure::Exception(); 3804 return Failure::Exception();
4176 } 3805 }
4177 3806
4178 3807
4179 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) { 3808 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) {
4180 NoHandleAllocation ha; 3809 NoHandleAllocation ha;
4181 ASSERT(args.length() == 2); 3810 ASSERT(args.length() == 2);
(...skipping 9536 matching lines...) Expand 10 before | Expand all | Expand 10 after
13718 // Handle last resort GC and make sure to allow future allocations 13347 // Handle last resort GC and make sure to allow future allocations
13719 // to grow the heap without causing GCs (if possible). 13348 // to grow the heap without causing GCs (if possible).
13720 isolate->counters()->gc_last_resort_from_js()->Increment(); 13349 isolate->counters()->gc_last_resort_from_js()->Increment();
13721 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13350 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13722 "Runtime::PerformGC"); 13351 "Runtime::PerformGC");
13723 } 13352 }
13724 } 13353 }
13725 13354
13726 13355
13727 } } // namespace v8::internal 13356 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/mips/code-stubs-mips.cc ('k') | src/unicode.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698