Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/runtime.cc

Issue 10831126: Take advantage of batched results when matching global regexp. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: addressed comments and formatting changes. Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/mips/code-stubs-mips.cc ('k') | src/x64/code-stubs-x64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2556 matching lines...) Expand 10 before | Expand all | Expand 10 after
2567 FixedArrayBuilder array_builder_; 2567 FixedArrayBuilder array_builder_;
2568 Handle<String> subject_; 2568 Handle<String> subject_;
2569 int character_count_; 2569 int character_count_;
2570 bool is_ascii_; 2570 bool is_ascii_;
2571 }; 2571 };
2572 2572
2573 2573
2574 class CompiledReplacement { 2574 class CompiledReplacement {
2575 public: 2575 public:
2576 explicit CompiledReplacement(Zone* zone) 2576 explicit CompiledReplacement(Zone* zone)
2577 : parts_(1, zone), replacement_substrings_(0, zone), 2577 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
2578 simple_hint_(false),
2579 zone_(zone) {}
2580 2578
2581 void Compile(Handle<String> replacement, 2579 // Return whether the replacement is simple.
2580 bool Compile(Handle<String> replacement,
2582 int capture_count, 2581 int capture_count,
2583 int subject_length); 2582 int subject_length);
2584 2583
2584 // Use Apply only if Compile returned false.
2585 void Apply(ReplacementStringBuilder* builder, 2585 void Apply(ReplacementStringBuilder* builder,
2586 int match_from, 2586 int match_from,
2587 int match_to, 2587 int match_to,
2588 Handle<JSArray> last_match_info); 2588 int32_t* match);
2589 2589
2590 // Number of distinct parts of the replacement pattern. 2590 // Number of distinct parts of the replacement pattern.
2591 int parts() { 2591 int parts() {
2592 return parts_.length(); 2592 return parts_.length();
2593 } 2593 }
2594 2594
2595 bool simple_hint() {
2596 return simple_hint_;
2597 }
2598
2599 Zone* zone() const { return zone_; } 2595 Zone* zone() const { return zone_; }
2600 2596
2601 private: 2597 private:
2602 enum PartType { 2598 enum PartType {
2603 SUBJECT_PREFIX = 1, 2599 SUBJECT_PREFIX = 1,
2604 SUBJECT_SUFFIX, 2600 SUBJECT_SUFFIX,
2605 SUBJECT_CAPTURE, 2601 SUBJECT_CAPTURE,
2606 REPLACEMENT_SUBSTRING, 2602 REPLACEMENT_SUBSTRING,
2607 REPLACEMENT_STRING, 2603 REPLACEMENT_STRING,
2608 2604
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
2649 // tag == REPLACEMENT_STRING: data is index into array of substrings 2645 // tag == REPLACEMENT_STRING: data is index into array of substrings
2650 // of the replacement string. 2646 // of the replacement string.
2651 // tag <= 0: Temporary representation of the substring of the replacement 2647 // tag <= 0: Temporary representation of the substring of the replacement
2652 // string ranging over -tag .. data. 2648 // string ranging over -tag .. data.
2653 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 2649 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
2654 // substring objects. 2650 // substring objects.
2655 int data; 2651 int data;
2656 }; 2652 };
2657 2653
2658 template<typename Char> 2654 template<typename Char>
2659 static bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, 2655 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
2660 Vector<Char> characters, 2656 Vector<Char> characters,
2661 int capture_count, 2657 int capture_count,
2662 int subject_length, 2658 int subject_length,
2663 Zone* zone) { 2659 Zone* zone) {
2664 int length = characters.length(); 2660 int length = characters.length();
2665 int last = 0; 2661 int last = 0;
2666 for (int i = 0; i < length; i++) { 2662 for (int i = 0; i < length; i++) {
2667 Char c = characters[i]; 2663 Char c = characters[i];
2668 if (c == '$') { 2664 if (c == '$') {
2669 int next_index = i + 1; 2665 int next_index = i + 1;
2670 if (next_index == length) { // No next character! 2666 if (next_index == length) { // No next character!
2671 break; 2667 break;
2672 } 2668 }
2673 Char c2 = characters[next_index]; 2669 Char c2 = characters[next_index];
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
2747 break; 2743 break;
2748 } 2744 }
2749 default: 2745 default:
2750 i = next_index; 2746 i = next_index;
2751 break; 2747 break;
2752 } 2748 }
2753 } 2749 }
2754 } 2750 }
2755 if (length > last) { 2751 if (length > last) {
2756 if (last == 0) { 2752 if (last == 0) {
2757 parts->Add(ReplacementPart::ReplacementString(), zone); 2753 // Replacement is simple. Do not use Apply to do the replacement.
2758 return true; 2754 return true;
2759 } else { 2755 } else {
2760 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); 2756 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
2761 } 2757 }
2762 } 2758 }
2763 return false; 2759 return false;
2764 } 2760 }
2765 2761
2766 ZoneList<ReplacementPart> parts_; 2762 ZoneList<ReplacementPart> parts_;
2767 ZoneList<Handle<String> > replacement_substrings_; 2763 ZoneList<Handle<String> > replacement_substrings_;
2768 bool simple_hint_;
2769 Zone* zone_; 2764 Zone* zone_;
2770 }; 2765 };
2771 2766
2772 2767
2773 void CompiledReplacement::Compile(Handle<String> replacement, 2768 bool CompiledReplacement::Compile(Handle<String> replacement,
2774 int capture_count, 2769 int capture_count,
2775 int subject_length) { 2770 int subject_length) {
2776 { 2771 {
2777 AssertNoAllocation no_alloc; 2772 AssertNoAllocation no_alloc;
2778 String::FlatContent content = replacement->GetFlatContent(); 2773 String::FlatContent content = replacement->GetFlatContent();
2779 ASSERT(content.IsFlat()); 2774 ASSERT(content.IsFlat());
2775 bool simple = false;
2780 if (content.IsAscii()) { 2776 if (content.IsAscii()) {
2781 simple_hint_ = ParseReplacementPattern(&parts_, 2777 simple = ParseReplacementPattern(&parts_,
2782 content.ToAsciiVector(), 2778 content.ToAsciiVector(),
2783 capture_count, 2779 capture_count,
2784 subject_length, 2780 subject_length,
2785 zone()); 2781 zone());
2786 } else { 2782 } else {
2787 ASSERT(content.IsTwoByte()); 2783 ASSERT(content.IsTwoByte());
2788 simple_hint_ = ParseReplacementPattern(&parts_, 2784 simple = ParseReplacementPattern(&parts_,
2789 content.ToUC16Vector(), 2785 content.ToUC16Vector(),
2790 capture_count, 2786 capture_count,
2791 subject_length, 2787 subject_length,
2792 zone()); 2788 zone());
2793 } 2789 }
2790 if (simple) return true;
2794 } 2791 }
2792
2795 Isolate* isolate = replacement->GetIsolate(); 2793 Isolate* isolate = replacement->GetIsolate();
2796 // Find substrings of replacement string and create them as String objects. 2794 // Find substrings of replacement string and create them as String objects.
2797 int substring_index = 0; 2795 int substring_index = 0;
2798 for (int i = 0, n = parts_.length(); i < n; i++) { 2796 for (int i = 0, n = parts_.length(); i < n; i++) {
2799 int tag = parts_[i].tag; 2797 int tag = parts_[i].tag;
2800 if (tag <= 0) { // A replacement string slice. 2798 if (tag <= 0) { // A replacement string slice.
2801 int from = -tag; 2799 int from = -tag;
2802 int to = parts_[i].data; 2800 int to = parts_[i].data;
2803 replacement_substrings_.Add( 2801 replacement_substrings_.Add(
2804 isolate->factory()->NewSubString(replacement, from, to), zone()); 2802 isolate->factory()->NewSubString(replacement, from, to), zone());
2805 parts_[i].tag = REPLACEMENT_SUBSTRING; 2803 parts_[i].tag = REPLACEMENT_SUBSTRING;
2806 parts_[i].data = substring_index; 2804 parts_[i].data = substring_index;
2807 substring_index++; 2805 substring_index++;
2808 } else if (tag == REPLACEMENT_STRING) { 2806 } else if (tag == REPLACEMENT_STRING) {
2809 replacement_substrings_.Add(replacement, zone()); 2807 replacement_substrings_.Add(replacement, zone());
2810 parts_[i].data = substring_index; 2808 parts_[i].data = substring_index;
2811 substring_index++; 2809 substring_index++;
2812 } 2810 }
2813 } 2811 }
2812 return false;
2814 } 2813 }
2815 2814
2816 2815
2817 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 2816 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
2818 int match_from, 2817 int match_from,
2819 int match_to, 2818 int match_to,
2820 Handle<JSArray> last_match_info) { 2819 int32_t* match) {
2820 ASSERT_LT(0, parts_.length());
2821 for (int i = 0, n = parts_.length(); i < n; i++) { 2821 for (int i = 0, n = parts_.length(); i < n; i++) {
2822 ReplacementPart part = parts_[i]; 2822 ReplacementPart part = parts_[i];
2823 switch (part.tag) { 2823 switch (part.tag) {
2824 case SUBJECT_PREFIX: 2824 case SUBJECT_PREFIX:
2825 if (match_from > 0) builder->AddSubjectSlice(0, match_from); 2825 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
2826 break; 2826 break;
2827 case SUBJECT_SUFFIX: { 2827 case SUBJECT_SUFFIX: {
2828 int subject_length = part.data; 2828 int subject_length = part.data;
2829 if (match_to < subject_length) { 2829 if (match_to < subject_length) {
2830 builder->AddSubjectSlice(match_to, subject_length); 2830 builder->AddSubjectSlice(match_to, subject_length);
2831 } 2831 }
2832 break; 2832 break;
2833 } 2833 }
2834 case SUBJECT_CAPTURE: { 2834 case SUBJECT_CAPTURE: {
2835 int capture = part.data; 2835 int capture = part.data;
2836 FixedArray* match_info = FixedArray::cast(last_match_info->elements()); 2836 int from = match[capture * 2];
2837 int from = RegExpImpl::GetCapture(match_info, capture * 2); 2837 int to = match[capture * 2 + 1];
2838 int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1);
2839 if (from >= 0 && to > from) { 2838 if (from >= 0 && to > from) {
2840 builder->AddSubjectSlice(from, to); 2839 builder->AddSubjectSlice(from, to);
2841 } 2840 }
2842 break; 2841 break;
2843 } 2842 }
2844 case REPLACEMENT_SUBSTRING: 2843 case REPLACEMENT_SUBSTRING:
2845 case REPLACEMENT_STRING: 2844 case REPLACEMENT_STRING:
2846 builder->AddString(replacement_substrings_[part.data]); 2845 builder->AddString(replacement_substrings_[part.data]);
2847 break; 2846 break;
2848 default: 2847 default:
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
2950 pattern_content.ToUC16Vector(), 2949 pattern_content.ToUC16Vector(),
2951 indices, 2950 indices,
2952 limit, 2951 limit,
2953 zone); 2952 zone);
2954 } 2953 }
2955 } 2954 }
2956 } 2955 }
2957 } 2956 }
2958 2957
2959 2958
2960 // Two smis before and after the match, for very long strings.
2961 const int kMaxBuilderEntriesPerRegExpMatch = 5;
2962
2963
2964 static void SetLastMatchInfoNoCaptures(Handle<String> subject,
2965 Handle<JSArray> last_match_info,
2966 int match_start,
2967 int match_end) {
2968 // Fill last_match_info with a single capture.
2969 last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
2970 AssertNoAllocation no_gc;
2971 FixedArray* elements = FixedArray::cast(last_match_info->elements());
2972 RegExpImpl::SetLastCaptureCount(elements, 2);
2973 RegExpImpl::SetLastInput(elements, *subject);
2974 RegExpImpl::SetLastSubject(elements, *subject);
2975 RegExpImpl::SetCapture(elements, 0, match_start);
2976 RegExpImpl::SetCapture(elements, 1, match_end);
2977 }
2978
2979
2980 template <typename SubjectChar, typename PatternChar>
2981 static bool SearchStringMultiple(Isolate* isolate,
2982 Vector<const SubjectChar> subject,
2983 Vector<const PatternChar> pattern,
2984 String* pattern_string,
2985 FixedArrayBuilder* builder,
2986 int* match_pos) {
2987 int pos = *match_pos;
2988 int subject_length = subject.length();
2989 int pattern_length = pattern.length();
2990 int max_search_start = subject_length - pattern_length;
2991 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
2992 while (pos <= max_search_start) {
2993 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
2994 *match_pos = pos;
2995 return false;
2996 }
2997 // Position of end of previous match.
2998 int match_end = pos + pattern_length;
2999 int new_pos = search.Search(subject, match_end);
3000 if (new_pos >= 0) {
3001 // A match.
3002 if (new_pos > match_end) {
3003 ReplacementStringBuilder::AddSubjectSlice(builder,
3004 match_end,
3005 new_pos);
3006 }
3007 pos = new_pos;
3008 builder->Add(pattern_string);
3009 } else {
3010 break;
3011 }
3012 }
3013
3014 if (pos < max_search_start) {
3015 ReplacementStringBuilder::AddSubjectSlice(builder,
3016 pos + pattern_length,
3017 subject_length);
3018 }
3019 *match_pos = pos;
3020 return true;
3021 }
3022
3023
3024
3025
3026 template<typename ResultSeqString> 2959 template<typename ResultSeqString>
3027 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( 2960 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
3028 Isolate* isolate, 2961 Isolate* isolate,
3029 Handle<String> subject, 2962 Handle<String> subject,
3030 Handle<JSRegExp> pattern_regexp, 2963 Handle<JSRegExp> pattern_regexp,
3031 Handle<String> replacement, 2964 Handle<String> replacement,
3032 Handle<JSArray> last_match_info, 2965 Handle<JSArray> last_match_info) {
3033 Zone* zone) {
3034 ASSERT(subject->IsFlat()); 2966 ASSERT(subject->IsFlat());
3035 ASSERT(replacement->IsFlat()); 2967 ASSERT(replacement->IsFlat());
3036 2968
3037 ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT); 2969 Zone* zone = isolate->runtime_zone();
3038 ZoneList<int> indices(8, isolate->runtime_zone()); 2970 ZoneScope zone_space(zone, DELETE_ON_EXIT);
2971 ZoneList<int> indices(8, zone);
3039 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); 2972 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
3040 String* pattern = 2973 String* pattern =
3041 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); 2974 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
3042 int subject_len = subject->length(); 2975 int subject_len = subject->length();
3043 int pattern_len = pattern->length(); 2976 int pattern_len = pattern->length();
3044 int replacement_len = replacement->length(); 2977 int replacement_len = replacement->length();
3045 2978
3046 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, 2979 FindStringIndicesDispatch(
3047 zone); 2980 isolate, *subject, pattern, &indices, 0xffffffff, zone);
3048 2981
3049 int matches = indices.length(); 2982 int matches = indices.length();
3050 if (matches == 0) return *subject; 2983 if (matches == 0) return *subject;
3051 2984
3052 // Detect integer overflow. 2985 // Detect integer overflow.
3053 int64_t result_len_64 = 2986 int64_t result_len_64 =
3054 (static_cast<int64_t>(replacement_len) - 2987 (static_cast<int64_t>(replacement_len) -
3055 static_cast<int64_t>(pattern_len)) * 2988 static_cast<int64_t>(pattern_len)) *
3056 static_cast<int64_t>(matches) + 2989 static_cast<int64_t>(matches) +
3057 static_cast<int64_t>(subject_len); 2990 static_cast<int64_t>(subject_len);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3092 subject_pos = indices.at(i) + pattern_len; 3025 subject_pos = indices.at(i) + pattern_len;
3093 } 3026 }
3094 // Add remaining subject content at the end. 3027 // Add remaining subject content at the end.
3095 if (subject_pos < subject_len) { 3028 if (subject_pos < subject_len) {
3096 String::WriteToFlat(*subject, 3029 String::WriteToFlat(*subject,
3097 result->GetChars() + result_pos, 3030 result->GetChars() + result_pos,
3098 subject_pos, 3031 subject_pos,
3099 subject_len); 3032 subject_len);
3100 } 3033 }
3101 3034
3102 SetLastMatchInfoNoCaptures(subject, 3035 int32_t match_indices[] = { indices.at(matches - 1),
3103 last_match_info, 3036 indices.at(matches - 1) + pattern_len };
3104 indices.at(matches - 1), 3037 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
3105 indices.at(matches - 1) + pattern_len);
3106 3038
3107 return *result; 3039 return *result;
3108 } 3040 }
3109 3041
3110 3042
3111 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( 3043 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
3112 Isolate* isolate, 3044 Isolate* isolate,
3113 String* subject, 3045 Handle<String> subject,
3114 JSRegExp* regexp, 3046 Handle<JSRegExp> regexp,
3115 String* replacement, 3047 Handle<String> replacement,
3116 JSArray* last_match_info, 3048 Handle<JSArray> last_match_info) {
3117 Zone* zone) {
3118 ASSERT(subject->IsFlat()); 3049 ASSERT(subject->IsFlat());
3119 ASSERT(replacement->IsFlat()); 3050 ASSERT(replacement->IsFlat());
3120 3051
3121 HandleScope handles(isolate); 3052 bool is_global = regexp->GetFlags().is_global();
3122 3053 int capture_count = regexp->CaptureCount();
3123 int length = subject->length(); 3054 int subject_length = subject->length();
3124 Handle<String> subject_handle(subject);
3125 Handle<JSRegExp> regexp_handle(regexp);
3126 Handle<String> replacement_handle(replacement);
3127 Handle<JSArray> last_match_info_handle(last_match_info);
3128 Handle<Object> match = RegExpImpl::Exec(regexp_handle,
3129 subject_handle,
3130 0,
3131 last_match_info_handle);
3132 if (match.is_null()) {
3133 return Failure::Exception();
3134 }
3135 if (match->IsNull()) {
3136 return *subject_handle;
3137 }
3138
3139 int capture_count = regexp_handle->CaptureCount();
3140 3055
3141 // CompiledReplacement uses zone allocation. 3056 // CompiledReplacement uses zone allocation.
3057 Zone* zone = isolate->runtime_zone();
3142 ZoneScope zonescope(zone, DELETE_ON_EXIT); 3058 ZoneScope zonescope(zone, DELETE_ON_EXIT);
3143 CompiledReplacement compiled_replacement(zone); 3059 CompiledReplacement compiled_replacement(zone);
3144 compiled_replacement.Compile(replacement_handle, 3060 bool simple_replace = compiled_replacement.Compile(replacement,
3145 capture_count, 3061 capture_count,
3146 length); 3062 subject_length);
3147
3148 bool is_global = regexp_handle->GetFlags().is_global();
3149 3063
3150 // Shortcut for simple non-regexp global replacements 3064 // Shortcut for simple non-regexp global replacements
3151 if (is_global && 3065 if (is_global &&
3152 regexp_handle->TypeTag() == JSRegExp::ATOM && 3066 regexp->TypeTag() == JSRegExp::ATOM &&
3153 compiled_replacement.simple_hint()) { 3067 simple_replace) {
3154 if (subject_handle->HasOnlyAsciiChars() && 3068 if (subject->HasOnlyAsciiChars()) {
3155 replacement_handle->HasOnlyAsciiChars()) {
3156 return StringReplaceAtomRegExpWithString<SeqAsciiString>( 3069 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3157 isolate, 3070 isolate, subject, regexp, replacement, last_match_info);
3158 subject_handle,
3159 regexp_handle,
3160 replacement_handle,
3161 last_match_info_handle,
3162 zone);
3163 } else { 3071 } else {
3164 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( 3072 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3165 isolate, 3073 isolate, subject, regexp, replacement, last_match_info);
3166 subject_handle,
3167 regexp_handle,
3168 replacement_handle,
3169 last_match_info_handle,
3170 zone);
3171 } 3074 }
3172 } 3075 }
3173 3076
3077 RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
3078 if (global_cache.HasException()) return Failure::Exception();
3079
3080 int32_t* current_match = global_cache.FetchNext();
3081 if (current_match == NULL) {
3082 if (global_cache.HasException()) return Failure::Exception();
3083 return *subject;
3084 }
3085
3174 // Guessing the number of parts that the final result string is built 3086 // Guessing the number of parts that the final result string is built
3175 // from. Global regexps can match any number of times, so we guess 3087 // from. Global regexps can match any number of times, so we guess
3176 // conservatively. 3088 // conservatively.
3177 int expected_parts = 3089 int expected_parts =
3178 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; 3090 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
3179 ReplacementStringBuilder builder(isolate->heap(), 3091 ReplacementStringBuilder builder(isolate->heap(),
3180 subject_handle, 3092 subject,
3181 expected_parts); 3093 expected_parts);
3182 3094
3183 // Index of end of last match.
3184 int prev = 0;
3185
3186 // Number of parts added by compiled replacement plus preceeding 3095 // Number of parts added by compiled replacement plus preceeding
3187 // string and possibly suffix after last match. It is possible for 3096 // string and possibly suffix after last match. It is possible for
3188 // all components to use two elements when encoded as two smis. 3097 // all components to use two elements when encoded as two smis.
3189 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); 3098 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
3190 bool matched = true; 3099
3100 int prev = 0;
3101
3191 do { 3102 do {
3192 ASSERT(last_match_info_handle->HasFastObjectElements());
3193 // Increase the capacity of the builder before entering local handle-scope,
3194 // so its internal buffer can safely allocate a new handle if it grows.
3195 builder.EnsureCapacity(parts_added_per_loop); 3103 builder.EnsureCapacity(parts_added_per_loop);
3196 3104
3197 HandleScope loop_scope(isolate); 3105 int start = current_match[0];
3198 int start, end; 3106 int end = current_match[1];
3199 {
3200 AssertNoAllocation match_info_array_is_not_in_a_handle;
3201 FixedArray* match_info_array =
3202 FixedArray::cast(last_match_info_handle->elements());
3203
3204 ASSERT_EQ(capture_count * 2 + 2,
3205 RegExpImpl::GetLastCaptureCount(match_info_array));
3206 start = RegExpImpl::GetCapture(match_info_array, 0);
3207 end = RegExpImpl::GetCapture(match_info_array, 1);
3208 }
3209 3107
3210 if (prev < start) { 3108 if (prev < start) {
3211 builder.AddSubjectSlice(prev, start); 3109 builder.AddSubjectSlice(prev, start);
3212 } 3110 }
3213 compiled_replacement.Apply(&builder, 3111
3214 start, 3112 if (simple_replace) {
3215 end, 3113 builder.AddString(replacement);
3216 last_match_info_handle); 3114 } else {
3115 compiled_replacement.Apply(&builder,
3116 start,
3117 end,
3118 current_match);
3119 }
3217 prev = end; 3120 prev = end;
3218 3121
3219 // Only continue checking for global regexps. 3122 // Only continue checking for global regexps.
3220 if (!is_global) break; 3123 if (!is_global) break;
3221 3124
3222 // Continue from where the match ended, unless it was an empty match. 3125 current_match = global_cache.FetchNext();
3223 int next = end; 3126 } while (current_match != NULL);
3224 if (start == end) {
3225 next = end + 1;
3226 if (next > length) break;
3227 }
3228 3127
3229 match = RegExpImpl::Exec(regexp_handle, 3128 if (global_cache.HasException()) return Failure::Exception();
3230 subject_handle,
3231 next,
3232 last_match_info_handle);
3233 if (match.is_null()) {
3234 return Failure::Exception();
3235 }
3236 matched = !match->IsNull();
3237 } while (matched);
3238 3129
3239 if (prev < length) { 3130 if (prev < subject_length) {
3240 builder.AddSubjectSlice(prev, length); 3131 builder.AddSubjectSlice(prev, subject_length);
3241 } 3132 }
3242 3133
3134 RegExpImpl::SetLastMatchInfo(last_match_info,
3135 subject,
3136 capture_count,
3137 global_cache.LastSuccessfulMatch());
3138
3243 return *(builder.ToString()); 3139 return *(builder.ToString());
3244 } 3140 }
3245 3141
3246 3142
3247 template <typename ResultSeqString> 3143 template <typename ResultSeqString>
3248 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( 3144 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
3249 Isolate* isolate, 3145 Isolate* isolate,
3250 String* subject, 3146 Handle<String> subject,
3251 JSRegExp* regexp, 3147 Handle<JSRegExp> regexp,
3252 JSArray* last_match_info, 3148 Handle<JSArray> last_match_info) {
3253 Zone* zone) {
3254 ASSERT(subject->IsFlat()); 3149 ASSERT(subject->IsFlat());
3255 3150
3256 HandleScope handles(isolate); 3151 bool is_global = regexp->GetFlags().is_global();
3257
3258 Handle<String> subject_handle(subject);
3259 Handle<JSRegExp> regexp_handle(regexp);
3260 Handle<JSArray> last_match_info_handle(last_match_info);
3261 3152
3262 // Shortcut for simple non-regexp global replacements 3153 // Shortcut for simple non-regexp global replacements
3263 if (regexp_handle->GetFlags().is_global() && 3154 if (is_global &&
3264 regexp_handle->TypeTag() == JSRegExp::ATOM) { 3155 regexp->TypeTag() == JSRegExp::ATOM) {
3265 Handle<String> empty_string_handle(HEAP->empty_string()); 3156 Handle<String> empty_string(HEAP->empty_string());
3266 if (subject_handle->HasOnlyAsciiChars()) { 3157 if (subject->HasOnlyAsciiChars()) {
3267 return StringReplaceAtomRegExpWithString<SeqAsciiString>( 3158 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3268 isolate, 3159 isolate,
3269 subject_handle, 3160 subject,
3270 regexp_handle, 3161 regexp,
3271 empty_string_handle, 3162 empty_string,
3272 last_match_info_handle, 3163 last_match_info);
3273 zone);
3274 } else { 3164 } else {
3275 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( 3165 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3276 isolate, 3166 isolate,
3277 subject_handle, 3167 subject,
3278 regexp_handle, 3168 regexp,
3279 empty_string_handle, 3169 empty_string,
3280 last_match_info_handle, 3170 last_match_info);
3281 zone);
3282 } 3171 }
3283 } 3172 }
3284 3173
3285 Handle<Object> match = RegExpImpl::Exec(regexp_handle, 3174 RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
3286 subject_handle, 3175 if (global_cache.HasException()) return Failure::Exception();
3287 0,
3288 last_match_info_handle);
3289 if (match.is_null()) return Failure::Exception();
3290 if (match->IsNull()) return *subject_handle;
3291 3176
3292 ASSERT(last_match_info_handle->HasFastObjectElements()); 3177 int32_t* current_match = global_cache.FetchNext();
3293 3178 if (current_match == NULL) {
3294 int start, end; 3179 if (global_cache.HasException()) return Failure::Exception();
3295 { 3180 return *subject;
3296 AssertNoAllocation match_info_array_is_not_in_a_handle;
3297 FixedArray* match_info_array =
3298 FixedArray::cast(last_match_info_handle->elements());
3299
3300 start = RegExpImpl::GetCapture(match_info_array, 0);
3301 end = RegExpImpl::GetCapture(match_info_array, 1);
3302 } 3181 }
3303 3182
3304 bool global = regexp_handle->GetFlags().is_global(); 3183 int start = current_match[0];
3184 int end = current_match[1];
3185 int capture_count = regexp->CaptureCount();
3186 int subject_length = subject->length();
3305 3187
3306 if (start == end && !global) return *subject_handle; 3188 int new_length = subject_length - (end - start);
3189 if (new_length == 0) return isolate->heap()->empty_string();
3307 3190
3308 int length = subject_handle->length();
3309 int new_length = length - (end - start);
3310 if (new_length == 0) {
3311 return isolate->heap()->empty_string();
3312 }
3313 Handle<ResultSeqString> answer; 3191 Handle<ResultSeqString> answer;
3314 if (ResultSeqString::kHasAsciiEncoding) { 3192 if (ResultSeqString::kHasAsciiEncoding) {
3315 answer = Handle<ResultSeqString>::cast( 3193 answer = Handle<ResultSeqString>::cast(
3316 isolate->factory()->NewRawAsciiString(new_length)); 3194 isolate->factory()->NewRawAsciiString(new_length));
3317 } else { 3195 } else {
3318 answer = Handle<ResultSeqString>::cast( 3196 answer = Handle<ResultSeqString>::cast(
3319 isolate->factory()->NewRawTwoByteString(new_length)); 3197 isolate->factory()->NewRawTwoByteString(new_length));
3320 } 3198 }
3321 3199
3322 // If the regexp isn't global, only match once. 3200 if (!is_global) {
3323 if (!global) { 3201 RegExpImpl::SetLastMatchInfo(
3324 if (start > 0) { 3202 last_match_info, subject, capture_count, current_match);
3325 String::WriteToFlat(*subject_handle, 3203 if (start == end) {
3326 answer->GetChars(), 3204 return *subject;
3327 0, 3205 } else {
3328 start); 3206 if (start > 0) {
3207 String::WriteToFlat(*subject, answer->GetChars(), 0, start);
3208 }
3209 if (end < subject_length) {
3210 String::WriteToFlat(
3211 *subject, answer->GetChars() + start, end, subject_length);
3212 }
3213 return *answer;
3329 } 3214 }
3330 if (end < length) {
3331 String::WriteToFlat(*subject_handle,
3332 answer->GetChars() + start,
3333 end,
3334 length);
3335 }
3336 return *answer;
3337 } 3215 }
3338 3216
3339 int prev = 0; // Index of end of last match. 3217 int prev = 0;
3340 int next = 0; // Start of next search (prev unless last match was empty).
3341 int position = 0; 3218 int position = 0;
3342 3219
3343 do { 3220 do {
3221 start = current_match[0];
3222 end = current_match[1];
3344 if (prev < start) { 3223 if (prev < start) {
3345 // Add substring subject[prev;start] to answer string. 3224 // Add substring subject[prev;start] to answer string.
3346 String::WriteToFlat(*subject_handle, 3225 String::WriteToFlat(
3347 answer->GetChars() + position, 3226 *subject, answer->GetChars() + position, prev, start);
3348 prev,
3349 start);
3350 position += start - prev; 3227 position += start - prev;
3351 } 3228 }
3352 prev = end; 3229 prev = end;
3353 next = end;
3354 // Continue from where the match ended, unless it was an empty match.
3355 if (start == end) {
3356 next++;
3357 if (next > length) break;
3358 }
3359 match = RegExpImpl::Exec(regexp_handle,
3360 subject_handle,
3361 next,
3362 last_match_info_handle);
3363 if (match.is_null()) return Failure::Exception();
3364 if (match->IsNull()) break;
3365 3230
3366 ASSERT(last_match_info_handle->HasFastObjectElements()); 3231 current_match = global_cache.FetchNext();
3367 HandleScope loop_scope(isolate); 3232 } while (current_match != NULL);
3368 {
3369 AssertNoAllocation match_info_array_is_not_in_a_handle;
3370 FixedArray* match_info_array =
3371 FixedArray::cast(last_match_info_handle->elements());
3372 start = RegExpImpl::GetCapture(match_info_array, 0);
3373 end = RegExpImpl::GetCapture(match_info_array, 1);
3374 }
3375 } while (true);
3376 3233
3377 if (prev < length) { 3234 if (global_cache.HasException()) return Failure::Exception();
3235
3236 RegExpImpl::SetLastMatchInfo(last_match_info,
3237 subject,
3238 capture_count,
3239 global_cache.LastSuccessfulMatch());
3240
3241 if (prev < subject_length) {
3378 // Add substring subject[prev;length] to answer string. 3242 // Add substring subject[prev;length] to answer string.
3379 String::WriteToFlat(*subject_handle, 3243 String::WriteToFlat(
3380 answer->GetChars() + position, 3244 *subject, answer->GetChars() + position, prev, subject_length);
3381 prev, 3245 position += subject_length - prev;
3382 length);
3383 position += length - prev;
3384 } 3246 }
3385 3247
3386 if (position == 0) { 3248 if (position == 0) return isolate->heap()->empty_string();
3387 return isolate->heap()->empty_string();
3388 }
3389 3249
3390 // Shorten string and fill 3250 // Shorten string and fill
3391 int string_size = ResultSeqString::SizeFor(position); 3251 int string_size = ResultSeqString::SizeFor(position);
3392 int allocated_string_size = ResultSeqString::SizeFor(new_length); 3252 int allocated_string_size = ResultSeqString::SizeFor(new_length);
3393 int delta = allocated_string_size - string_size; 3253 int delta = allocated_string_size - string_size;
3394 3254
3395 answer->set_length(position); 3255 answer->set_length(position);
3396 if (delta == 0) return *answer; 3256 if (delta == 0) return *answer;
3397 3257
3398 Address end_of_string = answer->address() + string_size; 3258 Address end_of_string = answer->address() + string_size;
3399 isolate->heap()->CreateFillerObjectAt(end_of_string, delta); 3259 isolate->heap()->CreateFillerObjectAt(end_of_string, delta);
3400 if (Marking::IsBlack(Marking::MarkBitFrom(*answer))) { 3260 if (Marking::IsBlack(Marking::MarkBitFrom(*answer))) {
3401 MemoryChunk::IncrementLiveBytesFromMutator(answer->address(), -delta); 3261 MemoryChunk::IncrementLiveBytesFromMutator(answer->address(), -delta);
3402 } 3262 }
3403 3263
3404 return *answer; 3264 return *answer;
3405 } 3265 }
3406 3266
3407 3267
3408 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { 3268 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
3409 ASSERT(args.length() == 4); 3269 ASSERT(args.length() == 4);
3410 3270
3411 CONVERT_ARG_CHECKED(String, subject, 0); 3271 HandleScope scope(isolate);
3412 if (!subject->IsFlat()) {
3413 Object* flat_subject;
3414 { MaybeObject* maybe_flat_subject = subject->TryFlatten();
3415 if (!maybe_flat_subject->ToObject(&flat_subject)) {
3416 return maybe_flat_subject;
3417 }
3418 }
3419 subject = String::cast(flat_subject);
3420 }
3421 3272
3422 CONVERT_ARG_CHECKED(String, replacement, 2); 3273 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
3423 if (!replacement->IsFlat()) { 3274 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
3424 Object* flat_replacement; 3275 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
3425 { MaybeObject* maybe_flat_replacement = replacement->TryFlatten(); 3276 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
3426 if (!maybe_flat_replacement->ToObject(&flat_replacement)) {
3427 return maybe_flat_replacement;
3428 }
3429 }
3430 replacement = String::cast(flat_replacement);
3431 }
3432 3277
3433 CONVERT_ARG_CHECKED(JSRegExp, regexp, 1); 3278 if (!subject->IsFlat()) subject = FlattenGetString(subject);
3434 CONVERT_ARG_CHECKED(JSArray, last_match_info, 3); 3279
3280 if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
3435 3281
3436 ASSERT(last_match_info->HasFastObjectElements()); 3282 ASSERT(last_match_info->HasFastObjectElements());
3437 3283
3438 Zone* zone = isolate->runtime_zone();
3439 if (replacement->length() == 0) { 3284 if (replacement->length() == 0) {
3440 if (subject->HasOnlyAsciiChars()) { 3285 if (subject->HasOnlyAsciiChars()) {
3441 return StringReplaceRegExpWithEmptyString<SeqAsciiString>( 3286 return StringReplaceRegExpWithEmptyString<SeqAsciiString>(
3442 isolate, subject, regexp, last_match_info, zone); 3287 isolate, subject, regexp, last_match_info);
3443 } else { 3288 } else {
3444 return StringReplaceRegExpWithEmptyString<SeqTwoByteString>( 3289 return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
3445 isolate, subject, regexp, last_match_info, zone); 3290 isolate, subject, regexp, last_match_info);
3446 } 3291 }
3447 } 3292 }
3448 3293
3449 return StringReplaceRegExpWithString(isolate, 3294 return StringReplaceRegExpWithString(
3450 subject, 3295 isolate, subject, regexp, replacement, last_match_info);
3451 regexp,
3452 replacement,
3453 last_match_info,
3454 zone);
3455 } 3296 }
3456 3297
3457 3298
3458 Handle<String> Runtime::StringReplaceOneCharWithString(Isolate* isolate, 3299 Handle<String> Runtime::StringReplaceOneCharWithString(Isolate* isolate,
3459 Handle<String> subject, 3300 Handle<String> subject,
3460 Handle<String> search, 3301 Handle<String> search,
3461 Handle<String> replace, 3302 Handle<String> replace,
3462 bool* found, 3303 bool* found,
3463 int recursion_limit) { 3304 int recursion_limit) {
3464 if (recursion_limit == 0) return Handle<String>::null(); 3305 if (recursion_limit == 0) return Handle<String>::null();
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after
3767 3608
3768 3609
3769 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { 3610 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
3770 ASSERT_EQ(3, args.length()); 3611 ASSERT_EQ(3, args.length());
3771 3612
3772 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 3613 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
3773 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); 3614 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
3774 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2); 3615 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
3775 HandleScope handles; 3616 HandleScope handles;
3776 3617
3777 Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info); 3618 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
3619 if (global_cache.HasException()) return Failure::Exception();
3778 3620
3779 if (match.is_null()) { 3621 int capture_count = regexp->CaptureCount();
3780 return Failure::Exception();
3781 }
3782 if (match->IsNull()) {
3783 return isolate->heap()->null_value();
3784 }
3785 int length = subject->length();
3786 3622
3787 Zone* zone = isolate->runtime_zone(); 3623 Zone* zone = isolate->runtime_zone();
3788 ZoneScope zone_space(zone, DELETE_ON_EXIT); 3624 ZoneScope zone_space(zone, DELETE_ON_EXIT);
3789 ZoneList<int> offsets(8, zone); 3625 ZoneList<int> offsets(8, zone);
3790 int start; 3626
3791 int end; 3627 while (true) {
3792 do { 3628 int32_t* match = global_cache.FetchNext();
3793 { 3629 if (match == NULL) break;
3794 AssertNoAllocation no_alloc; 3630 offsets.Add(match[0], zone); // start
3795 FixedArray* elements = FixedArray::cast(regexp_info->elements()); 3631 offsets.Add(match[1], zone); // end
3796 start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value(); 3632 }
3797 end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value(); 3633
3798 } 3634 if (global_cache.HasException()) return Failure::Exception();
3799 offsets.Add(start, zone); 3635
3800 offsets.Add(end, zone); 3636 if (offsets.length() == 0) {
3801 if (start == end) if (++end > length) break; 3637 // Not a single match.
3802 match = RegExpImpl::Exec(regexp, subject, end, regexp_info); 3638 return isolate->heap()->null_value();
3803 if (match.is_null()) { 3639 }
3804 return Failure::Exception(); 3640
3805 } 3641 RegExpImpl::SetLastMatchInfo(regexp_info,
3806 } while (!match->IsNull()); 3642 subject,
3643 capture_count,
3644 global_cache.LastSuccessfulMatch());
3645
3807 int matches = offsets.length() / 2; 3646 int matches = offsets.length() / 2;
3808 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches); 3647 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
3809 Handle<String> substring = isolate->factory()-> 3648 Handle<String> substring =
3810 NewSubString(subject, offsets.at(0), offsets.at(1)); 3649 isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
3811 elements->set(0, *substring); 3650 elements->set(0, *substring);
3812 for (int i = 1; i < matches ; i++) { 3651 for (int i = 1; i < matches; i++) {
3652 HandleScope temp_scope(isolate);
3813 int from = offsets.at(i * 2); 3653 int from = offsets.at(i * 2);
3814 int to = offsets.at(i * 2 + 1); 3654 int to = offsets.at(i * 2 + 1);
3815 Handle<String> substring = isolate->factory()-> 3655 Handle<String> substring =
3816 NewProperSubString(subject, from, to); 3656 isolate->factory()->NewProperSubString(subject, from, to);
3817 elements->set(i, *substring); 3657 elements->set(i, *substring);
3818 } 3658 }
3819 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements); 3659 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
3820 result->set_length(Smi::FromInt(matches)); 3660 result->set_length(Smi::FromInt(matches));
3821 return *result; 3661 return *result;
3822 } 3662 }
3823 3663
3824 3664
3825 static bool SearchStringMultiple(Isolate* isolate, 3665 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3826 Handle<String> subject, 3666 // separate last match info. See comment on that function.
3827 Handle<String> pattern, 3667 template<bool has_capture>
3828 Handle<JSArray> last_match_info, 3668 static int SearchRegExpMultiple(
3829 FixedArrayBuilder* builder) {
3830 ASSERT(subject->IsFlat());
3831 ASSERT(pattern->IsFlat());
3832
3833 // Treating as if a previous match was before first character.
3834 int match_pos = -pattern->length();
3835
3836 for (;;) { // Break when search complete.
3837 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3838 AssertNoAllocation no_gc;
3839 String::FlatContent subject_content = subject->GetFlatContent();
3840 String::FlatContent pattern_content = pattern->GetFlatContent();
3841 if (subject_content.IsAscii()) {
3842 Vector<const char> subject_vector = subject_content.ToAsciiVector();
3843 if (pattern_content.IsAscii()) {
3844 if (SearchStringMultiple(isolate,
3845 subject_vector,
3846 pattern_content.ToAsciiVector(),
3847 *pattern,
3848 builder,
3849 &match_pos)) break;
3850 } else {
3851 if (SearchStringMultiple(isolate,
3852 subject_vector,
3853 pattern_content.ToUC16Vector(),
3854 *pattern,
3855 builder,
3856 &match_pos)) break;
3857 }
3858 } else {
3859 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
3860 if (pattern_content.IsAscii()) {
3861 if (SearchStringMultiple(isolate,
3862 subject_vector,
3863 pattern_content.ToAsciiVector(),
3864 *pattern,
3865 builder,
3866 &match_pos)) break;
3867 } else {
3868 if (SearchStringMultiple(isolate,
3869 subject_vector,
3870 pattern_content.ToUC16Vector(),
3871 *pattern,
3872 builder,
3873 &match_pos)) break;
3874 }
3875 }
3876 }
3877
3878 if (match_pos >= 0) {
3879 SetLastMatchInfoNoCaptures(subject,
3880 last_match_info,
3881 match_pos,
3882 match_pos + pattern->length());
3883 return true;
3884 }
3885 return false; // No matches at all.
3886 }
3887
3888
3889 static int SearchRegExpNoCaptureMultiple(
3890 Isolate* isolate, 3669 Isolate* isolate,
3891 Handle<String> subject, 3670 Handle<String> subject,
3892 Handle<JSRegExp> regexp, 3671 Handle<JSRegExp> regexp,
3893 Handle<JSArray> last_match_array, 3672 Handle<JSArray> last_match_array,
3894 FixedArrayBuilder* builder) { 3673 FixedArrayBuilder* builder) {
3895 ASSERT(subject->IsFlat()); 3674 ASSERT(subject->IsFlat());
3896 ASSERT(regexp->CaptureCount() == 0); 3675 ASSERT_NE(has_capture, regexp->CaptureCount() == 0);
3676
3677 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
3678 if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
3679
3680 int capture_count = regexp->CaptureCount();
3681 int subject_length = subject->length();
3682
3683 // Position to search from.
3897 int match_start = -1; 3684 int match_start = -1;
3898 int match_end = 0; 3685 int match_end = 0;
3899 int pos = 0; 3686 bool first = true;
3900 int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
3901 if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
3902 3687
3903 int max_matches; 3688 // Two smis before and after the match, for very long strings.
3904 int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, 3689 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
3905 registers_per_match, 3690
3906 &max_matches); 3691 while (true) {
3907 OffsetsVector registers(num_registers, isolate); 3692 int32_t* current_match = global_cache.FetchNext();
3908 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3693 if (current_match == NULL) break;
3909 int subject_length = subject->length(); 3694 match_start = current_match[0];
3910 bool first = true; 3695 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3911 for (;;) { // Break on failure, return on exception. 3696 if (match_end < match_start) {
3912 int num_matches = RegExpImpl::IrregexpExecRaw(regexp, 3697 ReplacementStringBuilder::AddSubjectSlice(builder,
3913 subject, 3698 match_end,
3914 pos, 3699 match_start);
3915 register_vector); 3700 }
3916 if (num_matches > 0) { 3701 match_end = current_match[1];
3917 for (int match_index = 0; match_index < num_matches; match_index++) { 3702 {
3918 int32_t* current_match = &register_vector[match_index * 2]; 3703 // Avoid accumulating new handles inside loop.
3919 match_start = current_match[0]; 3704 HandleScope temp_scope(isolate);
3920 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 3705 Handle<String> match;
3921 if (match_end < match_start) { 3706 if (!first) {
3922 ReplacementStringBuilder::AddSubjectSlice(builder, 3707 match = isolate->factory()->NewProperSubString(subject,
3923 match_end, 3708 match_start,
3924 match_start); 3709 match_end);
3925 } 3710 } else {
3926 match_end = current_match[1]; 3711 match = isolate->factory()->NewSubString(subject,
3927 HandleScope loop_scope(isolate); 3712 match_start,
3928 if (!first) { 3713 match_end);
3929 builder->Add(*isolate->factory()->NewProperSubString(subject, 3714 first = false;
3930 match_start,
3931 match_end));
3932 } else {
3933 builder->Add(*isolate->factory()->NewSubString(subject,
3934 match_start,
3935 match_end));
3936 first = false;
3937 }
3938 } 3715 }
3939 3716
3940 // If we did not get the maximum number of matches, we can stop here 3717 if (has_capture) {
3941 // since there are no matches left. 3718 // Arguments array to replace function is match, captures, index and
3942 if (num_matches < max_matches) break; 3719 // subject, i.e., 3 + capture count in total.
3720 Handle<FixedArray> elements =
3721 isolate->factory()->NewFixedArray(3 + capture_count);
3943 3722
3944 if (match_start != match_end) { 3723 elements->set(0, *match);
3945 pos = match_end; 3724 for (int i = 1; i <= capture_count; i++) {
3725 int start = current_match[i * 2];
3726 if (start >= 0) {
3727 int end = current_match[i * 2 + 1];
3728 ASSERT(start <= end);
3729 Handle<String> substring =
3730 isolate->factory()->NewSubString(subject, start, end);
3731 elements->set(i, *substring);
3732 } else {
3733 ASSERT(current_match[i * 2 + 1] < 0);
3734 elements->set(i, isolate->heap()->undefined_value());
3735 }
3736 }
3737 elements->set(capture_count + 1, Smi::FromInt(match_start));
3738 elements->set(capture_count + 2, *subject);
3739 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
3946 } else { 3740 } else {
3947 pos = match_end + 1; 3741 builder->Add(*match);
3948 if (pos > subject_length) break;
3949 } 3742 }
3950 } else if (num_matches == 0) {
3951 break;
3952 } else {
3953 ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
3954 return RegExpImpl::RE_EXCEPTION;
3955 } 3743 }
3956 } 3744 }
3957 3745
3746 if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
3747
3958 if (match_start >= 0) { 3748 if (match_start >= 0) {
3749 // Finished matching, with at least one match.
3959 if (match_end < subject_length) { 3750 if (match_end < subject_length) {
3960 ReplacementStringBuilder::AddSubjectSlice(builder, 3751 ReplacementStringBuilder::AddSubjectSlice(builder,
3961 match_end, 3752 match_end,
3962 subject_length); 3753 subject_length);
3963 } 3754 }
3964 SetLastMatchInfoNoCaptures(subject, 3755
3965 last_match_array, 3756 RegExpImpl::SetLastMatchInfo(
3966 match_start, 3757 last_match_array, subject, capture_count, NULL);
3967 match_end); 3758
3968 return RegExpImpl::RE_SUCCESS; 3759 return RegExpImpl::RE_SUCCESS;
3969 } else { 3760 } else {
3970 return RegExpImpl::RE_FAILURE; // No matches at all. 3761 return RegExpImpl::RE_FAILURE; // No matches at all.
3971 } 3762 }
3972 } 3763 }
3973 3764
3974 3765
3975 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3976 // separate last match info. See comment on that function.
3977 static int SearchRegExpMultiple(
3978 Isolate* isolate,
3979 Handle<String> subject,
3980 Handle<JSRegExp> regexp,
3981 Handle<JSArray> last_match_array,
3982 FixedArrayBuilder* builder,
3983 Zone* zone) {
3984
3985 ASSERT(subject->IsFlat());
3986 int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
3987 if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
3988
3989 int max_matches;
3990 int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
3991 registers_per_match,
3992 &max_matches);
3993 OffsetsVector registers(num_registers, isolate);
3994 Vector<int32_t> register_vector(registers.vector(), registers.length());
3995
3996 int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
3997 subject,
3998 0,
3999 register_vector);
4000
4001 int capture_count = regexp->CaptureCount();
4002 int subject_length = subject->length();
4003
4004 // Position to search from.
4005 int pos = 0;
4006 // End of previous match. Differs from pos if match was empty.
4007 int match_end = 0;
4008 bool first = true;
4009
4010 if (num_matches > 0) {
4011 do {
4012 int match_start = 0;
4013 for (int match_index = 0; match_index < num_matches; match_index++) {
4014 int32_t* current_match =
4015 &register_vector[match_index * registers_per_match];
4016 match_start = current_match[0];
4017 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
4018 if (match_end < match_start) {
4019 ReplacementStringBuilder::AddSubjectSlice(builder,
4020 match_end,
4021 match_start);
4022 }
4023 match_end = current_match[1];
4024
4025 {
4026 // Avoid accumulating new handles inside loop.
4027 HandleScope temp_scope(isolate);
4028 // Arguments array to replace function is match, captures, index and
4029 // subject, i.e., 3 + capture count in total.
4030 Handle<FixedArray> elements =
4031 isolate->factory()->NewFixedArray(3 + capture_count);
4032 Handle<String> match;
4033 if (!first) {
4034 match = isolate->factory()->NewProperSubString(subject,
4035 match_start,
4036 match_end);
4037 } else {
4038 match = isolate->factory()->NewSubString(subject,
4039 match_start,
4040 match_end);
4041 }
4042 elements->set(0, *match);
4043 for (int i = 1; i <= capture_count; i++) {
4044 int start = current_match[i * 2];
4045 if (start >= 0) {
4046 int end = current_match[i * 2 + 1];
4047 ASSERT(start <= end);
4048 Handle<String> substring;
4049 if (!first) {
4050 substring =
4051 isolate->factory()->NewProperSubString(subject, start, end);
4052 } else {
4053 substring =
4054 isolate->factory()->NewSubString(subject, start, end);
4055 }
4056 elements->set(i, *substring);
4057 } else {
4058 ASSERT(current_match[i * 2 + 1] < 0);
4059 elements->set(i, isolate->heap()->undefined_value());
4060 }
4061 }
4062 elements->set(capture_count + 1, Smi::FromInt(match_start));
4063 elements->set(capture_count + 2, *subject);
4064 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
4065 }
4066 first = false;
4067 }
4068
4069 // If we did not get the maximum number of matches, we can stop here
4070 // since there are no matches left.
4071 if (num_matches < max_matches) break;
4072
4073 if (match_end > match_start) {
4074 pos = match_end;
4075 } else {
4076 pos = match_end + 1;
4077 if (pos > subject_length) {
4078 break;
4079 }
4080 }
4081
4082 num_matches = RegExpImpl::IrregexpExecRaw(regexp,
4083 subject,
4084 pos,
4085 register_vector);
4086 } while (num_matches > 0);
4087
4088 if (num_matches != RegExpImpl::RE_EXCEPTION) {
4089 // Finished matching, with at least one match.
4090 if (match_end < subject_length) {
4091 ReplacementStringBuilder::AddSubjectSlice(builder,
4092 match_end,
4093 subject_length);
4094 }
4095
4096 int last_match_capture_count = (capture_count + 1) * 2;
4097 int last_match_array_size =
4098 last_match_capture_count + RegExpImpl::kLastMatchOverhead;
4099 last_match_array->EnsureSize(last_match_array_size);
4100 AssertNoAllocation no_gc;
4101 FixedArray* elements = FixedArray::cast(last_match_array->elements());
4102 // We have to set this even though the rest of the last match array is
4103 // ignored.
4104 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
4105 // These are also read without consulting the override.
4106 RegExpImpl::SetLastSubject(elements, *subject);
4107 RegExpImpl::SetLastInput(elements, *subject);
4108 return RegExpImpl::RE_SUCCESS;
4109 }
4110 }
4111 // No matches at all, return failure or exception result directly.
4112 return num_matches;
4113 }
4114
4115
4116 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets 3766 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets
4117 // lastMatchInfoOverride to maintain the last match info, so we don't need to 3767 // lastMatchInfoOverride to maintain the last match info, so we don't need to
4118 // set any other last match array info. 3768 // set any other last match array info.
4119 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { 3769 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
4120 ASSERT(args.length() == 4); 3770 ASSERT(args.length() == 4);
4121 HandleScope handles(isolate); 3771 HandleScope handles(isolate);
4122 3772
4123 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 3773 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
4124 if (!subject->IsFlat()) FlattenString(subject); 3774 if (!subject->IsFlat()) FlattenString(subject);
4125 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 3775 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
4126 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); 3776 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
4127 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); 3777 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
4128 3778
4129 ASSERT(last_match_info->HasFastObjectElements()); 3779 ASSERT(last_match_info->HasFastObjectElements());
4130 ASSERT(regexp->GetFlags().is_global()); 3780 ASSERT(regexp->GetFlags().is_global());
4131 Handle<FixedArray> result_elements; 3781 Handle<FixedArray> result_elements;
4132 if (result_array->HasFastObjectElements()) { 3782 if (result_array->HasFastObjectElements()) {
4133 result_elements = 3783 result_elements =
4134 Handle<FixedArray>(FixedArray::cast(result_array->elements())); 3784 Handle<FixedArray>(FixedArray::cast(result_array->elements()));
4135 } 3785 }
4136 if (result_elements.is_null() || result_elements->length() < 16) { 3786 if (result_elements.is_null() || result_elements->length() < 16) {
4137 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); 3787 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
4138 } 3788 }
4139 FixedArrayBuilder builder(result_elements); 3789 FixedArrayBuilder builder(result_elements);
4140 3790
4141 if (regexp->TypeTag() == JSRegExp::ATOM) { 3791 int result;
4142 Handle<String> pattern( 3792 if (regexp->CaptureCount() == 0) {
4143 String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); 3793 result = SearchRegExpMultiple<false>(
4144 ASSERT(pattern->IsFlat()); 3794 isolate, subject, regexp, last_match_info, &builder);
4145 if (SearchStringMultiple(isolate, subject, pattern, 3795 } else {
4146 last_match_info, &builder)) { 3796 result = SearchRegExpMultiple<true>(
4147 return *builder.ToJSArray(result_array); 3797 isolate, subject, regexp, last_match_info, &builder);
4148 }
4149 return isolate->heap()->null_value();
4150 } 3798 }
4151 3799
4152 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
4153
4154 int result;
4155 if (regexp->CaptureCount() == 0) {
4156 result = SearchRegExpNoCaptureMultiple(isolate,
4157 subject,
4158 regexp,
4159 last_match_info,
4160 &builder);
4161 } else {
4162 result = SearchRegExpMultiple(isolate,
4163 subject,
4164 regexp,
4165 last_match_info,
4166 &builder,
4167 isolate->runtime_zone());
4168 }
4169 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); 3800 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
4170 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); 3801 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
4171 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); 3802 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
4172 return Failure::Exception(); 3803 return Failure::Exception();
4173 } 3804 }
4174 3805
4175 3806
4176 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) { 3807 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) {
4177 NoHandleAllocation ha; 3808 NoHandleAllocation ha;
4178 ASSERT(args.length() == 2); 3809 ASSERT(args.length() == 2);
(...skipping 9536 matching lines...) Expand 10 before | Expand all | Expand 10 after
13715 // Handle last resort GC and make sure to allow future allocations 13346 // Handle last resort GC and make sure to allow future allocations
13716 // to grow the heap without causing GCs (if possible). 13347 // to grow the heap without causing GCs (if possible).
13717 isolate->counters()->gc_last_resort_from_js()->Increment(); 13348 isolate->counters()->gc_last_resort_from_js()->Increment();
13718 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13349 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13719 "Runtime::PerformGC"); 13350 "Runtime::PerformGC");
13720 } 13351 }
13721 } 13352 }
13722 13353
13723 13354
13724 } } // namespace v8::internal 13355 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/mips/code-stubs-mips.cc ('k') | src/x64/code-stubs-x64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698