OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 2864 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2875 subject_vector, | 2875 subject_vector, |
2876 pattern_content.ToUC16Vector(), | 2876 pattern_content.ToUC16Vector(), |
2877 indices, | 2877 indices, |
2878 limit); | 2878 limit); |
2879 } | 2879 } |
2880 } | 2880 } |
2881 } | 2881 } |
2882 } | 2882 } |
2883 | 2883 |
2884 | 2884 |
2885 static void SetLastMatchInfoNoCaptures(Handle<String> subject, | |
ulan
2012/04/23 16:59:23
Why not just move the implementation here, instead
Erik Corry
2012/04/23 18:54:02
Because then it would be hard to review.
Done.
| |
2886 Handle<JSArray> last_match_info, | |
2887 int match_start, | |
2888 int match_end); | |
2889 | |
2890 | |
2885 template<typename ResultSeqString> | 2891 template<typename ResultSeqString> |
2886 MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString( | 2892 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( |
2887 Isolate* isolate, | 2893 Isolate* isolate, |
2888 Handle<String> subject, | 2894 Handle<String> subject, |
2889 Handle<JSRegExp> pattern_regexp, | 2895 Handle<JSRegExp> pattern_regexp, |
2890 Handle<String> replacement) { | 2896 Handle<String> replacement, |
2897 Handle<JSArray> last_match_info) { | |
2891 ASSERT(subject->IsFlat()); | 2898 ASSERT(subject->IsFlat()); |
2892 ASSERT(replacement->IsFlat()); | 2899 ASSERT(replacement->IsFlat()); |
2893 | 2900 |
2894 ZoneScope zone_space(isolate, DELETE_ON_EXIT); | 2901 ZoneScope zone_space(isolate, DELETE_ON_EXIT); |
2895 ZoneList<int> indices(8); | 2902 ZoneList<int> indices(8); |
2896 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); | 2903 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
2897 String* pattern = | 2904 String* pattern = |
2898 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); | 2905 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
2899 int subject_len = subject->length(); | 2906 int subject_len = subject->length(); |
2900 int pattern_len = pattern->length(); | 2907 int pattern_len = pattern->length(); |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2939 | 2946 |
2940 subject_pos = indices.at(i) + pattern_len; | 2947 subject_pos = indices.at(i) + pattern_len; |
2941 } | 2948 } |
2942 // Add remaining subject content at the end. | 2949 // Add remaining subject content at the end. |
2943 if (subject_pos < subject_len) { | 2950 if (subject_pos < subject_len) { |
2944 String::WriteToFlat(*subject, | 2951 String::WriteToFlat(*subject, |
2945 result->GetChars() + result_pos, | 2952 result->GetChars() + result_pos, |
2946 subject_pos, | 2953 subject_pos, |
2947 subject_len); | 2954 subject_len); |
2948 } | 2955 } |
2956 | |
2957 SetLastMatchInfoNoCaptures(subject, | |
2958 last_match_info, | |
2959 indices.at(matches - 1), | |
2960 indices.at(matches - 1) + pattern_len); | |
2961 | |
2949 return *result; | 2962 return *result; |
2950 } | 2963 } |
2951 | 2964 |
2952 | 2965 |
2953 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( | 2966 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( |
2954 Isolate* isolate, | 2967 Isolate* isolate, |
2955 String* subject, | 2968 String* subject, |
2956 JSRegExp* regexp, | 2969 JSRegExp* regexp, |
2957 String* replacement, | 2970 String* replacement, |
2958 JSArray* last_match_info) { | 2971 JSArray* last_match_info) { |
(...skipping 28 matching lines...) Expand all Loading... | |
2987 length); | 3000 length); |
2988 | 3001 |
2989 bool is_global = regexp_handle->GetFlags().is_global(); | 3002 bool is_global = regexp_handle->GetFlags().is_global(); |
2990 | 3003 |
2991 // Shortcut for simple non-regexp global replacements | 3004 // Shortcut for simple non-regexp global replacements |
2992 if (is_global && | 3005 if (is_global && |
2993 regexp_handle->TypeTag() == JSRegExp::ATOM && | 3006 regexp_handle->TypeTag() == JSRegExp::ATOM && |
2994 compiled_replacement.simple_hint()) { | 3007 compiled_replacement.simple_hint()) { |
2995 if (subject_handle->HasOnlyAsciiChars() && | 3008 if (subject_handle->HasOnlyAsciiChars() && |
2996 replacement_handle->HasOnlyAsciiChars()) { | 3009 replacement_handle->HasOnlyAsciiChars()) { |
2997 return StringReplaceStringWithString<SeqAsciiString>( | 3010 return StringReplaceAtomRegExpWithString<SeqAsciiString>( |
2998 isolate, subject_handle, regexp_handle, replacement_handle); | 3011 isolate, |
3012 subject_handle, | |
3013 regexp_handle, | |
3014 replacement_handle, | |
3015 last_match_info_handle); | |
2999 } else { | 3016 } else { |
3000 return StringReplaceStringWithString<SeqTwoByteString>( | 3017 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( |
3001 isolate, subject_handle, regexp_handle, replacement_handle); | 3018 isolate, |
3019 subject_handle, | |
3020 regexp_handle, | |
3021 replacement_handle, | |
3022 last_match_info_handle); | |
3002 } | 3023 } |
3003 } | 3024 } |
3004 | 3025 |
3005 // Guessing the number of parts that the final result string is built | 3026 // Guessing the number of parts that the final result string is built |
3006 // from. Global regexps can match any number of times, so we guess | 3027 // from. Global regexps can match any number of times, so we guess |
3007 // conservatively. | 3028 // conservatively. |
3008 int expected_parts = | 3029 int expected_parts = |
3009 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; | 3030 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; |
3010 ReplacementStringBuilder builder(isolate->heap(), | 3031 ReplacementStringBuilder builder(isolate->heap(), |
3011 subject_handle, | 3032 subject_handle, |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3080 Isolate* isolate, | 3101 Isolate* isolate, |
3081 String* subject, | 3102 String* subject, |
3082 JSRegExp* regexp, | 3103 JSRegExp* regexp, |
3083 JSArray* last_match_info) { | 3104 JSArray* last_match_info) { |
3084 ASSERT(subject->IsFlat()); | 3105 ASSERT(subject->IsFlat()); |
3085 | 3106 |
3086 HandleScope handles(isolate); | 3107 HandleScope handles(isolate); |
3087 | 3108 |
3088 Handle<String> subject_handle(subject); | 3109 Handle<String> subject_handle(subject); |
3089 Handle<JSRegExp> regexp_handle(regexp); | 3110 Handle<JSRegExp> regexp_handle(regexp); |
3111 Handle<JSArray> last_match_info_handle(last_match_info); | |
3090 | 3112 |
3091 // Shortcut for simple non-regexp global replacements | 3113 // Shortcut for simple non-regexp global replacements |
3092 if (regexp_handle->GetFlags().is_global() && | 3114 if (regexp_handle->GetFlags().is_global() && |
3093 regexp_handle->TypeTag() == JSRegExp::ATOM) { | 3115 regexp_handle->TypeTag() == JSRegExp::ATOM) { |
3094 Handle<String> empty_string_handle(HEAP->empty_string()); | 3116 Handle<String> empty_string_handle(HEAP->empty_string()); |
3095 if (subject_handle->HasOnlyAsciiChars()) { | 3117 if (subject_handle->HasOnlyAsciiChars()) { |
3096 return StringReplaceStringWithString<SeqAsciiString>( | 3118 return StringReplaceAtomRegExpWithString<SeqAsciiString>( |
3097 isolate, subject_handle, regexp_handle, empty_string_handle); | 3119 isolate, |
3120 subject_handle, | |
3121 regexp_handle, | |
3122 empty_string_handle, | |
3123 last_match_info_handle); | |
3098 } else { | 3124 } else { |
3099 return StringReplaceStringWithString<SeqTwoByteString>( | 3125 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( |
3100 isolate, subject_handle, regexp_handle, empty_string_handle); | 3126 isolate, |
3127 subject_handle, | |
3128 regexp_handle, | |
3129 empty_string_handle, | |
3130 last_match_info_handle); | |
3101 } | 3131 } |
3102 } | 3132 } |
3103 | 3133 |
3104 Handle<JSArray> last_match_info_handle(last_match_info); | |
3105 Handle<Object> match = RegExpImpl::Exec(regexp_handle, | 3134 Handle<Object> match = RegExpImpl::Exec(regexp_handle, |
3106 subject_handle, | 3135 subject_handle, |
3107 0, | 3136 0, |
3108 last_match_info_handle); | 3137 last_match_info_handle); |
3109 if (match.is_null()) return Failure::Exception(); | 3138 if (match.is_null()) return Failure::Exception(); |
3110 if (match->IsNull()) return *subject_handle; | 3139 if (match->IsNull()) return *subject_handle; |
3111 | 3140 |
3112 ASSERT(last_match_info_handle->HasFastElements()); | 3141 ASSERT(last_match_info_handle->HasFastElements()); |
3113 | 3142 |
3114 int start, end; | 3143 int start, end; |
3115 { | 3144 { |
3116 AssertNoAllocation match_info_array_is_not_in_a_handle; | 3145 AssertNoAllocation match_info_array_is_not_in_a_handle; |
3117 FixedArray* match_info_array = | 3146 FixedArray* match_info_array = |
3118 FixedArray::cast(last_match_info_handle->elements()); | 3147 FixedArray::cast(last_match_info_handle->elements()); |
3119 | 3148 |
3120 start = RegExpImpl::GetCapture(match_info_array, 0); | 3149 start = RegExpImpl::GetCapture(match_info_array, 0); |
3121 end = RegExpImpl::GetCapture(match_info_array, 1); | 3150 end = RegExpImpl::GetCapture(match_info_array, 1); |
3122 } | 3151 } |
3123 | 3152 |
3153 bool global = regexp_handle->GetFlags().is_global(); | |
3154 | |
3155 if (start == end && !global) return *subject_handle; | |
3156 | |
3124 int length = subject_handle->length(); | 3157 int length = subject_handle->length(); |
3125 int new_length = length - (end - start); | 3158 int new_length = length - (end - start); |
3126 if (new_length == 0) { | 3159 if (new_length == 0) { |
3127 return isolate->heap()->empty_string(); | 3160 return isolate->heap()->empty_string(); |
3128 } | 3161 } |
3129 Handle<ResultSeqString> answer; | 3162 Handle<ResultSeqString> answer; |
3130 if (ResultSeqString::kHasAsciiEncoding) { | 3163 if (ResultSeqString::kHasAsciiEncoding) { |
3131 answer = Handle<ResultSeqString>::cast( | 3164 answer = Handle<ResultSeqString>::cast( |
3132 isolate->factory()->NewRawAsciiString(new_length)); | 3165 isolate->factory()->NewRawAsciiString(new_length)); |
3133 } else { | 3166 } else { |
3134 answer = Handle<ResultSeqString>::cast( | 3167 answer = Handle<ResultSeqString>::cast( |
3135 isolate->factory()->NewRawTwoByteString(new_length)); | 3168 isolate->factory()->NewRawTwoByteString(new_length)); |
3136 } | 3169 } |
3137 | 3170 |
3138 // If the regexp isn't global, only match once. | 3171 // If the regexp isn't global, only match once. |
3139 if (!regexp_handle->GetFlags().is_global()) { | 3172 if (!global) { |
3140 if (start > 0) { | 3173 if (start > 0) { |
3141 String::WriteToFlat(*subject_handle, | 3174 String::WriteToFlat(*subject_handle, |
3142 answer->GetChars(), | 3175 answer->GetChars(), |
3143 0, | 3176 0, |
3144 start); | 3177 start); |
3145 } | 3178 } |
3146 if (end < length) { | 3179 if (end < length) { |
3147 String::WriteToFlat(*subject_handle, | 3180 String::WriteToFlat(*subject_handle, |
3148 answer->GetChars() + start, | 3181 answer->GetChars() + start, |
3149 end, | 3182 end, |
(...skipping 681 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3831 last_match_array, | 3864 last_match_array, |
3832 match_start, | 3865 match_start, |
3833 match_end); | 3866 match_end); |
3834 return RegExpImpl::RE_SUCCESS; | 3867 return RegExpImpl::RE_SUCCESS; |
3835 } else { | 3868 } else { |
3836 return RegExpImpl::RE_FAILURE; // No matches at all. | 3869 return RegExpImpl::RE_FAILURE; // No matches at all. |
3837 } | 3870 } |
3838 } | 3871 } |
3839 | 3872 |
3840 | 3873 |
3874 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain | |
3875 // separate last match info. See comment on that function. | |
3841 static RegExpImpl::IrregexpResult SearchRegExpMultiple( | 3876 static RegExpImpl::IrregexpResult SearchRegExpMultiple( |
3842 Isolate* isolate, | 3877 Isolate* isolate, |
3843 Handle<String> subject, | 3878 Handle<String> subject, |
3844 Handle<JSRegExp> regexp, | 3879 Handle<JSRegExp> regexp, |
3845 Handle<JSArray> last_match_array, | 3880 Handle<JSArray> last_match_array, |
3846 FixedArrayBuilder* builder) { | 3881 FixedArrayBuilder* builder) { |
3847 | 3882 |
3848 ASSERT(subject->IsFlat()); | 3883 ASSERT(subject->IsFlat()); |
3849 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); | 3884 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); |
3850 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; | 3885 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; |
3851 | 3886 |
3852 OffsetsVector registers(required_registers, isolate); | 3887 OffsetsVector registers(required_registers, isolate); |
3853 Vector<int32_t> register_vector(registers.vector(), registers.length()); | 3888 Vector<int32_t> register_vector(registers.vector(), registers.length()); |
3854 | 3889 |
3855 RegExpImpl::IrregexpResult result = | 3890 RegExpImpl::IrregexpResult result = |
3856 RegExpImpl::IrregexpExecOnce(regexp, | 3891 RegExpImpl::IrregexpExecOnce(regexp, |
3857 subject, | 3892 subject, |
3858 0, | 3893 0, |
3859 register_vector); | 3894 register_vector); |
3860 | 3895 |
3861 int capture_count = regexp->CaptureCount(); | 3896 int capture_count = regexp->CaptureCount(); |
3862 int subject_length = subject->length(); | 3897 int subject_length = subject->length(); |
3863 | 3898 |
3864 // Position to search from. | 3899 // Position to search from. |
3865 int pos = 0; | 3900 int pos = 0; |
3866 // End of previous match. Differs from pos if match was empty. | 3901 // End of previous match. Differs from pos if match was empty. |
3867 int match_end = 0; | 3902 int match_end = 0; |
3868 if (result == RegExpImpl::RE_SUCCESS) { | 3903 if (result == RegExpImpl::RE_SUCCESS) { |
3869 // Need to keep a copy of the previous match for creating last_match_info | |
3870 // at the end, so we have two vectors that we swap between. | |
3871 OffsetsVector registers2(required_registers, isolate); | |
3872 Vector<int> prev_register_vector(registers2.vector(), registers2.length()); | |
3873 bool first = true; | 3904 bool first = true; |
3874 do { | 3905 do { |
3875 int match_start = register_vector[0]; | 3906 int match_start = register_vector[0]; |
3876 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); | 3907 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
3877 if (match_end < match_start) { | 3908 if (match_end < match_start) { |
3878 ReplacementStringBuilder::AddSubjectSlice(builder, | 3909 ReplacementStringBuilder::AddSubjectSlice(builder, |
3879 match_end, | 3910 match_end, |
3880 match_start); | 3911 match_start); |
3881 } | 3912 } |
3882 match_end = register_vector[1]; | 3913 match_end = register_vector[1]; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3915 elements->set(i, *substring); | 3946 elements->set(i, *substring); |
3916 } else { | 3947 } else { |
3917 ASSERT(register_vector[i * 2 + 1] < 0); | 3948 ASSERT(register_vector[i * 2 + 1] < 0); |
3918 elements->set(i, isolate->heap()->undefined_value()); | 3949 elements->set(i, isolate->heap()->undefined_value()); |
3919 } | 3950 } |
3920 } | 3951 } |
3921 elements->set(capture_count + 1, Smi::FromInt(match_start)); | 3952 elements->set(capture_count + 1, Smi::FromInt(match_start)); |
3922 elements->set(capture_count + 2, *subject); | 3953 elements->set(capture_count + 2, *subject); |
3923 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); | 3954 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
3924 } | 3955 } |
3925 // Swap register vectors, so the last successful match is in | |
3926 // prev_register_vector. | |
3927 Vector<int32_t> tmp = prev_register_vector; | |
3928 prev_register_vector = register_vector; | |
3929 register_vector = tmp; | |
3930 | 3956 |
3931 if (match_end > match_start) { | 3957 if (match_end > match_start) { |
3932 pos = match_end; | 3958 pos = match_end; |
3933 } else { | 3959 } else { |
3934 pos = match_end + 1; | 3960 pos = match_end + 1; |
3935 if (pos > subject_length) { | 3961 if (pos > subject_length) { |
3936 break; | 3962 break; |
3937 } | 3963 } |
3938 } | 3964 } |
3939 | 3965 |
(...skipping 11 matching lines...) Expand all Loading... | |
3951 match_end, | 3977 match_end, |
3952 subject_length); | 3978 subject_length); |
3953 } | 3979 } |
3954 | 3980 |
3955 int last_match_capture_count = (capture_count + 1) * 2; | 3981 int last_match_capture_count = (capture_count + 1) * 2; |
3956 int last_match_array_size = | 3982 int last_match_array_size = |
3957 last_match_capture_count + RegExpImpl::kLastMatchOverhead; | 3983 last_match_capture_count + RegExpImpl::kLastMatchOverhead; |
3958 last_match_array->EnsureSize(last_match_array_size); | 3984 last_match_array->EnsureSize(last_match_array_size); |
3959 AssertNoAllocation no_gc; | 3985 AssertNoAllocation no_gc; |
3960 FixedArray* elements = FixedArray::cast(last_match_array->elements()); | 3986 FixedArray* elements = FixedArray::cast(last_match_array->elements()); |
3987 // We have to set this even though the rest of the last match array is | |
3988 // ignored. | |
3961 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); | 3989 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); |
3990 // These are also read without consulting the override. | |
3962 RegExpImpl::SetLastSubject(elements, *subject); | 3991 RegExpImpl::SetLastSubject(elements, *subject); |
3963 RegExpImpl::SetLastInput(elements, *subject); | 3992 RegExpImpl::SetLastInput(elements, *subject); |
3964 for (int i = 0; i < last_match_capture_count; i++) { | |
3965 RegExpImpl::SetCapture(elements, i, prev_register_vector[i]); | |
3966 } | |
3967 return RegExpImpl::RE_SUCCESS; | 3993 return RegExpImpl::RE_SUCCESS; |
3968 } | 3994 } |
3969 } | 3995 } |
3970 // No matches at all, return failure or exception result directly. | 3996 // No matches at all, return failure or exception result directly. |
3971 return result; | 3997 return result; |
3972 } | 3998 } |
3973 | 3999 |
3974 | 4000 |
4001 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets | |
4002 // lastMatchInfoOverride to maintain the last match info, so we don't need to | |
4003 // set any other last match array info. | |
3975 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { | 4004 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { |
3976 ASSERT(args.length() == 4); | 4005 ASSERT(args.length() == 4); |
3977 HandleScope handles(isolate); | 4006 HandleScope handles(isolate); |
3978 | 4007 |
3979 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); | 4008 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
3980 if (!subject->IsFlat()) FlattenString(subject); | 4009 if (!subject->IsFlat()) FlattenString(subject); |
3981 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); | 4010 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
3982 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); | 4011 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); |
3983 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); | 4012 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); |
3984 | 4013 |
(...skipping 9420 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
13405 // Handle last resort GC and make sure to allow future allocations | 13434 // Handle last resort GC and make sure to allow future allocations |
13406 // to grow the heap without causing GCs (if possible). | 13435 // to grow the heap without causing GCs (if possible). |
13407 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13436 isolate->counters()->gc_last_resort_from_js()->Increment(); |
13408 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 13437 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
13409 "Runtime::PerformGC"); | 13438 "Runtime::PerformGC"); |
13410 } | 13439 } |
13411 } | 13440 } |
13412 | 13441 |
13413 | 13442 |
13414 } } // namespace v8::internal | 13443 } } // namespace v8::internal |
OLD | NEW |