Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(290)

Side by Side Diff: src/runtime.cc

Issue 10184004: Fix some bugs in accessing details of the lastest regexp (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2864 matching lines...) Expand 10 before | Expand all | Expand 10 after
2875 subject_vector, 2875 subject_vector,
2876 pattern_content.ToUC16Vector(), 2876 pattern_content.ToUC16Vector(),
2877 indices, 2877 indices,
2878 limit); 2878 limit);
2879 } 2879 }
2880 } 2880 }
2881 } 2881 }
2882 } 2882 }
2883 2883
2884 2884
2885 static void SetLastMatchInfoNoCaptures(Handle<String> subject,
ulan 2012/04/23 16:59:23 Why not just move the implementation here, instead
Erik Corry 2012/04/23 18:54:02 Because then it would be hard to review. Done.
2886 Handle<JSArray> last_match_info,
2887 int match_start,
2888 int match_end);
2889
2890
2885 template<typename ResultSeqString> 2891 template<typename ResultSeqString>
2886 MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString( 2892 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
2887 Isolate* isolate, 2893 Isolate* isolate,
2888 Handle<String> subject, 2894 Handle<String> subject,
2889 Handle<JSRegExp> pattern_regexp, 2895 Handle<JSRegExp> pattern_regexp,
2890 Handle<String> replacement) { 2896 Handle<String> replacement,
2897 Handle<JSArray> last_match_info) {
2891 ASSERT(subject->IsFlat()); 2898 ASSERT(subject->IsFlat());
2892 ASSERT(replacement->IsFlat()); 2899 ASSERT(replacement->IsFlat());
2893 2900
2894 ZoneScope zone_space(isolate, DELETE_ON_EXIT); 2901 ZoneScope zone_space(isolate, DELETE_ON_EXIT);
2895 ZoneList<int> indices(8); 2902 ZoneList<int> indices(8);
2896 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); 2903 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
2897 String* pattern = 2904 String* pattern =
2898 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); 2905 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
2899 int subject_len = subject->length(); 2906 int subject_len = subject->length();
2900 int pattern_len = pattern->length(); 2907 int pattern_len = pattern->length();
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
2939 2946
2940 subject_pos = indices.at(i) + pattern_len; 2947 subject_pos = indices.at(i) + pattern_len;
2941 } 2948 }
2942 // Add remaining subject content at the end. 2949 // Add remaining subject content at the end.
2943 if (subject_pos < subject_len) { 2950 if (subject_pos < subject_len) {
2944 String::WriteToFlat(*subject, 2951 String::WriteToFlat(*subject,
2945 result->GetChars() + result_pos, 2952 result->GetChars() + result_pos,
2946 subject_pos, 2953 subject_pos,
2947 subject_len); 2954 subject_len);
2948 } 2955 }
2956
2957 SetLastMatchInfoNoCaptures(subject,
2958 last_match_info,
2959 indices.at(matches - 1),
2960 indices.at(matches - 1) + pattern_len);
2961
2949 return *result; 2962 return *result;
2950 } 2963 }
2951 2964
2952 2965
2953 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( 2966 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
2954 Isolate* isolate, 2967 Isolate* isolate,
2955 String* subject, 2968 String* subject,
2956 JSRegExp* regexp, 2969 JSRegExp* regexp,
2957 String* replacement, 2970 String* replacement,
2958 JSArray* last_match_info) { 2971 JSArray* last_match_info) {
(...skipping 28 matching lines...) Expand all
2987 length); 3000 length);
2988 3001
2989 bool is_global = regexp_handle->GetFlags().is_global(); 3002 bool is_global = regexp_handle->GetFlags().is_global();
2990 3003
2991 // Shortcut for simple non-regexp global replacements 3004 // Shortcut for simple non-regexp global replacements
2992 if (is_global && 3005 if (is_global &&
2993 regexp_handle->TypeTag() == JSRegExp::ATOM && 3006 regexp_handle->TypeTag() == JSRegExp::ATOM &&
2994 compiled_replacement.simple_hint()) { 3007 compiled_replacement.simple_hint()) {
2995 if (subject_handle->HasOnlyAsciiChars() && 3008 if (subject_handle->HasOnlyAsciiChars() &&
2996 replacement_handle->HasOnlyAsciiChars()) { 3009 replacement_handle->HasOnlyAsciiChars()) {
2997 return StringReplaceStringWithString<SeqAsciiString>( 3010 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
2998 isolate, subject_handle, regexp_handle, replacement_handle); 3011 isolate,
3012 subject_handle,
3013 regexp_handle,
3014 replacement_handle,
3015 last_match_info_handle);
2999 } else { 3016 } else {
3000 return StringReplaceStringWithString<SeqTwoByteString>( 3017 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3001 isolate, subject_handle, regexp_handle, replacement_handle); 3018 isolate,
3019 subject_handle,
3020 regexp_handle,
3021 replacement_handle,
3022 last_match_info_handle);
3002 } 3023 }
3003 } 3024 }
3004 3025
3005 // Guessing the number of parts that the final result string is built 3026 // Guessing the number of parts that the final result string is built
3006 // from. Global regexps can match any number of times, so we guess 3027 // from. Global regexps can match any number of times, so we guess
3007 // conservatively. 3028 // conservatively.
3008 int expected_parts = 3029 int expected_parts =
3009 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; 3030 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
3010 ReplacementStringBuilder builder(isolate->heap(), 3031 ReplacementStringBuilder builder(isolate->heap(),
3011 subject_handle, 3032 subject_handle,
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
3080 Isolate* isolate, 3101 Isolate* isolate,
3081 String* subject, 3102 String* subject,
3082 JSRegExp* regexp, 3103 JSRegExp* regexp,
3083 JSArray* last_match_info) { 3104 JSArray* last_match_info) {
3084 ASSERT(subject->IsFlat()); 3105 ASSERT(subject->IsFlat());
3085 3106
3086 HandleScope handles(isolate); 3107 HandleScope handles(isolate);
3087 3108
3088 Handle<String> subject_handle(subject); 3109 Handle<String> subject_handle(subject);
3089 Handle<JSRegExp> regexp_handle(regexp); 3110 Handle<JSRegExp> regexp_handle(regexp);
3111 Handle<JSArray> last_match_info_handle(last_match_info);
3090 3112
3091 // Shortcut for simple non-regexp global replacements 3113 // Shortcut for simple non-regexp global replacements
3092 if (regexp_handle->GetFlags().is_global() && 3114 if (regexp_handle->GetFlags().is_global() &&
3093 regexp_handle->TypeTag() == JSRegExp::ATOM) { 3115 regexp_handle->TypeTag() == JSRegExp::ATOM) {
3094 Handle<String> empty_string_handle(HEAP->empty_string()); 3116 Handle<String> empty_string_handle(HEAP->empty_string());
3095 if (subject_handle->HasOnlyAsciiChars()) { 3117 if (subject_handle->HasOnlyAsciiChars()) {
3096 return StringReplaceStringWithString<SeqAsciiString>( 3118 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3097 isolate, subject_handle, regexp_handle, empty_string_handle); 3119 isolate,
3120 subject_handle,
3121 regexp_handle,
3122 empty_string_handle,
3123 last_match_info_handle);
3098 } else { 3124 } else {
3099 return StringReplaceStringWithString<SeqTwoByteString>( 3125 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3100 isolate, subject_handle, regexp_handle, empty_string_handle); 3126 isolate,
3127 subject_handle,
3128 regexp_handle,
3129 empty_string_handle,
3130 last_match_info_handle);
3101 } 3131 }
3102 } 3132 }
3103 3133
3104 Handle<JSArray> last_match_info_handle(last_match_info);
3105 Handle<Object> match = RegExpImpl::Exec(regexp_handle, 3134 Handle<Object> match = RegExpImpl::Exec(regexp_handle,
3106 subject_handle, 3135 subject_handle,
3107 0, 3136 0,
3108 last_match_info_handle); 3137 last_match_info_handle);
3109 if (match.is_null()) return Failure::Exception(); 3138 if (match.is_null()) return Failure::Exception();
3110 if (match->IsNull()) return *subject_handle; 3139 if (match->IsNull()) return *subject_handle;
3111 3140
3112 ASSERT(last_match_info_handle->HasFastElements()); 3141 ASSERT(last_match_info_handle->HasFastElements());
3113 3142
3114 int start, end; 3143 int start, end;
3115 { 3144 {
3116 AssertNoAllocation match_info_array_is_not_in_a_handle; 3145 AssertNoAllocation match_info_array_is_not_in_a_handle;
3117 FixedArray* match_info_array = 3146 FixedArray* match_info_array =
3118 FixedArray::cast(last_match_info_handle->elements()); 3147 FixedArray::cast(last_match_info_handle->elements());
3119 3148
3120 start = RegExpImpl::GetCapture(match_info_array, 0); 3149 start = RegExpImpl::GetCapture(match_info_array, 0);
3121 end = RegExpImpl::GetCapture(match_info_array, 1); 3150 end = RegExpImpl::GetCapture(match_info_array, 1);
3122 } 3151 }
3123 3152
3153 bool global = regexp_handle->GetFlags().is_global();
3154
3155 if (start == end && !global) return *subject_handle;
3156
3124 int length = subject_handle->length(); 3157 int length = subject_handle->length();
3125 int new_length = length - (end - start); 3158 int new_length = length - (end - start);
3126 if (new_length == 0) { 3159 if (new_length == 0) {
3127 return isolate->heap()->empty_string(); 3160 return isolate->heap()->empty_string();
3128 } 3161 }
3129 Handle<ResultSeqString> answer; 3162 Handle<ResultSeqString> answer;
3130 if (ResultSeqString::kHasAsciiEncoding) { 3163 if (ResultSeqString::kHasAsciiEncoding) {
3131 answer = Handle<ResultSeqString>::cast( 3164 answer = Handle<ResultSeqString>::cast(
3132 isolate->factory()->NewRawAsciiString(new_length)); 3165 isolate->factory()->NewRawAsciiString(new_length));
3133 } else { 3166 } else {
3134 answer = Handle<ResultSeqString>::cast( 3167 answer = Handle<ResultSeqString>::cast(
3135 isolate->factory()->NewRawTwoByteString(new_length)); 3168 isolate->factory()->NewRawTwoByteString(new_length));
3136 } 3169 }
3137 3170
3138 // If the regexp isn't global, only match once. 3171 // If the regexp isn't global, only match once.
3139 if (!regexp_handle->GetFlags().is_global()) { 3172 if (!global) {
3140 if (start > 0) { 3173 if (start > 0) {
3141 String::WriteToFlat(*subject_handle, 3174 String::WriteToFlat(*subject_handle,
3142 answer->GetChars(), 3175 answer->GetChars(),
3143 0, 3176 0,
3144 start); 3177 start);
3145 } 3178 }
3146 if (end < length) { 3179 if (end < length) {
3147 String::WriteToFlat(*subject_handle, 3180 String::WriteToFlat(*subject_handle,
3148 answer->GetChars() + start, 3181 answer->GetChars() + start,
3149 end, 3182 end,
(...skipping 681 matching lines...) Expand 10 before | Expand all | Expand 10 after
3831 last_match_array, 3864 last_match_array,
3832 match_start, 3865 match_start,
3833 match_end); 3866 match_end);
3834 return RegExpImpl::RE_SUCCESS; 3867 return RegExpImpl::RE_SUCCESS;
3835 } else { 3868 } else {
3836 return RegExpImpl::RE_FAILURE; // No matches at all. 3869 return RegExpImpl::RE_FAILURE; // No matches at all.
3837 } 3870 }
3838 } 3871 }
3839 3872
3840 3873
3874 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3875 // separate last match info. See comment on that function.
3841 static RegExpImpl::IrregexpResult SearchRegExpMultiple( 3876 static RegExpImpl::IrregexpResult SearchRegExpMultiple(
3842 Isolate* isolate, 3877 Isolate* isolate,
3843 Handle<String> subject, 3878 Handle<String> subject,
3844 Handle<JSRegExp> regexp, 3879 Handle<JSRegExp> regexp,
3845 Handle<JSArray> last_match_array, 3880 Handle<JSArray> last_match_array,
3846 FixedArrayBuilder* builder) { 3881 FixedArrayBuilder* builder) {
3847 3882
3848 ASSERT(subject->IsFlat()); 3883 ASSERT(subject->IsFlat());
3849 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); 3884 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
3850 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; 3885 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
3851 3886
3852 OffsetsVector registers(required_registers, isolate); 3887 OffsetsVector registers(required_registers, isolate);
3853 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3888 Vector<int32_t> register_vector(registers.vector(), registers.length());
3854 3889
3855 RegExpImpl::IrregexpResult result = 3890 RegExpImpl::IrregexpResult result =
3856 RegExpImpl::IrregexpExecOnce(regexp, 3891 RegExpImpl::IrregexpExecOnce(regexp,
3857 subject, 3892 subject,
3858 0, 3893 0,
3859 register_vector); 3894 register_vector);
3860 3895
3861 int capture_count = regexp->CaptureCount(); 3896 int capture_count = regexp->CaptureCount();
3862 int subject_length = subject->length(); 3897 int subject_length = subject->length();
3863 3898
3864 // Position to search from. 3899 // Position to search from.
3865 int pos = 0; 3900 int pos = 0;
3866 // End of previous match. Differs from pos if match was empty. 3901 // End of previous match. Differs from pos if match was empty.
3867 int match_end = 0; 3902 int match_end = 0;
3868 if (result == RegExpImpl::RE_SUCCESS) { 3903 if (result == RegExpImpl::RE_SUCCESS) {
3869 // Need to keep a copy of the previous match for creating last_match_info
3870 // at the end, so we have two vectors that we swap between.
3871 OffsetsVector registers2(required_registers, isolate);
3872 Vector<int> prev_register_vector(registers2.vector(), registers2.length());
3873 bool first = true; 3904 bool first = true;
3874 do { 3905 do {
3875 int match_start = register_vector[0]; 3906 int match_start = register_vector[0];
3876 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 3907 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3877 if (match_end < match_start) { 3908 if (match_end < match_start) {
3878 ReplacementStringBuilder::AddSubjectSlice(builder, 3909 ReplacementStringBuilder::AddSubjectSlice(builder,
3879 match_end, 3910 match_end,
3880 match_start); 3911 match_start);
3881 } 3912 }
3882 match_end = register_vector[1]; 3913 match_end = register_vector[1];
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
3915 elements->set(i, *substring); 3946 elements->set(i, *substring);
3916 } else { 3947 } else {
3917 ASSERT(register_vector[i * 2 + 1] < 0); 3948 ASSERT(register_vector[i * 2 + 1] < 0);
3918 elements->set(i, isolate->heap()->undefined_value()); 3949 elements->set(i, isolate->heap()->undefined_value());
3919 } 3950 }
3920 } 3951 }
3921 elements->set(capture_count + 1, Smi::FromInt(match_start)); 3952 elements->set(capture_count + 1, Smi::FromInt(match_start));
3922 elements->set(capture_count + 2, *subject); 3953 elements->set(capture_count + 2, *subject);
3923 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); 3954 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
3924 } 3955 }
3925 // Swap register vectors, so the last successful match is in
3926 // prev_register_vector.
3927 Vector<int32_t> tmp = prev_register_vector;
3928 prev_register_vector = register_vector;
3929 register_vector = tmp;
3930 3956
3931 if (match_end > match_start) { 3957 if (match_end > match_start) {
3932 pos = match_end; 3958 pos = match_end;
3933 } else { 3959 } else {
3934 pos = match_end + 1; 3960 pos = match_end + 1;
3935 if (pos > subject_length) { 3961 if (pos > subject_length) {
3936 break; 3962 break;
3937 } 3963 }
3938 } 3964 }
3939 3965
(...skipping 11 matching lines...) Expand all
3951 match_end, 3977 match_end,
3952 subject_length); 3978 subject_length);
3953 } 3979 }
3954 3980
3955 int last_match_capture_count = (capture_count + 1) * 2; 3981 int last_match_capture_count = (capture_count + 1) * 2;
3956 int last_match_array_size = 3982 int last_match_array_size =
3957 last_match_capture_count + RegExpImpl::kLastMatchOverhead; 3983 last_match_capture_count + RegExpImpl::kLastMatchOverhead;
3958 last_match_array->EnsureSize(last_match_array_size); 3984 last_match_array->EnsureSize(last_match_array_size);
3959 AssertNoAllocation no_gc; 3985 AssertNoAllocation no_gc;
3960 FixedArray* elements = FixedArray::cast(last_match_array->elements()); 3986 FixedArray* elements = FixedArray::cast(last_match_array->elements());
3987 // We have to set this even though the rest of the last match array is
3988 // ignored.
3961 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); 3989 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
3990 // These are also read without consulting the override.
3962 RegExpImpl::SetLastSubject(elements, *subject); 3991 RegExpImpl::SetLastSubject(elements, *subject);
3963 RegExpImpl::SetLastInput(elements, *subject); 3992 RegExpImpl::SetLastInput(elements, *subject);
3964 for (int i = 0; i < last_match_capture_count; i++) {
3965 RegExpImpl::SetCapture(elements, i, prev_register_vector[i]);
3966 }
3967 return RegExpImpl::RE_SUCCESS; 3993 return RegExpImpl::RE_SUCCESS;
3968 } 3994 }
3969 } 3995 }
3970 // No matches at all, return failure or exception result directly. 3996 // No matches at all, return failure or exception result directly.
3971 return result; 3997 return result;
3972 } 3998 }
3973 3999
3974 4000
4001 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets
4002 // lastMatchInfoOverride to maintain the last match info, so we don't need to
4003 // set any other last match array info.
3975 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { 4004 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
3976 ASSERT(args.length() == 4); 4005 ASSERT(args.length() == 4);
3977 HandleScope handles(isolate); 4006 HandleScope handles(isolate);
3978 4007
3979 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 4008 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
3980 if (!subject->IsFlat()) FlattenString(subject); 4009 if (!subject->IsFlat()) FlattenString(subject);
3981 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 4010 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
3982 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); 4011 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
3983 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); 4012 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
3984 4013
(...skipping 9420 matching lines...) Expand 10 before | Expand all | Expand 10 after
13405 // Handle last resort GC and make sure to allow future allocations 13434 // Handle last resort GC and make sure to allow future allocations
13406 // to grow the heap without causing GCs (if possible). 13435 // to grow the heap without causing GCs (if possible).
13407 isolate->counters()->gc_last_resort_from_js()->Increment(); 13436 isolate->counters()->gc_last_resort_from_js()->Increment();
13408 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13437 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13409 "Runtime::PerformGC"); 13438 "Runtime::PerformGC");
13410 } 13439 }
13411 } 13440 }
13412 13441
13413 13442
13414 } } // namespace v8::internal 13443 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698