Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Side by Side Diff: src/jsregexp.cc

Issue 10443109: Backport r11686: Avoid overdeep recursion in regexp where a guarded expression with a minimum repet… (Closed) Base URL: http://v8.googlecode.com/svn/branches/3.10/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/version.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2156 matching lines...) Expand 10 before | Expand all | Expand 10 after
2167 bool not_at_start) { 2167 bool not_at_start) {
2168 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2168 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2169 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 2169 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
2170 return on_success()->EatsAtLeast(still_to_find, 2170 return on_success()->EatsAtLeast(still_to_find,
2171 recursion_depth + 1, 2171 recursion_depth + 1,
2172 not_at_start); 2172 not_at_start);
2173 } 2173 }
2174 2174
2175 2175
2176 void ActionNode::FillInBMInfo(int offset, 2176 void ActionNode::FillInBMInfo(int offset,
2177 int recursion_depth,
2177 BoyerMooreLookahead* bm, 2178 BoyerMooreLookahead* bm,
2178 bool not_at_start) { 2179 bool not_at_start) {
2179 if (type_ == BEGIN_SUBMATCH) { 2180 if (type_ == BEGIN_SUBMATCH) {
2180 bm->SetRest(offset); 2181 bm->SetRest(offset);
2181 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { 2182 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
2182 on_success()->FillInBMInfo(offset, bm, not_at_start); 2183 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
2183 } 2184 }
2184 SaveBMInfo(bm, not_at_start, offset); 2185 SaveBMInfo(bm, not_at_start, offset);
2185 } 2186 }
2186 2187
2187 2188
2188 int AssertionNode::EatsAtLeast(int still_to_find, 2189 int AssertionNode::EatsAtLeast(int still_to_find,
2189 int recursion_depth, 2190 int recursion_depth,
2190 bool not_at_start) { 2191 bool not_at_start) {
2191 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2192 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2192 // If we know we are not at the start and we are asked "how many characters 2193 // If we know we are not at the start and we are asked "how many characters
2193 // will you match if you succeed?" then we can answer anything since false 2194 // will you match if you succeed?" then we can answer anything since false
2194 // implies false. So lets just return the max answer (still_to_find) since 2195 // implies false. So lets just return the max answer (still_to_find) since
2195 // that won't prevent us from preloading a lot of characters for the other 2196 // that won't prevent us from preloading a lot of characters for the other
2196 // branches in the node graph. 2197 // branches in the node graph.
2197 if (type() == AT_START && not_at_start) return still_to_find; 2198 if (type() == AT_START && not_at_start) return still_to_find;
2198 return on_success()->EatsAtLeast(still_to_find, 2199 return on_success()->EatsAtLeast(still_to_find,
2199 recursion_depth + 1, 2200 recursion_depth + 1,
2200 not_at_start); 2201 not_at_start);
2201 } 2202 }
2202 2203
2203 2204
2204 void AssertionNode::FillInBMInfo( 2205 void AssertionNode::FillInBMInfo(int offset,
2205 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 2206 int recursion_depth,
2207 BoyerMooreLookahead* bm,
2208 bool not_at_start) {
2206 // Match the behaviour of EatsAtLeast on this node. 2209 // Match the behaviour of EatsAtLeast on this node.
2207 if (type() == AT_START && not_at_start) return; 2210 if (type() == AT_START && not_at_start) return;
2208 on_success()->FillInBMInfo(offset, bm, not_at_start); 2211 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
2209 SaveBMInfo(bm, not_at_start, offset); 2212 SaveBMInfo(bm, not_at_start, offset);
2210 } 2213 }
2211 2214
2212 2215
2213 int BackReferenceNode::EatsAtLeast(int still_to_find, 2216 int BackReferenceNode::EatsAtLeast(int still_to_find,
2214 int recursion_depth, 2217 int recursion_depth,
2215 bool not_at_start) { 2218 bool not_at_start) {
2216 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2219 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2217 return on_success()->EatsAtLeast(still_to_find, 2220 return on_success()->EatsAtLeast(still_to_find,
2218 recursion_depth + 1, 2221 recursion_depth + 1,
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after
2779 bool not_at_start) { 2782 bool not_at_start) {
2780 if (body_can_be_zero_length_ || info()->visited) return; 2783 if (body_can_be_zero_length_ || info()->visited) return;
2781 VisitMarker marker(info()); 2784 VisitMarker marker(info());
2782 return ChoiceNode::GetQuickCheckDetails(details, 2785 return ChoiceNode::GetQuickCheckDetails(details,
2783 compiler, 2786 compiler,
2784 characters_filled_in, 2787 characters_filled_in,
2785 not_at_start); 2788 not_at_start);
2786 } 2789 }
2787 2790
2788 2791
2789 void LoopChoiceNode::FillInBMInfo( 2792 void LoopChoiceNode::FillInBMInfo(int offset,
2790 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 2793 int recursion_depth,
2791 if (body_can_be_zero_length_) { 2794 BoyerMooreLookahead* bm,
2795 bool not_at_start) {
2796 if (body_can_be_zero_length_ ||
2797 recursion_depth > RegExpCompiler::kMaxRecursion) {
2792 bm->SetRest(offset); 2798 bm->SetRest(offset);
2793 SaveBMInfo(bm, not_at_start, offset); 2799 SaveBMInfo(bm, not_at_start, offset);
2794 return; 2800 return;
2795 } 2801 }
2796 ChoiceNode::FillInBMInfo(offset, bm, not_at_start); 2802 ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
2797 SaveBMInfo(bm, not_at_start, offset); 2803 SaveBMInfo(bm, not_at_start, offset);
2798 } 2804 }
2799 2805
2800 2806
2801 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2807 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2802 RegExpCompiler* compiler, 2808 RegExpCompiler* compiler,
2803 int characters_filled_in, 2809 int characters_filled_in,
2804 bool not_at_start) { 2810 bool not_at_start) {
2805 not_at_start = (not_at_start || not_at_start_); 2811 not_at_start = (not_at_start || not_at_start_);
2806 int choice_count = alternatives_->length(); 2812 int choice_count = alternatives_->length();
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2888 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2894 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2889 bool not_at_start = (trace->at_start() == Trace::FALSE); 2895 bool not_at_start = (trace->at_start() == Trace::FALSE);
2890 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2896 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2891 if (lookahead == NULL) { 2897 if (lookahead == NULL) {
2892 int eats_at_least = 2898 int eats_at_least =
2893 Min(kMaxLookaheadForBoyerMoore, 2899 Min(kMaxLookaheadForBoyerMoore,
2894 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 2900 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
2895 if (eats_at_least >= 1) { 2901 if (eats_at_least >= 1) {
2896 BoyerMooreLookahead* bm = 2902 BoyerMooreLookahead* bm =
2897 new BoyerMooreLookahead(eats_at_least, compiler); 2903 new BoyerMooreLookahead(eats_at_least, compiler);
2898 FillInBMInfo(0, bm, not_at_start); 2904 FillInBMInfo(0, 0, bm, not_at_start);
2899 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2905 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2900 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2906 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2901 } 2907 }
2902 } else { 2908 } else {
2903 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2909 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2904 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2910 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2905 } 2911 }
2906 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); 2912 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);
2907 if (next_is_word_character == Trace::UNKNOWN) { 2913 if (next_is_word_character == Trace::UNKNOWN) {
2908 Label before_non_word; 2914 Label before_non_word;
(...skipping 917 matching lines...) Expand 10 before | Expand all | Expand 10 after
3826 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. 3832 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
3827 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 3833 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3828 if (lookahead == NULL) { 3834 if (lookahead == NULL) {
3829 eats_at_least = 3835 eats_at_least =
3830 Min(kMaxLookaheadForBoyerMoore, 3836 Min(kMaxLookaheadForBoyerMoore,
3831 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 3837 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
3832 if (eats_at_least >= 1) { 3838 if (eats_at_least >= 1) {
3833 BoyerMooreLookahead* bm = 3839 BoyerMooreLookahead* bm =
3834 new BoyerMooreLookahead(eats_at_least, compiler); 3840 new BoyerMooreLookahead(eats_at_least, compiler);
3835 GuardedAlternative alt0 = alternatives_->at(0); 3841 GuardedAlternative alt0 = alternatives_->at(0);
3836 alt0.node()->FillInBMInfo(0, bm, not_at_start); 3842 alt0.node()->FillInBMInfo(0, 0, bm, not_at_start);
3837 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); 3843 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
3838 } 3844 }
3839 } else { 3845 } else {
3840 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); 3846 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
3841 } 3847 }
3842 } 3848 }
3843 } 3849 }
3844 } 3850 }
3845 3851
3846 if (eats_at_least == kEatsAtLeastNotYetInitialized) { 3852 if (eats_at_least == kEatsAtLeastNotYetInitialized) {
(...skipping 1718 matching lines...) Expand 10 before | Expand all | Expand 10 after
5565 void Analysis::VisitBackReference(BackReferenceNode* that) { 5571 void Analysis::VisitBackReference(BackReferenceNode* that) {
5566 EnsureAnalyzed(that->on_success()); 5572 EnsureAnalyzed(that->on_success());
5567 } 5573 }
5568 5574
5569 5575
5570 void Analysis::VisitAssertion(AssertionNode* that) { 5576 void Analysis::VisitAssertion(AssertionNode* that) {
5571 EnsureAnalyzed(that->on_success()); 5577 EnsureAnalyzed(that->on_success());
5572 } 5578 }
5573 5579
5574 5580
5575 void BackReferenceNode::FillInBMInfo( 5581 void BackReferenceNode::FillInBMInfo(int offset,
5576 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 5582 int recursion_depth,
5583 BoyerMooreLookahead* bm,
5584 bool not_at_start) {
5577 // Working out the set of characters that a backreference can match is too 5585 // Working out the set of characters that a backreference can match is too
5578 // hard, so we just say that any character can match. 5586 // hard, so we just say that any character can match.
5579 bm->SetRest(offset); 5587 bm->SetRest(offset);
5580 SaveBMInfo(bm, not_at_start, offset); 5588 SaveBMInfo(bm, not_at_start, offset);
5581 } 5589 }
5582 5590
5583 5591
5584 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 5592 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
5585 RegExpMacroAssembler::kTableSize); 5593 RegExpMacroAssembler::kTableSize);
5586 5594
5587 5595
5588 void ChoiceNode::FillInBMInfo( 5596 void ChoiceNode::FillInBMInfo(int offset,
5589 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 5597 int recursion_depth,
5598 BoyerMooreLookahead* bm,
5599 bool not_at_start) {
5590 ZoneList<GuardedAlternative>* alts = alternatives(); 5600 ZoneList<GuardedAlternative>* alts = alternatives();
5591 for (int i = 0; i < alts->length(); i++) { 5601 for (int i = 0; i < alts->length(); i++) {
5592 GuardedAlternative& alt = alts->at(i); 5602 GuardedAlternative& alt = alts->at(i);
5593 if (alt.guards() != NULL && alt.guards()->length() != 0) { 5603 if (alt.guards() != NULL && alt.guards()->length() != 0) {
5594 bm->SetRest(offset); // Give up trying to fill in info. 5604 bm->SetRest(offset); // Give up trying to fill in info.
5595 SaveBMInfo(bm, not_at_start, offset); 5605 SaveBMInfo(bm, not_at_start, offset);
5596 return; 5606 return;
5597 } 5607 }
5598 alt.node()->FillInBMInfo(offset, bm, not_at_start); 5608 alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
5599 } 5609 }
5600 SaveBMInfo(bm, not_at_start, offset); 5610 SaveBMInfo(bm, not_at_start, offset);
5601 } 5611 }
5602 5612
5603 5613
5604 void TextNode::FillInBMInfo( 5614 void TextNode::FillInBMInfo(int initial_offset,
5605 int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) { 5615 int recursion_depth,
5616 BoyerMooreLookahead* bm,
5617 bool not_at_start) {
5606 if (initial_offset >= bm->length()) return; 5618 if (initial_offset >= bm->length()) return;
5607 int offset = initial_offset; 5619 int offset = initial_offset;
5608 int max_char = bm->max_char(); 5620 int max_char = bm->max_char();
5609 for (int i = 0; i < elements()->length(); i++) { 5621 for (int i = 0; i < elements()->length(); i++) {
5610 if (offset >= bm->length()) { 5622 if (offset >= bm->length()) {
5611 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5623 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5612 return; 5624 return;
5613 } 5625 }
5614 TextElement text = elements()->at(i); 5626 TextElement text = elements()->at(i);
5615 if (text.type == TextElement::ATOM) { 5627 if (text.type == TextElement::ATOM) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
5649 } 5661 }
5650 } 5662 }
5651 offset++; 5663 offset++;
5652 } 5664 }
5653 } 5665 }
5654 if (offset >= bm->length()) { 5666 if (offset >= bm->length()) {
5655 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5667 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5656 return; 5668 return;
5657 } 5669 }
5658 on_success()->FillInBMInfo(offset, 5670 on_success()->FillInBMInfo(offset,
5671 recursion_depth + 1,
5659 bm, 5672 bm,
5660 true); // Not at start after a text node. 5673 true); // Not at start after a text node.
5661 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5674 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5662 } 5675 }
5663 5676
5664 5677
5665 // ------------------------------------------------------------------- 5678 // -------------------------------------------------------------------
5666 // Dispatch table construction 5679 // Dispatch table construction
5667 5680
5668 5681
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
5884 } 5897 }
5885 5898
5886 return compiler.Assemble(&macro_assembler, 5899 return compiler.Assemble(&macro_assembler,
5887 node, 5900 node,
5888 data->capture_count, 5901 data->capture_count,
5889 pattern); 5902 pattern);
5890 } 5903 }
5891 5904
5892 5905
5893 }} // namespace v8::internal 5906 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/version.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698