Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Side by Side Diff: src/jsregexp.cc

Issue 10451092: Avoid overdeep recursion in regexp where a guarded expression with a (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | test/mjsunit/regexp-capture-3.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2173 matching lines...) Expand 10 before | Expand all | Expand 10 after
2184 bool not_at_start) { 2184 bool not_at_start) {
2185 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2185 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2186 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 2186 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
2187 return on_success()->EatsAtLeast(still_to_find, 2187 return on_success()->EatsAtLeast(still_to_find,
2188 recursion_depth + 1, 2188 recursion_depth + 1,
2189 not_at_start); 2189 not_at_start);
2190 } 2190 }
2191 2191
2192 2192
2193 void ActionNode::FillInBMInfo(int offset, 2193 void ActionNode::FillInBMInfo(int offset,
2194 int recursion_depth,
2194 BoyerMooreLookahead* bm, 2195 BoyerMooreLookahead* bm,
2195 bool not_at_start) { 2196 bool not_at_start) {
2196 if (type_ == BEGIN_SUBMATCH) { 2197 if (type_ == BEGIN_SUBMATCH) {
2197 bm->SetRest(offset); 2198 bm->SetRest(offset);
2198 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { 2199 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
2199 on_success()->FillInBMInfo(offset, bm, not_at_start); 2200 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
2200 } 2201 }
2201 SaveBMInfo(bm, not_at_start, offset); 2202 SaveBMInfo(bm, not_at_start, offset);
2202 } 2203 }
2203 2204
2204 2205
2205 int AssertionNode::EatsAtLeast(int still_to_find, 2206 int AssertionNode::EatsAtLeast(int still_to_find,
2206 int recursion_depth, 2207 int recursion_depth,
2207 bool not_at_start) { 2208 bool not_at_start) {
2208 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2209 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2209 // If we know we are not at the start and we are asked "how many characters 2210 // If we know we are not at the start and we are asked "how many characters
2210 // will you match if you succeed?" then we can answer anything since false 2211 // will you match if you succeed?" then we can answer anything since false
2211 // implies false. So lets just return the max answer (still_to_find) since 2212 // implies false. So lets just return the max answer (still_to_find) since
2212 // that won't prevent us from preloading a lot of characters for the other 2213 // that won't prevent us from preloading a lot of characters for the other
2213 // branches in the node graph. 2214 // branches in the node graph.
2214 if (type() == AT_START && not_at_start) return still_to_find; 2215 if (type() == AT_START && not_at_start) return still_to_find;
2215 return on_success()->EatsAtLeast(still_to_find, 2216 return on_success()->EatsAtLeast(still_to_find,
2216 recursion_depth + 1, 2217 recursion_depth + 1,
2217 not_at_start); 2218 not_at_start);
2218 } 2219 }
2219 2220
2220 2221
2221 void AssertionNode::FillInBMInfo( 2222 void AssertionNode::FillInBMInfo(int offset,
2222 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 2223 int recursion_depth,
2224 BoyerMooreLookahead* bm,
2225 bool not_at_start) {
2223 // Match the behaviour of EatsAtLeast on this node. 2226 // Match the behaviour of EatsAtLeast on this node.
2224 if (type() == AT_START && not_at_start) return; 2227 if (type() == AT_START && not_at_start) return;
2225 on_success()->FillInBMInfo(offset, bm, not_at_start); 2228 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
2226 SaveBMInfo(bm, not_at_start, offset); 2229 SaveBMInfo(bm, not_at_start, offset);
2227 } 2230 }
2228 2231
2229 2232
2230 int BackReferenceNode::EatsAtLeast(int still_to_find, 2233 int BackReferenceNode::EatsAtLeast(int still_to_find,
2231 int recursion_depth, 2234 int recursion_depth,
2232 bool not_at_start) { 2235 bool not_at_start) {
2233 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2236 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2234 return on_success()->EatsAtLeast(still_to_find, 2237 return on_success()->EatsAtLeast(still_to_find,
2235 recursion_depth + 1, 2238 recursion_depth + 1,
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after
2796 bool not_at_start) { 2799 bool not_at_start) {
2797 if (body_can_be_zero_length_ || info()->visited) return; 2800 if (body_can_be_zero_length_ || info()->visited) return;
2798 VisitMarker marker(info()); 2801 VisitMarker marker(info());
2799 return ChoiceNode::GetQuickCheckDetails(details, 2802 return ChoiceNode::GetQuickCheckDetails(details,
2800 compiler, 2803 compiler,
2801 characters_filled_in, 2804 characters_filled_in,
2802 not_at_start); 2805 not_at_start);
2803 } 2806 }
2804 2807
2805 2808
2806 void LoopChoiceNode::FillInBMInfo( 2809 void LoopChoiceNode::FillInBMInfo(int offset,
2807 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 2810 int recursion_depth,
2808 if (body_can_be_zero_length_) { 2811 BoyerMooreLookahead* bm,
2812 bool not_at_start) {
2813 if (body_can_be_zero_length_ ||
2814 recursion_depth > RegExpCompiler::kMaxRecursion) {
2809 bm->SetRest(offset); 2815 bm->SetRest(offset);
2810 SaveBMInfo(bm, not_at_start, offset); 2816 SaveBMInfo(bm, not_at_start, offset);
2811 return; 2817 return;
2812 } 2818 }
2813 ChoiceNode::FillInBMInfo(offset, bm, not_at_start); 2819 ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
2814 SaveBMInfo(bm, not_at_start, offset); 2820 SaveBMInfo(bm, not_at_start, offset);
2815 } 2821 }
2816 2822
2817 2823
2818 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2824 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2819 RegExpCompiler* compiler, 2825 RegExpCompiler* compiler,
2820 int characters_filled_in, 2826 int characters_filled_in,
2821 bool not_at_start) { 2827 bool not_at_start) {
2822 not_at_start = (not_at_start || not_at_start_); 2828 not_at_start = (not_at_start || not_at_start_);
2823 int choice_count = alternatives_->length(); 2829 int choice_count = alternatives_->length();
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2905 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2911 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2906 bool not_at_start = (trace->at_start() == Trace::FALSE); 2912 bool not_at_start = (trace->at_start() == Trace::FALSE);
2907 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2913 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2908 if (lookahead == NULL) { 2914 if (lookahead == NULL) {
2909 int eats_at_least = 2915 int eats_at_least =
2910 Min(kMaxLookaheadForBoyerMoore, 2916 Min(kMaxLookaheadForBoyerMoore,
2911 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 2917 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
2912 if (eats_at_least >= 1) { 2918 if (eats_at_least >= 1) {
2913 BoyerMooreLookahead* bm = 2919 BoyerMooreLookahead* bm =
2914 new BoyerMooreLookahead(eats_at_least, compiler); 2920 new BoyerMooreLookahead(eats_at_least, compiler);
2915 FillInBMInfo(0, bm, not_at_start); 2921 FillInBMInfo(0, 0, bm, not_at_start);
2916 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2922 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2917 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2923 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2918 } 2924 }
2919 } else { 2925 } else {
2920 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2926 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2921 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2927 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2922 } 2928 }
2923 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); 2929 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);
2924 if (next_is_word_character == Trace::UNKNOWN) { 2930 if (next_is_word_character == Trace::UNKNOWN) {
2925 Label before_non_word; 2931 Label before_non_word;
(...skipping 917 matching lines...) Expand 10 before | Expand all | Expand 10 after
3843 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. 3849 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
3844 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 3850 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3845 if (lookahead == NULL) { 3851 if (lookahead == NULL) {
3846 eats_at_least = 3852 eats_at_least =
3847 Min(kMaxLookaheadForBoyerMoore, 3853 Min(kMaxLookaheadForBoyerMoore,
3848 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 3854 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
3849 if (eats_at_least >= 1) { 3855 if (eats_at_least >= 1) {
3850 BoyerMooreLookahead* bm = 3856 BoyerMooreLookahead* bm =
3851 new BoyerMooreLookahead(eats_at_least, compiler); 3857 new BoyerMooreLookahead(eats_at_least, compiler);
3852 GuardedAlternative alt0 = alternatives_->at(0); 3858 GuardedAlternative alt0 = alternatives_->at(0);
3853 alt0.node()->FillInBMInfo(0, bm, not_at_start); 3859 alt0.node()->FillInBMInfo(0, 0, bm, not_at_start);
3854 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); 3860 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
3855 } 3861 }
3856 } else { 3862 } else {
3857 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); 3863 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
3858 } 3864 }
3859 } 3865 }
3860 } 3866 }
3861 } 3867 }
3862 3868
3863 if (eats_at_least == kEatsAtLeastNotYetInitialized) { 3869 if (eats_at_least == kEatsAtLeastNotYetInitialized) {
(...skipping 1718 matching lines...) Expand 10 before | Expand all | Expand 10 after
5582 void Analysis::VisitBackReference(BackReferenceNode* that) { 5588 void Analysis::VisitBackReference(BackReferenceNode* that) {
5583 EnsureAnalyzed(that->on_success()); 5589 EnsureAnalyzed(that->on_success());
5584 } 5590 }
5585 5591
5586 5592
5587 void Analysis::VisitAssertion(AssertionNode* that) { 5593 void Analysis::VisitAssertion(AssertionNode* that) {
5588 EnsureAnalyzed(that->on_success()); 5594 EnsureAnalyzed(that->on_success());
5589 } 5595 }
5590 5596
5591 5597
5592 void BackReferenceNode::FillInBMInfo( 5598 void BackReferenceNode::FillInBMInfo(int offset,
5593 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 5599 int recursion_depth,
5600 BoyerMooreLookahead* bm,
5601 bool not_at_start) {
5594 // Working out the set of characters that a backreference can match is too 5602 // Working out the set of characters that a backreference can match is too
5595 // hard, so we just say that any character can match. 5603 // hard, so we just say that any character can match.
5596 bm->SetRest(offset); 5604 bm->SetRest(offset);
5597 SaveBMInfo(bm, not_at_start, offset); 5605 SaveBMInfo(bm, not_at_start, offset);
5598 } 5606 }
5599 5607
5600 5608
5601 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 5609 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
5602 RegExpMacroAssembler::kTableSize); 5610 RegExpMacroAssembler::kTableSize);
5603 5611
5604 5612
5605 void ChoiceNode::FillInBMInfo( 5613 void ChoiceNode::FillInBMInfo(int offset,
5606 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 5614 int recursion_depth,
5615 BoyerMooreLookahead* bm,
5616 bool not_at_start) {
5607 ZoneList<GuardedAlternative>* alts = alternatives(); 5617 ZoneList<GuardedAlternative>* alts = alternatives();
ulan 2012/05/31 11:56:23 We don't check recursion_depth here, because nesti
Erik Corry 2012/05/31 13:51:43 Yes, it's only LoopChoiceNodes that can lead to un
5608 for (int i = 0; i < alts->length(); i++) { 5618 for (int i = 0; i < alts->length(); i++) {
5609 GuardedAlternative& alt = alts->at(i); 5619 GuardedAlternative& alt = alts->at(i);
5610 if (alt.guards() != NULL && alt.guards()->length() != 0) { 5620 if (alt.guards() != NULL && alt.guards()->length() != 0) {
5611 bm->SetRest(offset); // Give up trying to fill in info. 5621 bm->SetRest(offset); // Give up trying to fill in info.
5612 SaveBMInfo(bm, not_at_start, offset); 5622 SaveBMInfo(bm, not_at_start, offset);
5613 return; 5623 return;
5614 } 5624 }
5615 alt.node()->FillInBMInfo(offset, bm, not_at_start); 5625 alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
5616 } 5626 }
5617 SaveBMInfo(bm, not_at_start, offset); 5627 SaveBMInfo(bm, not_at_start, offset);
5618 } 5628 }
5619 5629
5620 5630
5621 void TextNode::FillInBMInfo( 5631 void TextNode::FillInBMInfo(int initial_offset,
5622 int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) { 5632 int recursion_depth,
5633 BoyerMooreLookahead* bm,
5634 bool not_at_start) {
5623 if (initial_offset >= bm->length()) return; 5635 if (initial_offset >= bm->length()) return;
5624 int offset = initial_offset; 5636 int offset = initial_offset;
5625 int max_char = bm->max_char(); 5637 int max_char = bm->max_char();
5626 for (int i = 0; i < elements()->length(); i++) { 5638 for (int i = 0; i < elements()->length(); i++) {
5627 if (offset >= bm->length()) { 5639 if (offset >= bm->length()) {
5628 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5640 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5629 return; 5641 return;
5630 } 5642 }
5631 TextElement text = elements()->at(i); 5643 TextElement text = elements()->at(i);
5632 if (text.type == TextElement::ATOM) { 5644 if (text.type == TextElement::ATOM) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
5666 } 5678 }
5667 } 5679 }
5668 offset++; 5680 offset++;
5669 } 5681 }
5670 } 5682 }
5671 if (offset >= bm->length()) { 5683 if (offset >= bm->length()) {
5672 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5684 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5673 return; 5685 return;
5674 } 5686 }
5675 on_success()->FillInBMInfo(offset, 5687 on_success()->FillInBMInfo(offset,
5688 recursion_depth + 1,
5676 bm, 5689 bm,
5677 true); // Not at start after a text node. 5690 true); // Not at start after a text node.
5678 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5691 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5679 } 5692 }
5680 5693
5681 5694
5682 // ------------------------------------------------------------------- 5695 // -------------------------------------------------------------------
5683 // Dispatch table construction 5696 // Dispatch table construction
5684 5697
5685 5698
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
5904 macro_assembler.set_global(is_global); 5917 macro_assembler.set_global(is_global);
5905 5918
5906 return compiler.Assemble(&macro_assembler, 5919 return compiler.Assemble(&macro_assembler,
5907 node, 5920 node,
5908 data->capture_count, 5921 data->capture_count,
5909 pattern); 5922 pattern);
5910 } 5923 }
5911 5924
5912 5925
5913 }} // namespace v8::internal 5926 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | test/mjsunit/regexp-capture-3.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698