Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: src/jsregexp.cc

Issue 10536002: Backport to 3.10 of r11696: Limit work done analyzing regexps with very large fanout. (Closed) Base URL: http://v8.googlecode.com/svn/branches/3.10/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/version.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2157 matching lines...) Expand 10 before | Expand all | Expand 10 after
2168 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2168 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2169 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 2169 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
2170 return on_success()->EatsAtLeast(still_to_find, 2170 return on_success()->EatsAtLeast(still_to_find,
2171 recursion_depth + 1, 2171 recursion_depth + 1,
2172 not_at_start); 2172 not_at_start);
2173 } 2173 }
2174 2174
2175 2175
2176 void ActionNode::FillInBMInfo(int offset, 2176 void ActionNode::FillInBMInfo(int offset,
2177 int recursion_depth, 2177 int recursion_depth,
2178 int budget,
2178 BoyerMooreLookahead* bm, 2179 BoyerMooreLookahead* bm,
2179 bool not_at_start) { 2180 bool not_at_start) {
2180 if (type_ == BEGIN_SUBMATCH) { 2181 if (type_ == BEGIN_SUBMATCH) {
2181 bm->SetRest(offset); 2182 bm->SetRest(offset);
2182 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { 2183 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
2183 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 2184 on_success()->FillInBMInfo(
2185 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2184 } 2186 }
2185 SaveBMInfo(bm, not_at_start, offset); 2187 SaveBMInfo(bm, not_at_start, offset);
2186 } 2188 }
2187 2189
2188 2190
2189 int AssertionNode::EatsAtLeast(int still_to_find, 2191 int AssertionNode::EatsAtLeast(int still_to_find,
2190 int recursion_depth, 2192 int recursion_depth,
2191 bool not_at_start) { 2193 bool not_at_start) {
2192 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2194 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2193 // If we know we are not at the start and we are asked "how many characters 2195 // If we know we are not at the start and we are asked "how many characters
2194 // will you match if you succeed?" then we can answer anything since false 2196 // will you match if you succeed?" then we can answer anything since false
2195 // implies false. So lets just return the max answer (still_to_find) since 2197 // implies false. So lets just return the max answer (still_to_find) since
2196 // that won't prevent us from preloading a lot of characters for the other 2198 // that won't prevent us from preloading a lot of characters for the other
2197 // branches in the node graph. 2199 // branches in the node graph.
2198 if (type() == AT_START && not_at_start) return still_to_find; 2200 if (type() == AT_START && not_at_start) return still_to_find;
2199 return on_success()->EatsAtLeast(still_to_find, 2201 return on_success()->EatsAtLeast(still_to_find,
2200 recursion_depth + 1, 2202 recursion_depth + 1,
2201 not_at_start); 2203 not_at_start);
2202 } 2204 }
2203 2205
2204 2206
2205 void AssertionNode::FillInBMInfo(int offset, 2207 void AssertionNode::FillInBMInfo(int offset,
2206 int recursion_depth, 2208 int recursion_depth,
2209 int budget,
2207 BoyerMooreLookahead* bm, 2210 BoyerMooreLookahead* bm,
2208 bool not_at_start) { 2211 bool not_at_start) {
2209 // Match the behaviour of EatsAtLeast on this node. 2212 // Match the behaviour of EatsAtLeast on this node.
2210 if (type() == AT_START && not_at_start) return; 2213 if (type() == AT_START && not_at_start) return;
2211 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 2214 on_success()->FillInBMInfo(
2215 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2212 SaveBMInfo(bm, not_at_start, offset); 2216 SaveBMInfo(bm, not_at_start, offset);
2213 } 2217 }
2214 2218
2215 2219
2216 int BackReferenceNode::EatsAtLeast(int still_to_find, 2220 int BackReferenceNode::EatsAtLeast(int still_to_find,
2217 int recursion_depth, 2221 int recursion_depth,
2218 bool not_at_start) { 2222 bool not_at_start) {
2219 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2223 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2220 return on_success()->EatsAtLeast(still_to_find, 2224 return on_success()->EatsAtLeast(still_to_find,
2221 recursion_depth + 1, 2225 recursion_depth + 1,
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after
2784 VisitMarker marker(info()); 2788 VisitMarker marker(info());
2785 return ChoiceNode::GetQuickCheckDetails(details, 2789 return ChoiceNode::GetQuickCheckDetails(details,
2786 compiler, 2790 compiler,
2787 characters_filled_in, 2791 characters_filled_in,
2788 not_at_start); 2792 not_at_start);
2789 } 2793 }
2790 2794
2791 2795
2792 void LoopChoiceNode::FillInBMInfo(int offset, 2796 void LoopChoiceNode::FillInBMInfo(int offset,
2793 int recursion_depth, 2797 int recursion_depth,
2798 int budget,
2794 BoyerMooreLookahead* bm, 2799 BoyerMooreLookahead* bm,
2795 bool not_at_start) { 2800 bool not_at_start) {
2796 if (body_can_be_zero_length_ || 2801 if (body_can_be_zero_length_ ||
2797 recursion_depth > RegExpCompiler::kMaxRecursion) { 2802 recursion_depth > RegExpCompiler::kMaxRecursion ||
2803 budget <= 0) {
2798 bm->SetRest(offset); 2804 bm->SetRest(offset);
2799 SaveBMInfo(bm, not_at_start, offset); 2805 SaveBMInfo(bm, not_at_start, offset);
2800 return; 2806 return;
2801 } 2807 }
2802 ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 2808 ChoiceNode::FillInBMInfo(
2809 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2803 SaveBMInfo(bm, not_at_start, offset); 2810 SaveBMInfo(bm, not_at_start, offset);
2804 } 2811 }
2805 2812
2806 2813
2807 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2814 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2808 RegExpCompiler* compiler, 2815 RegExpCompiler* compiler,
2809 int characters_filled_in, 2816 int characters_filled_in,
2810 bool not_at_start) { 2817 bool not_at_start) {
2811 not_at_start = (not_at_start || not_at_start_); 2818 not_at_start = (not_at_start || not_at_start_);
2812 int choice_count = alternatives_->length(); 2819 int choice_count = alternatives_->length();
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2894 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2901 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2895 bool not_at_start = (trace->at_start() == Trace::FALSE); 2902 bool not_at_start = (trace->at_start() == Trace::FALSE);
2896 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2903 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2897 if (lookahead == NULL) { 2904 if (lookahead == NULL) {
2898 int eats_at_least = 2905 int eats_at_least =
2899 Min(kMaxLookaheadForBoyerMoore, 2906 Min(kMaxLookaheadForBoyerMoore,
2900 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 2907 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
2901 if (eats_at_least >= 1) { 2908 if (eats_at_least >= 1) {
2902 BoyerMooreLookahead* bm = 2909 BoyerMooreLookahead* bm =
2903 new BoyerMooreLookahead(eats_at_least, compiler); 2910 new BoyerMooreLookahead(eats_at_least, compiler);
2904 FillInBMInfo(0, 0, bm, not_at_start); 2911 FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
2905 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2912 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2906 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2913 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2907 } 2914 }
2908 } else { 2915 } else {
2909 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2916 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2910 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2917 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2911 } 2918 }
2912 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); 2919 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);
2913 if (next_is_word_character == Trace::UNKNOWN) { 2920 if (next_is_word_character == Trace::UNKNOWN) {
2914 Label before_non_word; 2921 Label before_non_word;
(...skipping 917 matching lines...) Expand 10 before | Expand all | Expand 10 after
3832 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. 3839 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
3833 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 3840 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3834 if (lookahead == NULL) { 3841 if (lookahead == NULL) {
3835 eats_at_least = 3842 eats_at_least =
3836 Min(kMaxLookaheadForBoyerMoore, 3843 Min(kMaxLookaheadForBoyerMoore,
3837 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 3844 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
3838 if (eats_at_least >= 1) { 3845 if (eats_at_least >= 1) {
3839 BoyerMooreLookahead* bm = 3846 BoyerMooreLookahead* bm =
3840 new BoyerMooreLookahead(eats_at_least, compiler); 3847 new BoyerMooreLookahead(eats_at_least, compiler);
3841 GuardedAlternative alt0 = alternatives_->at(0); 3848 GuardedAlternative alt0 = alternatives_->at(0);
3842 alt0.node()->FillInBMInfo(0, 0, bm, not_at_start); 3849 alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
3843 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); 3850 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
3844 } 3851 }
3845 } else { 3852 } else {
3846 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); 3853 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
3847 } 3854 }
3848 } 3855 }
3849 } 3856 }
3850 } 3857 }
3851 3858
3852 if (eats_at_least == kEatsAtLeastNotYetInitialized) { 3859 if (eats_at_least == kEatsAtLeastNotYetInitialized) {
(...skipping 1720 matching lines...) Expand 10 before | Expand all | Expand 10 after
5573 } 5580 }
5574 5581
5575 5582
5576 void Analysis::VisitAssertion(AssertionNode* that) { 5583 void Analysis::VisitAssertion(AssertionNode* that) {
5577 EnsureAnalyzed(that->on_success()); 5584 EnsureAnalyzed(that->on_success());
5578 } 5585 }
5579 5586
5580 5587
5581 void BackReferenceNode::FillInBMInfo(int offset, 5588 void BackReferenceNode::FillInBMInfo(int offset,
5582 int recursion_depth, 5589 int recursion_depth,
5590 int budget,
5583 BoyerMooreLookahead* bm, 5591 BoyerMooreLookahead* bm,
5584 bool not_at_start) { 5592 bool not_at_start) {
5585 // Working out the set of characters that a backreference can match is too 5593 // Working out the set of characters that a backreference can match is too
5586 // hard, so we just say that any character can match. 5594 // hard, so we just say that any character can match.
5587 bm->SetRest(offset); 5595 bm->SetRest(offset);
5588 SaveBMInfo(bm, not_at_start, offset); 5596 SaveBMInfo(bm, not_at_start, offset);
5589 } 5597 }
5590 5598
5591 5599
5592 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 5600 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
5593 RegExpMacroAssembler::kTableSize); 5601 RegExpMacroAssembler::kTableSize);
5594 5602
5595 5603
5596 void ChoiceNode::FillInBMInfo(int offset, 5604 void ChoiceNode::FillInBMInfo(int offset,
5597 int recursion_depth, 5605 int recursion_depth,
5606 int budget,
5598 BoyerMooreLookahead* bm, 5607 BoyerMooreLookahead* bm,
5599 bool not_at_start) { 5608 bool not_at_start) {
5600 ZoneList<GuardedAlternative>* alts = alternatives(); 5609 ZoneList<GuardedAlternative>* alts = alternatives();
5610 budget = (budget - 1) / alts->length();
5601 for (int i = 0; i < alts->length(); i++) { 5611 for (int i = 0; i < alts->length(); i++) {
5602 GuardedAlternative& alt = alts->at(i); 5612 GuardedAlternative& alt = alts->at(i);
5603 if (alt.guards() != NULL && alt.guards()->length() != 0) { 5613 if (alt.guards() != NULL && alt.guards()->length() != 0) {
5604 bm->SetRest(offset); // Give up trying to fill in info. 5614 bm->SetRest(offset); // Give up trying to fill in info.
5605 SaveBMInfo(bm, not_at_start, offset); 5615 SaveBMInfo(bm, not_at_start, offset);
5606 return; 5616 return;
5607 } 5617 }
5608 alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 5618 alt.node()->FillInBMInfo(
5619 offset, recursion_depth + 1, budget, bm, not_at_start);
5609 } 5620 }
5610 SaveBMInfo(bm, not_at_start, offset); 5621 SaveBMInfo(bm, not_at_start, offset);
5611 } 5622 }
5612 5623
5613 5624
5614 void TextNode::FillInBMInfo(int initial_offset, 5625 void TextNode::FillInBMInfo(int initial_offset,
5615 int recursion_depth, 5626 int recursion_depth,
5627 int budget,
5616 BoyerMooreLookahead* bm, 5628 BoyerMooreLookahead* bm,
5617 bool not_at_start) { 5629 bool not_at_start) {
5618 if (initial_offset >= bm->length()) return; 5630 if (initial_offset >= bm->length()) return;
5619 int offset = initial_offset; 5631 int offset = initial_offset;
5620 int max_char = bm->max_char(); 5632 int max_char = bm->max_char();
5621 for (int i = 0; i < elements()->length(); i++) { 5633 for (int i = 0; i < elements()->length(); i++) {
5622 if (offset >= bm->length()) { 5634 if (offset >= bm->length()) {
5623 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5635 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5624 return; 5636 return;
5625 } 5637 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5662 } 5674 }
5663 offset++; 5675 offset++;
5664 } 5676 }
5665 } 5677 }
5666 if (offset >= bm->length()) { 5678 if (offset >= bm->length()) {
5667 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5679 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5668 return; 5680 return;
5669 } 5681 }
5670 on_success()->FillInBMInfo(offset, 5682 on_success()->FillInBMInfo(offset,
5671 recursion_depth + 1, 5683 recursion_depth + 1,
5684 budget - 1,
5672 bm, 5685 bm,
5673 true); // Not at start after a text node. 5686 true); // Not at start after a text node.
5674 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5687 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5675 } 5688 }
5676 5689
5677 5690
5678 // ------------------------------------------------------------------- 5691 // -------------------------------------------------------------------
5679 // Dispatch table construction 5692 // Dispatch table construction
5680 5693
5681 5694
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
5897 } 5910 }
5898 5911
5899 return compiler.Assemble(&macro_assembler, 5912 return compiler.Assemble(&macro_assembler,
5900 node, 5913 node,
5901 data->capture_count, 5914 data->capture_count,
5902 pattern); 5915 pattern);
5903 } 5916 }
5904 5917
5905 5918
5906 }} // namespace v8::internal 5919 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/version.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698