 Chromium Code Reviews
 Chromium Code Reviews Issue 10448117:
  Limit work done analyzing regexps with very large fanout.  (Closed) 
  Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
    
  
    Issue 10448117:
  Limit work done analyzing regexps with very large fanout.  (Closed) 
  Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/| OLD | NEW | 
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright | 
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. | 
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above | 
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following | 
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided | 
| (...skipping 2174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2185 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 2185 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
| 2186 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 2186 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 
| 2187 return on_success()->EatsAtLeast(still_to_find, | 2187 return on_success()->EatsAtLeast(still_to_find, | 
| 2188 recursion_depth + 1, | 2188 recursion_depth + 1, | 
| 2189 not_at_start); | 2189 not_at_start); | 
| 2190 } | 2190 } | 
| 2191 | 2191 | 
| 2192 | 2192 | 
| 2193 void ActionNode::FillInBMInfo(int offset, | 2193 void ActionNode::FillInBMInfo(int offset, | 
| 2194 int recursion_depth, | 2194 int recursion_depth, | 
| 2195 int budget, | |
| 2195 BoyerMooreLookahead* bm, | 2196 BoyerMooreLookahead* bm, | 
| 2196 bool not_at_start) { | 2197 bool not_at_start) { | 
| 2197 if (type_ == BEGIN_SUBMATCH) { | 2198 if (type_ == BEGIN_SUBMATCH) { | 
| 2198 bm->SetRest(offset); | 2199 bm->SetRest(offset); | 
| 2199 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { | 2200 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { | 
| 2200 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 2201 on_success()->FillInBMInfo( | 
| 2202 offset, recursion_depth + 1, budget - 1, bm, not_at_start); | |
| 2201 } | 2203 } | 
| 2202 SaveBMInfo(bm, not_at_start, offset); | 2204 SaveBMInfo(bm, not_at_start, offset); | 
| 2203 } | 2205 } | 
| 2204 | 2206 | 
| 2205 | 2207 | 
| 2206 int AssertionNode::EatsAtLeast(int still_to_find, | 2208 int AssertionNode::EatsAtLeast(int still_to_find, | 
| 2207 int recursion_depth, | 2209 int recursion_depth, | 
| 2208 bool not_at_start) { | 2210 bool not_at_start) { | 
| 2209 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 2211 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
| 2210 // If we know we are not at the start and we are asked "how many characters | 2212 // If we know we are not at the start and we are asked "how many characters | 
| 2211 // will you match if you succeed?" then we can answer anything since false | 2213 // will you match if you succeed?" then we can answer anything since false | 
| 2212 // implies false. So lets just return the max answer (still_to_find) since | 2214 // implies false. So lets just return the max answer (still_to_find) since | 
| 2213 // that won't prevent us from preloading a lot of characters for the other | 2215 // that won't prevent us from preloading a lot of characters for the other | 
| 2214 // branches in the node graph. | 2216 // branches in the node graph. | 
| 2215 if (type() == AT_START && not_at_start) return still_to_find; | 2217 if (type() == AT_START && not_at_start) return still_to_find; | 
| 2216 return on_success()->EatsAtLeast(still_to_find, | 2218 return on_success()->EatsAtLeast(still_to_find, | 
| 2217 recursion_depth + 1, | 2219 recursion_depth + 1, | 
| 2218 not_at_start); | 2220 not_at_start); | 
| 2219 } | 2221 } | 
| 2220 | 2222 | 
| 2221 | 2223 | 
| 2222 void AssertionNode::FillInBMInfo(int offset, | 2224 void AssertionNode::FillInBMInfo(int offset, | 
| 2223 int recursion_depth, | 2225 int recursion_depth, | 
| 2226 int budget, | |
| 2224 BoyerMooreLookahead* bm, | 2227 BoyerMooreLookahead* bm, | 
| 2225 bool not_at_start) { | 2228 bool not_at_start) { | 
| 2226 // Match the behaviour of EatsAtLeast on this node. | 2229 // Match the behaviour of EatsAtLeast on this node. | 
| 2227 if (type() == AT_START && not_at_start) return; | 2230 if (type() == AT_START && not_at_start) return; | 
| 2228 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 2231 on_success()->FillInBMInfo( | 
| 2232 offset, recursion_depth + 1, budget - 1, bm, not_at_start); | |
| 2229 SaveBMInfo(bm, not_at_start, offset); | 2233 SaveBMInfo(bm, not_at_start, offset); | 
| 2230 } | 2234 } | 
| 2231 | 2235 | 
| 2232 | 2236 | 
| 2233 int BackReferenceNode::EatsAtLeast(int still_to_find, | 2237 int BackReferenceNode::EatsAtLeast(int still_to_find, | 
| 2234 int recursion_depth, | 2238 int recursion_depth, | 
| 2235 bool not_at_start) { | 2239 bool not_at_start) { | 
| 2236 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 2240 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
| 2237 return on_success()->EatsAtLeast(still_to_find, | 2241 return on_success()->EatsAtLeast(still_to_find, | 
| 2238 recursion_depth + 1, | 2242 recursion_depth + 1, | 
| (...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2801 VisitMarker marker(info()); | 2805 VisitMarker marker(info()); | 
| 2802 return ChoiceNode::GetQuickCheckDetails(details, | 2806 return ChoiceNode::GetQuickCheckDetails(details, | 
| 2803 compiler, | 2807 compiler, | 
| 2804 characters_filled_in, | 2808 characters_filled_in, | 
| 2805 not_at_start); | 2809 not_at_start); | 
| 2806 } | 2810 } | 
| 2807 | 2811 | 
| 2808 | 2812 | 
| 2809 void LoopChoiceNode::FillInBMInfo(int offset, | 2813 void LoopChoiceNode::FillInBMInfo(int offset, | 
| 2810 int recursion_depth, | 2814 int recursion_depth, | 
| 2815 int budget, | |
| 2811 BoyerMooreLookahead* bm, | 2816 BoyerMooreLookahead* bm, | 
| 2812 bool not_at_start) { | 2817 bool not_at_start) { | 
| 2813 if (body_can_be_zero_length_ || | 2818 if (body_can_be_zero_length_ || | 
| 2814 recursion_depth > RegExpCompiler::kMaxRecursion) { | 2819 recursion_depth > RegExpCompiler::kMaxRecursion || | 
| 2820 budget <= 0) { | |
| 2815 bm->SetRest(offset); | 2821 bm->SetRest(offset); | 
| 2816 SaveBMInfo(bm, not_at_start, offset); | 2822 SaveBMInfo(bm, not_at_start, offset); | 
| 2817 return; | 2823 return; | 
| 2818 } | 2824 } | 
| 2819 ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 2825 ChoiceNode::FillInBMInfo( | 
| 2826 offset, recursion_depth + 1, budget, bm, not_at_start); | |
| 
ulan
2012/06/01 11:24:04
We either need to decrement the budget or add a co
 | |
| 2820 SaveBMInfo(bm, not_at_start, offset); | 2827 SaveBMInfo(bm, not_at_start, offset); | 
| 2821 } | 2828 } | 
| 2822 | 2829 | 
| 2823 | 2830 | 
| 2824 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2831 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 
| 2825 RegExpCompiler* compiler, | 2832 RegExpCompiler* compiler, | 
| 2826 int characters_filled_in, | 2833 int characters_filled_in, | 
| 2827 bool not_at_start) { | 2834 bool not_at_start) { | 
| 2828 not_at_start = (not_at_start || not_at_start_); | 2835 not_at_start = (not_at_start || not_at_start_); | 
| 2829 int choice_count = alternatives_->length(); | 2836 int choice_count = alternatives_->length(); | 
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2911 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2918 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 
| 2912 bool not_at_start = (trace->at_start() == Trace::FALSE); | 2919 bool not_at_start = (trace->at_start() == Trace::FALSE); | 
| 2913 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2920 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 
| 2914 if (lookahead == NULL) { | 2921 if (lookahead == NULL) { | 
| 2915 int eats_at_least = | 2922 int eats_at_least = | 
| 2916 Min(kMaxLookaheadForBoyerMoore, | 2923 Min(kMaxLookaheadForBoyerMoore, | 
| 2917 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); | 2924 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); | 
| 2918 if (eats_at_least >= 1) { | 2925 if (eats_at_least >= 1) { | 
| 2919 BoyerMooreLookahead* bm = | 2926 BoyerMooreLookahead* bm = | 
| 2920 new BoyerMooreLookahead(eats_at_least, compiler); | 2927 new BoyerMooreLookahead(eats_at_least, compiler); | 
| 2921 FillInBMInfo(0, 0, bm, not_at_start); | 2928 FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); | 
| 2922 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; | 2929 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; | 
| 2923 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; | 2930 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; | 
| 2924 } | 2931 } | 
| 2925 } else { | 2932 } else { | 
| 2926 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; | 2933 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; | 
| 2927 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; | 2934 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; | 
| 2928 } | 2935 } | 
| 2929 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); | 2936 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); | 
| 2930 if (next_is_word_character == Trace::UNKNOWN) { | 2937 if (next_is_word_character == Trace::UNKNOWN) { | 
| 2931 Label before_non_word; | 2938 Label before_non_word; | 
| (...skipping 917 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3849 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. | 3856 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. | 
| 3850 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 3857 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 
| 3851 if (lookahead == NULL) { | 3858 if (lookahead == NULL) { | 
| 3852 eats_at_least = | 3859 eats_at_least = | 
| 3853 Min(kMaxLookaheadForBoyerMoore, | 3860 Min(kMaxLookaheadForBoyerMoore, | 
| 3854 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); | 3861 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); | 
| 3855 if (eats_at_least >= 1) { | 3862 if (eats_at_least >= 1) { | 
| 3856 BoyerMooreLookahead* bm = | 3863 BoyerMooreLookahead* bm = | 
| 3857 new BoyerMooreLookahead(eats_at_least, compiler); | 3864 new BoyerMooreLookahead(eats_at_least, compiler); | 
| 3858 GuardedAlternative alt0 = alternatives_->at(0); | 3865 GuardedAlternative alt0 = alternatives_->at(0); | 
| 3859 alt0.node()->FillInBMInfo(0, 0, bm, not_at_start); | 3866 alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); | 
| 3860 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); | 3867 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); | 
| 3861 } | 3868 } | 
| 3862 } else { | 3869 } else { | 
| 3863 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); | 3870 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); | 
| 3864 } | 3871 } | 
| 3865 } | 3872 } | 
| 3866 } | 3873 } | 
| 3867 } | 3874 } | 
| 3868 | 3875 | 
| 3869 if (eats_at_least == kEatsAtLeastNotYetInitialized) { | 3876 if (eats_at_least == kEatsAtLeastNotYetInitialized) { | 
| (...skipping 1720 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5590 } | 5597 } | 
| 5591 | 5598 | 
| 5592 | 5599 | 
| 5593 void Analysis::VisitAssertion(AssertionNode* that) { | 5600 void Analysis::VisitAssertion(AssertionNode* that) { | 
| 5594 EnsureAnalyzed(that->on_success()); | 5601 EnsureAnalyzed(that->on_success()); | 
| 5595 } | 5602 } | 
| 5596 | 5603 | 
| 5597 | 5604 | 
| 5598 void BackReferenceNode::FillInBMInfo(int offset, | 5605 void BackReferenceNode::FillInBMInfo(int offset, | 
| 5599 int recursion_depth, | 5606 int recursion_depth, | 
| 5607 int budget, | |
| 5600 BoyerMooreLookahead* bm, | 5608 BoyerMooreLookahead* bm, | 
| 5601 bool not_at_start) { | 5609 bool not_at_start) { | 
| 5602 // Working out the set of characters that a backreference can match is too | 5610 // Working out the set of characters that a backreference can match is too | 
| 5603 // hard, so we just say that any character can match. | 5611 // hard, so we just say that any character can match. | 
| 5604 bm->SetRest(offset); | 5612 bm->SetRest(offset); | 
| 5605 SaveBMInfo(bm, not_at_start, offset); | 5613 SaveBMInfo(bm, not_at_start, offset); | 
| 5606 } | 5614 } | 
| 5607 | 5615 | 
| 5608 | 5616 | 
| 5609 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == | 5617 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == | 
| 5610 RegExpMacroAssembler::kTableSize); | 5618 RegExpMacroAssembler::kTableSize); | 
| 5611 | 5619 | 
| 5612 | 5620 | 
| 5613 void ChoiceNode::FillInBMInfo(int offset, | 5621 void ChoiceNode::FillInBMInfo(int offset, | 
| 5614 int recursion_depth, | 5622 int recursion_depth, | 
| 5623 int budget, | |
| 5615 BoyerMooreLookahead* bm, | 5624 BoyerMooreLookahead* bm, | 
| 5616 bool not_at_start) { | 5625 bool not_at_start) { | 
| 5617 ZoneList<GuardedAlternative>* alts = alternatives(); | 5626 ZoneList<GuardedAlternative>* alts = alternatives(); | 
| 5627 budget /= alts->length(); | |
| 5618 for (int i = 0; i < alts->length(); i++) { | 5628 for (int i = 0; i < alts->length(); i++) { | 
| 5619 GuardedAlternative& alt = alts->at(i); | 5629 GuardedAlternative& alt = alts->at(i); | 
| 5620 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 5630 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 
| 5621 bm->SetRest(offset); // Give up trying to fill in info. | 5631 bm->SetRest(offset); // Give up trying to fill in info. | 
| 5622 SaveBMInfo(bm, not_at_start, offset); | 5632 SaveBMInfo(bm, not_at_start, offset); | 
| 5623 return; | 5633 return; | 
| 5624 } | 5634 } | 
| 5625 alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 5635 alt.node()->FillInBMInfo( | 
| 5636 offset, recursion_depth + 1, budget, bm, not_at_start); | |
| 5626 } | 5637 } | 
| 5627 SaveBMInfo(bm, not_at_start, offset); | 5638 SaveBMInfo(bm, not_at_start, offset); | 
| 5628 } | 5639 } | 
| 5629 | 5640 | 
| 5630 | 5641 | 
| 5631 void TextNode::FillInBMInfo(int initial_offset, | 5642 void TextNode::FillInBMInfo(int initial_offset, | 
| 5632 int recursion_depth, | 5643 int recursion_depth, | 
| 5644 int budget, | |
| 5633 BoyerMooreLookahead* bm, | 5645 BoyerMooreLookahead* bm, | 
| 5634 bool not_at_start) { | 5646 bool not_at_start) { | 
| 5635 if (initial_offset >= bm->length()) return; | 5647 if (initial_offset >= bm->length()) return; | 
| 5636 int offset = initial_offset; | 5648 int offset = initial_offset; | 
| 5637 int max_char = bm->max_char(); | 5649 int max_char = bm->max_char(); | 
| 5638 for (int i = 0; i < elements()->length(); i++) { | 5650 for (int i = 0; i < elements()->length(); i++) { | 
| 5639 if (offset >= bm->length()) { | 5651 if (offset >= bm->length()) { | 
| 5640 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5652 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 
| 5641 return; | 5653 return; | 
| 5642 } | 5654 } | 
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5679 } | 5691 } | 
| 5680 offset++; | 5692 offset++; | 
| 5681 } | 5693 } | 
| 5682 } | 5694 } | 
| 5683 if (offset >= bm->length()) { | 5695 if (offset >= bm->length()) { | 
| 5684 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5696 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 
| 5685 return; | 5697 return; | 
| 5686 } | 5698 } | 
| 5687 on_success()->FillInBMInfo(offset, | 5699 on_success()->FillInBMInfo(offset, | 
| 5688 recursion_depth + 1, | 5700 recursion_depth + 1, | 
| 5701 budget - 1, | |
| 5689 bm, | 5702 bm, | 
| 5690 true); // Not at start after a text node. | 5703 true); // Not at start after a text node. | 
| 5691 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5704 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 
| 5692 } | 5705 } | 
| 5693 | 5706 | 
| 5694 | 5707 | 
| 5695 // ------------------------------------------------------------------- | 5708 // ------------------------------------------------------------------- | 
| 5696 // Dispatch table construction | 5709 // Dispatch table construction | 
| 5697 | 5710 | 
| 5698 | 5711 | 
| (...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5917 macro_assembler.set_global(is_global); | 5930 macro_assembler.set_global(is_global); | 
| 5918 | 5931 | 
| 5919 return compiler.Assemble(¯o_assembler, | 5932 return compiler.Assemble(¯o_assembler, | 
| 5920 node, | 5933 node, | 
| 5921 data->capture_count, | 5934 data->capture_count, | 
| 5922 pattern); | 5935 pattern); | 
| 5923 } | 5936 } | 
| 5924 | 5937 | 
| 5925 | 5938 | 
| 5926 }} // namespace v8::internal | 5939 }} // namespace v8::internal | 
| OLD | NEW |