Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(830)

Side by Side Diff: src/jsregexp.cc

Issue 10448117: Limit work done analyzing regexps with very large fanout. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2174 matching lines...) Expand 10 before | Expand all | Expand 10 after
2185 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2185 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2186 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 2186 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
2187 return on_success()->EatsAtLeast(still_to_find, 2187 return on_success()->EatsAtLeast(still_to_find,
2188 recursion_depth + 1, 2188 recursion_depth + 1,
2189 not_at_start); 2189 not_at_start);
2190 } 2190 }
2191 2191
2192 2192
2193 void ActionNode::FillInBMInfo(int offset, 2193 void ActionNode::FillInBMInfo(int offset,
2194 int recursion_depth, 2194 int recursion_depth,
2195 int budget,
2195 BoyerMooreLookahead* bm, 2196 BoyerMooreLookahead* bm,
2196 bool not_at_start) { 2197 bool not_at_start) {
2197 if (type_ == BEGIN_SUBMATCH) { 2198 if (type_ == BEGIN_SUBMATCH) {
2198 bm->SetRest(offset); 2199 bm->SetRest(offset);
2199 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { 2200 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
2200 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 2201 on_success()->FillInBMInfo(
2202 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2201 } 2203 }
2202 SaveBMInfo(bm, not_at_start, offset); 2204 SaveBMInfo(bm, not_at_start, offset);
2203 } 2205 }
2204 2206
2205 2207
2206 int AssertionNode::EatsAtLeast(int still_to_find, 2208 int AssertionNode::EatsAtLeast(int still_to_find,
2207 int recursion_depth, 2209 int recursion_depth,
2208 bool not_at_start) { 2210 bool not_at_start) {
2209 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2211 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2210 // If we know we are not at the start and we are asked "how many characters 2212 // If we know we are not at the start and we are asked "how many characters
2211 // will you match if you succeed?" then we can answer anything since false 2213 // will you match if you succeed?" then we can answer anything since false
2212 // implies false. So lets just return the max answer (still_to_find) since 2214 // implies false. So lets just return the max answer (still_to_find) since
2213 // that won't prevent us from preloading a lot of characters for the other 2215 // that won't prevent us from preloading a lot of characters for the other
2214 // branches in the node graph. 2216 // branches in the node graph.
2215 if (type() == AT_START && not_at_start) return still_to_find; 2217 if (type() == AT_START && not_at_start) return still_to_find;
2216 return on_success()->EatsAtLeast(still_to_find, 2218 return on_success()->EatsAtLeast(still_to_find,
2217 recursion_depth + 1, 2219 recursion_depth + 1,
2218 not_at_start); 2220 not_at_start);
2219 } 2221 }
2220 2222
2221 2223
2222 void AssertionNode::FillInBMInfo(int offset, 2224 void AssertionNode::FillInBMInfo(int offset,
2223 int recursion_depth, 2225 int recursion_depth,
2226 int budget,
2224 BoyerMooreLookahead* bm, 2227 BoyerMooreLookahead* bm,
2225 bool not_at_start) { 2228 bool not_at_start) {
2226 // Match the behaviour of EatsAtLeast on this node. 2229 // Match the behaviour of EatsAtLeast on this node.
2227 if (type() == AT_START && not_at_start) return; 2230 if (type() == AT_START && not_at_start) return;
2228 on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 2231 on_success()->FillInBMInfo(
2232 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
2229 SaveBMInfo(bm, not_at_start, offset); 2233 SaveBMInfo(bm, not_at_start, offset);
2230 } 2234 }
2231 2235
2232 2236
2233 int BackReferenceNode::EatsAtLeast(int still_to_find, 2237 int BackReferenceNode::EatsAtLeast(int still_to_find,
2234 int recursion_depth, 2238 int recursion_depth,
2235 bool not_at_start) { 2239 bool not_at_start) {
2236 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2240 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2237 return on_success()->EatsAtLeast(still_to_find, 2241 return on_success()->EatsAtLeast(still_to_find,
2238 recursion_depth + 1, 2242 recursion_depth + 1,
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after
2801 VisitMarker marker(info()); 2805 VisitMarker marker(info());
2802 return ChoiceNode::GetQuickCheckDetails(details, 2806 return ChoiceNode::GetQuickCheckDetails(details,
2803 compiler, 2807 compiler,
2804 characters_filled_in, 2808 characters_filled_in,
2805 not_at_start); 2809 not_at_start);
2806 } 2810 }
2807 2811
2808 2812
2809 void LoopChoiceNode::FillInBMInfo(int offset, 2813 void LoopChoiceNode::FillInBMInfo(int offset,
2810 int recursion_depth, 2814 int recursion_depth,
2815 int budget,
2811 BoyerMooreLookahead* bm, 2816 BoyerMooreLookahead* bm,
2812 bool not_at_start) { 2817 bool not_at_start) {
2813 if (body_can_be_zero_length_ || 2818 if (body_can_be_zero_length_ ||
2814 recursion_depth > RegExpCompiler::kMaxRecursion) { 2819 recursion_depth > RegExpCompiler::kMaxRecursion ||
2820 budget <= 0) {
2815 bm->SetRest(offset); 2821 bm->SetRest(offset);
2816 SaveBMInfo(bm, not_at_start, offset); 2822 SaveBMInfo(bm, not_at_start, offset);
2817 return; 2823 return;
2818 } 2824 }
2819 ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 2825 ChoiceNode::FillInBMInfo(
2826 offset, recursion_depth + 1, budget, bm, not_at_start);
ulan 2012/06/01 11:24:04 We either need to decrement the budget or add a co
2820 SaveBMInfo(bm, not_at_start, offset); 2827 SaveBMInfo(bm, not_at_start, offset);
2821 } 2828 }
2822 2829
2823 2830
2824 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2831 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2825 RegExpCompiler* compiler, 2832 RegExpCompiler* compiler,
2826 int characters_filled_in, 2833 int characters_filled_in,
2827 bool not_at_start) { 2834 bool not_at_start) {
2828 not_at_start = (not_at_start || not_at_start_); 2835 not_at_start = (not_at_start || not_at_start_);
2829 int choice_count = alternatives_->length(); 2836 int choice_count = alternatives_->length();
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2911 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2918 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2912 bool not_at_start = (trace->at_start() == Trace::FALSE); 2919 bool not_at_start = (trace->at_start() == Trace::FALSE);
2913 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2920 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2914 if (lookahead == NULL) { 2921 if (lookahead == NULL) {
2915 int eats_at_least = 2922 int eats_at_least =
2916 Min(kMaxLookaheadForBoyerMoore, 2923 Min(kMaxLookaheadForBoyerMoore,
2917 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 2924 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
2918 if (eats_at_least >= 1) { 2925 if (eats_at_least >= 1) {
2919 BoyerMooreLookahead* bm = 2926 BoyerMooreLookahead* bm =
2920 new BoyerMooreLookahead(eats_at_least, compiler); 2927 new BoyerMooreLookahead(eats_at_least, compiler);
2921 FillInBMInfo(0, 0, bm, not_at_start); 2928 FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
2922 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2929 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2923 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2930 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2924 } 2931 }
2925 } else { 2932 } else {
2926 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; 2933 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
2927 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; 2934 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;
2928 } 2935 }
2929 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); 2936 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);
2930 if (next_is_word_character == Trace::UNKNOWN) { 2937 if (next_is_word_character == Trace::UNKNOWN) {
2931 Label before_non_word; 2938 Label before_non_word;
(...skipping 917 matching lines...) Expand 10 before | Expand all | Expand 10 after
3849 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. 3856 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
3850 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 3857 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
3851 if (lookahead == NULL) { 3858 if (lookahead == NULL) {
3852 eats_at_least = 3859 eats_at_least =
3853 Min(kMaxLookaheadForBoyerMoore, 3860 Min(kMaxLookaheadForBoyerMoore,
3854 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); 3861 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
3855 if (eats_at_least >= 1) { 3862 if (eats_at_least >= 1) {
3856 BoyerMooreLookahead* bm = 3863 BoyerMooreLookahead* bm =
3857 new BoyerMooreLookahead(eats_at_least, compiler); 3864 new BoyerMooreLookahead(eats_at_least, compiler);
3858 GuardedAlternative alt0 = alternatives_->at(0); 3865 GuardedAlternative alt0 = alternatives_->at(0);
3859 alt0.node()->FillInBMInfo(0, 0, bm, not_at_start); 3866 alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
3860 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); 3867 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
3861 } 3868 }
3862 } else { 3869 } else {
3863 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); 3870 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
3864 } 3871 }
3865 } 3872 }
3866 } 3873 }
3867 } 3874 }
3868 3875
3869 if (eats_at_least == kEatsAtLeastNotYetInitialized) { 3876 if (eats_at_least == kEatsAtLeastNotYetInitialized) {
(...skipping 1720 matching lines...) Expand 10 before | Expand all | Expand 10 after
5590 } 5597 }
5591 5598
5592 5599
5593 void Analysis::VisitAssertion(AssertionNode* that) { 5600 void Analysis::VisitAssertion(AssertionNode* that) {
5594 EnsureAnalyzed(that->on_success()); 5601 EnsureAnalyzed(that->on_success());
5595 } 5602 }
5596 5603
5597 5604
5598 void BackReferenceNode::FillInBMInfo(int offset, 5605 void BackReferenceNode::FillInBMInfo(int offset,
5599 int recursion_depth, 5606 int recursion_depth,
5607 int budget,
5600 BoyerMooreLookahead* bm, 5608 BoyerMooreLookahead* bm,
5601 bool not_at_start) { 5609 bool not_at_start) {
5602 // Working out the set of characters that a backreference can match is too 5610 // Working out the set of characters that a backreference can match is too
5603 // hard, so we just say that any character can match. 5611 // hard, so we just say that any character can match.
5604 bm->SetRest(offset); 5612 bm->SetRest(offset);
5605 SaveBMInfo(bm, not_at_start, offset); 5613 SaveBMInfo(bm, not_at_start, offset);
5606 } 5614 }
5607 5615
5608 5616
5609 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 5617 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
5610 RegExpMacroAssembler::kTableSize); 5618 RegExpMacroAssembler::kTableSize);
5611 5619
5612 5620
5613 void ChoiceNode::FillInBMInfo(int offset, 5621 void ChoiceNode::FillInBMInfo(int offset,
5614 int recursion_depth, 5622 int recursion_depth,
5623 int budget,
5615 BoyerMooreLookahead* bm, 5624 BoyerMooreLookahead* bm,
5616 bool not_at_start) { 5625 bool not_at_start) {
5617 ZoneList<GuardedAlternative>* alts = alternatives(); 5626 ZoneList<GuardedAlternative>* alts = alternatives();
5627 budget /= alts->length();
5618 for (int i = 0; i < alts->length(); i++) { 5628 for (int i = 0; i < alts->length(); i++) {
5619 GuardedAlternative& alt = alts->at(i); 5629 GuardedAlternative& alt = alts->at(i);
5620 if (alt.guards() != NULL && alt.guards()->length() != 0) { 5630 if (alt.guards() != NULL && alt.guards()->length() != 0) {
5621 bm->SetRest(offset); // Give up trying to fill in info. 5631 bm->SetRest(offset); // Give up trying to fill in info.
5622 SaveBMInfo(bm, not_at_start, offset); 5632 SaveBMInfo(bm, not_at_start, offset);
5623 return; 5633 return;
5624 } 5634 }
5625 alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 5635 alt.node()->FillInBMInfo(
5636 offset, recursion_depth + 1, budget, bm, not_at_start);
5626 } 5637 }
5627 SaveBMInfo(bm, not_at_start, offset); 5638 SaveBMInfo(bm, not_at_start, offset);
5628 } 5639 }
5629 5640
5630 5641
5631 void TextNode::FillInBMInfo(int initial_offset, 5642 void TextNode::FillInBMInfo(int initial_offset,
5632 int recursion_depth, 5643 int recursion_depth,
5644 int budget,
5633 BoyerMooreLookahead* bm, 5645 BoyerMooreLookahead* bm,
5634 bool not_at_start) { 5646 bool not_at_start) {
5635 if (initial_offset >= bm->length()) return; 5647 if (initial_offset >= bm->length()) return;
5636 int offset = initial_offset; 5648 int offset = initial_offset;
5637 int max_char = bm->max_char(); 5649 int max_char = bm->max_char();
5638 for (int i = 0; i < elements()->length(); i++) { 5650 for (int i = 0; i < elements()->length(); i++) {
5639 if (offset >= bm->length()) { 5651 if (offset >= bm->length()) {
5640 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5652 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5641 return; 5653 return;
5642 } 5654 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5679 } 5691 }
5680 offset++; 5692 offset++;
5681 } 5693 }
5682 } 5694 }
5683 if (offset >= bm->length()) { 5695 if (offset >= bm->length()) {
5684 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5696 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5685 return; 5697 return;
5686 } 5698 }
5687 on_success()->FillInBMInfo(offset, 5699 on_success()->FillInBMInfo(offset,
5688 recursion_depth + 1, 5700 recursion_depth + 1,
5701 budget - 1,
5689 bm, 5702 bm,
5690 true); // Not at start after a text node. 5703 true); // Not at start after a text node.
5691 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5704 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5692 } 5705 }
5693 5706
5694 5707
5695 // ------------------------------------------------------------------- 5708 // -------------------------------------------------------------------
5696 // Dispatch table construction 5709 // Dispatch table construction
5697 5710
5698 5711
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
5917 macro_assembler.set_global(is_global); 5930 macro_assembler.set_global(is_global);
5918 5931
5919 return compiler.Assemble(&macro_assembler, 5932 return compiler.Assemble(&macro_assembler,
5920 node, 5933 node,
5921 data->capture_count, 5934 data->capture_count,
5922 pattern); 5935 pattern);
5923 } 5936 }
5924 5937
5925 5938
5926 }} // namespace v8::internal 5939 }} // namespace v8::internal
OLDNEW
« src/jsregexp.h ('K') | « src/jsregexp.h ('k') | test/mjsunit/regexp-capture.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698