Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(341)

Side by Side Diff: src/jsregexp.h

Issue 10448117: Limit work done analyzing regexps with very large fanout. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after
574 // character and that has no guards on it. 574 // character and that has no guards on it.
575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
576 RegExpCompiler* compiler) { 576 RegExpCompiler* compiler) {
577 return NULL; 577 return NULL;
578 } 578 }
579 579
580 // Collects information on the possible code units (mod 128) that can match if 580 // Collects information on the possible code units (mod 128) that can match if
581 // we look forward. This is used for a Boyer-Moore-like string searching 581 // we look forward. This is used for a Boyer-Moore-like string searching
582 // implementation. TODO(erikcorry): This should share more code with 582 // implementation. TODO(erikcorry): This should share more code with
583 // EatsAtLeast, GetQuickCheckDetails. 583 // EatsAtLeast, GetQuickCheckDetails.
584 static const int kFillInBMBudget = 200;
584 virtual void FillInBMInfo(int offset, 585 virtual void FillInBMInfo(int offset,
585 int recursion_depth, 586 int recursion_depth,
587 int budget,
ulan 2012/06/01 11:24:04 Please add a description for budget.
586 BoyerMooreLookahead* bm, 588 BoyerMooreLookahead* bm,
587 bool not_at_start) { 589 bool not_at_start) {
588 UNREACHABLE(); 590 UNREACHABLE();
589 } 591 }
590 592
591 // If we know that the input is ASCII then there are some nodes that can 593 // If we know that the input is ASCII then there are some nodes that can
592 // never match. This method returns a node that can be substituted for 594 // never match. This method returns a node that can be substituted for
593 // itself, or NULL if the node can never match. 595 // itself, or NULL if the node can never match.
594 virtual RegExpNode* FilterASCII(int depth) { return this; } 596 virtual RegExpNode* FilterASCII(int depth) { return this; }
595 // Helper for FilterASCII. 597 // Helper for FilterASCII.
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 680
679 class SeqRegExpNode: public RegExpNode { 681 class SeqRegExpNode: public RegExpNode {
680 public: 682 public:
681 explicit SeqRegExpNode(RegExpNode* on_success) 683 explicit SeqRegExpNode(RegExpNode* on_success)
682 : on_success_(on_success) { } 684 : on_success_(on_success) { }
683 RegExpNode* on_success() { return on_success_; } 685 RegExpNode* on_success() { return on_success_; }
684 void set_on_success(RegExpNode* node) { on_success_ = node; } 686 void set_on_success(RegExpNode* node) { on_success_ = node; }
685 virtual RegExpNode* FilterASCII(int depth); 687 virtual RegExpNode* FilterASCII(int depth);
686 virtual void FillInBMInfo(int offset, 688 virtual void FillInBMInfo(int offset,
687 int recursion_depth, 689 int recursion_depth,
690 int budget,
688 BoyerMooreLookahead* bm, 691 BoyerMooreLookahead* bm,
689 bool not_at_start) { 692 bool not_at_start) {
690 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 693 on_success_->FillInBMInfo(
694 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
691 if (offset == 0) set_bm_info(not_at_start, bm); 695 if (offset == 0) set_bm_info(not_at_start, bm);
692 } 696 }
693 697
694 protected: 698 protected:
695 RegExpNode* FilterSuccessor(int depth); 699 RegExpNode* FilterSuccessor(int depth);
696 700
697 private: 701 private:
698 RegExpNode* on_success_; 702 RegExpNode* on_success_;
699 }; 703 };
700 704
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
735 bool not_at_start); 739 bool not_at_start);
736 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 740 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
737 RegExpCompiler* compiler, 741 RegExpCompiler* compiler,
738 int filled_in, 742 int filled_in,
739 bool not_at_start) { 743 bool not_at_start) {
740 return on_success()->GetQuickCheckDetails( 744 return on_success()->GetQuickCheckDetails(
741 details, compiler, filled_in, not_at_start); 745 details, compiler, filled_in, not_at_start);
742 } 746 }
743 virtual void FillInBMInfo(int offset, 747 virtual void FillInBMInfo(int offset,
744 int recursion_depth, 748 int recursion_depth,
749 int budget,
745 BoyerMooreLookahead* bm, 750 BoyerMooreLookahead* bm,
746 bool not_at_start); 751 bool not_at_start);
747 Type type() { return type_; } 752 Type type() { return type_; }
748 // TODO(erikcorry): We should allow some action nodes in greedy loops. 753 // TODO(erikcorry): We should allow some action nodes in greedy loops.
749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 754 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
750 755
751 private: 756 private:
752 union { 757 union {
753 struct { 758 struct {
754 int reg; 759 int reg;
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
806 RegExpCompiler* compiler, 811 RegExpCompiler* compiler,
807 int characters_filled_in, 812 int characters_filled_in,
808 bool not_at_start); 813 bool not_at_start);
809 ZoneList<TextElement>* elements() { return elms_; } 814 ZoneList<TextElement>* elements() { return elms_; }
810 void MakeCaseIndependent(bool is_ascii); 815 void MakeCaseIndependent(bool is_ascii);
811 virtual int GreedyLoopTextLength(); 816 virtual int GreedyLoopTextLength();
812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 817 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
813 RegExpCompiler* compiler); 818 RegExpCompiler* compiler);
814 virtual void FillInBMInfo(int offset, 819 virtual void FillInBMInfo(int offset,
815 int recursion_depth, 820 int recursion_depth,
821 int budget,
816 BoyerMooreLookahead* bm, 822 BoyerMooreLookahead* bm,
817 bool not_at_start); 823 bool not_at_start);
818 void CalculateOffsets(); 824 void CalculateOffsets();
819 virtual RegExpNode* FilterASCII(int depth); 825 virtual RegExpNode* FilterASCII(int depth);
820 826
821 private: 827 private:
822 enum TextEmitPassType { 828 enum TextEmitPassType {
823 NON_ASCII_MATCH, // Check for characters that can't match. 829 NON_ASCII_MATCH, // Check for characters that can't match.
824 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 830 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
825 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 831 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
868 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 874 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
869 virtual int EatsAtLeast(int still_to_find, 875 virtual int EatsAtLeast(int still_to_find,
870 int recursion_depth, 876 int recursion_depth,
871 bool not_at_start); 877 bool not_at_start);
872 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 878 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
873 RegExpCompiler* compiler, 879 RegExpCompiler* compiler,
874 int filled_in, 880 int filled_in,
875 bool not_at_start); 881 bool not_at_start);
876 virtual void FillInBMInfo(int offset, 882 virtual void FillInBMInfo(int offset,
877 int recursion_depth, 883 int recursion_depth,
884 int budget,
878 BoyerMooreLookahead* bm, 885 BoyerMooreLookahead* bm,
879 bool not_at_start); 886 bool not_at_start);
880 AssertionNodeType type() { return type_; } 887 AssertionNodeType type() { return type_; }
881 void set_type(AssertionNodeType type) { type_ = type; } 888 void set_type(AssertionNodeType type) { type_ = type; }
882 889
883 private: 890 private:
884 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 891 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
885 enum IfPrevious { kIsNonWord, kIsWord }; 892 enum IfPrevious { kIsNonWord, kIsWord };
886 void BacktrackIfPrevious(RegExpCompiler* compiler, 893 void BacktrackIfPrevious(RegExpCompiler* compiler,
887 Trace* trace, 894 Trace* trace,
(...skipping 20 matching lines...) Expand all
908 int recursion_depth, 915 int recursion_depth,
909 bool not_at_start); 916 bool not_at_start);
910 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 917 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
911 RegExpCompiler* compiler, 918 RegExpCompiler* compiler,
912 int characters_filled_in, 919 int characters_filled_in,
913 bool not_at_start) { 920 bool not_at_start) {
914 return; 921 return;
915 } 922 }
916 virtual void FillInBMInfo(int offset, 923 virtual void FillInBMInfo(int offset,
917 int recursion_depth, 924 int recursion_depth,
925 int budget,
918 BoyerMooreLookahead* bm, 926 BoyerMooreLookahead* bm,
919 bool not_at_start); 927 bool not_at_start);
920 928
921 private: 929 private:
922 int start_reg_; 930 int start_reg_;
923 int end_reg_; 931 int end_reg_;
924 }; 932 };
925 933
926 934
927 class EndNode: public RegExpNode { 935 class EndNode: public RegExpNode {
928 public: 936 public:
929 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 937 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
930 explicit EndNode(Action action) : action_(action) { } 938 explicit EndNode(Action action) : action_(action) { }
931 virtual void Accept(NodeVisitor* visitor); 939 virtual void Accept(NodeVisitor* visitor);
932 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 940 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
933 virtual int EatsAtLeast(int still_to_find, 941 virtual int EatsAtLeast(int still_to_find,
934 int recursion_depth, 942 int recursion_depth,
935 bool not_at_start) { return 0; } 943 bool not_at_start) { return 0; }
936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 944 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
937 RegExpCompiler* compiler, 945 RegExpCompiler* compiler,
938 int characters_filled_in, 946 int characters_filled_in,
939 bool not_at_start) { 947 bool not_at_start) {
940 // Returning 0 from EatsAtLeast should ensure we never get here. 948 // Returning 0 from EatsAtLeast should ensure we never get here.
941 UNREACHABLE(); 949 UNREACHABLE();
942 } 950 }
943 virtual void FillInBMInfo(int offset, 951 virtual void FillInBMInfo(int offset,
944 int recursion_depth, 952 int recursion_depth,
953 int budget,
945 BoyerMooreLookahead* bm, 954 BoyerMooreLookahead* bm,
946 bool not_at_start) { 955 bool not_at_start) {
947 // Returning 0 from EatsAtLeast should ensure we never get here. 956 // Returning 0 from EatsAtLeast should ensure we never get here.
948 UNREACHABLE(); 957 UNREACHABLE();
949 } 958 }
950 959
951 private: 960 private:
952 Action action_; 961 Action action_;
953 }; 962 };
954 963
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
1027 int EatsAtLeastHelper(int still_to_find, 1036 int EatsAtLeastHelper(int still_to_find,
1028 int recursion_depth, 1037 int recursion_depth,
1029 RegExpNode* ignore_this_node, 1038 RegExpNode* ignore_this_node,
1030 bool not_at_start); 1039 bool not_at_start);
1031 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1040 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1032 RegExpCompiler* compiler, 1041 RegExpCompiler* compiler,
1033 int characters_filled_in, 1042 int characters_filled_in,
1034 bool not_at_start); 1043 bool not_at_start);
1035 virtual void FillInBMInfo(int offset, 1044 virtual void FillInBMInfo(int offset,
1036 int recursion_depth, 1045 int recursion_depth,
1046 int budget,
1037 BoyerMooreLookahead* bm, 1047 BoyerMooreLookahead* bm,
1038 bool not_at_start); 1048 bool not_at_start);
1039 1049
1040 bool being_calculated() { return being_calculated_; } 1050 bool being_calculated() { return being_calculated_; }
1041 bool not_at_start() { return not_at_start_; } 1051 bool not_at_start() { return not_at_start_; }
1042 void set_not_at_start() { not_at_start_ = true; } 1052 void set_not_at_start() { not_at_start_ = true; }
1043 void set_being_calculated(bool b) { being_calculated_ = b; } 1053 void set_being_calculated(bool b) { being_calculated_ = b; }
1044 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } 1054 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1045 virtual RegExpNode* FilterASCII(int depth); 1055 virtual RegExpNode* FilterASCII(int depth);
1046 1056
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1079 } 1089 }
1080 virtual int EatsAtLeast(int still_to_find, 1090 virtual int EatsAtLeast(int still_to_find,
1081 int recursion_depth, 1091 int recursion_depth,
1082 bool not_at_start); 1092 bool not_at_start);
1083 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1093 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1084 RegExpCompiler* compiler, 1094 RegExpCompiler* compiler,
1085 int characters_filled_in, 1095 int characters_filled_in,
1086 bool not_at_start); 1096 bool not_at_start);
1087 virtual void FillInBMInfo(int offset, 1097 virtual void FillInBMInfo(int offset,
1088 int recursion_depth, 1098 int recursion_depth,
1099 int budget,
1089 BoyerMooreLookahead* bm, 1100 BoyerMooreLookahead* bm,
1090 bool not_at_start) { 1101 bool not_at_start) {
1091 alternatives_->at(1).node()->FillInBMInfo( 1102 alternatives_->at(1).node()->FillInBMInfo(
1092 offset, recursion_depth + 1, bm, not_at_start); 1103 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
1093 if (offset == 0) set_bm_info(not_at_start, bm); 1104 if (offset == 0) set_bm_info(not_at_start, bm);
1094 } 1105 }
1095 // For a negative lookahead we don't emit the quick check for the 1106 // For a negative lookahead we don't emit the quick check for the
1096 // alternative that is expected to fail. This is because quick check code 1107 // alternative that is expected to fail. This is because quick check code
1097 // starts by loading enough characters for the alternative that takes fewest 1108 // starts by loading enough characters for the alternative that takes fewest
1098 // characters, but on a negative lookahead the negative branch did not take 1109 // characters, but on a negative lookahead the negative branch did not take
1099 // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1110 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1100 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1111 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1101 virtual RegExpNode* FilterASCII(int depth); 1112 virtual RegExpNode* FilterASCII(int depth);
1102 }; 1113 };
(...skipping 11 matching lines...) Expand all
1114 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1125 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1115 virtual int EatsAtLeast(int still_to_find, 1126 virtual int EatsAtLeast(int still_to_find,
1116 int recursion_depth, 1127 int recursion_depth,
1117 bool not_at_start); 1128 bool not_at_start);
1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1129 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1119 RegExpCompiler* compiler, 1130 RegExpCompiler* compiler,
1120 int characters_filled_in, 1131 int characters_filled_in,
1121 bool not_at_start); 1132 bool not_at_start);
1122 virtual void FillInBMInfo(int offset, 1133 virtual void FillInBMInfo(int offset,
1123 int recursion_depth, 1134 int recursion_depth,
1135 int budget,
1124 BoyerMooreLookahead* bm, 1136 BoyerMooreLookahead* bm,
1125 bool not_at_start); 1137 bool not_at_start);
1126 RegExpNode* loop_node() { return loop_node_; } 1138 RegExpNode* loop_node() { return loop_node_; }
1127 RegExpNode* continue_node() { return continue_node_; } 1139 RegExpNode* continue_node() { return continue_node_; }
1128 bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1140 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1129 virtual void Accept(NodeVisitor* visitor); 1141 virtual void Accept(NodeVisitor* visitor);
1130 virtual RegExpNode* FilterASCII(int depth); 1142 virtual RegExpNode* FilterASCII(int depth);
1131 1143
1132 private: 1144 private:
1133 // AddAlternative is made private for loop nodes because alternatives 1145 // AddAlternative is made private for loop nodes because alternatives
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after
1612 int* vector_; 1624 int* vector_;
1613 int offsets_vector_length_; 1625 int offsets_vector_length_;
1614 1626
1615 friend class ExternalReference; 1627 friend class ExternalReference;
1616 }; 1628 };
1617 1629
1618 1630
1619 } } // namespace v8::internal 1631 } } // namespace v8::internal
1620 1632
1621 #endif // V8_JSREGEXP_H_ 1633 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698