Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(680)

Side by Side Diff: src/jsregexp.h

Issue 10536002: Backport to 3.10 of r11696: Limit work done analyzing regexps with very large fanout. (Closed) Base URL: http://v8.googlecode.com/svn/branches/3.10/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 556 matching lines...) Expand 10 before | Expand all | Expand 10 after
567 // Only returns the successor for a text node of length 1 that matches any 567 // Only returns the successor for a text node of length 1 that matches any
568 // character and that has no guards on it. 568 // character and that has no guards on it.
569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
570 RegExpCompiler* compiler) { 570 RegExpCompiler* compiler) {
571 return NULL; 571 return NULL;
572 } 572 }
573 573
574 // Collects information on the possible code units (mod 128) that can match if 574 // Collects information on the possible code units (mod 128) that can match if
575 // we look forward. This is used for a Boyer-Moore-like string searching 575 // we look forward. This is used for a Boyer-Moore-like string searching
576 // implementation. TODO(erikcorry): This should share more code with 576 // implementation. TODO(erikcorry): This should share more code with
577 // EatsAtLeast, GetQuickCheckDetails. 577 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
578 // the number of nodes we are willing to look at in order to create this data.
579 static const int kFillInBMBudget = 200;
578 virtual void FillInBMInfo(int offset, 580 virtual void FillInBMInfo(int offset,
579 int recursion_depth, 581 int recursion_depth,
582 int budget,
580 BoyerMooreLookahead* bm, 583 BoyerMooreLookahead* bm,
581 bool not_at_start) { 584 bool not_at_start) {
582 UNREACHABLE(); 585 UNREACHABLE();
583 } 586 }
584 587
585 // If we know that the input is ASCII then there are some nodes that can 588 // If we know that the input is ASCII then there are some nodes that can
586 // never match. This method returns a node that can be substituted for 589 // never match. This method returns a node that can be substituted for
587 // itself, or NULL if the node can never match. 590 // itself, or NULL if the node can never match.
588 virtual RegExpNode* FilterASCII(int depth) { return this; } 591 virtual RegExpNode* FilterASCII(int depth) { return this; }
589 // Helper for FilterASCII. 592 // Helper for FilterASCII.
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
672 675
673 class SeqRegExpNode: public RegExpNode { 676 class SeqRegExpNode: public RegExpNode {
674 public: 677 public:
675 explicit SeqRegExpNode(RegExpNode* on_success) 678 explicit SeqRegExpNode(RegExpNode* on_success)
676 : on_success_(on_success) { } 679 : on_success_(on_success) { }
677 RegExpNode* on_success() { return on_success_; } 680 RegExpNode* on_success() { return on_success_; }
678 void set_on_success(RegExpNode* node) { on_success_ = node; } 681 void set_on_success(RegExpNode* node) { on_success_ = node; }
679 virtual RegExpNode* FilterASCII(int depth); 682 virtual RegExpNode* FilterASCII(int depth);
680 virtual void FillInBMInfo(int offset, 683 virtual void FillInBMInfo(int offset,
681 int recursion_depth, 684 int recursion_depth,
685 int budget,
682 BoyerMooreLookahead* bm, 686 BoyerMooreLookahead* bm,
683 bool not_at_start) { 687 bool not_at_start) {
684 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); 688 on_success_->FillInBMInfo(
689 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
685 if (offset == 0) set_bm_info(not_at_start, bm); 690 if (offset == 0) set_bm_info(not_at_start, bm);
686 } 691 }
687 692
688 protected: 693 protected:
689 RegExpNode* FilterSuccessor(int depth); 694 RegExpNode* FilterSuccessor(int depth);
690 695
691 private: 696 private:
692 RegExpNode* on_success_; 697 RegExpNode* on_success_;
693 }; 698 };
694 699
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
729 bool not_at_start); 734 bool not_at_start);
730 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 735 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
731 RegExpCompiler* compiler, 736 RegExpCompiler* compiler,
732 int filled_in, 737 int filled_in,
733 bool not_at_start) { 738 bool not_at_start) {
734 return on_success()->GetQuickCheckDetails( 739 return on_success()->GetQuickCheckDetails(
735 details, compiler, filled_in, not_at_start); 740 details, compiler, filled_in, not_at_start);
736 } 741 }
737 virtual void FillInBMInfo(int offset, 742 virtual void FillInBMInfo(int offset,
738 int recursion_depth, 743 int recursion_depth,
744 int budget,
739 BoyerMooreLookahead* bm, 745 BoyerMooreLookahead* bm,
740 bool not_at_start); 746 bool not_at_start);
741 Type type() { return type_; } 747 Type type() { return type_; }
742 // TODO(erikcorry): We should allow some action nodes in greedy loops. 748 // TODO(erikcorry): We should allow some action nodes in greedy loops.
743 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
744 750
745 private: 751 private:
746 union { 752 union {
747 struct { 753 struct {
748 int reg; 754 int reg;
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
800 RegExpCompiler* compiler, 806 RegExpCompiler* compiler,
801 int characters_filled_in, 807 int characters_filled_in,
802 bool not_at_start); 808 bool not_at_start);
803 ZoneList<TextElement>* elements() { return elms_; } 809 ZoneList<TextElement>* elements() { return elms_; }
804 void MakeCaseIndependent(bool is_ascii); 810 void MakeCaseIndependent(bool is_ascii);
805 virtual int GreedyLoopTextLength(); 811 virtual int GreedyLoopTextLength();
806 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
807 RegExpCompiler* compiler); 813 RegExpCompiler* compiler);
808 virtual void FillInBMInfo(int offset, 814 virtual void FillInBMInfo(int offset,
809 int recursion_depth, 815 int recursion_depth,
816 int budget,
810 BoyerMooreLookahead* bm, 817 BoyerMooreLookahead* bm,
811 bool not_at_start); 818 bool not_at_start);
812 void CalculateOffsets(); 819 void CalculateOffsets();
813 virtual RegExpNode* FilterASCII(int depth); 820 virtual RegExpNode* FilterASCII(int depth);
814 821
815 private: 822 private:
816 enum TextEmitPassType { 823 enum TextEmitPassType {
817 NON_ASCII_MATCH, // Check for characters that can't match. 824 NON_ASCII_MATCH, // Check for characters that can't match.
818 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 825 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
819 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 826 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
862 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 869 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
863 virtual int EatsAtLeast(int still_to_find, 870 virtual int EatsAtLeast(int still_to_find,
864 int recursion_depth, 871 int recursion_depth,
865 bool not_at_start); 872 bool not_at_start);
866 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 873 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
867 RegExpCompiler* compiler, 874 RegExpCompiler* compiler,
868 int filled_in, 875 int filled_in,
869 bool not_at_start); 876 bool not_at_start);
870 virtual void FillInBMInfo(int offset, 877 virtual void FillInBMInfo(int offset,
871 int recursion_depth, 878 int recursion_depth,
879 int budget,
872 BoyerMooreLookahead* bm, 880 BoyerMooreLookahead* bm,
873 bool not_at_start); 881 bool not_at_start);
874 AssertionNodeType type() { return type_; } 882 AssertionNodeType type() { return type_; }
875 void set_type(AssertionNodeType type) { type_ = type; } 883 void set_type(AssertionNodeType type) { type_ = type; }
876 884
877 private: 885 private:
878 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 886 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
879 enum IfPrevious { kIsNonWord, kIsWord }; 887 enum IfPrevious { kIsNonWord, kIsWord };
880 void BacktrackIfPrevious(RegExpCompiler* compiler, 888 void BacktrackIfPrevious(RegExpCompiler* compiler,
881 Trace* trace, 889 Trace* trace,
(...skipping 20 matching lines...) Expand all
902 int recursion_depth, 910 int recursion_depth,
903 bool not_at_start); 911 bool not_at_start);
904 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 912 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
905 RegExpCompiler* compiler, 913 RegExpCompiler* compiler,
906 int characters_filled_in, 914 int characters_filled_in,
907 bool not_at_start) { 915 bool not_at_start) {
908 return; 916 return;
909 } 917 }
910 virtual void FillInBMInfo(int offset, 918 virtual void FillInBMInfo(int offset,
911 int recursion_depth, 919 int recursion_depth,
920 int budget,
912 BoyerMooreLookahead* bm, 921 BoyerMooreLookahead* bm,
913 bool not_at_start); 922 bool not_at_start);
914 923
915 private: 924 private:
916 int start_reg_; 925 int start_reg_;
917 int end_reg_; 926 int end_reg_;
918 }; 927 };
919 928
920 929
921 class EndNode: public RegExpNode { 930 class EndNode: public RegExpNode {
922 public: 931 public:
923 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 932 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
924 explicit EndNode(Action action) : action_(action) { } 933 explicit EndNode(Action action) : action_(action) { }
925 virtual void Accept(NodeVisitor* visitor); 934 virtual void Accept(NodeVisitor* visitor);
926 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 935 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
927 virtual int EatsAtLeast(int still_to_find, 936 virtual int EatsAtLeast(int still_to_find,
928 int recursion_depth, 937 int recursion_depth,
929 bool not_at_start) { return 0; } 938 bool not_at_start) { return 0; }
930 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 939 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
931 RegExpCompiler* compiler, 940 RegExpCompiler* compiler,
932 int characters_filled_in, 941 int characters_filled_in,
933 bool not_at_start) { 942 bool not_at_start) {
934 // Returning 0 from EatsAtLeast should ensure we never get here. 943 // Returning 0 from EatsAtLeast should ensure we never get here.
935 UNREACHABLE(); 944 UNREACHABLE();
936 } 945 }
937 virtual void FillInBMInfo(int offset, 946 virtual void FillInBMInfo(int offset,
938 int recursion_depth, 947 int recursion_depth,
948 int budget,
939 BoyerMooreLookahead* bm, 949 BoyerMooreLookahead* bm,
940 bool not_at_start) { 950 bool not_at_start) {
941 // Returning 0 from EatsAtLeast should ensure we never get here. 951 // Returning 0 from EatsAtLeast should ensure we never get here.
942 UNREACHABLE(); 952 UNREACHABLE();
943 } 953 }
944 954
945 private: 955 private:
946 Action action_; 956 Action action_;
947 }; 957 };
948 958
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
1021 int EatsAtLeastHelper(int still_to_find, 1031 int EatsAtLeastHelper(int still_to_find,
1022 int recursion_depth, 1032 int recursion_depth,
1023 RegExpNode* ignore_this_node, 1033 RegExpNode* ignore_this_node,
1024 bool not_at_start); 1034 bool not_at_start);
1025 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1035 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1026 RegExpCompiler* compiler, 1036 RegExpCompiler* compiler,
1027 int characters_filled_in, 1037 int characters_filled_in,
1028 bool not_at_start); 1038 bool not_at_start);
1029 virtual void FillInBMInfo(int offset, 1039 virtual void FillInBMInfo(int offset,
1030 int recursion_depth, 1040 int recursion_depth,
1041 int budget,
1031 BoyerMooreLookahead* bm, 1042 BoyerMooreLookahead* bm,
1032 bool not_at_start); 1043 bool not_at_start);
1033 1044
1034 bool being_calculated() { return being_calculated_; } 1045 bool being_calculated() { return being_calculated_; }
1035 bool not_at_start() { return not_at_start_; } 1046 bool not_at_start() { return not_at_start_; }
1036 void set_not_at_start() { not_at_start_ = true; } 1047 void set_not_at_start() { not_at_start_ = true; }
1037 void set_being_calculated(bool b) { being_calculated_ = b; } 1048 void set_being_calculated(bool b) { being_calculated_ = b; }
1038 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } 1049 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1039 virtual RegExpNode* FilterASCII(int depth); 1050 virtual RegExpNode* FilterASCII(int depth);
1040 1051
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1073 } 1084 }
1074 virtual int EatsAtLeast(int still_to_find, 1085 virtual int EatsAtLeast(int still_to_find,
1075 int recursion_depth, 1086 int recursion_depth,
1076 bool not_at_start); 1087 bool not_at_start);
1077 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1088 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1078 RegExpCompiler* compiler, 1089 RegExpCompiler* compiler,
1079 int characters_filled_in, 1090 int characters_filled_in,
1080 bool not_at_start); 1091 bool not_at_start);
1081 virtual void FillInBMInfo(int offset, 1092 virtual void FillInBMInfo(int offset,
1082 int recursion_depth, 1093 int recursion_depth,
1094 int budget,
1083 BoyerMooreLookahead* bm, 1095 BoyerMooreLookahead* bm,
1084 bool not_at_start) { 1096 bool not_at_start) {
1085 alternatives_->at(1).node()->FillInBMInfo( 1097 alternatives_->at(1).node()->FillInBMInfo(
1086 offset, recursion_depth + 1, bm, not_at_start); 1098 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
1087 if (offset == 0) set_bm_info(not_at_start, bm); 1099 if (offset == 0) set_bm_info(not_at_start, bm);
1088 } 1100 }
1089 // For a negative lookahead we don't emit the quick check for the 1101 // For a negative lookahead we don't emit the quick check for the
1090 // alternative that is expected to fail. This is because quick check code 1102 // alternative that is expected to fail. This is because quick check code
1091 // starts by loading enough characters for the alternative that takes fewest 1103 // starts by loading enough characters for the alternative that takes fewest
1092 // characters, but on a negative lookahead the negative branch did not take 1104 // characters, but on a negative lookahead the negative branch did not take
1093 // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1105 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1094 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1106 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1095 virtual RegExpNode* FilterASCII(int depth); 1107 virtual RegExpNode* FilterASCII(int depth);
1096 }; 1108 };
(...skipping 11 matching lines...) Expand all
1108 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1120 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1109 virtual int EatsAtLeast(int still_to_find, 1121 virtual int EatsAtLeast(int still_to_find,
1110 int recursion_depth, 1122 int recursion_depth,
1111 bool not_at_start); 1123 bool not_at_start);
1112 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1124 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1113 RegExpCompiler* compiler, 1125 RegExpCompiler* compiler,
1114 int characters_filled_in, 1126 int characters_filled_in,
1115 bool not_at_start); 1127 bool not_at_start);
1116 virtual void FillInBMInfo(int offset, 1128 virtual void FillInBMInfo(int offset,
1117 int recursion_depth, 1129 int recursion_depth,
1130 int budget,
1118 BoyerMooreLookahead* bm, 1131 BoyerMooreLookahead* bm,
1119 bool not_at_start); 1132 bool not_at_start);
1120 RegExpNode* loop_node() { return loop_node_; } 1133 RegExpNode* loop_node() { return loop_node_; }
1121 RegExpNode* continue_node() { return continue_node_; } 1134 RegExpNode* continue_node() { return continue_node_; }
1122 bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1135 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1123 virtual void Accept(NodeVisitor* visitor); 1136 virtual void Accept(NodeVisitor* visitor);
1124 virtual RegExpNode* FilterASCII(int depth); 1137 virtual RegExpNode* FilterASCII(int depth);
1125 1138
1126 private: 1139 private:
1127 // AddAlternative is made private for loop nodes because alternatives 1140 // AddAlternative is made private for loop nodes because alternatives
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after
1604 int* vector_; 1617 int* vector_;
1605 int offsets_vector_length_; 1618 int offsets_vector_length_;
1606 1619
1607 friend class ExternalReference; 1620 friend class ExternalReference;
1608 }; 1621 };
1609 1622
1610 1623
1611 } } // namespace v8::internal 1624 } } // namespace v8::internal
1612 1625
1613 #endif // V8_JSREGEXP_H_ 1626 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698