Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: src/jsregexp.h

Issue 10443109: Backport r11686: Avoid overdeep recursion in regexp where a guarded expression with a minimum repet… (Closed) Base URL: http://v8.googlecode.com/svn/branches/3.10/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 557 matching lines...) Expand 10 before | Expand all | Expand 10 after
568 // character and that has no guards on it. 568 // character and that has no guards on it.
569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
570 RegExpCompiler* compiler) { 570 RegExpCompiler* compiler) {
571 return NULL; 571 return NULL;
572 } 572 }
573 573
574 // Collects information on the possible code units (mod 128) that can match if 574 // Collects information on the possible code units (mod 128) that can match if
575 // we look forward. This is used for a Boyer-Moore-like string searching 575 // we look forward. This is used for a Boyer-Moore-like string searching
576 // implementation. TODO(erikcorry): This should share more code with 576 // implementation. TODO(erikcorry): This should share more code with
577 // EatsAtLeast, GetQuickCheckDetails. 577 // EatsAtLeast, GetQuickCheckDetails.
578 virtual void FillInBMInfo( 578 virtual void FillInBMInfo(int offset,
579 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 579 int recursion_depth,
580 BoyerMooreLookahead* bm,
581 bool not_at_start) {
580 UNREACHABLE(); 582 UNREACHABLE();
581 } 583 }
582 584
583 // If we know that the input is ASCII then there are some nodes that can 585 // If we know that the input is ASCII then there are some nodes that can
584 // never match. This method returns a node that can be substituted for 586 // never match. This method returns a node that can be substituted for
585 // itself, or NULL if the node can never match. 587 // itself, or NULL if the node can never match.
586 virtual RegExpNode* FilterASCII(int depth) { return this; } 588 virtual RegExpNode* FilterASCII(int depth) { return this; }
587 // Helper for FilterASCII. 589 // Helper for FilterASCII.
588 RegExpNode* replacement() { 590 RegExpNode* replacement() {
589 ASSERT(info()->replacement_calculated); 591 ASSERT(info()->replacement_calculated);
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
668 }; 670 };
669 671
670 672
671 class SeqRegExpNode: public RegExpNode { 673 class SeqRegExpNode: public RegExpNode {
672 public: 674 public:
673 explicit SeqRegExpNode(RegExpNode* on_success) 675 explicit SeqRegExpNode(RegExpNode* on_success)
674 : on_success_(on_success) { } 676 : on_success_(on_success) { }
675 RegExpNode* on_success() { return on_success_; } 677 RegExpNode* on_success() { return on_success_; }
676 void set_on_success(RegExpNode* node) { on_success_ = node; } 678 void set_on_success(RegExpNode* node) { on_success_ = node; }
677 virtual RegExpNode* FilterASCII(int depth); 679 virtual RegExpNode* FilterASCII(int depth);
678 virtual void FillInBMInfo( 680 virtual void FillInBMInfo(int offset,
679 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 681 int recursion_depth,
680 on_success_->FillInBMInfo(offset, bm, not_at_start); 682 BoyerMooreLookahead* bm,
683 bool not_at_start) {
684 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
681 if (offset == 0) set_bm_info(not_at_start, bm); 685 if (offset == 0) set_bm_info(not_at_start, bm);
682 } 686 }
683 687
684 protected: 688 protected:
685 RegExpNode* FilterSuccessor(int depth); 689 RegExpNode* FilterSuccessor(int depth);
686 690
687 private: 691 private:
688 RegExpNode* on_success_; 692 RegExpNode* on_success_;
689 }; 693 };
690 694
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
723 virtual int EatsAtLeast(int still_to_find, 727 virtual int EatsAtLeast(int still_to_find,
724 int recursion_depth, 728 int recursion_depth,
725 bool not_at_start); 729 bool not_at_start);
726 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 730 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
727 RegExpCompiler* compiler, 731 RegExpCompiler* compiler,
728 int filled_in, 732 int filled_in,
729 bool not_at_start) { 733 bool not_at_start) {
730 return on_success()->GetQuickCheckDetails( 734 return on_success()->GetQuickCheckDetails(
731 details, compiler, filled_in, not_at_start); 735 details, compiler, filled_in, not_at_start);
732 } 736 }
733 virtual void FillInBMInfo( 737 virtual void FillInBMInfo(int offset,
734 int offset, BoyerMooreLookahead* bm, bool not_at_start); 738 int recursion_depth,
739 BoyerMooreLookahead* bm,
740 bool not_at_start);
735 Type type() { return type_; } 741 Type type() { return type_; }
736 // TODO(erikcorry): We should allow some action nodes in greedy loops. 742 // TODO(erikcorry): We should allow some action nodes in greedy loops.
737 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 743 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
738 744
739 private: 745 private:
740 union { 746 union {
741 struct { 747 struct {
742 int reg; 748 int reg;
743 int value; 749 int value;
744 } u_store_register; 750 } u_store_register;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 bool not_at_start); 798 bool not_at_start);
793 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 799 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
794 RegExpCompiler* compiler, 800 RegExpCompiler* compiler,
795 int characters_filled_in, 801 int characters_filled_in,
796 bool not_at_start); 802 bool not_at_start);
797 ZoneList<TextElement>* elements() { return elms_; } 803 ZoneList<TextElement>* elements() { return elms_; }
798 void MakeCaseIndependent(bool is_ascii); 804 void MakeCaseIndependent(bool is_ascii);
799 virtual int GreedyLoopTextLength(); 805 virtual int GreedyLoopTextLength();
800 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 806 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
801 RegExpCompiler* compiler); 807 RegExpCompiler* compiler);
802 virtual void FillInBMInfo( 808 virtual void FillInBMInfo(int offset,
803 int offset, BoyerMooreLookahead* bm, bool not_at_start); 809 int recursion_depth,
810 BoyerMooreLookahead* bm,
811 bool not_at_start);
804 void CalculateOffsets(); 812 void CalculateOffsets();
805 virtual RegExpNode* FilterASCII(int depth); 813 virtual RegExpNode* FilterASCII(int depth);
806 814
807 private: 815 private:
808 enum TextEmitPassType { 816 enum TextEmitPassType {
809 NON_ASCII_MATCH, // Check for characters that can't match. 817 NON_ASCII_MATCH, // Check for characters that can't match.
810 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 818 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
811 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 819 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
812 CASE_CHARACTER_MATCH, // Case-independent single character check. 820 CASE_CHARACTER_MATCH, // Case-independent single character check.
813 CHARACTER_CLASS_MATCH // Character class. 821 CHARACTER_CLASS_MATCH // Character class.
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
852 } 860 }
853 virtual void Accept(NodeVisitor* visitor); 861 virtual void Accept(NodeVisitor* visitor);
854 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 862 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
855 virtual int EatsAtLeast(int still_to_find, 863 virtual int EatsAtLeast(int still_to_find,
856 int recursion_depth, 864 int recursion_depth,
857 bool not_at_start); 865 bool not_at_start);
858 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 866 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
859 RegExpCompiler* compiler, 867 RegExpCompiler* compiler,
860 int filled_in, 868 int filled_in,
861 bool not_at_start); 869 bool not_at_start);
862 virtual void FillInBMInfo( 870 virtual void FillInBMInfo(int offset,
863 int offset, BoyerMooreLookahead* bm, bool not_at_start); 871 int recursion_depth,
872 BoyerMooreLookahead* bm,
873 bool not_at_start);
864 AssertionNodeType type() { return type_; } 874 AssertionNodeType type() { return type_; }
865 void set_type(AssertionNodeType type) { type_ = type; } 875 void set_type(AssertionNodeType type) { type_ = type; }
866 876
867 private: 877 private:
868 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 878 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
869 enum IfPrevious { kIsNonWord, kIsWord }; 879 enum IfPrevious { kIsNonWord, kIsWord };
870 void BacktrackIfPrevious(RegExpCompiler* compiler, 880 void BacktrackIfPrevious(RegExpCompiler* compiler,
871 Trace* trace, 881 Trace* trace,
872 IfPrevious backtrack_if_previous); 882 IfPrevious backtrack_if_previous);
873 AssertionNode(AssertionNodeType t, RegExpNode* on_success) 883 AssertionNode(AssertionNodeType t, RegExpNode* on_success)
(...skipping 16 matching lines...) Expand all
890 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 900 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
891 virtual int EatsAtLeast(int still_to_find, 901 virtual int EatsAtLeast(int still_to_find,
892 int recursion_depth, 902 int recursion_depth,
893 bool not_at_start); 903 bool not_at_start);
894 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 904 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
895 RegExpCompiler* compiler, 905 RegExpCompiler* compiler,
896 int characters_filled_in, 906 int characters_filled_in,
897 bool not_at_start) { 907 bool not_at_start) {
898 return; 908 return;
899 } 909 }
900 virtual void FillInBMInfo( 910 virtual void FillInBMInfo(int offset,
901 int offset, BoyerMooreLookahead* bm, bool not_at_start); 911 int recursion_depth,
912 BoyerMooreLookahead* bm,
913 bool not_at_start);
902 914
903 private: 915 private:
904 int start_reg_; 916 int start_reg_;
905 int end_reg_; 917 int end_reg_;
906 }; 918 };
907 919
908 920
909 class EndNode: public RegExpNode { 921 class EndNode: public RegExpNode {
910 public: 922 public:
911 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 923 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
912 explicit EndNode(Action action) : action_(action) { } 924 explicit EndNode(Action action) : action_(action) { }
913 virtual void Accept(NodeVisitor* visitor); 925 virtual void Accept(NodeVisitor* visitor);
914 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 926 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
915 virtual int EatsAtLeast(int still_to_find, 927 virtual int EatsAtLeast(int still_to_find,
916 int recursion_depth, 928 int recursion_depth,
917 bool not_at_start) { return 0; } 929 bool not_at_start) { return 0; }
918 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 930 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
919 RegExpCompiler* compiler, 931 RegExpCompiler* compiler,
920 int characters_filled_in, 932 int characters_filled_in,
921 bool not_at_start) { 933 bool not_at_start) {
922 // Returning 0 from EatsAtLeast should ensure we never get here. 934 // Returning 0 from EatsAtLeast should ensure we never get here.
923 UNREACHABLE(); 935 UNREACHABLE();
924 } 936 }
925 virtual void FillInBMInfo( 937 virtual void FillInBMInfo(int offset,
926 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 938 int recursion_depth,
939 BoyerMooreLookahead* bm,
940 bool not_at_start) {
927 // Returning 0 from EatsAtLeast should ensure we never get here. 941 // Returning 0 from EatsAtLeast should ensure we never get here.
928 UNREACHABLE(); 942 UNREACHABLE();
929 } 943 }
930 944
931 private: 945 private:
932 Action action_; 946 Action action_;
933 }; 947 };
934 948
935 949
936 class NegativeSubmatchSuccess: public EndNode { 950 class NegativeSubmatchSuccess: public EndNode {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
1005 int recursion_depth, 1019 int recursion_depth,
1006 bool not_at_start); 1020 bool not_at_start);
1007 int EatsAtLeastHelper(int still_to_find, 1021 int EatsAtLeastHelper(int still_to_find,
1008 int recursion_depth, 1022 int recursion_depth,
1009 RegExpNode* ignore_this_node, 1023 RegExpNode* ignore_this_node,
1010 bool not_at_start); 1024 bool not_at_start);
1011 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1025 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1012 RegExpCompiler* compiler, 1026 RegExpCompiler* compiler,
1013 int characters_filled_in, 1027 int characters_filled_in,
1014 bool not_at_start); 1028 bool not_at_start);
1015 virtual void FillInBMInfo( 1029 virtual void FillInBMInfo(int offset,
1016 int offset, BoyerMooreLookahead* bm, bool not_at_start); 1030 int recursion_depth,
1031 BoyerMooreLookahead* bm,
1032 bool not_at_start);
1017 1033
1018 bool being_calculated() { return being_calculated_; } 1034 bool being_calculated() { return being_calculated_; }
1019 bool not_at_start() { return not_at_start_; } 1035 bool not_at_start() { return not_at_start_; }
1020 void set_not_at_start() { not_at_start_ = true; } 1036 void set_not_at_start() { not_at_start_ = true; }
1021 void set_being_calculated(bool b) { being_calculated_ = b; } 1037 void set_being_calculated(bool b) { being_calculated_ = b; }
1022 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } 1038 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1023 virtual RegExpNode* FilterASCII(int depth); 1039 virtual RegExpNode* FilterASCII(int depth);
1024 1040
1025 protected: 1041 protected:
1026 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 1042 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
(...skipping 28 matching lines...) Expand all
1055 AddAlternative(this_must_fail); 1071 AddAlternative(this_must_fail);
1056 AddAlternative(then_do_this); 1072 AddAlternative(then_do_this);
1057 } 1073 }
1058 virtual int EatsAtLeast(int still_to_find, 1074 virtual int EatsAtLeast(int still_to_find,
1059 int recursion_depth, 1075 int recursion_depth,
1060 bool not_at_start); 1076 bool not_at_start);
1061 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1077 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1062 RegExpCompiler* compiler, 1078 RegExpCompiler* compiler,
1063 int characters_filled_in, 1079 int characters_filled_in,
1064 bool not_at_start); 1080 bool not_at_start);
1065 virtual void FillInBMInfo( 1081 virtual void FillInBMInfo(int offset,
1066 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 1082 int recursion_depth,
1067 alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); 1083 BoyerMooreLookahead* bm,
1084 bool not_at_start) {
1085 alternatives_->at(1).node()->FillInBMInfo(
1086 offset, recursion_depth + 1, bm, not_at_start);
1068 if (offset == 0) set_bm_info(not_at_start, bm); 1087 if (offset == 0) set_bm_info(not_at_start, bm);
1069 } 1088 }
1070 // For a negative lookahead we don't emit the quick check for the 1089 // For a negative lookahead we don't emit the quick check for the
1071 // alternative that is expected to fail. This is because quick check code 1090 // alternative that is expected to fail. This is because quick check code
1072 // starts by loading enough characters for the alternative that takes fewest 1091 // starts by loading enough characters for the alternative that takes fewest
1073 // characters, but on a negative lookahead the negative branch did not take 1092 // characters, but on a negative lookahead the negative branch did not take
1074 // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1093 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1075 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1094 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1076 virtual RegExpNode* FilterASCII(int depth); 1095 virtual RegExpNode* FilterASCII(int depth);
1077 }; 1096 };
1078 1097
1079 1098
1080 class LoopChoiceNode: public ChoiceNode { 1099 class LoopChoiceNode: public ChoiceNode {
1081 public: 1100 public:
1082 explicit LoopChoiceNode(bool body_can_be_zero_length) 1101 explicit LoopChoiceNode(bool body_can_be_zero_length)
1083 : ChoiceNode(2), 1102 : ChoiceNode(2),
1084 loop_node_(NULL), 1103 loop_node_(NULL),
1085 continue_node_(NULL), 1104 continue_node_(NULL),
1086 body_can_be_zero_length_(body_can_be_zero_length) { } 1105 body_can_be_zero_length_(body_can_be_zero_length) { }
1087 void AddLoopAlternative(GuardedAlternative alt); 1106 void AddLoopAlternative(GuardedAlternative alt);
1088 void AddContinueAlternative(GuardedAlternative alt); 1107 void AddContinueAlternative(GuardedAlternative alt);
1089 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1108 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1090 virtual int EatsAtLeast(int still_to_find, 1109 virtual int EatsAtLeast(int still_to_find,
1091 int recursion_depth, 1110 int recursion_depth,
1092 bool not_at_start); 1111 bool not_at_start);
1093 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1112 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1094 RegExpCompiler* compiler, 1113 RegExpCompiler* compiler,
1095 int characters_filled_in, 1114 int characters_filled_in,
1096 bool not_at_start); 1115 bool not_at_start);
1097 virtual void FillInBMInfo( 1116 virtual void FillInBMInfo(int offset,
1098 int offset, BoyerMooreLookahead* bm, bool not_at_start); 1117 int recursion_depth,
1118 BoyerMooreLookahead* bm,
1119 bool not_at_start);
1099 RegExpNode* loop_node() { return loop_node_; } 1120 RegExpNode* loop_node() { return loop_node_; }
1100 RegExpNode* continue_node() { return continue_node_; } 1121 RegExpNode* continue_node() { return continue_node_; }
1101 bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1122 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1102 virtual void Accept(NodeVisitor* visitor); 1123 virtual void Accept(NodeVisitor* visitor);
1103 virtual RegExpNode* FilterASCII(int depth); 1124 virtual RegExpNode* FilterASCII(int depth);
1104 1125
1105 private: 1126 private:
1106 // AddAlternative is made private for loop nodes because alternatives 1127 // AddAlternative is made private for loop nodes because alternatives
1107 // should not be added freely, we need to keep track of which node 1128 // should not be added freely, we need to keep track of which node
1108 // goes back to the node itself. 1129 // goes back to the node itself.
(...skipping 474 matching lines...) Expand 10 before | Expand all | Expand 10 after
1583 int* vector_; 1604 int* vector_;
1584 int offsets_vector_length_; 1605 int offsets_vector_length_;
1585 1606
1586 friend class ExternalReference; 1607 friend class ExternalReference;
1587 }; 1608 };
1588 1609
1589 1610
1590 } } // namespace v8::internal 1611 } } // namespace v8::internal
1591 1612
1592 #endif // V8_JSREGEXP_H_ 1613 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698