Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/jsregexp.h

Issue 10451092: Avoid overdeep recursion in regexp where a guarded expression with a (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after
574 // character and that has no guards on it. 574 // character and that has no guards on it.
575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
576 RegExpCompiler* compiler) { 576 RegExpCompiler* compiler) {
577 return NULL; 577 return NULL;
578 } 578 }
579 579
580 // Collects information on the possible code units (mod 128) that can match if 580 // Collects information on the possible code units (mod 128) that can match if
581 // we look forward. This is used for a Boyer-Moore-like string searching 581 // we look forward. This is used for a Boyer-Moore-like string searching
582 // implementation. TODO(erikcorry): This should share more code with 582 // implementation. TODO(erikcorry): This should share more code with
583 // EatsAtLeast, GetQuickCheckDetails. 583 // EatsAtLeast, GetQuickCheckDetails.
584 virtual void FillInBMInfo( 584 virtual void FillInBMInfo(int offset,
585 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 585 int recursion_depth,
586 BoyerMooreLookahead* bm,
587 bool not_at_start) {
586 UNREACHABLE(); 588 UNREACHABLE();
587 } 589 }
588 590
589 // If we know that the input is ASCII then there are some nodes that can 591 // If we know that the input is ASCII then there are some nodes that can
590 // never match. This method returns a node that can be substituted for 592 // never match. This method returns a node that can be substituted for
591 // itself, or NULL if the node can never match. 593 // itself, or NULL if the node can never match.
592 virtual RegExpNode* FilterASCII(int depth) { return this; } 594 virtual RegExpNode* FilterASCII(int depth) { return this; }
593 // Helper for FilterASCII. 595 // Helper for FilterASCII.
594 RegExpNode* replacement() { 596 RegExpNode* replacement() {
595 ASSERT(info()->replacement_calculated); 597 ASSERT(info()->replacement_calculated);
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
674 }; 676 };
675 677
676 678
677 class SeqRegExpNode: public RegExpNode { 679 class SeqRegExpNode: public RegExpNode {
678 public: 680 public:
679 explicit SeqRegExpNode(RegExpNode* on_success) 681 explicit SeqRegExpNode(RegExpNode* on_success)
680 : on_success_(on_success) { } 682 : on_success_(on_success) { }
681 RegExpNode* on_success() { return on_success_; } 683 RegExpNode* on_success() { return on_success_; }
682 void set_on_success(RegExpNode* node) { on_success_ = node; } 684 void set_on_success(RegExpNode* node) { on_success_ = node; }
683 virtual RegExpNode* FilterASCII(int depth); 685 virtual RegExpNode* FilterASCII(int depth);
684 virtual void FillInBMInfo( 686 virtual void FillInBMInfo(int offset,
685 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 687 int recursion_depth,
686 on_success_->FillInBMInfo(offset, bm, not_at_start); 688 BoyerMooreLookahead* bm,
689 bool not_at_start) {
690 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
687 if (offset == 0) set_bm_info(not_at_start, bm); 691 if (offset == 0) set_bm_info(not_at_start, bm);
688 } 692 }
689 693
690 protected: 694 protected:
691 RegExpNode* FilterSuccessor(int depth); 695 RegExpNode* FilterSuccessor(int depth);
692 696
693 private: 697 private:
694 RegExpNode* on_success_; 698 RegExpNode* on_success_;
695 }; 699 };
696 700
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
729 virtual int EatsAtLeast(int still_to_find, 733 virtual int EatsAtLeast(int still_to_find,
730 int recursion_depth, 734 int recursion_depth,
731 bool not_at_start); 735 bool not_at_start);
732 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 736 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
733 RegExpCompiler* compiler, 737 RegExpCompiler* compiler,
734 int filled_in, 738 int filled_in,
735 bool not_at_start) { 739 bool not_at_start) {
736 return on_success()->GetQuickCheckDetails( 740 return on_success()->GetQuickCheckDetails(
737 details, compiler, filled_in, not_at_start); 741 details, compiler, filled_in, not_at_start);
738 } 742 }
739 virtual void FillInBMInfo( 743 virtual void FillInBMInfo(int offset,
740 int offset, BoyerMooreLookahead* bm, bool not_at_start); 744 int recursion_depth,
745 BoyerMooreLookahead* bm,
746 bool not_at_start);
741 Type type() { return type_; } 747 Type type() { return type_; }
742 // TODO(erikcorry): We should allow some action nodes in greedy loops. 748 // TODO(erikcorry): We should allow some action nodes in greedy loops.
743 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
744 750
745 private: 751 private:
746 union { 752 union {
747 struct { 753 struct {
748 int reg; 754 int reg;
749 int value; 755 int value;
750 } u_store_register; 756 } u_store_register;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
798 bool not_at_start); 804 bool not_at_start);
799 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 805 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
800 RegExpCompiler* compiler, 806 RegExpCompiler* compiler,
801 int characters_filled_in, 807 int characters_filled_in,
802 bool not_at_start); 808 bool not_at_start);
803 ZoneList<TextElement>* elements() { return elms_; } 809 ZoneList<TextElement>* elements() { return elms_; }
804 void MakeCaseIndependent(bool is_ascii); 810 void MakeCaseIndependent(bool is_ascii);
805 virtual int GreedyLoopTextLength(); 811 virtual int GreedyLoopTextLength();
806 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
807 RegExpCompiler* compiler); 813 RegExpCompiler* compiler);
808 virtual void FillInBMInfo( 814 virtual void FillInBMInfo(int offset,
809 int offset, BoyerMooreLookahead* bm, bool not_at_start); 815 int recursion_depth,
816 BoyerMooreLookahead* bm,
817 bool not_at_start);
810 void CalculateOffsets(); 818 void CalculateOffsets();
811 virtual RegExpNode* FilterASCII(int depth); 819 virtual RegExpNode* FilterASCII(int depth);
812 820
813 private: 821 private:
814 enum TextEmitPassType { 822 enum TextEmitPassType {
815 NON_ASCII_MATCH, // Check for characters that can't match. 823 NON_ASCII_MATCH, // Check for characters that can't match.
816 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 824 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
817 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 825 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
818 CASE_CHARACTER_MATCH, // Case-independent single character check. 826 CASE_CHARACTER_MATCH, // Case-independent single character check.
819 CHARACTER_CLASS_MATCH // Character class. 827 CHARACTER_CLASS_MATCH // Character class.
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
858 } 866 }
859 virtual void Accept(NodeVisitor* visitor); 867 virtual void Accept(NodeVisitor* visitor);
860 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 868 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
861 virtual int EatsAtLeast(int still_to_find, 869 virtual int EatsAtLeast(int still_to_find,
862 int recursion_depth, 870 int recursion_depth,
863 bool not_at_start); 871 bool not_at_start);
864 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 872 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
865 RegExpCompiler* compiler, 873 RegExpCompiler* compiler,
866 int filled_in, 874 int filled_in,
867 bool not_at_start); 875 bool not_at_start);
868 virtual void FillInBMInfo( 876 virtual void FillInBMInfo(int offset,
869 int offset, BoyerMooreLookahead* bm, bool not_at_start); 877 int recursion_depth,
878 BoyerMooreLookahead* bm,
879 bool not_at_start);
870 AssertionNodeType type() { return type_; } 880 AssertionNodeType type() { return type_; }
871 void set_type(AssertionNodeType type) { type_ = type; } 881 void set_type(AssertionNodeType type) { type_ = type; }
872 882
873 private: 883 private:
874 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 884 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
875 enum IfPrevious { kIsNonWord, kIsWord }; 885 enum IfPrevious { kIsNonWord, kIsWord };
876 void BacktrackIfPrevious(RegExpCompiler* compiler, 886 void BacktrackIfPrevious(RegExpCompiler* compiler,
877 Trace* trace, 887 Trace* trace,
878 IfPrevious backtrack_if_previous); 888 IfPrevious backtrack_if_previous);
879 AssertionNode(AssertionNodeType t, RegExpNode* on_success) 889 AssertionNode(AssertionNodeType t, RegExpNode* on_success)
(...skipping 16 matching lines...) Expand all
896 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 906 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
897 virtual int EatsAtLeast(int still_to_find, 907 virtual int EatsAtLeast(int still_to_find,
898 int recursion_depth, 908 int recursion_depth,
899 bool not_at_start); 909 bool not_at_start);
900 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 910 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
901 RegExpCompiler* compiler, 911 RegExpCompiler* compiler,
902 int characters_filled_in, 912 int characters_filled_in,
903 bool not_at_start) { 913 bool not_at_start) {
904 return; 914 return;
905 } 915 }
906 virtual void FillInBMInfo( 916 virtual void FillInBMInfo(int offset,
907 int offset, BoyerMooreLookahead* bm, bool not_at_start); 917 int recursion_depth,
918 BoyerMooreLookahead* bm,
919 bool not_at_start);
908 920
909 private: 921 private:
910 int start_reg_; 922 int start_reg_;
911 int end_reg_; 923 int end_reg_;
912 }; 924 };
913 925
914 926
915 class EndNode: public RegExpNode { 927 class EndNode: public RegExpNode {
916 public: 928 public:
917 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 929 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
918 explicit EndNode(Action action) : action_(action) { } 930 explicit EndNode(Action action) : action_(action) { }
919 virtual void Accept(NodeVisitor* visitor); 931 virtual void Accept(NodeVisitor* visitor);
920 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 932 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
921 virtual int EatsAtLeast(int still_to_find, 933 virtual int EatsAtLeast(int still_to_find,
922 int recursion_depth, 934 int recursion_depth,
923 bool not_at_start) { return 0; } 935 bool not_at_start) { return 0; }
924 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 936 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
925 RegExpCompiler* compiler, 937 RegExpCompiler* compiler,
926 int characters_filled_in, 938 int characters_filled_in,
927 bool not_at_start) { 939 bool not_at_start) {
928 // Returning 0 from EatsAtLeast should ensure we never get here. 940 // Returning 0 from EatsAtLeast should ensure we never get here.
929 UNREACHABLE(); 941 UNREACHABLE();
930 } 942 }
931 virtual void FillInBMInfo( 943 virtual void FillInBMInfo(int offset,
932 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 944 int recursion_depth,
945 BoyerMooreLookahead* bm,
946 bool not_at_start) {
933 // Returning 0 from EatsAtLeast should ensure we never get here. 947 // Returning 0 from EatsAtLeast should ensure we never get here.
934 UNREACHABLE(); 948 UNREACHABLE();
935 } 949 }
936 950
937 private: 951 private:
938 Action action_; 952 Action action_;
939 }; 953 };
940 954
941 955
942 class NegativeSubmatchSuccess: public EndNode { 956 class NegativeSubmatchSuccess: public EndNode {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
1011 int recursion_depth, 1025 int recursion_depth,
1012 bool not_at_start); 1026 bool not_at_start);
1013 int EatsAtLeastHelper(int still_to_find, 1027 int EatsAtLeastHelper(int still_to_find,
1014 int recursion_depth, 1028 int recursion_depth,
1015 RegExpNode* ignore_this_node, 1029 RegExpNode* ignore_this_node,
1016 bool not_at_start); 1030 bool not_at_start);
1017 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1031 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1018 RegExpCompiler* compiler, 1032 RegExpCompiler* compiler,
1019 int characters_filled_in, 1033 int characters_filled_in,
1020 bool not_at_start); 1034 bool not_at_start);
1021 virtual void FillInBMInfo( 1035 virtual void FillInBMInfo(int offset,
1022 int offset, BoyerMooreLookahead* bm, bool not_at_start); 1036 int recursion_depth,
1037 BoyerMooreLookahead* bm,
1038 bool not_at_start);
1023 1039
1024 bool being_calculated() { return being_calculated_; } 1040 bool being_calculated() { return being_calculated_; }
1025 bool not_at_start() { return not_at_start_; } 1041 bool not_at_start() { return not_at_start_; }
1026 void set_not_at_start() { not_at_start_ = true; } 1042 void set_not_at_start() { not_at_start_ = true; }
1027 void set_being_calculated(bool b) { being_calculated_ = b; } 1043 void set_being_calculated(bool b) { being_calculated_ = b; }
1028 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } 1044 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1029 virtual RegExpNode* FilterASCII(int depth); 1045 virtual RegExpNode* FilterASCII(int depth);
1030 1046
1031 protected: 1047 protected:
1032 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 1048 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
(...skipping 28 matching lines...) Expand all
1061 AddAlternative(this_must_fail); 1077 AddAlternative(this_must_fail);
1062 AddAlternative(then_do_this); 1078 AddAlternative(then_do_this);
1063 } 1079 }
1064 virtual int EatsAtLeast(int still_to_find, 1080 virtual int EatsAtLeast(int still_to_find,
1065 int recursion_depth, 1081 int recursion_depth,
1066 bool not_at_start); 1082 bool not_at_start);
1067 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1083 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1068 RegExpCompiler* compiler, 1084 RegExpCompiler* compiler,
1069 int characters_filled_in, 1085 int characters_filled_in,
1070 bool not_at_start); 1086 bool not_at_start);
1071 virtual void FillInBMInfo( 1087 virtual void FillInBMInfo(int offset,
1072 int offset, BoyerMooreLookahead* bm, bool not_at_start) { 1088 int recursion_depth,
1073 alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); 1089 BoyerMooreLookahead* bm,
1090 bool not_at_start) {
1091 alternatives_->at(1).node()->FillInBMInfo(
1092 offset, recursion_depth + 1, bm, not_at_start);
1074 if (offset == 0) set_bm_info(not_at_start, bm); 1093 if (offset == 0) set_bm_info(not_at_start, bm);
1075 } 1094 }
1076 // For a negative lookahead we don't emit the quick check for the 1095 // For a negative lookahead we don't emit the quick check for the
1077 // alternative that is expected to fail. This is because quick check code 1096 // alternative that is expected to fail. This is because quick check code
1078 // starts by loading enough characters for the alternative that takes fewest 1097 // starts by loading enough characters for the alternative that takes fewest
1079 // characters, but on a negative lookahead the negative branch did not take 1098 // characters, but on a negative lookahead the negative branch did not take
1080 // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1099 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1081 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1100 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1082 virtual RegExpNode* FilterASCII(int depth); 1101 virtual RegExpNode* FilterASCII(int depth);
1083 }; 1102 };
1084 1103
1085 1104
1086 class LoopChoiceNode: public ChoiceNode { 1105 class LoopChoiceNode: public ChoiceNode {
1087 public: 1106 public:
1088 explicit LoopChoiceNode(bool body_can_be_zero_length) 1107 explicit LoopChoiceNode(bool body_can_be_zero_length)
1089 : ChoiceNode(2), 1108 : ChoiceNode(2),
1090 loop_node_(NULL), 1109 loop_node_(NULL),
1091 continue_node_(NULL), 1110 continue_node_(NULL),
1092 body_can_be_zero_length_(body_can_be_zero_length) { } 1111 body_can_be_zero_length_(body_can_be_zero_length) { }
1093 void AddLoopAlternative(GuardedAlternative alt); 1112 void AddLoopAlternative(GuardedAlternative alt);
1094 void AddContinueAlternative(GuardedAlternative alt); 1113 void AddContinueAlternative(GuardedAlternative alt);
1095 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1114 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1096 virtual int EatsAtLeast(int still_to_find, 1115 virtual int EatsAtLeast(int still_to_find,
1097 int recursion_depth, 1116 int recursion_depth,
1098 bool not_at_start); 1117 bool not_at_start);
1099 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1100 RegExpCompiler* compiler, 1119 RegExpCompiler* compiler,
1101 int characters_filled_in, 1120 int characters_filled_in,
1102 bool not_at_start); 1121 bool not_at_start);
1103 virtual void FillInBMInfo( 1122 virtual void FillInBMInfo(int offset,
1104 int offset, BoyerMooreLookahead* bm, bool not_at_start); 1123 int recursion_depth,
1124 BoyerMooreLookahead* bm,
1125 bool not_at_start);
1105 RegExpNode* loop_node() { return loop_node_; } 1126 RegExpNode* loop_node() { return loop_node_; }
1106 RegExpNode* continue_node() { return continue_node_; } 1127 RegExpNode* continue_node() { return continue_node_; }
1107 bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1128 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1108 virtual void Accept(NodeVisitor* visitor); 1129 virtual void Accept(NodeVisitor* visitor);
1109 virtual RegExpNode* FilterASCII(int depth); 1130 virtual RegExpNode* FilterASCII(int depth);
1110 1131
1111 private: 1132 private:
1112 // AddAlternative is made private for loop nodes because alternatives 1133 // AddAlternative is made private for loop nodes because alternatives
1113 // should not be added freely, we need to keep track of which node 1134 // should not be added freely, we need to keep track of which node
1114 // goes back to the node itself. 1135 // goes back to the node itself.
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after
1591 int* vector_; 1612 int* vector_;
1592 int offsets_vector_length_; 1613 int offsets_vector_length_;
1593 1614
1594 friend class ExternalReference; 1615 friend class ExternalReference;
1595 }; 1616 };
1596 1617
1597 1618
1598 } } // namespace v8::internal 1619 } } // namespace v8::internal
1599 1620
1600 #endif // V8_JSREGEXP_H_ 1621 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698