OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
574 // character and that has no guards on it. | 574 // character and that has no guards on it. |
575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
576 RegExpCompiler* compiler) { | 576 RegExpCompiler* compiler) { |
577 return NULL; | 577 return NULL; |
578 } | 578 } |
579 | 579 |
580 // Collects information on the possible code units (mod 128) that can match if | 580 // Collects information on the possible code units (mod 128) that can match if |
581 // we look forward. This is used for a Boyer-Moore-like string searching | 581 // we look forward. This is used for a Boyer-Moore-like string searching |
582 // implementation. TODO(erikcorry): This should share more code with | 582 // implementation. TODO(erikcorry): This should share more code with |
583 // EatsAtLeast, GetQuickCheckDetails. | 583 // EatsAtLeast, GetQuickCheckDetails. |
584 static const int kFillInBMBudget = 200; | |
584 virtual void FillInBMInfo(int offset, | 585 virtual void FillInBMInfo(int offset, |
585 int recursion_depth, | 586 int recursion_depth, |
587 int budget, | |
ulan
2012/06/01 11:24:04
Please add a description for budget.
| |
586 BoyerMooreLookahead* bm, | 588 BoyerMooreLookahead* bm, |
587 bool not_at_start) { | 589 bool not_at_start) { |
588 UNREACHABLE(); | 590 UNREACHABLE(); |
589 } | 591 } |
590 | 592 |
591 // If we know that the input is ASCII then there are some nodes that can | 593 // If we know that the input is ASCII then there are some nodes that can |
592 // never match. This method returns a node that can be substituted for | 594 // never match. This method returns a node that can be substituted for |
593 // itself, or NULL if the node can never match. | 595 // itself, or NULL if the node can never match. |
594 virtual RegExpNode* FilterASCII(int depth) { return this; } | 596 virtual RegExpNode* FilterASCII(int depth) { return this; } |
595 // Helper for FilterASCII. | 597 // Helper for FilterASCII. |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
678 | 680 |
679 class SeqRegExpNode: public RegExpNode { | 681 class SeqRegExpNode: public RegExpNode { |
680 public: | 682 public: |
681 explicit SeqRegExpNode(RegExpNode* on_success) | 683 explicit SeqRegExpNode(RegExpNode* on_success) |
682 : on_success_(on_success) { } | 684 : on_success_(on_success) { } |
683 RegExpNode* on_success() { return on_success_; } | 685 RegExpNode* on_success() { return on_success_; } |
684 void set_on_success(RegExpNode* node) { on_success_ = node; } | 686 void set_on_success(RegExpNode* node) { on_success_ = node; } |
685 virtual RegExpNode* FilterASCII(int depth); | 687 virtual RegExpNode* FilterASCII(int depth); |
686 virtual void FillInBMInfo(int offset, | 688 virtual void FillInBMInfo(int offset, |
687 int recursion_depth, | 689 int recursion_depth, |
690 int budget, | |
688 BoyerMooreLookahead* bm, | 691 BoyerMooreLookahead* bm, |
689 bool not_at_start) { | 692 bool not_at_start) { |
690 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 693 on_success_->FillInBMInfo( |
694 offset, recursion_depth + 1, budget - 1, bm, not_at_start); | |
691 if (offset == 0) set_bm_info(not_at_start, bm); | 695 if (offset == 0) set_bm_info(not_at_start, bm); |
692 } | 696 } |
693 | 697 |
694 protected: | 698 protected: |
695 RegExpNode* FilterSuccessor(int depth); | 699 RegExpNode* FilterSuccessor(int depth); |
696 | 700 |
697 private: | 701 private: |
698 RegExpNode* on_success_; | 702 RegExpNode* on_success_; |
699 }; | 703 }; |
700 | 704 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
735 bool not_at_start); | 739 bool not_at_start); |
736 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 740 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
737 RegExpCompiler* compiler, | 741 RegExpCompiler* compiler, |
738 int filled_in, | 742 int filled_in, |
739 bool not_at_start) { | 743 bool not_at_start) { |
740 return on_success()->GetQuickCheckDetails( | 744 return on_success()->GetQuickCheckDetails( |
741 details, compiler, filled_in, not_at_start); | 745 details, compiler, filled_in, not_at_start); |
742 } | 746 } |
743 virtual void FillInBMInfo(int offset, | 747 virtual void FillInBMInfo(int offset, |
744 int recursion_depth, | 748 int recursion_depth, |
749 int budget, | |
745 BoyerMooreLookahead* bm, | 750 BoyerMooreLookahead* bm, |
746 bool not_at_start); | 751 bool not_at_start); |
747 Type type() { return type_; } | 752 Type type() { return type_; } |
748 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 753 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 754 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
750 | 755 |
751 private: | 756 private: |
752 union { | 757 union { |
753 struct { | 758 struct { |
754 int reg; | 759 int reg; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
806 RegExpCompiler* compiler, | 811 RegExpCompiler* compiler, |
807 int characters_filled_in, | 812 int characters_filled_in, |
808 bool not_at_start); | 813 bool not_at_start); |
809 ZoneList<TextElement>* elements() { return elms_; } | 814 ZoneList<TextElement>* elements() { return elms_; } |
810 void MakeCaseIndependent(bool is_ascii); | 815 void MakeCaseIndependent(bool is_ascii); |
811 virtual int GreedyLoopTextLength(); | 816 virtual int GreedyLoopTextLength(); |
812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 817 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
813 RegExpCompiler* compiler); | 818 RegExpCompiler* compiler); |
814 virtual void FillInBMInfo(int offset, | 819 virtual void FillInBMInfo(int offset, |
815 int recursion_depth, | 820 int recursion_depth, |
821 int budget, | |
816 BoyerMooreLookahead* bm, | 822 BoyerMooreLookahead* bm, |
817 bool not_at_start); | 823 bool not_at_start); |
818 void CalculateOffsets(); | 824 void CalculateOffsets(); |
819 virtual RegExpNode* FilterASCII(int depth); | 825 virtual RegExpNode* FilterASCII(int depth); |
820 | 826 |
821 private: | 827 private: |
822 enum TextEmitPassType { | 828 enum TextEmitPassType { |
823 NON_ASCII_MATCH, // Check for characters that can't match. | 829 NON_ASCII_MATCH, // Check for characters that can't match. |
824 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 830 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
825 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 831 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
868 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 874 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
869 virtual int EatsAtLeast(int still_to_find, | 875 virtual int EatsAtLeast(int still_to_find, |
870 int recursion_depth, | 876 int recursion_depth, |
871 bool not_at_start); | 877 bool not_at_start); |
872 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 878 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
873 RegExpCompiler* compiler, | 879 RegExpCompiler* compiler, |
874 int filled_in, | 880 int filled_in, |
875 bool not_at_start); | 881 bool not_at_start); |
876 virtual void FillInBMInfo(int offset, | 882 virtual void FillInBMInfo(int offset, |
877 int recursion_depth, | 883 int recursion_depth, |
884 int budget, | |
878 BoyerMooreLookahead* bm, | 885 BoyerMooreLookahead* bm, |
879 bool not_at_start); | 886 bool not_at_start); |
880 AssertionNodeType type() { return type_; } | 887 AssertionNodeType type() { return type_; } |
881 void set_type(AssertionNodeType type) { type_ = type; } | 888 void set_type(AssertionNodeType type) { type_ = type; } |
882 | 889 |
883 private: | 890 private: |
884 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 891 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
885 enum IfPrevious { kIsNonWord, kIsWord }; | 892 enum IfPrevious { kIsNonWord, kIsWord }; |
886 void BacktrackIfPrevious(RegExpCompiler* compiler, | 893 void BacktrackIfPrevious(RegExpCompiler* compiler, |
887 Trace* trace, | 894 Trace* trace, |
(...skipping 20 matching lines...) Expand all Loading... | |
908 int recursion_depth, | 915 int recursion_depth, |
909 bool not_at_start); | 916 bool not_at_start); |
910 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 917 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
911 RegExpCompiler* compiler, | 918 RegExpCompiler* compiler, |
912 int characters_filled_in, | 919 int characters_filled_in, |
913 bool not_at_start) { | 920 bool not_at_start) { |
914 return; | 921 return; |
915 } | 922 } |
916 virtual void FillInBMInfo(int offset, | 923 virtual void FillInBMInfo(int offset, |
917 int recursion_depth, | 924 int recursion_depth, |
925 int budget, | |
918 BoyerMooreLookahead* bm, | 926 BoyerMooreLookahead* bm, |
919 bool not_at_start); | 927 bool not_at_start); |
920 | 928 |
921 private: | 929 private: |
922 int start_reg_; | 930 int start_reg_; |
923 int end_reg_; | 931 int end_reg_; |
924 }; | 932 }; |
925 | 933 |
926 | 934 |
927 class EndNode: public RegExpNode { | 935 class EndNode: public RegExpNode { |
928 public: | 936 public: |
929 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 937 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
930 explicit EndNode(Action action) : action_(action) { } | 938 explicit EndNode(Action action) : action_(action) { } |
931 virtual void Accept(NodeVisitor* visitor); | 939 virtual void Accept(NodeVisitor* visitor); |
932 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 940 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
933 virtual int EatsAtLeast(int still_to_find, | 941 virtual int EatsAtLeast(int still_to_find, |
934 int recursion_depth, | 942 int recursion_depth, |
935 bool not_at_start) { return 0; } | 943 bool not_at_start) { return 0; } |
936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 944 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
937 RegExpCompiler* compiler, | 945 RegExpCompiler* compiler, |
938 int characters_filled_in, | 946 int characters_filled_in, |
939 bool not_at_start) { | 947 bool not_at_start) { |
940 // Returning 0 from EatsAtLeast should ensure we never get here. | 948 // Returning 0 from EatsAtLeast should ensure we never get here. |
941 UNREACHABLE(); | 949 UNREACHABLE(); |
942 } | 950 } |
943 virtual void FillInBMInfo(int offset, | 951 virtual void FillInBMInfo(int offset, |
944 int recursion_depth, | 952 int recursion_depth, |
953 int budget, | |
945 BoyerMooreLookahead* bm, | 954 BoyerMooreLookahead* bm, |
946 bool not_at_start) { | 955 bool not_at_start) { |
947 // Returning 0 from EatsAtLeast should ensure we never get here. | 956 // Returning 0 from EatsAtLeast should ensure we never get here. |
948 UNREACHABLE(); | 957 UNREACHABLE(); |
949 } | 958 } |
950 | 959 |
951 private: | 960 private: |
952 Action action_; | 961 Action action_; |
953 }; | 962 }; |
954 | 963 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1027 int EatsAtLeastHelper(int still_to_find, | 1036 int EatsAtLeastHelper(int still_to_find, |
1028 int recursion_depth, | 1037 int recursion_depth, |
1029 RegExpNode* ignore_this_node, | 1038 RegExpNode* ignore_this_node, |
1030 bool not_at_start); | 1039 bool not_at_start); |
1031 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1040 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1032 RegExpCompiler* compiler, | 1041 RegExpCompiler* compiler, |
1033 int characters_filled_in, | 1042 int characters_filled_in, |
1034 bool not_at_start); | 1043 bool not_at_start); |
1035 virtual void FillInBMInfo(int offset, | 1044 virtual void FillInBMInfo(int offset, |
1036 int recursion_depth, | 1045 int recursion_depth, |
1046 int budget, | |
1037 BoyerMooreLookahead* bm, | 1047 BoyerMooreLookahead* bm, |
1038 bool not_at_start); | 1048 bool not_at_start); |
1039 | 1049 |
1040 bool being_calculated() { return being_calculated_; } | 1050 bool being_calculated() { return being_calculated_; } |
1041 bool not_at_start() { return not_at_start_; } | 1051 bool not_at_start() { return not_at_start_; } |
1042 void set_not_at_start() { not_at_start_ = true; } | 1052 void set_not_at_start() { not_at_start_ = true; } |
1043 void set_being_calculated(bool b) { being_calculated_ = b; } | 1053 void set_being_calculated(bool b) { being_calculated_ = b; } |
1044 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1054 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
1045 virtual RegExpNode* FilterASCII(int depth); | 1055 virtual RegExpNode* FilterASCII(int depth); |
1046 | 1056 |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1079 } | 1089 } |
1080 virtual int EatsAtLeast(int still_to_find, | 1090 virtual int EatsAtLeast(int still_to_find, |
1081 int recursion_depth, | 1091 int recursion_depth, |
1082 bool not_at_start); | 1092 bool not_at_start); |
1083 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1093 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1084 RegExpCompiler* compiler, | 1094 RegExpCompiler* compiler, |
1085 int characters_filled_in, | 1095 int characters_filled_in, |
1086 bool not_at_start); | 1096 bool not_at_start); |
1087 virtual void FillInBMInfo(int offset, | 1097 virtual void FillInBMInfo(int offset, |
1088 int recursion_depth, | 1098 int recursion_depth, |
1099 int budget, | |
1089 BoyerMooreLookahead* bm, | 1100 BoyerMooreLookahead* bm, |
1090 bool not_at_start) { | 1101 bool not_at_start) { |
1091 alternatives_->at(1).node()->FillInBMInfo( | 1102 alternatives_->at(1).node()->FillInBMInfo( |
1092 offset, recursion_depth + 1, bm, not_at_start); | 1103 offset, recursion_depth + 1, budget - 1, bm, not_at_start); |
1093 if (offset == 0) set_bm_info(not_at_start, bm); | 1104 if (offset == 0) set_bm_info(not_at_start, bm); |
1094 } | 1105 } |
1095 // For a negative lookahead we don't emit the quick check for the | 1106 // For a negative lookahead we don't emit the quick check for the |
1096 // alternative that is expected to fail. This is because quick check code | 1107 // alternative that is expected to fail. This is because quick check code |
1097 // starts by loading enough characters for the alternative that takes fewest | 1108 // starts by loading enough characters for the alternative that takes fewest |
1098 // characters, but on a negative lookahead the negative branch did not take | 1109 // characters, but on a negative lookahead the negative branch did not take |
1099 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1110 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
1100 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1111 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
1101 virtual RegExpNode* FilterASCII(int depth); | 1112 virtual RegExpNode* FilterASCII(int depth); |
1102 }; | 1113 }; |
(...skipping 11 matching lines...) Expand all Loading... | |
1114 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1125 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1115 virtual int EatsAtLeast(int still_to_find, | 1126 virtual int EatsAtLeast(int still_to_find, |
1116 int recursion_depth, | 1127 int recursion_depth, |
1117 bool not_at_start); | 1128 bool not_at_start); |
1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1129 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1119 RegExpCompiler* compiler, | 1130 RegExpCompiler* compiler, |
1120 int characters_filled_in, | 1131 int characters_filled_in, |
1121 bool not_at_start); | 1132 bool not_at_start); |
1122 virtual void FillInBMInfo(int offset, | 1133 virtual void FillInBMInfo(int offset, |
1123 int recursion_depth, | 1134 int recursion_depth, |
1135 int budget, | |
1124 BoyerMooreLookahead* bm, | 1136 BoyerMooreLookahead* bm, |
1125 bool not_at_start); | 1137 bool not_at_start); |
1126 RegExpNode* loop_node() { return loop_node_; } | 1138 RegExpNode* loop_node() { return loop_node_; } |
1127 RegExpNode* continue_node() { return continue_node_; } | 1139 RegExpNode* continue_node() { return continue_node_; } |
1128 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1140 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
1129 virtual void Accept(NodeVisitor* visitor); | 1141 virtual void Accept(NodeVisitor* visitor); |
1130 virtual RegExpNode* FilterASCII(int depth); | 1142 virtual RegExpNode* FilterASCII(int depth); |
1131 | 1143 |
1132 private: | 1144 private: |
1133 // AddAlternative is made private for loop nodes because alternatives | 1145 // AddAlternative is made private for loop nodes because alternatives |
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1612 int* vector_; | 1624 int* vector_; |
1613 int offsets_vector_length_; | 1625 int offsets_vector_length_; |
1614 | 1626 |
1615 friend class ExternalReference; | 1627 friend class ExternalReference; |
1616 }; | 1628 }; |
1617 | 1629 |
1618 | 1630 |
1619 } } // namespace v8::internal | 1631 } } // namespace v8::internal |
1620 | 1632 |
1621 #endif // V8_JSREGEXP_H_ | 1633 #endif // V8_JSREGEXP_H_ |
OLD | NEW |