Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 574 // character and that has no guards on it. | 574 // character and that has no guards on it. |
| 575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 576 RegExpCompiler* compiler) { | 576 RegExpCompiler* compiler) { |
| 577 return NULL; | 577 return NULL; |
| 578 } | 578 } |
| 579 | 579 |
| 580 // Collects information on the possible code units (mod 128) that can match if | 580 // Collects information on the possible code units (mod 128) that can match if |
| 581 // we look forward. This is used for a Boyer-Moore-like string searching | 581 // we look forward. This is used for a Boyer-Moore-like string searching |
| 582 // implementation. TODO(erikcorry): This should share more code with | 582 // implementation. TODO(erikcorry): This should share more code with |
| 583 // EatsAtLeast, GetQuickCheckDetails. | 583 // EatsAtLeast, GetQuickCheckDetails. |
| 584 static const int kFillInBMBudget = 200; | |
| 584 virtual void FillInBMInfo(int offset, | 585 virtual void FillInBMInfo(int offset, |
| 585 int recursion_depth, | 586 int recursion_depth, |
| 587 int budget, | |
|
ulan
2012/06/01 11:24:04
Please add a description for budget.
| |
| 586 BoyerMooreLookahead* bm, | 588 BoyerMooreLookahead* bm, |
| 587 bool not_at_start) { | 589 bool not_at_start) { |
| 588 UNREACHABLE(); | 590 UNREACHABLE(); |
| 589 } | 591 } |
| 590 | 592 |
| 591 // If we know that the input is ASCII then there are some nodes that can | 593 // If we know that the input is ASCII then there are some nodes that can |
| 592 // never match. This method returns a node that can be substituted for | 594 // never match. This method returns a node that can be substituted for |
| 593 // itself, or NULL if the node can never match. | 595 // itself, or NULL if the node can never match. |
| 594 virtual RegExpNode* FilterASCII(int depth) { return this; } | 596 virtual RegExpNode* FilterASCII(int depth) { return this; } |
| 595 // Helper for FilterASCII. | 597 // Helper for FilterASCII. |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 678 | 680 |
| 679 class SeqRegExpNode: public RegExpNode { | 681 class SeqRegExpNode: public RegExpNode { |
| 680 public: | 682 public: |
| 681 explicit SeqRegExpNode(RegExpNode* on_success) | 683 explicit SeqRegExpNode(RegExpNode* on_success) |
| 682 : on_success_(on_success) { } | 684 : on_success_(on_success) { } |
| 683 RegExpNode* on_success() { return on_success_; } | 685 RegExpNode* on_success() { return on_success_; } |
| 684 void set_on_success(RegExpNode* node) { on_success_ = node; } | 686 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 685 virtual RegExpNode* FilterASCII(int depth); | 687 virtual RegExpNode* FilterASCII(int depth); |
| 686 virtual void FillInBMInfo(int offset, | 688 virtual void FillInBMInfo(int offset, |
| 687 int recursion_depth, | 689 int recursion_depth, |
| 690 int budget, | |
| 688 BoyerMooreLookahead* bm, | 691 BoyerMooreLookahead* bm, |
| 689 bool not_at_start) { | 692 bool not_at_start) { |
| 690 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 693 on_success_->FillInBMInfo( |
| 694 offset, recursion_depth + 1, budget - 1, bm, not_at_start); | |
| 691 if (offset == 0) set_bm_info(not_at_start, bm); | 695 if (offset == 0) set_bm_info(not_at_start, bm); |
| 692 } | 696 } |
| 693 | 697 |
| 694 protected: | 698 protected: |
| 695 RegExpNode* FilterSuccessor(int depth); | 699 RegExpNode* FilterSuccessor(int depth); |
| 696 | 700 |
| 697 private: | 701 private: |
| 698 RegExpNode* on_success_; | 702 RegExpNode* on_success_; |
| 699 }; | 703 }; |
| 700 | 704 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 735 bool not_at_start); | 739 bool not_at_start); |
| 736 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 740 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 737 RegExpCompiler* compiler, | 741 RegExpCompiler* compiler, |
| 738 int filled_in, | 742 int filled_in, |
| 739 bool not_at_start) { | 743 bool not_at_start) { |
| 740 return on_success()->GetQuickCheckDetails( | 744 return on_success()->GetQuickCheckDetails( |
| 741 details, compiler, filled_in, not_at_start); | 745 details, compiler, filled_in, not_at_start); |
| 742 } | 746 } |
| 743 virtual void FillInBMInfo(int offset, | 747 virtual void FillInBMInfo(int offset, |
| 744 int recursion_depth, | 748 int recursion_depth, |
| 749 int budget, | |
| 745 BoyerMooreLookahead* bm, | 750 BoyerMooreLookahead* bm, |
| 746 bool not_at_start); | 751 bool not_at_start); |
| 747 Type type() { return type_; } | 752 Type type() { return type_; } |
| 748 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 753 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| 749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 754 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 750 | 755 |
| 751 private: | 756 private: |
| 752 union { | 757 union { |
| 753 struct { | 758 struct { |
| 754 int reg; | 759 int reg; |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 806 RegExpCompiler* compiler, | 811 RegExpCompiler* compiler, |
| 807 int characters_filled_in, | 812 int characters_filled_in, |
| 808 bool not_at_start); | 813 bool not_at_start); |
| 809 ZoneList<TextElement>* elements() { return elms_; } | 814 ZoneList<TextElement>* elements() { return elms_; } |
| 810 void MakeCaseIndependent(bool is_ascii); | 815 void MakeCaseIndependent(bool is_ascii); |
| 811 virtual int GreedyLoopTextLength(); | 816 virtual int GreedyLoopTextLength(); |
| 812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 817 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 813 RegExpCompiler* compiler); | 818 RegExpCompiler* compiler); |
| 814 virtual void FillInBMInfo(int offset, | 819 virtual void FillInBMInfo(int offset, |
| 815 int recursion_depth, | 820 int recursion_depth, |
| 821 int budget, | |
| 816 BoyerMooreLookahead* bm, | 822 BoyerMooreLookahead* bm, |
| 817 bool not_at_start); | 823 bool not_at_start); |
| 818 void CalculateOffsets(); | 824 void CalculateOffsets(); |
| 819 virtual RegExpNode* FilterASCII(int depth); | 825 virtual RegExpNode* FilterASCII(int depth); |
| 820 | 826 |
| 821 private: | 827 private: |
| 822 enum TextEmitPassType { | 828 enum TextEmitPassType { |
| 823 NON_ASCII_MATCH, // Check for characters that can't match. | 829 NON_ASCII_MATCH, // Check for characters that can't match. |
| 824 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 830 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
| 825 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 831 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 868 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 874 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 869 virtual int EatsAtLeast(int still_to_find, | 875 virtual int EatsAtLeast(int still_to_find, |
| 870 int recursion_depth, | 876 int recursion_depth, |
| 871 bool not_at_start); | 877 bool not_at_start); |
| 872 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 878 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 873 RegExpCompiler* compiler, | 879 RegExpCompiler* compiler, |
| 874 int filled_in, | 880 int filled_in, |
| 875 bool not_at_start); | 881 bool not_at_start); |
| 876 virtual void FillInBMInfo(int offset, | 882 virtual void FillInBMInfo(int offset, |
| 877 int recursion_depth, | 883 int recursion_depth, |
| 884 int budget, | |
| 878 BoyerMooreLookahead* bm, | 885 BoyerMooreLookahead* bm, |
| 879 bool not_at_start); | 886 bool not_at_start); |
| 880 AssertionNodeType type() { return type_; } | 887 AssertionNodeType type() { return type_; } |
| 881 void set_type(AssertionNodeType type) { type_ = type; } | 888 void set_type(AssertionNodeType type) { type_ = type; } |
| 882 | 889 |
| 883 private: | 890 private: |
| 884 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 891 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
| 885 enum IfPrevious { kIsNonWord, kIsWord }; | 892 enum IfPrevious { kIsNonWord, kIsWord }; |
| 886 void BacktrackIfPrevious(RegExpCompiler* compiler, | 893 void BacktrackIfPrevious(RegExpCompiler* compiler, |
| 887 Trace* trace, | 894 Trace* trace, |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 908 int recursion_depth, | 915 int recursion_depth, |
| 909 bool not_at_start); | 916 bool not_at_start); |
| 910 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 917 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 911 RegExpCompiler* compiler, | 918 RegExpCompiler* compiler, |
| 912 int characters_filled_in, | 919 int characters_filled_in, |
| 913 bool not_at_start) { | 920 bool not_at_start) { |
| 914 return; | 921 return; |
| 915 } | 922 } |
| 916 virtual void FillInBMInfo(int offset, | 923 virtual void FillInBMInfo(int offset, |
| 917 int recursion_depth, | 924 int recursion_depth, |
| 925 int budget, | |
| 918 BoyerMooreLookahead* bm, | 926 BoyerMooreLookahead* bm, |
| 919 bool not_at_start); | 927 bool not_at_start); |
| 920 | 928 |
| 921 private: | 929 private: |
| 922 int start_reg_; | 930 int start_reg_; |
| 923 int end_reg_; | 931 int end_reg_; |
| 924 }; | 932 }; |
| 925 | 933 |
| 926 | 934 |
| 927 class EndNode: public RegExpNode { | 935 class EndNode: public RegExpNode { |
| 928 public: | 936 public: |
| 929 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 937 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 930 explicit EndNode(Action action) : action_(action) { } | 938 explicit EndNode(Action action) : action_(action) { } |
| 931 virtual void Accept(NodeVisitor* visitor); | 939 virtual void Accept(NodeVisitor* visitor); |
| 932 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 940 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 933 virtual int EatsAtLeast(int still_to_find, | 941 virtual int EatsAtLeast(int still_to_find, |
| 934 int recursion_depth, | 942 int recursion_depth, |
| 935 bool not_at_start) { return 0; } | 943 bool not_at_start) { return 0; } |
| 936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 944 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 937 RegExpCompiler* compiler, | 945 RegExpCompiler* compiler, |
| 938 int characters_filled_in, | 946 int characters_filled_in, |
| 939 bool not_at_start) { | 947 bool not_at_start) { |
| 940 // Returning 0 from EatsAtLeast should ensure we never get here. | 948 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 941 UNREACHABLE(); | 949 UNREACHABLE(); |
| 942 } | 950 } |
| 943 virtual void FillInBMInfo(int offset, | 951 virtual void FillInBMInfo(int offset, |
| 944 int recursion_depth, | 952 int recursion_depth, |
| 953 int budget, | |
| 945 BoyerMooreLookahead* bm, | 954 BoyerMooreLookahead* bm, |
| 946 bool not_at_start) { | 955 bool not_at_start) { |
| 947 // Returning 0 from EatsAtLeast should ensure we never get here. | 956 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 948 UNREACHABLE(); | 957 UNREACHABLE(); |
| 949 } | 958 } |
| 950 | 959 |
| 951 private: | 960 private: |
| 952 Action action_; | 961 Action action_; |
| 953 }; | 962 }; |
| 954 | 963 |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1027 int EatsAtLeastHelper(int still_to_find, | 1036 int EatsAtLeastHelper(int still_to_find, |
| 1028 int recursion_depth, | 1037 int recursion_depth, |
| 1029 RegExpNode* ignore_this_node, | 1038 RegExpNode* ignore_this_node, |
| 1030 bool not_at_start); | 1039 bool not_at_start); |
| 1031 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1040 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1032 RegExpCompiler* compiler, | 1041 RegExpCompiler* compiler, |
| 1033 int characters_filled_in, | 1042 int characters_filled_in, |
| 1034 bool not_at_start); | 1043 bool not_at_start); |
| 1035 virtual void FillInBMInfo(int offset, | 1044 virtual void FillInBMInfo(int offset, |
| 1036 int recursion_depth, | 1045 int recursion_depth, |
| 1046 int budget, | |
| 1037 BoyerMooreLookahead* bm, | 1047 BoyerMooreLookahead* bm, |
| 1038 bool not_at_start); | 1048 bool not_at_start); |
| 1039 | 1049 |
| 1040 bool being_calculated() { return being_calculated_; } | 1050 bool being_calculated() { return being_calculated_; } |
| 1041 bool not_at_start() { return not_at_start_; } | 1051 bool not_at_start() { return not_at_start_; } |
| 1042 void set_not_at_start() { not_at_start_ = true; } | 1052 void set_not_at_start() { not_at_start_ = true; } |
| 1043 void set_being_calculated(bool b) { being_calculated_ = b; } | 1053 void set_being_calculated(bool b) { being_calculated_ = b; } |
| 1044 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1054 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
| 1045 virtual RegExpNode* FilterASCII(int depth); | 1055 virtual RegExpNode* FilterASCII(int depth); |
| 1046 | 1056 |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1079 } | 1089 } |
| 1080 virtual int EatsAtLeast(int still_to_find, | 1090 virtual int EatsAtLeast(int still_to_find, |
| 1081 int recursion_depth, | 1091 int recursion_depth, |
| 1082 bool not_at_start); | 1092 bool not_at_start); |
| 1083 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1093 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1084 RegExpCompiler* compiler, | 1094 RegExpCompiler* compiler, |
| 1085 int characters_filled_in, | 1095 int characters_filled_in, |
| 1086 bool not_at_start); | 1096 bool not_at_start); |
| 1087 virtual void FillInBMInfo(int offset, | 1097 virtual void FillInBMInfo(int offset, |
| 1088 int recursion_depth, | 1098 int recursion_depth, |
| 1099 int budget, | |
| 1089 BoyerMooreLookahead* bm, | 1100 BoyerMooreLookahead* bm, |
| 1090 bool not_at_start) { | 1101 bool not_at_start) { |
| 1091 alternatives_->at(1).node()->FillInBMInfo( | 1102 alternatives_->at(1).node()->FillInBMInfo( |
| 1092 offset, recursion_depth + 1, bm, not_at_start); | 1103 offset, recursion_depth + 1, budget - 1, bm, not_at_start); |
| 1093 if (offset == 0) set_bm_info(not_at_start, bm); | 1104 if (offset == 0) set_bm_info(not_at_start, bm); |
| 1094 } | 1105 } |
| 1095 // For a negative lookahead we don't emit the quick check for the | 1106 // For a negative lookahead we don't emit the quick check for the |
| 1096 // alternative that is expected to fail. This is because quick check code | 1107 // alternative that is expected to fail. This is because quick check code |
| 1097 // starts by loading enough characters for the alternative that takes fewest | 1108 // starts by loading enough characters for the alternative that takes fewest |
| 1098 // characters, but on a negative lookahead the negative branch did not take | 1109 // characters, but on a negative lookahead the negative branch did not take |
| 1099 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1110 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 1100 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1111 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
| 1101 virtual RegExpNode* FilterASCII(int depth); | 1112 virtual RegExpNode* FilterASCII(int depth); |
| 1102 }; | 1113 }; |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 1114 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1125 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 1115 virtual int EatsAtLeast(int still_to_find, | 1126 virtual int EatsAtLeast(int still_to_find, |
| 1116 int recursion_depth, | 1127 int recursion_depth, |
| 1117 bool not_at_start); | 1128 bool not_at_start); |
| 1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1129 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1119 RegExpCompiler* compiler, | 1130 RegExpCompiler* compiler, |
| 1120 int characters_filled_in, | 1131 int characters_filled_in, |
| 1121 bool not_at_start); | 1132 bool not_at_start); |
| 1122 virtual void FillInBMInfo(int offset, | 1133 virtual void FillInBMInfo(int offset, |
| 1123 int recursion_depth, | 1134 int recursion_depth, |
| 1135 int budget, | |
| 1124 BoyerMooreLookahead* bm, | 1136 BoyerMooreLookahead* bm, |
| 1125 bool not_at_start); | 1137 bool not_at_start); |
| 1126 RegExpNode* loop_node() { return loop_node_; } | 1138 RegExpNode* loop_node() { return loop_node_; } |
| 1127 RegExpNode* continue_node() { return continue_node_; } | 1139 RegExpNode* continue_node() { return continue_node_; } |
| 1128 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1140 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
| 1129 virtual void Accept(NodeVisitor* visitor); | 1141 virtual void Accept(NodeVisitor* visitor); |
| 1130 virtual RegExpNode* FilterASCII(int depth); | 1142 virtual RegExpNode* FilterASCII(int depth); |
| 1131 | 1143 |
| 1132 private: | 1144 private: |
| 1133 // AddAlternative is made private for loop nodes because alternatives | 1145 // AddAlternative is made private for loop nodes because alternatives |
| (...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1612 int* vector_; | 1624 int* vector_; |
| 1613 int offsets_vector_length_; | 1625 int offsets_vector_length_; |
| 1614 | 1626 |
| 1615 friend class ExternalReference; | 1627 friend class ExternalReference; |
| 1616 }; | 1628 }; |
| 1617 | 1629 |
| 1618 | 1630 |
| 1619 } } // namespace v8::internal | 1631 } } // namespace v8::internal |
| 1620 | 1632 |
| 1621 #endif // V8_JSREGEXP_H_ | 1633 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |