| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 556 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 567 // Only returns the successor for a text node of length 1 that matches any | 567 // Only returns the successor for a text node of length 1 that matches any |
| 568 // character and that has no guards on it. | 568 // character and that has no guards on it. |
| 569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 570 RegExpCompiler* compiler) { | 570 RegExpCompiler* compiler) { |
| 571 return NULL; | 571 return NULL; |
| 572 } | 572 } |
| 573 | 573 |
| 574 // Collects information on the possible code units (mod 128) that can match if | 574 // Collects information on the possible code units (mod 128) that can match if |
| 575 // we look forward. This is used for a Boyer-Moore-like string searching | 575 // we look forward. This is used for a Boyer-Moore-like string searching |
| 576 // implementation. TODO(erikcorry): This should share more code with | 576 // implementation. TODO(erikcorry): This should share more code with |
| 577 // EatsAtLeast, GetQuickCheckDetails. | 577 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit |
| 578 // the number of nodes we are willing to look at in order to create this data. |
| 579 static const int kFillInBMBudget = 200; |
| 578 virtual void FillInBMInfo(int offset, | 580 virtual void FillInBMInfo(int offset, |
| 579 int recursion_depth, | 581 int recursion_depth, |
| 582 int budget, |
| 580 BoyerMooreLookahead* bm, | 583 BoyerMooreLookahead* bm, |
| 581 bool not_at_start) { | 584 bool not_at_start) { |
| 582 UNREACHABLE(); | 585 UNREACHABLE(); |
| 583 } | 586 } |
| 584 | 587 |
| 585 // If we know that the input is ASCII then there are some nodes that can | 588 // If we know that the input is ASCII then there are some nodes that can |
| 586 // never match. This method returns a node that can be substituted for | 589 // never match. This method returns a node that can be substituted for |
| 587 // itself, or NULL if the node can never match. | 590 // itself, or NULL if the node can never match. |
| 588 virtual RegExpNode* FilterASCII(int depth) { return this; } | 591 virtual RegExpNode* FilterASCII(int depth) { return this; } |
| 589 // Helper for FilterASCII. | 592 // Helper for FilterASCII. |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 672 | 675 |
| 673 class SeqRegExpNode: public RegExpNode { | 676 class SeqRegExpNode: public RegExpNode { |
| 674 public: | 677 public: |
| 675 explicit SeqRegExpNode(RegExpNode* on_success) | 678 explicit SeqRegExpNode(RegExpNode* on_success) |
| 676 : on_success_(on_success) { } | 679 : on_success_(on_success) { } |
| 677 RegExpNode* on_success() { return on_success_; } | 680 RegExpNode* on_success() { return on_success_; } |
| 678 void set_on_success(RegExpNode* node) { on_success_ = node; } | 681 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 679 virtual RegExpNode* FilterASCII(int depth); | 682 virtual RegExpNode* FilterASCII(int depth); |
| 680 virtual void FillInBMInfo(int offset, | 683 virtual void FillInBMInfo(int offset, |
| 681 int recursion_depth, | 684 int recursion_depth, |
| 685 int budget, |
| 682 BoyerMooreLookahead* bm, | 686 BoyerMooreLookahead* bm, |
| 683 bool not_at_start) { | 687 bool not_at_start) { |
| 684 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 688 on_success_->FillInBMInfo( |
| 689 offset, recursion_depth + 1, budget - 1, bm, not_at_start); |
| 685 if (offset == 0) set_bm_info(not_at_start, bm); | 690 if (offset == 0) set_bm_info(not_at_start, bm); |
| 686 } | 691 } |
| 687 | 692 |
| 688 protected: | 693 protected: |
| 689 RegExpNode* FilterSuccessor(int depth); | 694 RegExpNode* FilterSuccessor(int depth); |
| 690 | 695 |
| 691 private: | 696 private: |
| 692 RegExpNode* on_success_; | 697 RegExpNode* on_success_; |
| 693 }; | 698 }; |
| 694 | 699 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 729 bool not_at_start); | 734 bool not_at_start); |
| 730 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 735 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 731 RegExpCompiler* compiler, | 736 RegExpCompiler* compiler, |
| 732 int filled_in, | 737 int filled_in, |
| 733 bool not_at_start) { | 738 bool not_at_start) { |
| 734 return on_success()->GetQuickCheckDetails( | 739 return on_success()->GetQuickCheckDetails( |
| 735 details, compiler, filled_in, not_at_start); | 740 details, compiler, filled_in, not_at_start); |
| 736 } | 741 } |
| 737 virtual void FillInBMInfo(int offset, | 742 virtual void FillInBMInfo(int offset, |
| 738 int recursion_depth, | 743 int recursion_depth, |
| 744 int budget, |
| 739 BoyerMooreLookahead* bm, | 745 BoyerMooreLookahead* bm, |
| 740 bool not_at_start); | 746 bool not_at_start); |
| 741 Type type() { return type_; } | 747 Type type() { return type_; } |
| 742 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 748 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| 743 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 744 | 750 |
| 745 private: | 751 private: |
| 746 union { | 752 union { |
| 747 struct { | 753 struct { |
| 748 int reg; | 754 int reg; |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 800 RegExpCompiler* compiler, | 806 RegExpCompiler* compiler, |
| 801 int characters_filled_in, | 807 int characters_filled_in, |
| 802 bool not_at_start); | 808 bool not_at_start); |
| 803 ZoneList<TextElement>* elements() { return elms_; } | 809 ZoneList<TextElement>* elements() { return elms_; } |
| 804 void MakeCaseIndependent(bool is_ascii); | 810 void MakeCaseIndependent(bool is_ascii); |
| 805 virtual int GreedyLoopTextLength(); | 811 virtual int GreedyLoopTextLength(); |
| 806 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 807 RegExpCompiler* compiler); | 813 RegExpCompiler* compiler); |
| 808 virtual void FillInBMInfo(int offset, | 814 virtual void FillInBMInfo(int offset, |
| 809 int recursion_depth, | 815 int recursion_depth, |
| 816 int budget, |
| 810 BoyerMooreLookahead* bm, | 817 BoyerMooreLookahead* bm, |
| 811 bool not_at_start); | 818 bool not_at_start); |
| 812 void CalculateOffsets(); | 819 void CalculateOffsets(); |
| 813 virtual RegExpNode* FilterASCII(int depth); | 820 virtual RegExpNode* FilterASCII(int depth); |
| 814 | 821 |
| 815 private: | 822 private: |
| 816 enum TextEmitPassType { | 823 enum TextEmitPassType { |
| 817 NON_ASCII_MATCH, // Check for characters that can't match. | 824 NON_ASCII_MATCH, // Check for characters that can't match. |
| 818 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 825 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
| 819 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 826 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 862 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 869 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 863 virtual int EatsAtLeast(int still_to_find, | 870 virtual int EatsAtLeast(int still_to_find, |
| 864 int recursion_depth, | 871 int recursion_depth, |
| 865 bool not_at_start); | 872 bool not_at_start); |
| 866 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 873 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 867 RegExpCompiler* compiler, | 874 RegExpCompiler* compiler, |
| 868 int filled_in, | 875 int filled_in, |
| 869 bool not_at_start); | 876 bool not_at_start); |
| 870 virtual void FillInBMInfo(int offset, | 877 virtual void FillInBMInfo(int offset, |
| 871 int recursion_depth, | 878 int recursion_depth, |
| 879 int budget, |
| 872 BoyerMooreLookahead* bm, | 880 BoyerMooreLookahead* bm, |
| 873 bool not_at_start); | 881 bool not_at_start); |
| 874 AssertionNodeType type() { return type_; } | 882 AssertionNodeType type() { return type_; } |
| 875 void set_type(AssertionNodeType type) { type_ = type; } | 883 void set_type(AssertionNodeType type) { type_ = type; } |
| 876 | 884 |
| 877 private: | 885 private: |
| 878 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 886 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
| 879 enum IfPrevious { kIsNonWord, kIsWord }; | 887 enum IfPrevious { kIsNonWord, kIsWord }; |
| 880 void BacktrackIfPrevious(RegExpCompiler* compiler, | 888 void BacktrackIfPrevious(RegExpCompiler* compiler, |
| 881 Trace* trace, | 889 Trace* trace, |
| (...skipping 20 matching lines...) Expand all Loading... |
| 902 int recursion_depth, | 910 int recursion_depth, |
| 903 bool not_at_start); | 911 bool not_at_start); |
| 904 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 912 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 905 RegExpCompiler* compiler, | 913 RegExpCompiler* compiler, |
| 906 int characters_filled_in, | 914 int characters_filled_in, |
| 907 bool not_at_start) { | 915 bool not_at_start) { |
| 908 return; | 916 return; |
| 909 } | 917 } |
| 910 virtual void FillInBMInfo(int offset, | 918 virtual void FillInBMInfo(int offset, |
| 911 int recursion_depth, | 919 int recursion_depth, |
| 920 int budget, |
| 912 BoyerMooreLookahead* bm, | 921 BoyerMooreLookahead* bm, |
| 913 bool not_at_start); | 922 bool not_at_start); |
| 914 | 923 |
| 915 private: | 924 private: |
| 916 int start_reg_; | 925 int start_reg_; |
| 917 int end_reg_; | 926 int end_reg_; |
| 918 }; | 927 }; |
| 919 | 928 |
| 920 | 929 |
| 921 class EndNode: public RegExpNode { | 930 class EndNode: public RegExpNode { |
| 922 public: | 931 public: |
| 923 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 932 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 924 explicit EndNode(Action action) : action_(action) { } | 933 explicit EndNode(Action action) : action_(action) { } |
| 925 virtual void Accept(NodeVisitor* visitor); | 934 virtual void Accept(NodeVisitor* visitor); |
| 926 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 935 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 927 virtual int EatsAtLeast(int still_to_find, | 936 virtual int EatsAtLeast(int still_to_find, |
| 928 int recursion_depth, | 937 int recursion_depth, |
| 929 bool not_at_start) { return 0; } | 938 bool not_at_start) { return 0; } |
| 930 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 939 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 931 RegExpCompiler* compiler, | 940 RegExpCompiler* compiler, |
| 932 int characters_filled_in, | 941 int characters_filled_in, |
| 933 bool not_at_start) { | 942 bool not_at_start) { |
| 934 // Returning 0 from EatsAtLeast should ensure we never get here. | 943 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 935 UNREACHABLE(); | 944 UNREACHABLE(); |
| 936 } | 945 } |
| 937 virtual void FillInBMInfo(int offset, | 946 virtual void FillInBMInfo(int offset, |
| 938 int recursion_depth, | 947 int recursion_depth, |
| 948 int budget, |
| 939 BoyerMooreLookahead* bm, | 949 BoyerMooreLookahead* bm, |
| 940 bool not_at_start) { | 950 bool not_at_start) { |
| 941 // Returning 0 from EatsAtLeast should ensure we never get here. | 951 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 942 UNREACHABLE(); | 952 UNREACHABLE(); |
| 943 } | 953 } |
| 944 | 954 |
| 945 private: | 955 private: |
| 946 Action action_; | 956 Action action_; |
| 947 }; | 957 }; |
| 948 | 958 |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1021 int EatsAtLeastHelper(int still_to_find, | 1031 int EatsAtLeastHelper(int still_to_find, |
| 1022 int recursion_depth, | 1032 int recursion_depth, |
| 1023 RegExpNode* ignore_this_node, | 1033 RegExpNode* ignore_this_node, |
| 1024 bool not_at_start); | 1034 bool not_at_start); |
| 1025 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1035 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1026 RegExpCompiler* compiler, | 1036 RegExpCompiler* compiler, |
| 1027 int characters_filled_in, | 1037 int characters_filled_in, |
| 1028 bool not_at_start); | 1038 bool not_at_start); |
| 1029 virtual void FillInBMInfo(int offset, | 1039 virtual void FillInBMInfo(int offset, |
| 1030 int recursion_depth, | 1040 int recursion_depth, |
| 1041 int budget, |
| 1031 BoyerMooreLookahead* bm, | 1042 BoyerMooreLookahead* bm, |
| 1032 bool not_at_start); | 1043 bool not_at_start); |
| 1033 | 1044 |
| 1034 bool being_calculated() { return being_calculated_; } | 1045 bool being_calculated() { return being_calculated_; } |
| 1035 bool not_at_start() { return not_at_start_; } | 1046 bool not_at_start() { return not_at_start_; } |
| 1036 void set_not_at_start() { not_at_start_ = true; } | 1047 void set_not_at_start() { not_at_start_ = true; } |
| 1037 void set_being_calculated(bool b) { being_calculated_ = b; } | 1048 void set_being_calculated(bool b) { being_calculated_ = b; } |
| 1038 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1049 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
| 1039 virtual RegExpNode* FilterASCII(int depth); | 1050 virtual RegExpNode* FilterASCII(int depth); |
| 1040 | 1051 |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1073 } | 1084 } |
| 1074 virtual int EatsAtLeast(int still_to_find, | 1085 virtual int EatsAtLeast(int still_to_find, |
| 1075 int recursion_depth, | 1086 int recursion_depth, |
| 1076 bool not_at_start); | 1087 bool not_at_start); |
| 1077 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1088 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1078 RegExpCompiler* compiler, | 1089 RegExpCompiler* compiler, |
| 1079 int characters_filled_in, | 1090 int characters_filled_in, |
| 1080 bool not_at_start); | 1091 bool not_at_start); |
| 1081 virtual void FillInBMInfo(int offset, | 1092 virtual void FillInBMInfo(int offset, |
| 1082 int recursion_depth, | 1093 int recursion_depth, |
| 1094 int budget, |
| 1083 BoyerMooreLookahead* bm, | 1095 BoyerMooreLookahead* bm, |
| 1084 bool not_at_start) { | 1096 bool not_at_start) { |
| 1085 alternatives_->at(1).node()->FillInBMInfo( | 1097 alternatives_->at(1).node()->FillInBMInfo( |
| 1086 offset, recursion_depth + 1, bm, not_at_start); | 1098 offset, recursion_depth + 1, budget - 1, bm, not_at_start); |
| 1087 if (offset == 0) set_bm_info(not_at_start, bm); | 1099 if (offset == 0) set_bm_info(not_at_start, bm); |
| 1088 } | 1100 } |
| 1089 // For a negative lookahead we don't emit the quick check for the | 1101 // For a negative lookahead we don't emit the quick check for the |
| 1090 // alternative that is expected to fail. This is because quick check code | 1102 // alternative that is expected to fail. This is because quick check code |
| 1091 // starts by loading enough characters for the alternative that takes fewest | 1103 // starts by loading enough characters for the alternative that takes fewest |
| 1092 // characters, but on a negative lookahead the negative branch did not take | 1104 // characters, but on a negative lookahead the negative branch did not take |
| 1093 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1105 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 1094 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1106 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
| 1095 virtual RegExpNode* FilterASCII(int depth); | 1107 virtual RegExpNode* FilterASCII(int depth); |
| 1096 }; | 1108 }; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 1108 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1120 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 1109 virtual int EatsAtLeast(int still_to_find, | 1121 virtual int EatsAtLeast(int still_to_find, |
| 1110 int recursion_depth, | 1122 int recursion_depth, |
| 1111 bool not_at_start); | 1123 bool not_at_start); |
| 1112 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1124 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1113 RegExpCompiler* compiler, | 1125 RegExpCompiler* compiler, |
| 1114 int characters_filled_in, | 1126 int characters_filled_in, |
| 1115 bool not_at_start); | 1127 bool not_at_start); |
| 1116 virtual void FillInBMInfo(int offset, | 1128 virtual void FillInBMInfo(int offset, |
| 1117 int recursion_depth, | 1129 int recursion_depth, |
| 1130 int budget, |
| 1118 BoyerMooreLookahead* bm, | 1131 BoyerMooreLookahead* bm, |
| 1119 bool not_at_start); | 1132 bool not_at_start); |
| 1120 RegExpNode* loop_node() { return loop_node_; } | 1133 RegExpNode* loop_node() { return loop_node_; } |
| 1121 RegExpNode* continue_node() { return continue_node_; } | 1134 RegExpNode* continue_node() { return continue_node_; } |
| 1122 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1135 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
| 1123 virtual void Accept(NodeVisitor* visitor); | 1136 virtual void Accept(NodeVisitor* visitor); |
| 1124 virtual RegExpNode* FilterASCII(int depth); | 1137 virtual RegExpNode* FilterASCII(int depth); |
| 1125 | 1138 |
| 1126 private: | 1139 private: |
| 1127 // AddAlternative is made private for loop nodes because alternatives | 1140 // AddAlternative is made private for loop nodes because alternatives |
| (...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1604 int* vector_; | 1617 int* vector_; |
| 1605 int offsets_vector_length_; | 1618 int offsets_vector_length_; |
| 1606 | 1619 |
| 1607 friend class ExternalReference; | 1620 friend class ExternalReference; |
| 1608 }; | 1621 }; |
| 1609 | 1622 |
| 1610 | 1623 |
| 1611 } } // namespace v8::internal | 1624 } } // namespace v8::internal |
| 1612 | 1625 |
| 1613 #endif // V8_JSREGEXP_H_ | 1626 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |