OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 556 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
567 // Only returns the successor for a text node of length 1 that matches any | 567 // Only returns the successor for a text node of length 1 that matches any |
568 // character and that has no guards on it. | 568 // character and that has no guards on it. |
569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 569 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
570 RegExpCompiler* compiler) { | 570 RegExpCompiler* compiler) { |
571 return NULL; | 571 return NULL; |
572 } | 572 } |
573 | 573 |
574 // Collects information on the possible code units (mod 128) that can match if | 574 // Collects information on the possible code units (mod 128) that can match if |
575 // we look forward. This is used for a Boyer-Moore-like string searching | 575 // we look forward. This is used for a Boyer-Moore-like string searching |
576 // implementation. TODO(erikcorry): This should share more code with | 576 // implementation. TODO(erikcorry): This should share more code with |
577 // EatsAtLeast, GetQuickCheckDetails. | 577 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit |
| 578 // the number of nodes we are willing to look at in order to create this data. |
| 579 static const int kFillInBMBudget = 200; |
578 virtual void FillInBMInfo(int offset, | 580 virtual void FillInBMInfo(int offset, |
579 int recursion_depth, | 581 int recursion_depth, |
| 582 int budget, |
580 BoyerMooreLookahead* bm, | 583 BoyerMooreLookahead* bm, |
581 bool not_at_start) { | 584 bool not_at_start) { |
582 UNREACHABLE(); | 585 UNREACHABLE(); |
583 } | 586 } |
584 | 587 |
585 // If we know that the input is ASCII then there are some nodes that can | 588 // If we know that the input is ASCII then there are some nodes that can |
586 // never match. This method returns a node that can be substituted for | 589 // never match. This method returns a node that can be substituted for |
587 // itself, or NULL if the node can never match. | 590 // itself, or NULL if the node can never match. |
588 virtual RegExpNode* FilterASCII(int depth) { return this; } | 591 virtual RegExpNode* FilterASCII(int depth) { return this; } |
589 // Helper for FilterASCII. | 592 // Helper for FilterASCII. |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
672 | 675 |
673 class SeqRegExpNode: public RegExpNode { | 676 class SeqRegExpNode: public RegExpNode { |
674 public: | 677 public: |
675 explicit SeqRegExpNode(RegExpNode* on_success) | 678 explicit SeqRegExpNode(RegExpNode* on_success) |
676 : on_success_(on_success) { } | 679 : on_success_(on_success) { } |
677 RegExpNode* on_success() { return on_success_; } | 680 RegExpNode* on_success() { return on_success_; } |
678 void set_on_success(RegExpNode* node) { on_success_ = node; } | 681 void set_on_success(RegExpNode* node) { on_success_ = node; } |
679 virtual RegExpNode* FilterASCII(int depth); | 682 virtual RegExpNode* FilterASCII(int depth); |
680 virtual void FillInBMInfo(int offset, | 683 virtual void FillInBMInfo(int offset, |
681 int recursion_depth, | 684 int recursion_depth, |
| 685 int budget, |
682 BoyerMooreLookahead* bm, | 686 BoyerMooreLookahead* bm, |
683 bool not_at_start) { | 687 bool not_at_start) { |
684 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); | 688 on_success_->FillInBMInfo( |
| 689 offset, recursion_depth + 1, budget - 1, bm, not_at_start); |
685 if (offset == 0) set_bm_info(not_at_start, bm); | 690 if (offset == 0) set_bm_info(not_at_start, bm); |
686 } | 691 } |
687 | 692 |
688 protected: | 693 protected: |
689 RegExpNode* FilterSuccessor(int depth); | 694 RegExpNode* FilterSuccessor(int depth); |
690 | 695 |
691 private: | 696 private: |
692 RegExpNode* on_success_; | 697 RegExpNode* on_success_; |
693 }; | 698 }; |
694 | 699 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
729 bool not_at_start); | 734 bool not_at_start); |
730 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 735 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
731 RegExpCompiler* compiler, | 736 RegExpCompiler* compiler, |
732 int filled_in, | 737 int filled_in, |
733 bool not_at_start) { | 738 bool not_at_start) { |
734 return on_success()->GetQuickCheckDetails( | 739 return on_success()->GetQuickCheckDetails( |
735 details, compiler, filled_in, not_at_start); | 740 details, compiler, filled_in, not_at_start); |
736 } | 741 } |
737 virtual void FillInBMInfo(int offset, | 742 virtual void FillInBMInfo(int offset, |
738 int recursion_depth, | 743 int recursion_depth, |
| 744 int budget, |
739 BoyerMooreLookahead* bm, | 745 BoyerMooreLookahead* bm, |
740 bool not_at_start); | 746 bool not_at_start); |
741 Type type() { return type_; } | 747 Type type() { return type_; } |
742 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 748 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
743 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
744 | 750 |
745 private: | 751 private: |
746 union { | 752 union { |
747 struct { | 753 struct { |
748 int reg; | 754 int reg; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
800 RegExpCompiler* compiler, | 806 RegExpCompiler* compiler, |
801 int characters_filled_in, | 807 int characters_filled_in, |
802 bool not_at_start); | 808 bool not_at_start); |
803 ZoneList<TextElement>* elements() { return elms_; } | 809 ZoneList<TextElement>* elements() { return elms_; } |
804 void MakeCaseIndependent(bool is_ascii); | 810 void MakeCaseIndependent(bool is_ascii); |
805 virtual int GreedyLoopTextLength(); | 811 virtual int GreedyLoopTextLength(); |
806 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
807 RegExpCompiler* compiler); | 813 RegExpCompiler* compiler); |
808 virtual void FillInBMInfo(int offset, | 814 virtual void FillInBMInfo(int offset, |
809 int recursion_depth, | 815 int recursion_depth, |
| 816 int budget, |
810 BoyerMooreLookahead* bm, | 817 BoyerMooreLookahead* bm, |
811 bool not_at_start); | 818 bool not_at_start); |
812 void CalculateOffsets(); | 819 void CalculateOffsets(); |
813 virtual RegExpNode* FilterASCII(int depth); | 820 virtual RegExpNode* FilterASCII(int depth); |
814 | 821 |
815 private: | 822 private: |
816 enum TextEmitPassType { | 823 enum TextEmitPassType { |
817 NON_ASCII_MATCH, // Check for characters that can't match. | 824 NON_ASCII_MATCH, // Check for characters that can't match. |
818 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 825 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
819 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 826 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
862 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 869 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
863 virtual int EatsAtLeast(int still_to_find, | 870 virtual int EatsAtLeast(int still_to_find, |
864 int recursion_depth, | 871 int recursion_depth, |
865 bool not_at_start); | 872 bool not_at_start); |
866 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 873 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
867 RegExpCompiler* compiler, | 874 RegExpCompiler* compiler, |
868 int filled_in, | 875 int filled_in, |
869 bool not_at_start); | 876 bool not_at_start); |
870 virtual void FillInBMInfo(int offset, | 877 virtual void FillInBMInfo(int offset, |
871 int recursion_depth, | 878 int recursion_depth, |
| 879 int budget, |
872 BoyerMooreLookahead* bm, | 880 BoyerMooreLookahead* bm, |
873 bool not_at_start); | 881 bool not_at_start); |
874 AssertionNodeType type() { return type_; } | 882 AssertionNodeType type() { return type_; } |
875 void set_type(AssertionNodeType type) { type_ = type; } | 883 void set_type(AssertionNodeType type) { type_ = type; } |
876 | 884 |
877 private: | 885 private: |
878 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 886 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
879 enum IfPrevious { kIsNonWord, kIsWord }; | 887 enum IfPrevious { kIsNonWord, kIsWord }; |
880 void BacktrackIfPrevious(RegExpCompiler* compiler, | 888 void BacktrackIfPrevious(RegExpCompiler* compiler, |
881 Trace* trace, | 889 Trace* trace, |
(...skipping 20 matching lines...) Expand all Loading... |
902 int recursion_depth, | 910 int recursion_depth, |
903 bool not_at_start); | 911 bool not_at_start); |
904 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 912 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
905 RegExpCompiler* compiler, | 913 RegExpCompiler* compiler, |
906 int characters_filled_in, | 914 int characters_filled_in, |
907 bool not_at_start) { | 915 bool not_at_start) { |
908 return; | 916 return; |
909 } | 917 } |
910 virtual void FillInBMInfo(int offset, | 918 virtual void FillInBMInfo(int offset, |
911 int recursion_depth, | 919 int recursion_depth, |
| 920 int budget, |
912 BoyerMooreLookahead* bm, | 921 BoyerMooreLookahead* bm, |
913 bool not_at_start); | 922 bool not_at_start); |
914 | 923 |
915 private: | 924 private: |
916 int start_reg_; | 925 int start_reg_; |
917 int end_reg_; | 926 int end_reg_; |
918 }; | 927 }; |
919 | 928 |
920 | 929 |
921 class EndNode: public RegExpNode { | 930 class EndNode: public RegExpNode { |
922 public: | 931 public: |
923 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 932 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
924 explicit EndNode(Action action) : action_(action) { } | 933 explicit EndNode(Action action) : action_(action) { } |
925 virtual void Accept(NodeVisitor* visitor); | 934 virtual void Accept(NodeVisitor* visitor); |
926 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 935 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
927 virtual int EatsAtLeast(int still_to_find, | 936 virtual int EatsAtLeast(int still_to_find, |
928 int recursion_depth, | 937 int recursion_depth, |
929 bool not_at_start) { return 0; } | 938 bool not_at_start) { return 0; } |
930 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 939 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
931 RegExpCompiler* compiler, | 940 RegExpCompiler* compiler, |
932 int characters_filled_in, | 941 int characters_filled_in, |
933 bool not_at_start) { | 942 bool not_at_start) { |
934 // Returning 0 from EatsAtLeast should ensure we never get here. | 943 // Returning 0 from EatsAtLeast should ensure we never get here. |
935 UNREACHABLE(); | 944 UNREACHABLE(); |
936 } | 945 } |
937 virtual void FillInBMInfo(int offset, | 946 virtual void FillInBMInfo(int offset, |
938 int recursion_depth, | 947 int recursion_depth, |
| 948 int budget, |
939 BoyerMooreLookahead* bm, | 949 BoyerMooreLookahead* bm, |
940 bool not_at_start) { | 950 bool not_at_start) { |
941 // Returning 0 from EatsAtLeast should ensure we never get here. | 951 // Returning 0 from EatsAtLeast should ensure we never get here. |
942 UNREACHABLE(); | 952 UNREACHABLE(); |
943 } | 953 } |
944 | 954 |
945 private: | 955 private: |
946 Action action_; | 956 Action action_; |
947 }; | 957 }; |
948 | 958 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1021 int EatsAtLeastHelper(int still_to_find, | 1031 int EatsAtLeastHelper(int still_to_find, |
1022 int recursion_depth, | 1032 int recursion_depth, |
1023 RegExpNode* ignore_this_node, | 1033 RegExpNode* ignore_this_node, |
1024 bool not_at_start); | 1034 bool not_at_start); |
1025 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1035 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1026 RegExpCompiler* compiler, | 1036 RegExpCompiler* compiler, |
1027 int characters_filled_in, | 1037 int characters_filled_in, |
1028 bool not_at_start); | 1038 bool not_at_start); |
1029 virtual void FillInBMInfo(int offset, | 1039 virtual void FillInBMInfo(int offset, |
1030 int recursion_depth, | 1040 int recursion_depth, |
| 1041 int budget, |
1031 BoyerMooreLookahead* bm, | 1042 BoyerMooreLookahead* bm, |
1032 bool not_at_start); | 1043 bool not_at_start); |
1033 | 1044 |
1034 bool being_calculated() { return being_calculated_; } | 1045 bool being_calculated() { return being_calculated_; } |
1035 bool not_at_start() { return not_at_start_; } | 1046 bool not_at_start() { return not_at_start_; } |
1036 void set_not_at_start() { not_at_start_ = true; } | 1047 void set_not_at_start() { not_at_start_ = true; } |
1037 void set_being_calculated(bool b) { being_calculated_ = b; } | 1048 void set_being_calculated(bool b) { being_calculated_ = b; } |
1038 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1049 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
1039 virtual RegExpNode* FilterASCII(int depth); | 1050 virtual RegExpNode* FilterASCII(int depth); |
1040 | 1051 |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1073 } | 1084 } |
1074 virtual int EatsAtLeast(int still_to_find, | 1085 virtual int EatsAtLeast(int still_to_find, |
1075 int recursion_depth, | 1086 int recursion_depth, |
1076 bool not_at_start); | 1087 bool not_at_start); |
1077 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1088 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1078 RegExpCompiler* compiler, | 1089 RegExpCompiler* compiler, |
1079 int characters_filled_in, | 1090 int characters_filled_in, |
1080 bool not_at_start); | 1091 bool not_at_start); |
1081 virtual void FillInBMInfo(int offset, | 1092 virtual void FillInBMInfo(int offset, |
1082 int recursion_depth, | 1093 int recursion_depth, |
| 1094 int budget, |
1083 BoyerMooreLookahead* bm, | 1095 BoyerMooreLookahead* bm, |
1084 bool not_at_start) { | 1096 bool not_at_start) { |
1085 alternatives_->at(1).node()->FillInBMInfo( | 1097 alternatives_->at(1).node()->FillInBMInfo( |
1086 offset, recursion_depth + 1, bm, not_at_start); | 1098 offset, recursion_depth + 1, budget - 1, bm, not_at_start); |
1087 if (offset == 0) set_bm_info(not_at_start, bm); | 1099 if (offset == 0) set_bm_info(not_at_start, bm); |
1088 } | 1100 } |
1089 // For a negative lookahead we don't emit the quick check for the | 1101 // For a negative lookahead we don't emit the quick check for the |
1090 // alternative that is expected to fail. This is because quick check code | 1102 // alternative that is expected to fail. This is because quick check code |
1091 // starts by loading enough characters for the alternative that takes fewest | 1103 // starts by loading enough characters for the alternative that takes fewest |
1092 // characters, but on a negative lookahead the negative branch did not take | 1104 // characters, but on a negative lookahead the negative branch did not take |
1093 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1105 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
1094 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1106 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
1095 virtual RegExpNode* FilterASCII(int depth); | 1107 virtual RegExpNode* FilterASCII(int depth); |
1096 }; | 1108 }; |
(...skipping 11 matching lines...) Expand all Loading... |
1108 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1120 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1109 virtual int EatsAtLeast(int still_to_find, | 1121 virtual int EatsAtLeast(int still_to_find, |
1110 int recursion_depth, | 1122 int recursion_depth, |
1111 bool not_at_start); | 1123 bool not_at_start); |
1112 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1124 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1113 RegExpCompiler* compiler, | 1125 RegExpCompiler* compiler, |
1114 int characters_filled_in, | 1126 int characters_filled_in, |
1115 bool not_at_start); | 1127 bool not_at_start); |
1116 virtual void FillInBMInfo(int offset, | 1128 virtual void FillInBMInfo(int offset, |
1117 int recursion_depth, | 1129 int recursion_depth, |
| 1130 int budget, |
1118 BoyerMooreLookahead* bm, | 1131 BoyerMooreLookahead* bm, |
1119 bool not_at_start); | 1132 bool not_at_start); |
1120 RegExpNode* loop_node() { return loop_node_; } | 1133 RegExpNode* loop_node() { return loop_node_; } |
1121 RegExpNode* continue_node() { return continue_node_; } | 1134 RegExpNode* continue_node() { return continue_node_; } |
1122 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1135 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
1123 virtual void Accept(NodeVisitor* visitor); | 1136 virtual void Accept(NodeVisitor* visitor); |
1124 virtual RegExpNode* FilterASCII(int depth); | 1137 virtual RegExpNode* FilterASCII(int depth); |
1125 | 1138 |
1126 private: | 1139 private: |
1127 // AddAlternative is made private for loop nodes because alternatives | 1140 // AddAlternative is made private for loop nodes because alternatives |
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1604 int* vector_; | 1617 int* vector_; |
1605 int offsets_vector_length_; | 1618 int offsets_vector_length_; |
1606 | 1619 |
1607 friend class ExternalReference; | 1620 friend class ExternalReference; |
1608 }; | 1621 }; |
1609 | 1622 |
1610 | 1623 |
1611 } } // namespace v8::internal | 1624 } } // namespace v8::internal |
1612 | 1625 |
1613 #endif // V8_JSREGEXP_H_ | 1626 #endif // V8_JSREGEXP_H_ |
OLD | NEW |