OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
574 // character and that has no guards on it. | 574 // character and that has no guards on it. |
575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 575 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
576 RegExpCompiler* compiler) { | 576 RegExpCompiler* compiler) { |
577 return NULL; | 577 return NULL; |
578 } | 578 } |
579 | 579 |
580 // Collects information on the possible code units (mod 128) that can match if | 580 // Collects information on the possible code units (mod 128) that can match if |
581 // we look forward. This is used for a Boyer-Moore-like string searching | 581 // we look forward. This is used for a Boyer-Moore-like string searching |
582 // implementation. TODO(erikcorry): This should share more code with | 582 // implementation. TODO(erikcorry): This should share more code with |
583 // EatsAtLeast, GetQuickCheckDetails. | 583 // EatsAtLeast, GetQuickCheckDetails. |
584 virtual void FillInBMInfo( | 584 virtual void FillInBMInfo(int offset, |
585 int offset, BoyerMooreLookahead* bm, bool not_at_start) { | 585 int recursion_depth, |
| 586 BoyerMooreLookahead* bm, |
| 587 bool not_at_start) { |
586 UNREACHABLE(); | 588 UNREACHABLE(); |
587 } | 589 } |
588 | 590 |
589 // If we know that the input is ASCII then there are some nodes that can | 591 // If we know that the input is ASCII then there are some nodes that can |
590 // never match. This method returns a node that can be substituted for | 592 // never match. This method returns a node that can be substituted for |
591 // itself, or NULL if the node can never match. | 593 // itself, or NULL if the node can never match. |
592 virtual RegExpNode* FilterASCII(int depth) { return this; } | 594 virtual RegExpNode* FilterASCII(int depth) { return this; } |
593 // Helper for FilterASCII. | 595 // Helper for FilterASCII. |
594 RegExpNode* replacement() { | 596 RegExpNode* replacement() { |
595 ASSERT(info()->replacement_calculated); | 597 ASSERT(info()->replacement_calculated); |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
674 }; | 676 }; |
675 | 677 |
676 | 678 |
677 class SeqRegExpNode: public RegExpNode { | 679 class SeqRegExpNode: public RegExpNode { |
678 public: | 680 public: |
679 explicit SeqRegExpNode(RegExpNode* on_success) | 681 explicit SeqRegExpNode(RegExpNode* on_success) |
680 : on_success_(on_success) { } | 682 : on_success_(on_success) { } |
681 RegExpNode* on_success() { return on_success_; } | 683 RegExpNode* on_success() { return on_success_; } |
682 void set_on_success(RegExpNode* node) { on_success_ = node; } | 684 void set_on_success(RegExpNode* node) { on_success_ = node; } |
683 virtual RegExpNode* FilterASCII(int depth); | 685 virtual RegExpNode* FilterASCII(int depth); |
684 virtual void FillInBMInfo( | 686 virtual void FillInBMInfo(int offset, |
685 int offset, BoyerMooreLookahead* bm, bool not_at_start) { | 687 int recursion_depth, |
686 on_success_->FillInBMInfo(offset, bm, not_at_start); | 688 BoyerMooreLookahead* bm, |
| 689 bool not_at_start) { |
| 690 on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); |
687 if (offset == 0) set_bm_info(not_at_start, bm); | 691 if (offset == 0) set_bm_info(not_at_start, bm); |
688 } | 692 } |
689 | 693 |
690 protected: | 694 protected: |
691 RegExpNode* FilterSuccessor(int depth); | 695 RegExpNode* FilterSuccessor(int depth); |
692 | 696 |
693 private: | 697 private: |
694 RegExpNode* on_success_; | 698 RegExpNode* on_success_; |
695 }; | 699 }; |
696 | 700 |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
729 virtual int EatsAtLeast(int still_to_find, | 733 virtual int EatsAtLeast(int still_to_find, |
730 int recursion_depth, | 734 int recursion_depth, |
731 bool not_at_start); | 735 bool not_at_start); |
732 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 736 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
733 RegExpCompiler* compiler, | 737 RegExpCompiler* compiler, |
734 int filled_in, | 738 int filled_in, |
735 bool not_at_start) { | 739 bool not_at_start) { |
736 return on_success()->GetQuickCheckDetails( | 740 return on_success()->GetQuickCheckDetails( |
737 details, compiler, filled_in, not_at_start); | 741 details, compiler, filled_in, not_at_start); |
738 } | 742 } |
739 virtual void FillInBMInfo( | 743 virtual void FillInBMInfo(int offset, |
740 int offset, BoyerMooreLookahead* bm, bool not_at_start); | 744 int recursion_depth, |
| 745 BoyerMooreLookahead* bm, |
| 746 bool not_at_start); |
741 Type type() { return type_; } | 747 Type type() { return type_; } |
742 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 748 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
743 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 749 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
744 | 750 |
745 private: | 751 private: |
746 union { | 752 union { |
747 struct { | 753 struct { |
748 int reg; | 754 int reg; |
749 int value; | 755 int value; |
750 } u_store_register; | 756 } u_store_register; |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
798 bool not_at_start); | 804 bool not_at_start); |
799 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 805 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
800 RegExpCompiler* compiler, | 806 RegExpCompiler* compiler, |
801 int characters_filled_in, | 807 int characters_filled_in, |
802 bool not_at_start); | 808 bool not_at_start); |
803 ZoneList<TextElement>* elements() { return elms_; } | 809 ZoneList<TextElement>* elements() { return elms_; } |
804 void MakeCaseIndependent(bool is_ascii); | 810 void MakeCaseIndependent(bool is_ascii); |
805 virtual int GreedyLoopTextLength(); | 811 virtual int GreedyLoopTextLength(); |
806 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 812 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
807 RegExpCompiler* compiler); | 813 RegExpCompiler* compiler); |
808 virtual void FillInBMInfo( | 814 virtual void FillInBMInfo(int offset, |
809 int offset, BoyerMooreLookahead* bm, bool not_at_start); | 815 int recursion_depth, |
| 816 BoyerMooreLookahead* bm, |
| 817 bool not_at_start); |
810 void CalculateOffsets(); | 818 void CalculateOffsets(); |
811 virtual RegExpNode* FilterASCII(int depth); | 819 virtual RegExpNode* FilterASCII(int depth); |
812 | 820 |
813 private: | 821 private: |
814 enum TextEmitPassType { | 822 enum TextEmitPassType { |
815 NON_ASCII_MATCH, // Check for characters that can't match. | 823 NON_ASCII_MATCH, // Check for characters that can't match. |
816 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 824 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
817 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 825 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
818 CASE_CHARACTER_MATCH, // Case-independent single character check. | 826 CASE_CHARACTER_MATCH, // Case-independent single character check. |
819 CHARACTER_CLASS_MATCH // Character class. | 827 CHARACTER_CLASS_MATCH // Character class. |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
858 } | 866 } |
859 virtual void Accept(NodeVisitor* visitor); | 867 virtual void Accept(NodeVisitor* visitor); |
860 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 868 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
861 virtual int EatsAtLeast(int still_to_find, | 869 virtual int EatsAtLeast(int still_to_find, |
862 int recursion_depth, | 870 int recursion_depth, |
863 bool not_at_start); | 871 bool not_at_start); |
864 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 872 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
865 RegExpCompiler* compiler, | 873 RegExpCompiler* compiler, |
866 int filled_in, | 874 int filled_in, |
867 bool not_at_start); | 875 bool not_at_start); |
868 virtual void FillInBMInfo( | 876 virtual void FillInBMInfo(int offset, |
869 int offset, BoyerMooreLookahead* bm, bool not_at_start); | 877 int recursion_depth, |
| 878 BoyerMooreLookahead* bm, |
| 879 bool not_at_start); |
870 AssertionNodeType type() { return type_; } | 880 AssertionNodeType type() { return type_; } |
871 void set_type(AssertionNodeType type) { type_ = type; } | 881 void set_type(AssertionNodeType type) { type_ = type; } |
872 | 882 |
873 private: | 883 private: |
874 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 884 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
875 enum IfPrevious { kIsNonWord, kIsWord }; | 885 enum IfPrevious { kIsNonWord, kIsWord }; |
876 void BacktrackIfPrevious(RegExpCompiler* compiler, | 886 void BacktrackIfPrevious(RegExpCompiler* compiler, |
877 Trace* trace, | 887 Trace* trace, |
878 IfPrevious backtrack_if_previous); | 888 IfPrevious backtrack_if_previous); |
879 AssertionNode(AssertionNodeType t, RegExpNode* on_success) | 889 AssertionNode(AssertionNodeType t, RegExpNode* on_success) |
(...skipping 16 matching lines...) Expand all Loading... |
896 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 906 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
897 virtual int EatsAtLeast(int still_to_find, | 907 virtual int EatsAtLeast(int still_to_find, |
898 int recursion_depth, | 908 int recursion_depth, |
899 bool not_at_start); | 909 bool not_at_start); |
900 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 910 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
901 RegExpCompiler* compiler, | 911 RegExpCompiler* compiler, |
902 int characters_filled_in, | 912 int characters_filled_in, |
903 bool not_at_start) { | 913 bool not_at_start) { |
904 return; | 914 return; |
905 } | 915 } |
906 virtual void FillInBMInfo( | 916 virtual void FillInBMInfo(int offset, |
907 int offset, BoyerMooreLookahead* bm, bool not_at_start); | 917 int recursion_depth, |
| 918 BoyerMooreLookahead* bm, |
| 919 bool not_at_start); |
908 | 920 |
909 private: | 921 private: |
910 int start_reg_; | 922 int start_reg_; |
911 int end_reg_; | 923 int end_reg_; |
912 }; | 924 }; |
913 | 925 |
914 | 926 |
915 class EndNode: public RegExpNode { | 927 class EndNode: public RegExpNode { |
916 public: | 928 public: |
917 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 929 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
918 explicit EndNode(Action action) : action_(action) { } | 930 explicit EndNode(Action action) : action_(action) { } |
919 virtual void Accept(NodeVisitor* visitor); | 931 virtual void Accept(NodeVisitor* visitor); |
920 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 932 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
921 virtual int EatsAtLeast(int still_to_find, | 933 virtual int EatsAtLeast(int still_to_find, |
922 int recursion_depth, | 934 int recursion_depth, |
923 bool not_at_start) { return 0; } | 935 bool not_at_start) { return 0; } |
924 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
925 RegExpCompiler* compiler, | 937 RegExpCompiler* compiler, |
926 int characters_filled_in, | 938 int characters_filled_in, |
927 bool not_at_start) { | 939 bool not_at_start) { |
928 // Returning 0 from EatsAtLeast should ensure we never get here. | 940 // Returning 0 from EatsAtLeast should ensure we never get here. |
929 UNREACHABLE(); | 941 UNREACHABLE(); |
930 } | 942 } |
931 virtual void FillInBMInfo( | 943 virtual void FillInBMInfo(int offset, |
932 int offset, BoyerMooreLookahead* bm, bool not_at_start) { | 944 int recursion_depth, |
| 945 BoyerMooreLookahead* bm, |
| 946 bool not_at_start) { |
933 // Returning 0 from EatsAtLeast should ensure we never get here. | 947 // Returning 0 from EatsAtLeast should ensure we never get here. |
934 UNREACHABLE(); | 948 UNREACHABLE(); |
935 } | 949 } |
936 | 950 |
937 private: | 951 private: |
938 Action action_; | 952 Action action_; |
939 }; | 953 }; |
940 | 954 |
941 | 955 |
942 class NegativeSubmatchSuccess: public EndNode { | 956 class NegativeSubmatchSuccess: public EndNode { |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1011 int recursion_depth, | 1025 int recursion_depth, |
1012 bool not_at_start); | 1026 bool not_at_start); |
1013 int EatsAtLeastHelper(int still_to_find, | 1027 int EatsAtLeastHelper(int still_to_find, |
1014 int recursion_depth, | 1028 int recursion_depth, |
1015 RegExpNode* ignore_this_node, | 1029 RegExpNode* ignore_this_node, |
1016 bool not_at_start); | 1030 bool not_at_start); |
1017 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1031 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1018 RegExpCompiler* compiler, | 1032 RegExpCompiler* compiler, |
1019 int characters_filled_in, | 1033 int characters_filled_in, |
1020 bool not_at_start); | 1034 bool not_at_start); |
1021 virtual void FillInBMInfo( | 1035 virtual void FillInBMInfo(int offset, |
1022 int offset, BoyerMooreLookahead* bm, bool not_at_start); | 1036 int recursion_depth, |
| 1037 BoyerMooreLookahead* bm, |
| 1038 bool not_at_start); |
1023 | 1039 |
1024 bool being_calculated() { return being_calculated_; } | 1040 bool being_calculated() { return being_calculated_; } |
1025 bool not_at_start() { return not_at_start_; } | 1041 bool not_at_start() { return not_at_start_; } |
1026 void set_not_at_start() { not_at_start_ = true; } | 1042 void set_not_at_start() { not_at_start_ = true; } |
1027 void set_being_calculated(bool b) { being_calculated_ = b; } | 1043 void set_being_calculated(bool b) { being_calculated_ = b; } |
1028 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1044 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
1029 virtual RegExpNode* FilterASCII(int depth); | 1045 virtual RegExpNode* FilterASCII(int depth); |
1030 | 1046 |
1031 protected: | 1047 protected: |
1032 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); | 1048 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); |
(...skipping 28 matching lines...) Expand all Loading... |
1061 AddAlternative(this_must_fail); | 1077 AddAlternative(this_must_fail); |
1062 AddAlternative(then_do_this); | 1078 AddAlternative(then_do_this); |
1063 } | 1079 } |
1064 virtual int EatsAtLeast(int still_to_find, | 1080 virtual int EatsAtLeast(int still_to_find, |
1065 int recursion_depth, | 1081 int recursion_depth, |
1066 bool not_at_start); | 1082 bool not_at_start); |
1067 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1083 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1068 RegExpCompiler* compiler, | 1084 RegExpCompiler* compiler, |
1069 int characters_filled_in, | 1085 int characters_filled_in, |
1070 bool not_at_start); | 1086 bool not_at_start); |
1071 virtual void FillInBMInfo( | 1087 virtual void FillInBMInfo(int offset, |
1072 int offset, BoyerMooreLookahead* bm, bool not_at_start) { | 1088 int recursion_depth, |
1073 alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); | 1089 BoyerMooreLookahead* bm, |
| 1090 bool not_at_start) { |
| 1091 alternatives_->at(1).node()->FillInBMInfo( |
| 1092 offset, recursion_depth + 1, bm, not_at_start); |
1074 if (offset == 0) set_bm_info(not_at_start, bm); | 1093 if (offset == 0) set_bm_info(not_at_start, bm); |
1075 } | 1094 } |
1076 // For a negative lookahead we don't emit the quick check for the | 1095 // For a negative lookahead we don't emit the quick check for the |
1077 // alternative that is expected to fail. This is because quick check code | 1096 // alternative that is expected to fail. This is because quick check code |
1078 // starts by loading enough characters for the alternative that takes fewest | 1097 // starts by loading enough characters for the alternative that takes fewest |
1079 // characters, but on a negative lookahead the negative branch did not take | 1098 // characters, but on a negative lookahead the negative branch did not take |
1080 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1099 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
1081 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1100 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
1082 virtual RegExpNode* FilterASCII(int depth); | 1101 virtual RegExpNode* FilterASCII(int depth); |
1083 }; | 1102 }; |
1084 | 1103 |
1085 | 1104 |
1086 class LoopChoiceNode: public ChoiceNode { | 1105 class LoopChoiceNode: public ChoiceNode { |
1087 public: | 1106 public: |
1088 explicit LoopChoiceNode(bool body_can_be_zero_length) | 1107 explicit LoopChoiceNode(bool body_can_be_zero_length) |
1089 : ChoiceNode(2), | 1108 : ChoiceNode(2), |
1090 loop_node_(NULL), | 1109 loop_node_(NULL), |
1091 continue_node_(NULL), | 1110 continue_node_(NULL), |
1092 body_can_be_zero_length_(body_can_be_zero_length) { } | 1111 body_can_be_zero_length_(body_can_be_zero_length) { } |
1093 void AddLoopAlternative(GuardedAlternative alt); | 1112 void AddLoopAlternative(GuardedAlternative alt); |
1094 void AddContinueAlternative(GuardedAlternative alt); | 1113 void AddContinueAlternative(GuardedAlternative alt); |
1095 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1114 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1096 virtual int EatsAtLeast(int still_to_find, | 1115 virtual int EatsAtLeast(int still_to_find, |
1097 int recursion_depth, | 1116 int recursion_depth, |
1098 bool not_at_start); | 1117 bool not_at_start); |
1099 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1100 RegExpCompiler* compiler, | 1119 RegExpCompiler* compiler, |
1101 int characters_filled_in, | 1120 int characters_filled_in, |
1102 bool not_at_start); | 1121 bool not_at_start); |
1103 virtual void FillInBMInfo( | 1122 virtual void FillInBMInfo(int offset, |
1104 int offset, BoyerMooreLookahead* bm, bool not_at_start); | 1123 int recursion_depth, |
| 1124 BoyerMooreLookahead* bm, |
| 1125 bool not_at_start); |
1105 RegExpNode* loop_node() { return loop_node_; } | 1126 RegExpNode* loop_node() { return loop_node_; } |
1106 RegExpNode* continue_node() { return continue_node_; } | 1127 RegExpNode* continue_node() { return continue_node_; } |
1107 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1128 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
1108 virtual void Accept(NodeVisitor* visitor); | 1129 virtual void Accept(NodeVisitor* visitor); |
1109 virtual RegExpNode* FilterASCII(int depth); | 1130 virtual RegExpNode* FilterASCII(int depth); |
1110 | 1131 |
1111 private: | 1132 private: |
1112 // AddAlternative is made private for loop nodes because alternatives | 1133 // AddAlternative is made private for loop nodes because alternatives |
1113 // should not be added freely, we need to keep track of which node | 1134 // should not be added freely, we need to keep track of which node |
1114 // goes back to the node itself. | 1135 // goes back to the node itself. |
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1591 int* vector_; | 1612 int* vector_; |
1592 int offsets_vector_length_; | 1613 int offsets_vector_length_; |
1593 | 1614 |
1594 friend class ExternalReference; | 1615 friend class ExternalReference; |
1595 }; | 1616 }; |
1596 | 1617 |
1597 | 1618 |
1598 } } // namespace v8::internal | 1619 } } // namespace v8::internal |
1599 | 1620 |
1600 #endif // V8_JSREGEXP_H_ | 1621 #endif // V8_JSREGEXP_H_ |
OLD | NEW |