src/jsregexp.cc - Issue 10105026: Version 3.10.3

Side by Side Diff: src/jsregexp.cc

Issue 10105026: Version 3.10.3 (Closed) Base URL: http://v8.googlecode.com/svn/trunk/

Patch Set: Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
101 Factory* factory = isolate->factory();	101 Factory* factory = isolate->factory();

102 Handle<FixedArray> elements = factory->NewFixedArray(2);	102 Handle<FixedArray> elements = factory->NewFixedArray(2);

103 elements->set(0, *pattern);	103 elements->set(0, *pattern);

104 elements->set(1, *error_text);	104 elements->set(1, *error_text);

105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);	105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array);	106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array);

107 isolate->Throw(*regexp_err);	107 isolate->Throw(*regexp_err);

108 }	108 }

109	109

110	110

	111 ContainedInLattice AddRange(ContainedInLattice containment,

	112 const int* ranges,

	113 int ranges_length,

	114 Interval new_range) {

	115 ASSERT((ranges_length & 1) == 1);

	116 ASSERT(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);

	117 if (containment == kLatticeUnknown) return containment;

	118 bool inside = false;

	119 int last = 0;

	120 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {

	121 // Consider the range from last to ranges[i].

	122 // We haven't got to the new range yet.

	123 if (ranges[i] <= new_range.from()) continue;

	124 // New range is wholly inside last-ranges[i]. Note that new_range.to() is

	125 // inclusive, but the values in ranges are not.

	126 if (last <= new_range.from() && new_range.to() < ranges[i]) {

	127 return Combine(containment, inside ? kLatticeIn : kLatticeOut);

	128 }

	129 return kLatticeUnknown;

	130 }

	131 return containment;

	132 }

	133

	134

111 // More makes code generation slower, less makes V8 benchmark score lower.	135 // More makes code generation slower, less makes V8 benchmark score lower.

112 const int kMaxLookaheadForBoyerMoore = 8;	136 const int kMaxLookaheadForBoyerMoore = 8;

113 // In a 3-character pattern you can maximally step forwards 3 characters	137 // In a 3-character pattern you can maximally step forwards 3 characters

114 // at a time, which is not always enough to pay for the extra logic.	138 // at a time, which is not always enough to pay for the extra logic.

115 const int kPatternTooShortForBoyerMoore = 2;	139 const int kPatternTooShortForBoyerMoore = 2;

116	140

117	141

118 // Identifies the sort of regexps where the regexp engine is faster	142 // Identifies the sort of regexps where the regexp engine is faster

119 // than the code used for atom matches.	143 // than the code used for atom matches.

120 static bool HasFewDifferentCharacters(Handle<String> pattern) {	144 static bool HasFewDifferentCharacters(Handle<String> pattern) {

(...skipping 2029 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2150	2174

2151	2175

2152 void ActionNode::FillInBMInfo(int offset,	2176 void ActionNode::FillInBMInfo(int offset,

2153 BoyerMooreLookahead* bm,	2177 BoyerMooreLookahead* bm,

2154 bool not_at_start) {	2178 bool not_at_start) {

2155 if (type_ == BEGIN_SUBMATCH) {	2179 if (type_ == BEGIN_SUBMATCH) {

2156 bm->SetRest(offset);	2180 bm->SetRest(offset);

2157 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {	2181 } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {

2158 on_success()->FillInBMInfo(offset, bm, not_at_start);	2182 on_success()->FillInBMInfo(offset, bm, not_at_start);

2159 }	2183 }

	2184 SaveBMInfo(bm, not_at_start, offset);

2160 }	2185 }

2161	2186

2162	2187

2163 int AssertionNode::EatsAtLeast(int still_to_find,	2188 int AssertionNode::EatsAtLeast(int still_to_find,

2164 int recursion_depth,	2189 int recursion_depth,

2165 bool not_at_start) {	2190 bool not_at_start) {

2166 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;	2191 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2167 // If we know we are not at the start and we are asked "how many characters	2192 // If we know we are not at the start and we are asked "how many characters

2168 // will you match if you succeed?" then we can answer anything since false	2193 // will you match if you succeed?" then we can answer anything since false

2169 // implies false. So lets just return the max answer (still_to_find) since	2194 // implies false. So lets just return the max answer (still_to_find) since

2170 // that won't prevent us from preloading a lot of characters for the other	2195 // that won't prevent us from preloading a lot of characters for the other

2171 // branches in the node graph.	2196 // branches in the node graph.

2172 if (type() == AT_START && not_at_start) return still_to_find;	2197 if (type() == AT_START && not_at_start) return still_to_find;

2173 return on_success()->EatsAtLeast(still_to_find,	2198 return on_success()->EatsAtLeast(still_to_find,

2174 recursion_depth + 1,	2199 recursion_depth + 1,

2175 not_at_start);	2200 not_at_start);

2176 }	2201 }

2177	2202

2178	2203

2179 void AssertionNode::FillInBMInfo(	2204 void AssertionNode::FillInBMInfo(

2180 int offset, BoyerMooreLookahead* bm, bool not_at_start) {	2205 int offset, BoyerMooreLookahead* bm, bool not_at_start) {

2181 // Match the behaviour of EatsAtLeast on this node.	2206 // Match the behaviour of EatsAtLeast on this node.

2182 if (type() == AT_START && not_at_start) return;	2207 if (type() == AT_START && not_at_start) return;

2183 on_success()->FillInBMInfo(offset, bm, not_at_start);	2208 on_success()->FillInBMInfo(offset, bm, not_at_start);

	2209 SaveBMInfo(bm, not_at_start, offset);

2184 }	2210 }

2185	2211

2186	2212

2187 int BackReferenceNode::EatsAtLeast(int still_to_find,	2213 int BackReferenceNode::EatsAtLeast(int still_to_find,

2188 int recursion_depth,	2214 int recursion_depth,

2189 bool not_at_start) {	2215 bool not_at_start) {

2190 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;	2216 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

2191 return on_success()->EatsAtLeast(still_to_find,	2217 return on_success()->EatsAtLeast(still_to_find,

2192 recursion_depth + 1,	2218 recursion_depth + 1,

2193 not_at_start);	2219 not_at_start);

(...skipping 416 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2610 if (body_can_be_zero_length_ \|\| info()->visited) return;	2636 if (body_can_be_zero_length_ \|\| info()->visited) return;

2611 VisitMarker marker(info());	2637 VisitMarker marker(info());

2612 return ChoiceNode::GetQuickCheckDetails(details,	2638 return ChoiceNode::GetQuickCheckDetails(details,

2613 compiler,	2639 compiler,

2614 characters_filled_in,	2640 characters_filled_in,

2615 not_at_start);	2641 not_at_start);

2616 }	2642 }

2617	2643

2618	2644

2619 void LoopChoiceNode::FillInBMInfo(	2645 void LoopChoiceNode::FillInBMInfo(

2620 int offset, BoyerMooreLookahead* bm, bool nas) {	2646 int offset, BoyerMooreLookahead* bm, bool not_at_start) {

2621 if (body_can_be_zero_length_) {	2647 if (body_can_be_zero_length_) {

2622 bm->SetRest(offset);	2648 bm->SetRest(offset);

	2649 SaveBMInfo(bm, not_at_start, offset);

2623 return;	2650 return;

2624 }	2651 }

2625 ChoiceNode::FillInBMInfo(offset, bm, nas);	2652 ChoiceNode::FillInBMInfo(offset, bm, not_at_start);

	2653 SaveBMInfo(bm, not_at_start, offset);

2626 }	2654 }

2627	2655

2628	2656

2629 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,	2657 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

2630 RegExpCompiler* compiler,	2658 RegExpCompiler* compiler,

2631 int characters_filled_in,	2659 int characters_filled_in,

2632 bool not_at_start) {	2660 bool not_at_start) {

2633 not_at_start = (not_at_start \|\| not_at_start_);	2661 not_at_start = (not_at_start \|\| not_at_start_);

2634 int choice_count = alternatives_->length();	2662 int choice_count = alternatives_->length();

2635 ASSERT(choice_count > 0);	2663 ASSERT(choice_count > 0);

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2703 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);	2731 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

2704 }	2732 }

2705 assembler->CheckCharacter('\n', &ok);	2733 assembler->CheckCharacter('\n', &ok);

2706 assembler->CheckNotCharacter('\r', new_trace.backtrack());	2734 assembler->CheckNotCharacter('\r', new_trace.backtrack());

2707 }	2735 }

2708 assembler->Bind(&ok);	2736 assembler->Bind(&ok);

2709 on_success->Emit(compiler, &new_trace);	2737 on_success->Emit(compiler, &new_trace);

2710 }	2738 }

2711	2739

2712	2740

2713 // Emit the code to handle \b and \B (word-boundary or non-word-boundary)

2714 // when we know whether the next character must be a word character or not.

2715 static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type,

2716 RegExpCompiler* compiler,

2717 RegExpNode* on_success,

2718 Trace* trace) {

2719 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2720 Label done;

2721

2722 Trace new_trace(*trace);

2723

2724 bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER);

2725 Label* on_word = expect_word_character ? &done : new_trace.backtrack();

2726 Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done;

2727

2728 // Check whether previous character was a word character.

2729 switch (trace->at_start()) {

2730 case Trace::TRUE:

2731 if (expect_word_character) {

2732 assembler->GoTo(on_non_word);

2733 }

2734 break;

2735 case Trace::UNKNOWN:

2736 ASSERT_EQ(0, trace->cp_offset());

2737 assembler->CheckAtStart(on_non_word);

2738 // Fall through.

2739 case Trace::FALSE:

2740 int prev_char_offset = trace->cp_offset() - 1;

2741 assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1);

2742 EmitWordCheck(assembler, on_word, on_non_word, expect_word_character);

2743 // We may or may not have loaded the previous character.

2744 new_trace.InvalidateCurrentCharacter();

2745 }

2746

2747 assembler->Bind(&done);

2748

2749 on_success->Emit(compiler, &new_trace);

2750 }

2751

2752

2753 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).	2741 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).

2754 static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,	2742 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {

2755 RegExpCompiler* compiler,

2756 RegExpNode* on_success,

2757 Trace* trace) {

2758 RegExpMacroAssembler* assembler = compiler->macro_assembler();	2743 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2759 Label before_non_word;	2744 Trace::TriBool next_is_word_character = Trace::UNKNOWN;

2760 Label before_word;	2745 bool not_at_start = (trace->at_start() == Trace::FALSE);

2761 if (trace->characters_preloaded() != 1) {	2746 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

2762 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);	2747 if (lookahead == NULL) {

	2748 int eats_at_least =

	2749 Min(kMaxLookaheadForBoyerMoore,

	2750 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));

	2751 if (eats_at_least >= 1) {

	2752 BoyerMooreLookahead* bm =

	2753 new BoyerMooreLookahead(eats_at_least, compiler);

	2754 FillInBMInfo(0, bm, not_at_start);

	2755 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;

	2756 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;

	2757 }

	2758 } else {

	2759 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;

	2760 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE;

2763 }	2761 }

2764 // Fall through on non-word.	2762 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY);

2765 EmitWordCheck(assembler, &before_word, &before_non_word, false);	2763 if (next_is_word_character == Trace::UNKNOWN) {

	2764 Label before_non_word;

	2765 Label before_word;

	2766 if (trace->characters_preloaded() != 1) {

	2767 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);

	2768 }

	2769 // Fall through on non-word.

	2770 EmitWordCheck(assembler, &before_word, &before_non_word, false);

	2771 // Next character is not a word character.

	2772 assembler->Bind(&before_non_word);

	2773 Label ok;

	2774 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2775 assembler->GoTo(&ok);

2766	2776

2767 // We will be loading the previous character into the current character	2777 assembler->Bind(&before_word);

2768 // register.	2778 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2779 assembler->Bind(&ok);

	2780 } else if (next_is_word_character == Trace::TRUE) {

	2781 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2782 } else {

	2783 ASSERT(next_is_word_character == Trace::FALSE);

	2784 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2785 }

	2786 }

	2787

	2788

	2789 void AssertionNode::BacktrackIfPrevious(

	2790 RegExpCompiler* compiler,

	2791 Trace* trace,

	2792 AssertionNode::IfPrevious backtrack_if_previous) {

	2793 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2769 Trace new_trace(*trace);	2794 Trace new_trace(*trace);

2770 new_trace.InvalidateCurrentCharacter();	2795 new_trace.InvalidateCurrentCharacter();

2771	2796

2772 Label ok;	2797 Label fall_through, dummy;

2773 Label* boundary;

2774 Label* not_boundary;

2775 if (type == AssertionNode::AT_BOUNDARY) {

2776 boundary = &ok;

2777 not_boundary = new_trace.backtrack();

2778 } else {

2779 not_boundary = &ok;

2780 boundary = new_trace.backtrack();

2781 }

2782	2798

2783 // Next character is not a word character.	2799 Label* non_word = backtrack_if_previous == kIsNonWord ?

2784 assembler->Bind(&before_non_word);	2800 new_trace.backtrack() :

	2801 &fall_through;

	2802 Label* word = backtrack_if_previous == kIsNonWord ?

	2803 &fall_through :

	2804 new_trace.backtrack();

	2805

2785 if (new_trace.cp_offset() == 0) {	2806 if (new_trace.cp_offset() == 0) {

2786 // The start of input counts as a non-word character, so the question is	2807 // The start of input counts as a non-word character, so the question is

2787 // decided if we are at the start.	2808 // decided if we are at the start.

2788 assembler->CheckAtStart(not_boundary);	2809 assembler->CheckAtStart(non_word);

2789 }	2810 }

2790 // We already checked that we are not at the start of input so it must be	2811 // We already checked that we are not at the start of input so it must be

2791 // OK to load the previous character.	2812 // OK to load the previous character.

2792 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,	2813 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);

2793 &ok, // Unused dummy label in this call.	2814 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);

2794 false);

2795 // Fall through on non-word.

2796 EmitWordCheck(assembler, boundary, not_boundary, false);

2797 assembler->GoTo(not_boundary);

2798	2815

2799 // Next character is a word character.	2816 assembler->Bind(&fall_through);

2800 assembler->Bind(&before_word);	2817 on_success()->Emit(compiler, &new_trace);

2801 if (new_trace.cp_offset() == 0) {

2802 // The start of input counts as a non-word character, so the question is

2803 // decided if we are at the start.

2804 assembler->CheckAtStart(boundary);

2805 }

2806 // We already checked that we are not at the start of input so it must be

2807 // OK to load the previous character.

2808 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,

2809 &ok, // Unused dummy label in this call.

2810 false);

2811 bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY);

2812 EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);

2813

2814 assembler->Bind(&ok);

2815

2816 on_success->Emit(compiler, &new_trace);

2817 }	2818 }

2818	2819

2819	2820

2820 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,	2821 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,

2821 RegExpCompiler* compiler,	2822 RegExpCompiler* compiler,

2822 int filled_in,	2823 int filled_in,

2823 bool not_at_start) {	2824 bool not_at_start) {

2824 if (type_ == AT_START && not_at_start) {	2825 if (type_ == AT_START && not_at_start) {

2825 details->set_cannot_match();	2826 details->set_cannot_match();

2826 return;	2827 return;

(...skipping 27 matching lines...) Expand all Loading...
2854 on_success()->Emit(compiler, &at_start_trace);	2855 on_success()->Emit(compiler, &at_start_trace);

2855 return;	2856 return;

2856 }	2857 }

2857 }	2858 }

2858 break;	2859 break;

2859 case AFTER_NEWLINE:	2860 case AFTER_NEWLINE:

2860 EmitHat(compiler, on_success(), trace);	2861 EmitHat(compiler, on_success(), trace);

2861 return;	2862 return;

2862 case AT_BOUNDARY:	2863 case AT_BOUNDARY:

2863 case AT_NON_BOUNDARY: {	2864 case AT_NON_BOUNDARY: {

2864 EmitBoundaryCheck(type_, compiler, on_success(), trace);	2865 EmitBoundaryCheck(compiler, trace);

2865 return;	2866 return;

2866 }	2867 }

2867 case AFTER_WORD_CHARACTER:

2868 case AFTER_NONWORD_CHARACTER: {

2869 EmitHalfBoundaryCheck(type_, compiler, on_success(), trace);

2870 }

2871 }	2868 }

2872 on_success()->Emit(compiler, trace);	2869 on_success()->Emit(compiler, trace);

2873 }	2870 }

2874	2871

2875	2872

2876 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {	2873 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {

2877 if (quick_check == NULL) return false;	2874 if (quick_check == NULL) return false;

2878 if (offset >= quick_check->characters()) return false;	2875 if (offset >= quick_check->characters()) return false;

2879 return quick_check->positions(offset)->determines_perfectly;	2876 return quick_check->positions(offset)->determines_perfectly;

2880 }	2877 }

(...skipping 389 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3270 return alt_gens_[i];	3267 return alt_gens_[i];

3271 }	3268 }

3272	3269

3273 private:	3270 private:

3274 static const int kAFew = 10;	3271 static const int kAFew = 10;

3275 ZoneList<AlternativeGeneration*> alt_gens_;	3272 ZoneList<AlternativeGeneration*> alt_gens_;

3276 AlternativeGeneration a_few_alt_gens_[kAFew];	3273 AlternativeGeneration a_few_alt_gens_[kAFew];

3277 };	3274 };

3278	3275

3279	3276

	3277 // The '2' variant is has inclusive from and exclusive to.

	3278 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0,

	3279 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,

	3280 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };

	3281 static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);

	3282

	3283 static const int kWordRanges[] = {

	3284 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };

	3285 static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);

	3286 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };

	3287 static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);

	3288 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };

	3289 static const int kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);

	3290 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,

	3291 0x2028, 0x202A, 0x10000 };

	3292 static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);

	3293

	3294

	3295 void BoyerMoorePositionInfo::Set(int character) {

	3296 SetInterval(Interval(character, character));

	3297 }

	3298

	3299

	3300 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {

	3301 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);

	3302 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);

	3303 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);

	3304 surrogate_ =

	3305 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);

	3306 if (interval.to() - interval.from() >= kMapSize - 1) {

	3307 if (map_count_ != kMapSize) {

	3308 map_count_ = kMapSize;

	3309 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;

	3310 }

	3311 return;

	3312 }

	3313 for (int i = interval.from(); i <= interval.to(); i++) {

	3314 int mod_character = (i & kMask);

	3315 if (!map_->at(mod_character)) {

	3316 map_count_++;

	3317 map_->at(mod_character) = true;

	3318 }

	3319 if (map_count_ == kMapSize) return;

	3320 }

	3321 }

	3322

	3323

	3324 void BoyerMoorePositionInfo::SetAll() {

	3325 s_ = w_ = d_ = kLatticeUnknown;

	3326 if (map_count_ != kMapSize) {

	3327 map_count_ = kMapSize;

	3328 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;

	3329 }

	3330 }

	3331

	3332

3280 BoyerMooreLookahead::BoyerMooreLookahead(	3333 BoyerMooreLookahead::BoyerMooreLookahead(

3281 int length, int map_length, RegExpCompiler* compiler)	3334 int length, RegExpCompiler* compiler)

3282 : length_(length),	3335 : length_(length),

3283 map_length_(map_length),

3284 compiler_(compiler) {	3336 compiler_(compiler) {

3285 ASSERT(IsPowerOf2(map_length));

3286 if (compiler->ascii()) {	3337 if (compiler->ascii()) {

3287 max_char_ = String::kMaxAsciiCharCode;	3338 max_char_ = String::kMaxAsciiCharCode;

3288 } else {	3339 } else {

3289 max_char_ = String::kMaxUtf16CodeUnit;	3340 max_char_ = String::kMaxUtf16CodeUnit;

3290 }	3341 }

3291 bitmaps_ = new ZoneList<ZoneList<bool>*>(length);	3342 bitmaps_ = new ZoneList<BoyerMoorePositionInfo*>(length);

3292 for (int i = 0; i < length; i++) {	3343 for (int i = 0; i < length; i++) {

3293 bitmaps_->Add(new ZoneList<bool>(map_length));	3344 bitmaps_->Add(new BoyerMoorePositionInfo());

3294 ZoneList<bool>* map = bitmaps_->at(i);

3295 for (int i = 0; i < map_length; i++) {

3296 map->Add(false);

3297 }

3298 }	3345 }

3299 }	3346 }

3300	3347

3301	3348

3302 // Find the longest range of lookahead that has the fewest number of different	3349 // Find the longest range of lookahead that has the fewest number of different

3303 // characters that can occur at a given position. Since we are optimizing two	3350 // characters that can occur at a given position. Since we are optimizing two

3304 // different parameters at once this is a tradeoff.	3351 // different parameters at once this is a tradeoff.

3305 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {	3352 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {

3306 int biggest_points = 0;	3353 int biggest_points = 0;

	3354 // If more than 32 characters out of 128 can occur it is unlikely that we can

	3355 // be lucky enough to step forwards much of the time.

	3356 const int kMaxMax = 32;

3307 for (int max_number_of_chars = 4;	3357 for (int max_number_of_chars = 4;

3308 max_number_of_chars < kTooManyCharacters;	3358 max_number_of_chars < kMaxMax;

3309 max_number_of_chars *= 2) {	3359 max_number_of_chars *= 2) {

3310 biggest_points =	3360 biggest_points =

3311 FindBestInterval(max_number_of_chars, biggest_points, from, to);	3361 FindBestInterval(max_number_of_chars, biggest_points, from, to);

3312 }	3362 }

3313 if (biggest_points == 0) return false;	3363 if (biggest_points == 0) return false;

3314 return true;	3364 return true;

3315 }	3365 }

3316	3366

3317	3367

3318 // Find the highest-points range between 0 and length_ where the character	3368 // Find the highest-points range between 0 and length_ where the character

3319 // information is not too vague. 'Too vague' means that there are more than	3369 // information is not too vague. 'Too vague' means that there are more than

3320 // max_number_of_chars that can occur at this position. Calculates the number	3370 // max_number_of_chars that can occur at this position. Calculates the number

3321 // of points as the product of width-of-the-range and	3371 // of points as the product of width-of-the-range and

3322 // probability-of-finding-one-of-the-characters, where the probability is	3372 // probability-of-finding-one-of-the-characters, where the probability is

3323 // calculated using the frequency distribution of the sample subject string.	3373 // calculated using the frequency distribution of the sample subject string.

3324 int BoyerMooreLookahead::FindBestInterval(	3374 int BoyerMooreLookahead::FindBestInterval(

3325 int max_number_of_chars, int old_biggest_points, int* from, int* to) {	3375 int max_number_of_chars, int old_biggest_points, int* from, int* to) {

3326 int biggest_points = old_biggest_points;	3376 int biggest_points = old_biggest_points;

3327 static const int kSize = RegExpMacroAssembler::kTableSize;	3377 static const int kSize = RegExpMacroAssembler::kTableSize;

3328 for (int i = 0; i < length_; ) {	3378 for (int i = 0; i < length_; ) {

3329 while (i < length_ && Count(i) > max_number_of_chars) i++;	3379 while (i < length_ && Count(i) > max_number_of_chars) i++;

3330 if (i == length_) break;	3380 if (i == length_) break;

3331 int remembered_from = i;	3381 int remembered_from = i;

3332 bool union_map[kSize];	3382 bool union_map[kSize];

3333 for (int j = 0; j < kSize; j++) union_map[j] = false;	3383 for (int j = 0; j < kSize; j++) union_map[j] = false;

3334 while (i < length_ && Count(i) <= max_number_of_chars) {	3384 while (i < length_ && Count(i) <= max_number_of_chars) {

3335 ZoneList<bool>* map = bitmaps_->at(i);	3385 BoyerMoorePositionInfo* map = bitmaps_->at(i);

3336 for (int j = 0; j < kSize; j++) union_map[j] \|= map->at(j);	3386 for (int j = 0; j < kSize; j++) union_map[j] \|= map->at(j);

3337 i++;	3387 i++;

3338 }	3388 }

3339 int frequency = 0;	3389 int frequency = 0;

3340 for (int j = 0; j < kSize; j++) {	3390 for (int j = 0; j < kSize; j++) {

3341 if (union_map[j]) {	3391 if (union_map[j]) {

3342 // Add 1 to the frequency to give a small per-character boost for	3392 // Add 1 to the frequency to give a small per-character boost for

3343 // the cases where our sampling is not good enough and many	3393 // the cases where our sampling is not good enough and many

3344 // characters have a frequency of zero. This means the frequency	3394 // characters have a frequency of zero. This means the frequency

3345 // can theoretically be up to 2*kSize though we treat it mostly as	3395 // can theoretically be up to 2*kSize though we treat it mostly as

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3380	3430

3381 const int kSkipArrayEntry = 0;	3431 const int kSkipArrayEntry = 0;

3382 const int kDontSkipArrayEntry = 1;	3432 const int kDontSkipArrayEntry = 1;

3383	3433

3384 for (int i = 0; i < kSize; i++) {	3434 for (int i = 0; i < kSize; i++) {

3385 boolean_skip_table->set(i, kSkipArrayEntry);	3435 boolean_skip_table->set(i, kSkipArrayEntry);

3386 }	3436 }

3387 int skip = max_lookahead + 1 - min_lookahead;	3437 int skip = max_lookahead + 1 - min_lookahead;

3388	3438

3389 for (int i = max_lookahead; i >= min_lookahead; i--) {	3439 for (int i = max_lookahead; i >= min_lookahead; i--) {

3390 ZoneList<bool>* map = bitmaps_->at(i);	3440 BoyerMoorePositionInfo* map = bitmaps_->at(i);

3391 for (int j = 0; j < map_length_; j++) {	3441 for (int j = 0; j < kSize; j++) {

3392 if (map->at(j)) {	3442 if (map->at(j)) {

3393 boolean_skip_table->set(j, kDontSkipArrayEntry);	3443 boolean_skip_table->set(j, kDontSkipArrayEntry);

3394 }	3444 }

3395 }	3445 }

3396 }	3446 }

3397	3447

3398 return skip;	3448 return skip;

3399 }	3449 }

3400	3450

3401	3451

3402 // See comment above on the implementation of GetSkipTable.	3452 // See comment above on the implementation of GetSkipTable.

3403 bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {	3453 bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {

	3454 const int kSize = RegExpMacroAssembler::kTableSize;

	3455

3404 int min_lookahead = 0;	3456 int min_lookahead = 0;

3405 int max_lookahead = 0;	3457 int max_lookahead = 0;

3406	3458

3407 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;	3459 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;

3408	3460

3409 bool found_single_character = false;	3461 bool found_single_character = false;

3410 bool abandoned_search_for_single_character = false;

3411 int single_character = 0;	3462 int single_character = 0;

3412 for (int i = max_lookahead; i >= min_lookahead; i--) {	3463 for (int i = max_lookahead; i >= min_lookahead; i--) {

3413 ZoneList<bool>* map = bitmaps_->at(i);	3464 BoyerMoorePositionInfo* map = bitmaps_->at(i);

3414 for (int j = 0; j < map_length_; j++) {	3465 if (map->map_count() > 1 \|\|

	3466 (found_single_character && map->map_count() != 0)) {

	3467 found_single_character = false;

	3468 break;

	3469 }

	3470 for (int j = 0; j < kSize; j++) {

3415 if (map->at(j)) {	3471 if (map->at(j)) {

3416 if (found_single_character) {	3472 found_single_character = true;

3417 found_single_character = false; // Found two.	3473 single_character = j;

3418 abandoned_search_for_single_character = true;	3474 break;

3419 break;

3420 } else {

3421 found_single_character = true;

3422 single_character = j;

3423 }

3424 }	3475 }

3425 }	3476 }

3426 if (abandoned_search_for_single_character) break;

3427 }	3477 }

3428	3478

3429 int lookahead_width = max_lookahead + 1 - min_lookahead;	3479 int lookahead_width = max_lookahead + 1 - min_lookahead;

3430	3480

3431 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {	3481 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {

3432 // The mask-compare can probably handle this better.	3482 // The mask-compare can probably handle this better.

3433 return false;	3483 return false;

3434 }	3484 }

3435	3485

3436 if (found_single_character) {	3486 if (found_single_character) {

3437 Label cont, again;	3487 Label cont, again;

3438 masm->Bind(&again);	3488 masm->Bind(&again);

3439 masm->LoadCurrentCharacter(max_lookahead, &cont, true);	3489 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

3440 if (max_char_ > map_length_) {	3490 if (max_char_ > kSize) {

3441 ASSERT(map_length_ == RegExpMacroAssembler::kTableSize);

3442 masm->CheckCharacterAfterAnd(single_character,	3491 masm->CheckCharacterAfterAnd(single_character,

3443 RegExpMacroAssembler::kTableMask,	3492 RegExpMacroAssembler::kTableMask,

3444 &cont);	3493 &cont);

3445 } else {	3494 } else {

3446 masm->CheckCharacter(single_character, &cont);	3495 masm->CheckCharacter(single_character, &cont);

3447 }	3496 }

3448 masm->AdvanceCurrentPosition(lookahead_width);	3497 masm->AdvanceCurrentPosition(lookahead_width);

3449 masm->GoTo(&again);	3498 masm->GoTo(&again);

3450 masm->Bind(&cont);	3499 masm->Bind(&cont);

3451 return true;	3500 return true;

3452 }	3501 }

3453	3502

3454 Handle<ByteArray> boolean_skip_table =	3503 Handle<ByteArray> boolean_skip_table =

3455 FACTORY->NewByteArray(map_length_, TENURED);	3504 FACTORY->NewByteArray(kSize, TENURED);

3456 int skip_distance = GetSkipTable(	3505 int skip_distance = GetSkipTable(

3457 min_lookahead, max_lookahead, boolean_skip_table);	3506 min_lookahead, max_lookahead, boolean_skip_table);

3458 ASSERT(skip_distance != 0);	3507 ASSERT(skip_distance != 0);

3459	3508

3460 Label cont, again;	3509 Label cont, again;

3461 masm->Bind(&again);	3510 masm->Bind(&again);

3462 masm->LoadCurrentCharacter(max_lookahead, &cont, true);	3511 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

3463 masm->CheckBitInTable(boolean_skip_table, &cont);	3512 masm->CheckBitInTable(boolean_skip_table, &cont);

3464 masm->AdvanceCurrentPosition(skip_distance);	3513 masm->AdvanceCurrentPosition(skip_distance);

3465 masm->GoTo(&again);	3514 masm->GoTo(&again);

(...skipping 158 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3624 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==	3673 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==

3625 this) {	3674 this) {

3626 // At this point we know that we are at a non-greedy loop that will eat	3675 // At this point we know that we are at a non-greedy loop that will eat

3627 // any character one at a time. Any non-anchored regexp has such a	3676 // any character one at a time. Any non-anchored regexp has such a

3628 // loop prepended to it in order to find where it starts. We look for	3677 // loop prepended to it in order to find where it starts. We look for

3629 // a pattern of the form ...abc... where we can look 6 characters ahead	3678 // a pattern of the form ...abc... where we can look 6 characters ahead

3630 // and step forwards 3 if the character is not one of abc. Abc need	3679 // and step forwards 3 if the character is not one of abc. Abc need

3631 // not be atoms, they can be any reasonably limited character class or	3680 // not be atoms, they can be any reasonably limited character class or

3632 // small alternation.	3681 // small alternation.

3633 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.	3682 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.

3634 eats_at_least =	3683 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

3635 Min(kMaxLookaheadForBoyerMoore,	3684 if (lookahead == NULL) {

3636 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));	3685 eats_at_least =

3637 if (eats_at_least >= 1) {	3686 Min(kMaxLookaheadForBoyerMoore,

3638 BoyerMooreLookahead bm(eats_at_least,	3687 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));

3639 RegExpMacroAssembler::kTableSize,	3688 if (eats_at_least >= 1) {

3640 compiler);	3689 BoyerMooreLookahead* bm =

3641 GuardedAlternative alt0 = alternatives_->at(0);	3690 new BoyerMooreLookahead(eats_at_least, compiler);

3642 alt0.node()->FillInBMInfo(0, &bm, not_at_start);	3691 GuardedAlternative alt0 = alternatives_->at(0);

3643 skip_was_emitted = bm.EmitSkipInstructions(macro_assembler);	3692 alt0.node()->FillInBMInfo(0, bm, not_at_start);

	3693 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);

	3694 }

	3695 } else {

	3696 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);

3644 }	3697 }

3645 }	3698 }

3646 }	3699 }

3647 }	3700 }

3648	3701

3649 if (eats_at_least == kEatsAtLeastNotYetInitialized) {	3702 if (eats_at_least == kEatsAtLeastNotYetInitialized) {

3650 // Save some time by looking at most one machine word ahead.	3703 // Save some time by looking at most one machine word ahead.

3651 eats_at_least = EatsAtLeast(compiler->ascii() ? 4 : 2, 0, not_at_start);	3704 eats_at_least = EatsAtLeast(compiler->ascii() ? 4 : 2, 0, not_at_start);

3652 }	3705 }

3653 int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);	3706 int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4196 break;	4249 break;

4197 case AssertionNode::AT_BOUNDARY:	4250 case AssertionNode::AT_BOUNDARY:

4198 stream()->Add("label=\"\\b\", shape=septagon");	4251 stream()->Add("label=\"\\b\", shape=septagon");

4199 break;	4252 break;

4200 case AssertionNode::AT_NON_BOUNDARY:	4253 case AssertionNode::AT_NON_BOUNDARY:

4201 stream()->Add("label=\"\\B\", shape=septagon");	4254 stream()->Add("label=\"\\B\", shape=septagon");

4202 break;	4255 break;

4203 case AssertionNode::AFTER_NEWLINE:	4256 case AssertionNode::AFTER_NEWLINE:

4204 stream()->Add("label=\"(?<=\\n)\", shape=septagon");	4257 stream()->Add("label=\"(?<=\\n)\", shape=septagon");

4205 break;	4258 break;

4206 case AssertionNode::AFTER_WORD_CHARACTER:

4207 stream()->Add("label=\"(?<=\\w)\", shape=septagon");

4208 break;

4209 case AssertionNode::AFTER_NONWORD_CHARACTER:

4210 stream()->Add("label=\"(?<=\\W)\", shape=septagon");

4211 break;

4212 }	4259 }

4213 stream()->Add("];\n");	4260 stream()->Add("];\n");

4214 PrintAttributes(that);	4261 PrintAttributes(that);

4215 RegExpNode* successor = that->on_success();	4262 RegExpNode* successor = that->on_success();

4216 stream()->Add(" n%p -> n%p;\n", that, successor);	4263 stream()->Add(" n%p -> n%p;\n", that, successor);

4217 Visit(successor);	4264 Visit(successor);

4218 }	4265 }

4219	4266

4220	4267

4221 void DotPrinter::VisitAction(ActionNode* that) {	4268 void DotPrinter::VisitAction(ActionNode* that) {

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4306 printer.PrintNode(label, node);	4353 printer.PrintNode(label, node);

4307 }	4354 }

4308	4355

4309	4356

4310 #endif // DEBUG	4357 #endif // DEBUG

4311	4358

4312	4359

4313 // -------------------------------------------------------------------	4360 // -------------------------------------------------------------------

4314 // Tree to graph conversion	4361 // Tree to graph conversion

4315	4362

4316 static const uc16 kSpaceRanges[] = { 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0,

4317 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,

4318 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000, 0xFEFF, 0xFEFF };

4319 static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);

4320

4321 static const uc16 kWordRanges[] = { '0', '9', 'A', 'Z', '_', '_', 'a', 'z' };

4322 static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);

4323

4324 static const uc16 kDigitRanges[] = { '0', '9' };

4325 static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);

4326

4327 static const uc16 kLineTerminatorRanges[] = { 0x000A, 0x000A, 0x000D, 0x000D,

4328 0x2028, 0x2029 };

4329 static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);

4330

4331 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,	4363 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

4332 RegExpNode* on_success) {	4364 RegExpNode* on_success) {

4333 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);	4365 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);

4334 elms->Add(TextElement::Atom(this));	4366 elms->Add(TextElement::Atom(this));

4335 return new TextNode(elms, on_success);	4367 return new TextNode(elms, on_success);

4336 }	4368 }

4337	4369

4338	4370

4339 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,	4371 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

4340 RegExpNode* on_success) {	4372 RegExpNode* on_success) {

4341 return new TextNode(elements(), on_success);	4373 return new TextNode(elements(), on_success);

4342 }	4374 }

4343	4375

	4376

4344 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,	4377 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,

4345 const uc16* special_class,	4378 const int* special_class,

4346 int length) {	4379 int length) {

	4380 length--; // Remove final 0x10000.

	4381 ASSERT(special_class[length] == 0x10000);

4347 ASSERT(ranges->length() != 0);	4382 ASSERT(ranges->length() != 0);

4348 ASSERT(length != 0);	4383 ASSERT(length != 0);

4349 ASSERT(special_class[0] != 0);	4384 ASSERT(special_class[0] != 0);

4350 if (ranges->length() != (length >> 1) + 1) {	4385 if (ranges->length() != (length >> 1) + 1) {

4351 return false;	4386 return false;

4352 }	4387 }

4353 CharacterRange range = ranges->at(0);	4388 CharacterRange range = ranges->at(0);

4354 if (range.from() != 0) {	4389 if (range.from() != 0) {

4355 return false;	4390 return false;

4356 }	4391 }

4357 for (int i = 0; i < length; i += 2) {	4392 for (int i = 0; i < length; i += 2) {

4358 if (special_class[i] != (range.to() + 1)) {	4393 if (special_class[i] != (range.to() + 1)) {

4359 return false;	4394 return false;

4360 }	4395 }

4361 range = ranges->at((i >> 1) + 1);	4396 range = ranges->at((i >> 1) + 1);

4362 if (special_class[i+1] != range.from() - 1) {	4397 if (special_class[i+1] != range.from()) {

4363 return false;	4398 return false;

4364 }	4399 }

4365 }	4400 }

4366 if (range.to() != 0xffff) {	4401 if (range.to() != 0xffff) {

4367 return false;	4402 return false;

4368 }	4403 }

4369 return true;	4404 return true;

4370 }	4405 }

4371	4406

4372	4407

4373 static bool CompareRanges(ZoneList<CharacterRange>* ranges,	4408 static bool CompareRanges(ZoneList<CharacterRange>* ranges,

4374 const uc16* special_class,	4409 const int* special_class,

4375 int length) {	4410 int length) {

	4411 length--; // Remove final 0x10000.

	4412 ASSERT(special_class[length] == 0x10000);

4376 if (ranges->length() * 2 != length) {	4413 if (ranges->length() * 2 != length) {

4377 return false;	4414 return false;

4378 }	4415 }

4379 for (int i = 0; i < length; i += 2) {	4416 for (int i = 0; i < length; i += 2) {

4380 CharacterRange range = ranges->at(i >> 1);	4417 CharacterRange range = ranges->at(i >> 1);

4381 if (range.from() != special_class[i] \|\| range.to() != special_class[i+1]) {	4418 if (range.from() != special_class[i] \|\|

	4419 range.to() != special_class[i + 1] - 1) {

4382 return false;	4420 return false;

4383 }	4421 }

4384 }	4422 }

4385 return true;	4423 return true;

4386 }	4424 }

4387	4425

4388	4426

4389 bool RegExpCharacterClass::is_standard() {	4427 bool RegExpCharacterClass::is_standard() {

4390 // TODO(lrn): Remove need for this function, by not throwing away information	4428 // TODO(lrn): Remove need for this function, by not throwing away information

4391 // along the way.	4429 // along the way.

(...skipping 380 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4772 RegExpNode* on_success) {	4810 RegExpNode* on_success) {

4773 ZoneList<RegExpTree> children = nodes();	4811 ZoneList<RegExpTree> children = nodes();

4774 RegExpNode* current = on_success;	4812 RegExpNode* current = on_success;

4775 for (int i = children->length() - 1; i >= 0; i--) {	4813 for (int i = children->length() - 1; i >= 0; i--) {

4776 current = children->at(i)->ToNode(compiler, current);	4814 current = children->at(i)->ToNode(compiler, current);

4777 }	4815 }

4778 return current;	4816 return current;

4779 }	4817 }

4780	4818

4781	4819

4782 static void AddClass(const uc16* elmv,	4820 static void AddClass(const int* elmv,

4783 int elmc,	4821 int elmc,

4784 ZoneList<CharacterRange>* ranges) {	4822 ZoneList<CharacterRange>* ranges) {

	4823 elmc--;

	4824 ASSERT(elmv[elmc] == 0x10000);

4785 for (int i = 0; i < elmc; i += 2) {	4825 for (int i = 0; i < elmc; i += 2) {

4786 ASSERT(elmv[i] <= elmv[i + 1]);	4826 ASSERT(elmv[i] < elmv[i + 1]);

4787 ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));	4827 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1));

4788 }	4828 }

4789 }	4829 }

4790	4830

4791	4831

4792 static void AddClassNegated(const uc16 *elmv,	4832 static void AddClassNegated(const int *elmv,

4793 int elmc,	4833 int elmc,

4794 ZoneList<CharacterRange>* ranges) {	4834 ZoneList<CharacterRange>* ranges) {

	4835 elmc--;

	4836 ASSERT(elmv[elmc] == 0x10000);

4795 ASSERT(elmv[0] != 0x0000);	4837 ASSERT(elmv[0] != 0x0000);

4796 ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit);	4838 ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit);

4797 uc16 last = 0x0000;	4839 uc16 last = 0x0000;

4798 for (int i = 0; i < elmc; i += 2) {	4840 for (int i = 0; i < elmc; i += 2) {

4799 ASSERT(last <= elmv[i] - 1);	4841 ASSERT(last <= elmv[i] - 1);

4800 ASSERT(elmv[i] <= elmv[i + 1]);	4842 ASSERT(elmv[i] < elmv[i + 1]);

4801 ranges->Add(CharacterRange(last, elmv[i] - 1));	4843 ranges->Add(CharacterRange(last, elmv[i] - 1));

4802 last = elmv[i + 1] + 1;	4844 last = elmv[i + 1];

4803 }	4845 }

4804 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit));	4846 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit));

4805 }	4847 }

4806	4848

4807	4849

4808 void CharacterRange::AddClassEscape(uc16 type,	4850 void CharacterRange::AddClassEscape(uc16 type,

4809 ZoneList<CharacterRange>* ranges) {	4851 ZoneList<CharacterRange>* ranges) {

4810 switch (type) {	4852 switch (type) {

4811 case 's':	4853 case 's':

4812 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);	4854 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);

(...skipping 30 matching lines...) Expand all Loading...
4843 AddClass(kLineTerminatorRanges,	4885 AddClass(kLineTerminatorRanges,

4844 kLineTerminatorRangeCount,	4886 kLineTerminatorRangeCount,

4845 ranges);	4887 ranges);

4846 break;	4888 break;

4847 default:	4889 default:

4848 UNREACHABLE();	4890 UNREACHABLE();

4849 }	4891 }

4850 }	4892 }

4851	4893

4852	4894

4853 Vector<const uc16> CharacterRange::GetWordBounds() {	4895 Vector<const int> CharacterRange::GetWordBounds() {

4854 return Vector<const uc16>(kWordRanges, kWordRangeCount);	4896 return Vector<const int>(kWordRanges, kWordRangeCount - 1);

4855 }	4897 }

4856	4898

4857	4899

4858 class CharacterRangeSplitter {	4900 class CharacterRangeSplitter {

4859 public:	4901 public:

4860 CharacterRangeSplitter(ZoneList<CharacterRange>** included,	4902 CharacterRangeSplitter(ZoneList<CharacterRange>** included,

4861 ZoneList<CharacterRange>** excluded)	4903 ZoneList<CharacterRange>** excluded)

4862 : included_(included),	4904 : included_(included),

4863 excluded_(excluded) { }	4905 excluded_(excluded) { }

4864 void Call(uc16 from, DispatchTable::Entry entry);	4906 void Call(uc16 from, DispatchTable::Entry entry);

(...skipping 11 matching lines...) Expand all Loading...
4876 if (!entry.out_set()->Get(kInBase)) return;	4918 if (!entry.out_set()->Get(kInBase)) return;

4877 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)	4919 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)

4878 ? included_	4920 ? included_

4879 : excluded_;	4921 : excluded_;

4880 if (target == NULL) target = new ZoneList<CharacterRange>(2);	4922 if (target == NULL) target = new ZoneList<CharacterRange>(2);

4881 (*target)->Add(CharacterRange(entry.from(), entry.to()));	4923 (*target)->Add(CharacterRange(entry.from(), entry.to()));

4882 }	4924 }

4883	4925

4884	4926

4885 void CharacterRange::Split(ZoneList<CharacterRange>* base,	4927 void CharacterRange::Split(ZoneList<CharacterRange>* base,

4886 Vector<const uc16> overlay,	4928 Vector<const int> overlay,

4887 ZoneList<CharacterRange>** included,	4929 ZoneList<CharacterRange>** included,

4888 ZoneList<CharacterRange>** excluded) {	4930 ZoneList<CharacterRange>** excluded) {

4889 ASSERT_EQ(NULL, *included);	4931 ASSERT_EQ(NULL, *included);

4890 ASSERT_EQ(NULL, *excluded);	4932 ASSERT_EQ(NULL, *excluded);

4891 DispatchTable table;	4933 DispatchTable table;

4892 for (int i = 0; i < base->length(); i++)	4934 for (int i = 0; i < base->length(); i++)

4893 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);	4935 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);

4894 for (int i = 0; i < overlay.length(); i += 2) {	4936 for (int i = 0; i < overlay.length(); i += 2) {

4895 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),	4937 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),

4896 CharacterRangeSplitter::kInOverlay);	4938 CharacterRangeSplitter::kInOverlay);

4897 }	4939 }

4898 CharacterRangeSplitter callback(included, excluded);	4940 CharacterRangeSplitter callback(included, excluded);

4899 table.ForEach(&callback);	4941 table.ForEach(&callback);

4900 }	4942 }

4901	4943

4902	4944

4903 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,	4945 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

4904 bool is_ascii) {	4946 bool is_ascii) {

4905 Isolate* isolate = Isolate::Current();	4947 Isolate* isolate = Isolate::Current();

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4971 if (n <= 1) return true;	5013 if (n <= 1) return true;

4972 int max = ranges->at(0).to();	5014 int max = ranges->at(0).to();

4973 for (int i = 1; i < n; i++) {	5015 for (int i = 1; i < n; i++) {

4974 CharacterRange next_range = ranges->at(i);	5016 CharacterRange next_range = ranges->at(i);

4975 if (next_range.from() <= max + 1) return false;	5017 if (next_range.from() <= max + 1) return false;

4976 max = next_range.to();	5018 max = next_range.to();

4977 }	5019 }

4978 return true;	5020 return true;

4979 }	5021 }

4980	5022

4981 SetRelation CharacterRange::WordCharacterRelation(

4982 ZoneList<CharacterRange>* range) {

4983 ASSERT(IsCanonical(range));

4984 int i = 0; // Word character range index.

4985 int j = 0; // Argument range index.

4986 ASSERT_NE(0, kWordRangeCount);

4987 SetRelation result;

4988 if (range->length() == 0) {

4989 result.SetElementsInSecondSet();

4990 return result;

4991 }

4992 CharacterRange argument_range = range->at(0);

4993 CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]);

4994 while (i < kWordRangeCount && j < range->length()) {

4995 // Check the two ranges for the five cases:

4996 // - no overlap.

4997 // - partial overlap (there are elements in both ranges that isn't

4998 // in the other, and there are also elements that are in both).

4999 // - argument range entirely inside word range.

5000 // - word range entirely inside argument range.

5001 // - ranges are completely equal.

5002

5003 // First check for no overlap. The earlier range is not in the other set.

5004 if (argument_range.from() > word_range.to()) {

5005 // Ranges are disjoint. The earlier word range contains elements that

5006 // cannot be in the argument set.

5007 result.SetElementsInSecondSet();

5008 } else if (word_range.from() > argument_range.to()) {

5009 // Ranges are disjoint. The earlier argument range contains elements that

5010 // cannot be in the word set.

5011 result.SetElementsInFirstSet();

5012 } else if (word_range.from() <= argument_range.from() &&

5013 word_range.to() >= argument_range.from()) {

5014 result.SetElementsInBothSets();

5015 // argument range completely inside word range.

5016 if (word_range.from() < argument_range.from() \|\|

5017 word_range.to() > argument_range.from()) {

5018 result.SetElementsInSecondSet();

5019 }

5020 } else if (word_range.from() >= argument_range.from() &&

5021 word_range.to() <= argument_range.from()) {

5022 result.SetElementsInBothSets();

5023 result.SetElementsInFirstSet();

5024 } else {

5025 // There is overlap, and neither is a subrange of the other

5026 result.SetElementsInFirstSet();

5027 result.SetElementsInSecondSet();

5028 result.SetElementsInBothSets();

5029 }

5030 if (result.NonTrivialIntersection()) {

5031 // The result is as (im)precise as we can possibly make it.

5032 return result;

5033 }

5034 // Progress the range(s) with minimal to-character.

5035 uc16 word_to = word_range.to();

5036 uc16 argument_to = argument_range.to();

5037 if (argument_to <= word_to) {

5038 j++;

5039 if (j < range->length()) {

5040 argument_range = range->at(j);

5041 }

5042 }

5043 if (word_to <= argument_to) {

5044 i += 2;

5045 if (i < kWordRangeCount) {

5046 word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]);

5047 }

5048 }

5049 }

5050 // Check if anything wasn't compared in the loop.

5051 if (i < kWordRangeCount) {

5052 // word range contains something not in argument range.

5053 result.SetElementsInSecondSet();

5054 } else if (j < range->length()) {

5055 // Argument range contains something not in word range.

5056 result.SetElementsInFirstSet();

5057 }

5058

5059 return result;

5060 }

5061

5062	5023

5063 ZoneList<CharacterRange>* CharacterSet::ranges() {	5024 ZoneList<CharacterRange>* CharacterSet::ranges() {

5064 if (ranges_ == NULL) {	5025 if (ranges_ == NULL) {

5065 ranges_ = new ZoneList<CharacterRange>(2);	5026 ranges_ = new ZoneList<CharacterRange>(2);

5066 CharacterRange::AddClassEscape(standard_set_type_, ranges_);	5027 CharacterRange::AddClassEscape(standard_set_type_, ranges_);

5067 }	5028 }

5068 return ranges_;	5029 return ranges_;

5069 }	5030 }

5070	5031

5071	5032

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5184 num_canonical,	5145 num_canonical,

5185 character_ranges->at(read));	5146 character_ranges->at(read));

5186 read++;	5147 read++;

5187 } while (read < n);	5148 } while (read < n);

5188 character_ranges->Rewind(num_canonical);	5149 character_ranges->Rewind(num_canonical);

5189	5150

5190 ASSERT(CharacterRange::IsCanonical(character_ranges));	5151 ASSERT(CharacterRange::IsCanonical(character_ranges));

5191 }	5152 }

5192	5153

5193	5154

5194 // Utility function for CharacterRange::Merge. Adds a range at the end of

5195 // a canonicalized range list, if necessary merging the range with the last

5196 // range of the list.

5197 static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) {

5198 if (set == NULL) return;

5199 ASSERT(set->length() == 0 \|\| set->at(set->length() - 1).to() < range.from());

5200 int n = set->length();

5201 if (n > 0) {

5202 CharacterRange lastRange = set->at(n - 1);

5203 if (lastRange.to() == range.from() - 1) {

5204 set->at(n - 1) = CharacterRange(lastRange.from(), range.to());

5205 return;

5206 }

5207 }

5208 set->Add(range);

5209 }

5210

5211

5212 static void AddRangeToSelectedSet(int selector,

5213 ZoneList<CharacterRange>* first_set,

5214 ZoneList<CharacterRange>* second_set,

5215 ZoneList<CharacterRange>* intersection_set,

5216 CharacterRange range) {

5217 switch (selector) {

5218 case kInsideFirst:

5219 AddRangeToSet(first_set, range);

5220 break;

5221 case kInsideSecond:

5222 AddRangeToSet(second_set, range);

5223 break;

5224 case kInsideBoth:

5225 AddRangeToSet(intersection_set, range);

5226 break;

5227 }

5228 }

5229

5230

5231

5232 void CharacterRange::Merge(ZoneList<CharacterRange>* first_set,

5233 ZoneList<CharacterRange>* second_set,

5234 ZoneList<CharacterRange>* first_set_only_out,

5235 ZoneList<CharacterRange>* second_set_only_out,

5236 ZoneList<CharacterRange>* both_sets_out) {

5237 // Inputs are canonicalized.

5238 ASSERT(CharacterRange::IsCanonical(first_set));

5239 ASSERT(CharacterRange::IsCanonical(second_set));

5240 // Outputs are empty, if applicable.

5241 ASSERT(first_set_only_out == NULL \|\| first_set_only_out->length() == 0);

5242 ASSERT(second_set_only_out == NULL \|\| second_set_only_out->length() == 0);

5243 ASSERT(both_sets_out == NULL \|\| both_sets_out->length() == 0);

5244

5245 // Merge sets by iterating through the lists in order of lowest "from" value,

5246 // and putting intervals into one of three sets.

5247

5248 if (first_set->length() == 0) {

5249 second_set_only_out->AddAll(*second_set);

5250 return;

5251 }

5252 if (second_set->length() == 0) {

5253 first_set_only_out->AddAll(*first_set);

5254 return;

5255 }

5256 // Indices into input lists.

5257 int i1 = 0;

5258 int i2 = 0;

5259 // Cache length of input lists.

5260 int n1 = first_set->length();

5261 int n2 = second_set->length();

5262 // Current range. May be invalid if state is kInsideNone.

5263 int from = 0;

5264 int to = -1;

5265 // Where current range comes from.

5266 int state = kInsideNone;

5267

5268 while (i1 < n1 \|\| i2 < n2) {

5269 CharacterRange next_range;

5270 int range_source;

5271 if (i2 == n2 \|\|

5272 (i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) {

5273 // Next smallest element is in first set.

5274 next_range = first_set->at(i1++);

5275 range_source = kInsideFirst;

5276 } else {

5277 // Next smallest element is in second set.

5278 next_range = second_set->at(i2++);

5279 range_source = kInsideSecond;

5280 }

5281 if (to < next_range.from()) {

5282 // Ranges disjoint: \|current\| \|next\|

5283 AddRangeToSelectedSet(state,

5284 first_set_only_out,

5285 second_set_only_out,

5286 both_sets_out,

5287 CharacterRange(from, to));

5288 from = next_range.from();

5289 to = next_range.to();

5290 state = range_source;

5291 } else {

5292 if (from < next_range.from()) {

5293 AddRangeToSelectedSet(state,

5294 first_set_only_out,

5295 second_set_only_out,

5296 both_sets_out,

5297 CharacterRange(from, next_range.from()-1));

5298 }

5299 if (to < next_range.to()) {

5300 // Ranges overlap: \|current\|

5301 // \|next\|

5302 AddRangeToSelectedSet(state \| range_source,

5303 first_set_only_out,

5304 second_set_only_out,

5305 both_sets_out,

5306 CharacterRange(next_range.from(), to));

5307 from = to + 1;

5308 to = next_range.to();

5309 state = range_source;

5310 } else {

5311 // Range included: \|current\| , possibly ending at same character.

5312 // \|next\|

5313 AddRangeToSelectedSet(

5314 state \| range_source,

5315 first_set_only_out,

5316 second_set_only_out,

5317 both_sets_out,

5318 CharacterRange(next_range.from(), next_range.to()));

5319 from = next_range.to() + 1;

5320 // If ranges end at same character, both ranges are consumed completely.

5321 if (next_range.to() == to) state = kInsideNone;

5322 }

5323 }

5324 }

5325 AddRangeToSelectedSet(state,

5326 first_set_only_out,

5327 second_set_only_out,

5328 both_sets_out,

5329 CharacterRange(from, to));

5330 }

5331

5332

5333 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,	5155 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,

5334 ZoneList<CharacterRange>* negated_ranges) {	5156 ZoneList<CharacterRange>* negated_ranges) {

5335 ASSERT(CharacterRange::IsCanonical(ranges));	5157 ASSERT(CharacterRange::IsCanonical(ranges));

5336 ASSERT_EQ(0, negated_ranges->length());	5158 ASSERT_EQ(0, negated_ranges->length());

5337 int range_count = ranges->length();	5159 int range_count = ranges->length();

5338 uc16 from = 0;	5160 uc16 from = 0;

5339 int i = 0;	5161 int i = 0;

5340 if (range_count > 0 && ranges->at(0).from() == 0) {	5162 if (range_count > 0 && ranges->at(0).from() == 0) {

5341 from = ranges->at(0).to();	5163 from = ranges->at(0).to();

5342 i = 1;	5164 i = 1;

(...skipping 292 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5635 }	5457 }

5636	5458

5637	5459

5638 void Analysis::VisitBackReference(BackReferenceNode* that) {	5460 void Analysis::VisitBackReference(BackReferenceNode* that) {

5639 EnsureAnalyzed(that->on_success());	5461 EnsureAnalyzed(that->on_success());

5640 }	5462 }

5641	5463

5642	5464

5643 void Analysis::VisitAssertion(AssertionNode* that) {	5465 void Analysis::VisitAssertion(AssertionNode* that) {

5644 EnsureAnalyzed(that->on_success());	5466 EnsureAnalyzed(that->on_success());

5645 AssertionNode::AssertionNodeType type = that->type();

5646 if (type == AssertionNode::AT_BOUNDARY \|\|

5647 type == AssertionNode::AT_NON_BOUNDARY) {

5648 // Check if the following character is known to be a word character

5649 // or known to not be a word character.

5650 ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet();

5651

5652 CharacterRange::Canonicalize(following_chars);

5653

5654 SetRelation word_relation =

5655 CharacterRange::WordCharacterRelation(following_chars);

5656 if (word_relation.Disjoint()) {

5657 // Includes the case where following_chars is empty (e.g., end-of-input).

5658 // Following character is definitely not a word character.

5659 type = (type == AssertionNode::AT_BOUNDARY) ?

5660 AssertionNode::AFTER_WORD_CHARACTER :

5661 AssertionNode::AFTER_NONWORD_CHARACTER;

5662 that->set_type(type);

5663 } else if (word_relation.ContainedIn()) {

5664 // Following character is definitely a word character.

5665 type = (type == AssertionNode::AT_BOUNDARY) ?

5666 AssertionNode::AFTER_NONWORD_CHARACTER :

5667 AssertionNode::AFTER_WORD_CHARACTER;

5668 that->set_type(type);

5669 }

5670 }

5671 }	5467 }

5672	5468

5673	5469

5674 ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() {	5470 void BackReferenceNode::FillInBMInfo(

5675 if (first_character_set_ == NULL) {	5471 int offset, BoyerMooreLookahead* bm, bool not_at_start) {

5676 if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) {	5472 // Working out the set of characters that a backreference can match is too

5677 // If we can't find an exact solution within the budget, we	5473 // hard, so we just say that any character can match.

5678 // set the value to the set of every character, i.e., all characters	5474 bm->SetRest(offset);

5679 // are possible.	5475 SaveBMInfo(bm, not_at_start, offset);

5680 ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1);

5681 all_set->Add(CharacterRange::Everything());

5682 first_character_set_ = all_set;

5683 }

5684 }

5685 return first_character_set_;

5686 }	5476 }

5687	5477

5688	5478

5689 int RegExpNode::ComputeFirstCharacterSet(int budget) {	5479 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==

5690 // Default behavior is to not be able to determine the first character.	5480 RegExpMacroAssembler::kTableSize);

5691 return kComputeFirstCharacterSetFail;

5692 }

5693

5694

5695 int LoopChoiceNode::ComputeFirstCharacterSet(int budget) {

5696 budget--;

5697 if (budget >= 0) {

5698 // Find loop min-iteration. It's the value of the guarded choice node

5699 // with a GEQ guard, if any.

5700 int min_repetition = 0;

5701

5702 for (int i = 0; i <= 1; i++) {

5703 GuardedAlternative alternative = alternatives()->at(i);

5704 ZoneList<Guard> guards = alternative.guards();

5705 if (guards != NULL && guards->length() > 0) {

5706 Guard* guard = guards->at(0);

5707 if (guard->op() == Guard::GEQ) {

5708 min_repetition = guard->value();

5709 break;

5710 }

5711 }

5712 }

5713

5714 budget = loop_node()->ComputeFirstCharacterSet(budget);

5715 if (budget >= 0) {

5716 ZoneList<CharacterRange>* character_set =

5717 loop_node()->first_character_set();

5718 if (body_can_be_zero_length() \|\| min_repetition == 0) {

5719 budget = continue_node()->ComputeFirstCharacterSet(budget);

5720 if (budget < 0) return budget;

5721 ZoneList<CharacterRange>* body_set =

5722 continue_node()->first_character_set();

5723 ZoneList<CharacterRange>* union_set =

5724 new ZoneList<CharacterRange>(Max(character_set->length(),

5725 body_set->length()));

5726 CharacterRange::Merge(character_set,

5727 body_set,

5728 union_set,

5729 union_set,

5730 union_set);

5731 character_set = union_set;

5732 }

5733 set_first_character_set(character_set);

5734 }

5735 }

5736 return budget;

5737 }

5738

5739

5740 int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) {

5741 budget--;

5742 if (budget >= 0) {

5743 GuardedAlternative successor = this->alternatives()->at(1);

5744 RegExpNode* successor_node = successor.node();

5745 budget = successor_node->ComputeFirstCharacterSet(budget);

5746 if (budget >= 0) {

5747 set_first_character_set(successor_node->first_character_set());

5748 }

5749 }

5750 return budget;

5751 }

5752

5753

5754 // The first character set of an EndNode is unknowable. Just use the

5755 // default implementation that fails and returns all characters as possible.

5756

5757

5758 int AssertionNode::ComputeFirstCharacterSet(int budget) {

5759 budget -= 1;

5760 if (budget >= 0) {

5761 switch (type_) {

5762 case AT_END: {

5763 set_first_character_set(new ZoneList<CharacterRange>(0));

5764 break;

5765 }

5766 case AT_START:

5767 case AT_BOUNDARY:

5768 case AT_NON_BOUNDARY:

5769 case AFTER_NEWLINE:

5770 case AFTER_NONWORD_CHARACTER:

5771 case AFTER_WORD_CHARACTER: {

5772 ASSERT_NOT_NULL(on_success());

5773 budget = on_success()->ComputeFirstCharacterSet(budget);

5774 if (budget >= 0) {

5775 set_first_character_set(on_success()->first_character_set());

5776 }

5777 break;

5778 }

5779 }

5780 }

5781 return budget;

5782 }

5783

5784

5785 int ActionNode::ComputeFirstCharacterSet(int budget) {

5786 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail;

5787 budget--;

5788 if (budget >= 0) {

5789 ASSERT_NOT_NULL(on_success());

5790 budget = on_success()->ComputeFirstCharacterSet(budget);

5791 if (budget >= 0) {

5792 set_first_character_set(on_success()->first_character_set());

5793 }

5794 }

5795 return budget;

5796 }

5797

5798

5799 int BackReferenceNode::ComputeFirstCharacterSet(int budget) {

5800 // We don't know anything about the first character of a backreference

5801 // at this point.

5802 // The potential first characters are the first characters of the capture,

5803 // and the first characters of the on_success node, depending on whether the

5804 // capture can be empty and whether it is known to be participating or known

5805 // not to be.

5806 return kComputeFirstCharacterSetFail;

5807 }

5808	5481

5809	5482

5810 void ChoiceNode::FillInBMInfo(	5483 void ChoiceNode::FillInBMInfo(

5811 int offset, BoyerMooreLookahead* bm, bool not_at_start) {	5484 int offset, BoyerMooreLookahead* bm, bool not_at_start) {

5812 ZoneList<GuardedAlternative>* alts = alternatives();	5485 ZoneList<GuardedAlternative>* alts = alternatives();

5813 for (int i = 0; i < alts->length(); i++) {	5486 for (int i = 0; i < alts->length(); i++) {

5814 GuardedAlternative& alt = alts->at(i);	5487 GuardedAlternative& alt = alts->at(i);

5815 if (alt.guards() != NULL && alt.guards()->length() != 0) {	5488 if (alt.guards() != NULL && alt.guards()->length() != 0) {

5816 bm->SetRest(offset); // Give up trying to fill in info.	5489 bm->SetRest(offset); // Give up trying to fill in info.

	5490 SaveBMInfo(bm, not_at_start, offset);

5817 return;	5491 return;

5818 }	5492 }

5819 alt.node()->FillInBMInfo(offset, bm, not_at_start);	5493 alt.node()->FillInBMInfo(offset, bm, not_at_start);

5820 }	5494 }

	5495 SaveBMInfo(bm, not_at_start, offset);

5821 }	5496 }

5822	5497

5823	5498

5824 void TextNode::FillInBMInfo(	5499 void TextNode::FillInBMInfo(

5825 int offset, BoyerMooreLookahead* bm, bool not_at_start) {	5500 int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) {

5826 if (offset >= bm->length()) return;	5501 if (initial_offset >= bm->length()) return;

	5502 int offset = initial_offset;

5827 int max_char = bm->max_char();	5503 int max_char = bm->max_char();

5828 for (int i = 0; i < elements()->length(); i++) {	5504 for (int i = 0; i < elements()->length(); i++) {

5829 if (offset >= bm->length()) return;	5505 if (offset >= bm->length()) {

	5506 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	5507 return;

	5508 }

5830 TextElement text = elements()->at(i);	5509 TextElement text = elements()->at(i);

5831 if (text.type == TextElement::ATOM) {	5510 if (text.type == TextElement::ATOM) {

5832 RegExpAtom* atom = text.data.u_atom;	5511 RegExpAtom* atom = text.data.u_atom;

5833 for (int j = 0; j < atom->length(); j++, offset++) {	5512 for (int j = 0; j < atom->length(); j++, offset++) {

5834 if (offset >= bm->length()) return;	5513 if (offset >= bm->length()) {

	5514 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	5515 return;

	5516 }

5835 uc16 character = atom->data()[j];	5517 uc16 character = atom->data()[j];

5836 if (bm->compiler()->ignore_case()) {	5518 if (bm->compiler()->ignore_case()) {

5837 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5519 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5838 int length = GetCaseIndependentLetters(	5520 int length = GetCaseIndependentLetters(

5839 ISOLATE,	5521 ISOLATE,

5840 character,	5522 character,

5841 bm->max_char() == String::kMaxAsciiCharCode,	5523 bm->max_char() == String::kMaxAsciiCharCode,

5842 chars);	5524 chars);

5843 for (int j = 0; j < length; j++) {	5525 for (int j = 0; j < length; j++) {

5844 bm->Set(offset, chars[j]);	5526 bm->Set(offset, chars[j]);

5845 }	5527 }

5846 } else {	5528 } else {

5847 if (character <= max_char) bm->Set(offset, character);	5529 if (character <= max_char) bm->Set(offset, character);

5848 }	5530 }

5849 }	5531 }

5850 } else {	5532 } else {

5851 ASSERT(text.type == TextElement::CHAR_CLASS);	5533 ASSERT(text.type == TextElement::CHAR_CLASS);

5852 RegExpCharacterClass* char_class = text.data.u_char_class;	5534 RegExpCharacterClass* char_class = text.data.u_char_class;

5853 ZoneList<CharacterRange>* ranges = char_class->ranges();	5535 ZoneList<CharacterRange>* ranges = char_class->ranges();

5854 if (char_class->is_negated()) {	5536 if (char_class->is_negated()) {

5855 bm->SetAll(offset);	5537 bm->SetAll(offset);

5856 } else {	5538 } else {

5857 for (int k = 0; k < ranges->length(); k++) {	5539 for (int k = 0; k < ranges->length(); k++) {

5858 CharacterRange& range = ranges->at(k);	5540 CharacterRange& range = ranges->at(k);

5859 if (range.from() > max_char) continue;	5541 if (range.from() > max_char) continue;

5860 int to = Min(max_char, static_cast<int>(range.to()));	5542 int to = Min(max_char, static_cast<int>(range.to()));

5861 if (to - range.from() >= BoyerMooreLookahead::kTooManyCharacters) {	5543 bm->SetInterval(offset, Interval(range.from(), to));

5862 bm->SetAll(offset);

5863 break;

5864 }

5865 for (int m = range.from(); m <= to; m++) {

5866 bm->Set(offset, m);

5867 }

5868 }	5544 }

5869 }	5545 }

5870 offset++;	5546 offset++;

5871 }	5547 }

5872 }	5548 }

5873 if (offset >= bm->length()) return;	5549 if (offset >= bm->length()) {

	5550 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	5551 return;

	5552 }

5874 on_success()->FillInBMInfo(offset,	5553 on_success()->FillInBMInfo(offset,

5875 bm,	5554 bm,

5876 true); // Not at start after a text node.	5555 true); // Not at start after a text node.

	5556 if (initial_offset == 0) set_bm_info(not_at_start, bm);

5877 }	5557 }

5878	5558

5879	5559

5880 int TextNode::ComputeFirstCharacterSet(int budget) {

5881 budget--;

5882 if (budget >= 0) {

5883 ASSERT_NE(0, elements()->length());

5884 TextElement text = elements()->at(0);

5885 if (text.type == TextElement::ATOM) {

5886 RegExpAtom* atom = text.data.u_atom;

5887 ASSERT_NE(0, atom->length());

5888 uc16 first_char = atom->data()[0];

5889 ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1);

5890 range->Add(CharacterRange(first_char, first_char));

5891 set_first_character_set(range);

5892 } else {

5893 ASSERT(text.type == TextElement::CHAR_CLASS);

5894 RegExpCharacterClass* char_class = text.data.u_char_class;

5895 ZoneList<CharacterRange>* ranges = char_class->ranges();

5896 // TODO(lrn): Canonicalize ranges when they are created

5897 // instead of waiting until now.

5898 CharacterRange::Canonicalize(ranges);

5899 if (char_class->is_negated()) {

5900 int length = ranges->length();

5901 int new_length = length + 1;

5902 if (length > 0) {

5903 if (ranges->at(0).from() == 0) new_length--;

5904 if (ranges->at(length - 1).to() == String::kMaxUtf16CodeUnit) {

5905 new_length--;

5906 }

5907 }

5908 ZoneList<CharacterRange>* negated_ranges =

5909 new ZoneList<CharacterRange>(new_length);

5910 CharacterRange::Negate(ranges, negated_ranges);

5911 set_first_character_set(negated_ranges);

5912 } else {

5913 set_first_character_set(ranges);

5914 }

5915 }

5916 }

5917 return budget;

5918 }

5919

5920

5921

5922 // -------------------------------------------------------------------	5560 // -------------------------------------------------------------------

5923 // Dispatch table construction	5561 // Dispatch table construction

5924	5562

5925	5563

5926 void DispatchTableConstructor::VisitEnd(EndNode* that) {	5564 void DispatchTableConstructor::VisitEnd(EndNode* that) {

5927 AddRange(CharacterRange::Everything());	5565 AddRange(CharacterRange::Everything());

5928 }	5566 }

5929	5567

5930	5568

5931 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {	5569 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {

(...skipping 201 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6133 }	5771 }

6134	5772

6135 return compiler.Assemble(&macro_assembler,	5773 return compiler.Assemble(&macro_assembler,

6136 node,	5774 node,

6137 data->capture_count,	5775 data->capture_count,

6138 pattern);	5776 pattern);

6139 }	5777 }

6140	5778

6141	5779

6142 }} // namespace v8::internal	5780 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | src/macros.py » ('j') | no next file with comments »