OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
373 elements->set(0, re->Pattern()); | 373 elements->set(0, re->Pattern()); |
374 elements->set(1, *error_message); | 374 elements->set(1, *error_message); |
375 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); | 375 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
376 Handle<Object> regexp_err = | 376 Handle<Object> regexp_err = |
377 factory->NewSyntaxError("malformed_regexp", array); | 377 factory->NewSyntaxError("malformed_regexp", array); |
378 isolate->Throw(*regexp_err); | 378 isolate->Throw(*regexp_err); |
379 return false; | 379 return false; |
380 } | 380 } |
381 | 381 |
382 | 382 |
383 int kUninitializedRegExpNodePlaceHolder; | |
384 | |
385 | |
383 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, | 386 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, |
384 Handle<String> sample_subject, | 387 Handle<String> sample_subject, |
385 bool is_ascii) { | 388 bool is_ascii) { |
386 // Compile the RegExp. | 389 // Compile the RegExp. |
387 Isolate* isolate = re->GetIsolate(); | 390 Isolate* isolate = re->GetIsolate(); |
388 ZoneScope zone_scope(isolate, DELETE_ON_EXIT); | 391 ZoneScope zone_scope(isolate, DELETE_ON_EXIT); |
389 PostponeInterruptsScope postpone(isolate); | 392 PostponeInterruptsScope postpone(isolate); |
390 // If we had a compilation error the last time this is saved at the | 393 // If we had a compilation error the last time this is saved at the |
391 // saved code index. | 394 // saved code index. |
392 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); | 395 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); |
(...skipping 2026 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2419 char_mask = String::kMaxUtf16CodeUnit; | 2422 char_mask = String::kMaxUtf16CodeUnit; |
2420 } | 2423 } |
2421 for (int k = 0; k < elms_->length(); k++) { | 2424 for (int k = 0; k < elms_->length(); k++) { |
2422 TextElement elm = elms_->at(k); | 2425 TextElement elm = elms_->at(k); |
2423 if (elm.type == TextElement::ATOM) { | 2426 if (elm.type == TextElement::ATOM) { |
2424 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2427 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2425 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2428 for (int i = 0; i < characters && i < quarks.length(); i++) { |
2426 QuickCheckDetails::Position* pos = | 2429 QuickCheckDetails::Position* pos = |
2427 details->positions(characters_filled_in); | 2430 details->positions(characters_filled_in); |
2428 uc16 c = quarks[i]; | 2431 uc16 c = quarks[i]; |
2429 if (c > char_mask) { | 2432 // We should already have filtered out nodes that have non-ASCII |
2430 // If we expect a non-ASCII character from an ASCII string, | 2433 // characters if we are matching against an ASCII string. |
2431 // there is no way we can match. Not even case independent | 2434 ASSERT(c <= char_mask); |
2432 // matching can turn an ASCII character into non-ASCII or | |
2433 // vice versa. | |
2434 details->set_cannot_match(); | |
2435 pos->determines_perfectly = false; | |
2436 return; | |
2437 } | |
2438 if (compiler->ignore_case()) { | 2435 if (compiler->ignore_case()) { |
2439 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2436 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
2440 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), | 2437 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), |
2441 chars); | 2438 chars); |
2442 ASSERT(length != 0); // Can only happen if c > char_mask (see above). | 2439 ASSERT(length != 0); // Can only happen if c > char_mask (see above). |
2443 if (length == 1) { | 2440 if (length == 1) { |
2444 // This letter has no case equivalents, so it's nice and simple | 2441 // This letter has no case equivalents, so it's nice and simple |
2445 // and the mask-compare will determine definitely whether we have | 2442 // and the mask-compare will determine definitely whether we have |
2446 // a match at this character position. | 2443 // a match at this character position. |
2447 pos->mask = char_mask; | 2444 pos->mask = char_mask; |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2489 // A quick check uses multi-character mask and compare. There is no | 2486 // A quick check uses multi-character mask and compare. There is no |
2490 // useful way to incorporate a negative char class into this scheme | 2487 // useful way to incorporate a negative char class into this scheme |
2491 // so we just conservatively create a mask and value that will always | 2488 // so we just conservatively create a mask and value that will always |
2492 // succeed. | 2489 // succeed. |
2493 pos->mask = 0; | 2490 pos->mask = 0; |
2494 pos->value = 0; | 2491 pos->value = 0; |
2495 } else { | 2492 } else { |
2496 int first_range = 0; | 2493 int first_range = 0; |
2497 while (ranges->at(first_range).from() > char_mask) { | 2494 while (ranges->at(first_range).from() > char_mask) { |
2498 first_range++; | 2495 first_range++; |
2499 if (first_range == ranges->length()) { | 2496 // We should already have filtered out nodes that cannot match |
2500 details->set_cannot_match(); | 2497 // so the first range should be a valid range. |
2501 pos->determines_perfectly = false; | 2498 ASSERT(first_range != ranges->length()); |
2502 return; | |
2503 } | |
2504 } | 2499 } |
2505 CharacterRange range = ranges->at(first_range); | 2500 CharacterRange range = ranges->at(first_range); |
2506 uc16 from = range.from(); | 2501 uc16 from = range.from(); |
2507 uc16 to = range.to(); | 2502 uc16 to = range.to(); |
2508 if (to > char_mask) { | 2503 if (to > char_mask) { |
2509 to = char_mask; | 2504 to = char_mask; |
2510 } | 2505 } |
2511 uint32_t differing_bits = (from ^ to); | 2506 uint32_t differing_bits = (from ^ to); |
2512 // A mask and compare is only perfect if the differing bits form a | 2507 // A mask and compare is only perfect if the differing bits form a |
2513 // number like 00011111 with one single block of trailing 1s. | 2508 // number like 00011111 with one single block of trailing 1s. |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2622 info->visited = true; | 2617 info->visited = true; |
2623 } | 2618 } |
2624 ~VisitMarker() { | 2619 ~VisitMarker() { |
2625 info_->visited = false; | 2620 info_->visited = false; |
2626 } | 2621 } |
2627 private: | 2622 private: |
2628 NodeInfo* info_; | 2623 NodeInfo* info_; |
2629 }; | 2624 }; |
2630 | 2625 |
2631 | 2626 |
2627 RegExpNode* SeqRegExpNode::FilterASCII(int depth) { | |
2628 if (replacement_ != Uninitialized()) return replacement_; | |
2629 if (depth < 0) return this; | |
2630 ASSERT(!info()->visited); | |
2631 VisitMarker marker(info()); | |
2632 return FilterSuccessor(depth - 1); | |
2633 } | |
2634 | |
2635 | |
2636 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) { | |
2637 RegExpNode* next = on_success_->FilterASCII(depth - 1); | |
2638 if (next == NULL) { | |
2639 replacement_ = NULL; | |
2640 return NULL; | |
2641 } | |
2642 on_success_ = next; | |
2643 replacement_ = this; | |
2644 return this; | |
2645 } | |
2646 | |
2647 | |
2648 RegExpNode* TextNode::FilterASCII(int depth) { | |
2649 if (replacement_ != Uninitialized()) return replacement_; | |
2650 if (depth < 0) return this; | |
2651 ASSERT(!info()->visited); | |
2652 VisitMarker marker(info()); | |
2653 int element_count = elms_->length(); | |
2654 for (int i = 0; i < element_count; i++) { | |
2655 TextElement elm = elms_->at(i); | |
2656 if (elm.type == TextElement::ATOM) { | |
2657 Vector<const uc16> quarks = elm.data.u_atom->data(); | |
2658 for (int j = 0; j < quarks.length(); j++) { | |
2659 // We don't need special handling for case independence | |
2660 // because of the rule that case independence cannot make | |
2661 // a non-ASCII character match an ASCII character. | |
2662 if (quarks[j] > String::kMaxAsciiCharCode) { | |
2663 replacement_ = NULL; | |
2664 return NULL; | |
2665 } | |
2666 } | |
2667 } else { | |
2668 ASSERT(elm.type == TextElement::CHAR_CLASS); | |
2669 RegExpCharacterClass* cc = elm.data.u_char_class; | |
2670 ZoneList<CharacterRange>* ranges = cc->ranges(); | |
2671 if (!CharacterRange::IsCanonical(ranges)) { | |
2672 CharacterRange::Canonicalize(ranges); | |
2673 } | |
2674 // Now they are in order so we only need to look at the first. | |
2675 int range_count = ranges->length(); | |
2676 if (cc->is_negated()) { | |
2677 if (range_count != 0 && | |
2678 ranges->at(0).from() == 0 && | |
2679 ranges->at(0).to() >= String::kMaxAsciiCharCode) { | |
2680 replacement_ = NULL; | |
2681 return NULL; | |
2682 } | |
2683 } else { | |
2684 if (range_count == 0 || | |
2685 ranges->at(0).from() > String::kMaxAsciiCharCode) { | |
2686 replacement_ = NULL; | |
2687 return NULL; | |
2688 } | |
2689 } | |
2690 } | |
2691 } | |
2692 return FilterSuccessor(depth - 1); | |
2693 } | |
2694 | |
2695 | |
2696 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { | |
2697 if (replacement_ != Uninitialized()) return replacement_; | |
2698 if (depth < 0) return this; | |
2699 if (info()->visited) return this; | |
2700 VisitMarker marker(info()); | |
2701 | |
2702 RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); | |
2703 if (continue_replacement == NULL) { | |
2704 // If we can't continue after the loop then there is no sense in doing the | |
2705 // loop. | |
2706 replacement_ = NULL; | |
2707 return NULL; | |
2708 } | |
2709 | |
2710 return ChoiceNode::FilterASCII(depth - 1); | |
2711 } | |
2712 | |
2713 | |
2714 RegExpNode* ChoiceNode::FilterASCII(int depth) { | |
2715 if (replacement_ != Uninitialized()) return replacement_; | |
2716 if (depth < 0) return this; | |
2717 if (info()->visited) return this; | |
2718 VisitMarker marker(info()); | |
2719 int choice_count = alternatives_->length(); | |
2720 int surviving = 0; | |
2721 RegExpNode* survivor = NULL; | |
2722 for (int i = 0; i < choice_count; i++) { | |
ulan
2012/04/25 12:55:28
Instead of duplicating the same loop twice, can we
Erik Corry
2012/04/26 08:23:11
I can't see a way to do in-place filtering, since
| |
2723 GuardedAlternative alternative = alternatives_->at(i); | |
2724 ZoneList<Guard*>* guards = alternative.guards(); | |
2725 int guard_count = (guards == NULL) ? 0 : guards->length(); | |
2726 if (guard_count == 0) { | |
2727 RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1); | |
2728 if (replacement != NULL && replacement != this) { | |
2729 surviving++; | |
2730 alternatives_->at(i).set_node(replacement); | |
2731 survivor = replacement; | |
2732 } | |
2733 } else { | |
2734 surviving++; | |
2735 } | |
2736 } | |
2737 if (surviving == 0) { | |
2738 replacement_ = NULL; | |
2739 return NULL; | |
2740 } | |
2741 if (surviving == 1 && survivor != NULL) { | |
2742 replacement_ = survivor; | |
2743 return survivor; | |
2744 } | |
2745 replacement_ = this; | |
2746 if (surviving == choice_count) { | |
2747 return this; | |
2748 } | |
2749 // Only some of the nodes survived the filtering. We need to rebuild the | |
2750 // alternatives list. | |
2751 ZoneList<GuardedAlternative>* new_alternatives = | |
2752 new ZoneList<GuardedAlternative>(surviving); | |
2753 for (int i = 0; i < choice_count; i++) { | |
2754 GuardedAlternative alternative = alternatives_->at(i); | |
2755 ZoneList<Guard*>* guards = alternative.guards(); | |
2756 int guard_count = (guards == NULL) ? 0 : guards->length(); | |
2757 if (guard_count == 0) { | |
2758 RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1); | |
2759 if (replacement != NULL && replacement != this) { | |
2760 GuardedAlternative new_alternative(replacement); | |
2761 new_alternatives->Add(new_alternative); | |
2762 } | |
2763 } else { | |
2764 new_alternatives->Add(alternative); | |
2765 } | |
2766 } | |
2767 alternatives_ = new_alternatives; | |
2768 return this; | |
2769 } | |
2770 | |
2771 | |
2772 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth) { | |
2773 if (replacement_ != Uninitialized()) return replacement_; | |
2774 if (depth < 0) return this; | |
2775 if (info()->visited) return this; | |
2776 VisitMarker marker(info()); | |
2777 // Alternative 0 is the negative lookahead, alternative 1 is what comes | |
2778 // afterwards. | |
2779 RegExpNode* node = alternatives_->at(1).node(); | |
2780 RegExpNode* replacement = node->FilterASCII(depth - 1); | |
2781 if (node == NULL) { | |
ulan
2012/04/25 12:55:28
Did you mean replacement == NULL?
Erik Corry
2012/04/26 08:23:11
Oops, yes.
| |
2782 replacement_ = NULL; | |
2783 return NULL; | |
2784 } | |
2785 alternatives_->at(1).set_node(replacement); | |
2786 | |
2787 RegExpNode* neg_node = alternatives_->at(0).node(); | |
2788 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1); | |
2789 // If the negative lookahead is always going to fail then | |
2790 // we don't need to check it. | |
2791 if (neg_replacement == NULL) { | |
2792 replacement_ = replacement; | |
2793 return replacement; | |
2794 } | |
2795 alternatives_->at(0).set_node(neg_replacement); | |
2796 replacement_ = this; | |
2797 return this; | |
2798 } | |
2799 | |
2800 | |
2632 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2801 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2633 RegExpCompiler* compiler, | 2802 RegExpCompiler* compiler, |
2634 int characters_filled_in, | 2803 int characters_filled_in, |
2635 bool not_at_start) { | 2804 bool not_at_start) { |
2636 if (body_can_be_zero_length_ || info()->visited) return; | 2805 if (body_can_be_zero_length_ || info()->visited) return; |
2637 VisitMarker marker(info()); | 2806 VisitMarker marker(info()); |
2638 return ChoiceNode::GetQuickCheckDetails(details, | 2807 return ChoiceNode::GetQuickCheckDetails(details, |
2639 compiler, | 2808 compiler, |
2640 characters_filled_in, | 2809 characters_filled_in, |
2641 not_at_start); | 2810 not_at_start); |
(...skipping 3041 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5683 // at the start of input. | 5852 // at the start of input. |
5684 ChoiceNode* first_step_node = new ChoiceNode(2); | 5853 ChoiceNode* first_step_node = new ChoiceNode(2); |
5685 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 5854 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5686 first_step_node->AddAlternative(GuardedAlternative( | 5855 first_step_node->AddAlternative(GuardedAlternative( |
5687 new TextNode(new RegExpCharacterClass('*'), loop_node))); | 5856 new TextNode(new RegExpCharacterClass('*'), loop_node))); |
5688 node = first_step_node; | 5857 node = first_step_node; |
5689 } else { | 5858 } else { |
5690 node = loop_node; | 5859 node = loop_node; |
5691 } | 5860 } |
5692 } | 5861 } |
5862 if (is_ascii) node = node->FilterASCII(RegExpCompiler::kMaxRecursion); | |
5863 | |
5864 if (node == NULL) node = new EndNode(EndNode::BACKTRACK); | |
5693 data->node = node; | 5865 data->node = node; |
5694 Analysis analysis(ignore_case, is_ascii); | 5866 Analysis analysis(ignore_case, is_ascii); |
5695 analysis.EnsureAnalyzed(node); | 5867 analysis.EnsureAnalyzed(node); |
5696 if (analysis.has_failed()) { | 5868 if (analysis.has_failed()) { |
5697 const char* error_message = analysis.error_message(); | 5869 const char* error_message = analysis.error_message(); |
5698 return CompilationResult(error_message); | 5870 return CompilationResult(error_message); |
5699 } | 5871 } |
5700 | 5872 |
5701 // Create the correct assembler for the architecture. | 5873 // Create the correct assembler for the architecture. |
5702 #ifndef V8_INTERPRETED_REGEXP | 5874 #ifndef V8_INTERPRETED_REGEXP |
(...skipping 29 matching lines...) Expand all Loading... | |
5732 } | 5904 } |
5733 | 5905 |
5734 return compiler.Assemble(¯o_assembler, | 5906 return compiler.Assemble(¯o_assembler, |
5735 node, | 5907 node, |
5736 data->capture_count, | 5908 data->capture_count, |
5737 pattern); | 5909 pattern); |
5738 } | 5910 } |
5739 | 5911 |
5740 | 5912 |
5741 }} // namespace v8::internal | 5913 }} // namespace v8::internal |
OLD | NEW |