OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 2408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2419 char_mask = String::kMaxUtf16CodeUnit; | 2419 char_mask = String::kMaxUtf16CodeUnit; |
2420 } | 2420 } |
2421 for (int k = 0; k < elms_->length(); k++) { | 2421 for (int k = 0; k < elms_->length(); k++) { |
2422 TextElement elm = elms_->at(k); | 2422 TextElement elm = elms_->at(k); |
2423 if (elm.type == TextElement::ATOM) { | 2423 if (elm.type == TextElement::ATOM) { |
2424 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2424 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2425 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2425 for (int i = 0; i < characters && i < quarks.length(); i++) { |
2426 QuickCheckDetails::Position* pos = | 2426 QuickCheckDetails::Position* pos = |
2427 details->positions(characters_filled_in); | 2427 details->positions(characters_filled_in); |
2428 uc16 c = quarks[i]; | 2428 uc16 c = quarks[i]; |
2429 if (c > char_mask) { | 2429 // We should already have filtered out nodes that have non-ASCII |
2430 // If we expect a non-ASCII character from an ASCII string, | 2430 // characters if we are matching against an ASCII string. |
2431 // there is no way we can match. Not even case independent | 2431 ASSERT(c <= char_mask); |
2432 // matching can turn an ASCII character into non-ASCII or | |
2433 // vice versa. | |
2434 details->set_cannot_match(); | |
2435 pos->determines_perfectly = false; | |
2436 return; | |
2437 } | |
2438 if (compiler->ignore_case()) { | 2432 if (compiler->ignore_case()) { |
2439 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2433 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
2440 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), | 2434 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), |
2441 chars); | 2435 chars); |
2442 ASSERT(length != 0); // Can only happen if c > char_mask (see above). | 2436 ASSERT(length != 0); // Can only happen if c > char_mask (see above). |
2443 if (length == 1) { | 2437 if (length == 1) { |
2444 // This letter has no case equivalents, so it's nice and simple | 2438 // This letter has no case equivalents, so it's nice and simple |
2445 // and the mask-compare will determine definitely whether we have | 2439 // and the mask-compare will determine definitely whether we have |
2446 // a match at this character position. | 2440 // a match at this character position. |
2447 pos->mask = char_mask; | 2441 pos->mask = char_mask; |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2489 // A quick check uses multi-character mask and compare. There is no | 2483 // A quick check uses multi-character mask and compare. There is no |
2490 // useful way to incorporate a negative char class into this scheme | 2484 // useful way to incorporate a negative char class into this scheme |
2491 // so we just conservatively create a mask and value that will always | 2485 // so we just conservatively create a mask and value that will always |
2492 // succeed. | 2486 // succeed. |
2493 pos->mask = 0; | 2487 pos->mask = 0; |
2494 pos->value = 0; | 2488 pos->value = 0; |
2495 } else { | 2489 } else { |
2496 int first_range = 0; | 2490 int first_range = 0; |
2497 while (ranges->at(first_range).from() > char_mask) { | 2491 while (ranges->at(first_range).from() > char_mask) { |
2498 first_range++; | 2492 first_range++; |
2499 if (first_range == ranges->length()) { | 2493 // We should already have filtered out nodes that cannot match |
2500 details->set_cannot_match(); | 2494 // so the first range should be a valid range. |
2501 pos->determines_perfectly = false; | 2495 ASSERT(first_range != ranges->length()); |
2502 return; | |
2503 } | |
2504 } | 2496 } |
2505 CharacterRange range = ranges->at(first_range); | 2497 CharacterRange range = ranges->at(first_range); |
2506 uc16 from = range.from(); | 2498 uc16 from = range.from(); |
2507 uc16 to = range.to(); | 2499 uc16 to = range.to(); |
2508 if (to > char_mask) { | 2500 if (to > char_mask) { |
2509 to = char_mask; | 2501 to = char_mask; |
2510 } | 2502 } |
2511 uint32_t differing_bits = (from ^ to); | 2503 uint32_t differing_bits = (from ^ to); |
2512 // A mask and compare is only perfect if the differing bits form a | 2504 // A mask and compare is only perfect if the differing bits form a |
2513 // number like 00011111 with one single block of trailing 1s. | 2505 // number like 00011111 with one single block of trailing 1s. |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2622 info->visited = true; | 2614 info->visited = true; |
2623 } | 2615 } |
2624 ~VisitMarker() { | 2616 ~VisitMarker() { |
2625 info_->visited = false; | 2617 info_->visited = false; |
2626 } | 2618 } |
2627 private: | 2619 private: |
2628 NodeInfo* info_; | 2620 NodeInfo* info_; |
2629 }; | 2621 }; |
2630 | 2622 |
2631 | 2623 |
| 2624 RegExpNode* SeqRegExpNode::FilterASCII(int depth) { |
| 2625 if (info()->replacement_calculated) return replacement(); |
| 2626 if (depth < 0) return this; |
| 2627 ASSERT(!info()->visited); |
| 2628 VisitMarker marker(info()); |
| 2629 return FilterSuccessor(depth - 1); |
| 2630 } |
| 2631 |
| 2632 |
| 2633 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) { |
| 2634 RegExpNode* next = on_success_->FilterASCII(depth - 1); |
| 2635 if (next == NULL) return set_replacement(NULL); |
| 2636 on_success_ = next; |
| 2637 return set_replacement(this); |
| 2638 } |
| 2639 |
| 2640 |
| 2641 RegExpNode* TextNode::FilterASCII(int depth) { |
| 2642 if (info()->replacement_calculated) return replacement(); |
| 2643 if (depth < 0) return this; |
| 2644 ASSERT(!info()->visited); |
| 2645 VisitMarker marker(info()); |
| 2646 int element_count = elms_->length(); |
| 2647 for (int i = 0; i < element_count; i++) { |
| 2648 TextElement elm = elms_->at(i); |
| 2649 if (elm.type == TextElement::ATOM) { |
| 2650 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2651 for (int j = 0; j < quarks.length(); j++) { |
| 2652 // We don't need special handling for case independence |
| 2653 // because of the rule that case independence cannot make |
| 2654 // a non-ASCII character match an ASCII character. |
| 2655 if (quarks[j] > String::kMaxAsciiCharCode) { |
| 2656 return set_replacement(NULL); |
| 2657 } |
| 2658 } |
| 2659 } else { |
| 2660 ASSERT(elm.type == TextElement::CHAR_CLASS); |
| 2661 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 2662 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 2663 if (!CharacterRange::IsCanonical(ranges)) { |
| 2664 CharacterRange::Canonicalize(ranges); |
| 2665 } |
| 2666 // Now they are in order so we only need to look at the first. |
| 2667 int range_count = ranges->length(); |
| 2668 if (cc->is_negated()) { |
| 2669 if (range_count != 0 && |
| 2670 ranges->at(0).from() == 0 && |
| 2671 ranges->at(0).to() >= String::kMaxAsciiCharCode) { |
| 2672 return set_replacement(NULL); |
| 2673 } |
| 2674 } else { |
| 2675 if (range_count == 0 || |
| 2676 ranges->at(0).from() > String::kMaxAsciiCharCode) { |
| 2677 return set_replacement(NULL); |
| 2678 } |
| 2679 } |
| 2680 } |
| 2681 } |
| 2682 return FilterSuccessor(depth - 1); |
| 2683 } |
| 2684 |
| 2685 |
| 2686 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { |
| 2687 if (info()->replacement_calculated) return replacement(); |
| 2688 if (depth < 0) return this; |
| 2689 if (info()->visited) return this; |
| 2690 VisitMarker marker(info()); |
| 2691 |
| 2692 RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); |
| 2693 // If we can't continue after the loop then there is no sense in doing the |
| 2694 // loop. |
| 2695 if (continue_replacement == NULL) return set_replacement(NULL); |
| 2696 |
| 2697 return ChoiceNode::FilterASCII(depth - 1); |
| 2698 } |
| 2699 |
| 2700 |
| 2701 RegExpNode* ChoiceNode::FilterASCII(int depth) { |
| 2702 if (info()->replacement_calculated) return replacement(); |
| 2703 if (depth < 0) return this; |
| 2704 if (info()->visited) return this; |
| 2705 VisitMarker marker(info()); |
| 2706 int choice_count = alternatives_->length(); |
| 2707 int surviving = 0; |
| 2708 RegExpNode* survivor = NULL; |
| 2709 for (int i = 0; i < choice_count; i++) { |
| 2710 GuardedAlternative alternative = alternatives_->at(i); |
| 2711 RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1); |
| 2712 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. |
| 2713 alternatives_->at(i).set_node(replacement); |
| 2714 if (replacement != NULL) { |
| 2715 surviving++; |
| 2716 survivor = replacement; |
| 2717 } |
| 2718 } |
| 2719 if (surviving < 2) return set_replacement(survivor); |
| 2720 |
| 2721 set_replacement(this); |
| 2722 if (surviving == choice_count) { |
| 2723 return this; |
| 2724 } |
| 2725 // Only some of the nodes survived the filtering. We need to rebuild the |
| 2726 // alternatives list. |
| 2727 ZoneList<GuardedAlternative>* new_alternatives = |
| 2728 new ZoneList<GuardedAlternative>(surviving); |
| 2729 for (int i = 0; i < choice_count; i++) { |
| 2730 GuardedAlternative alternative = alternatives_->at(i); |
| 2731 if (alternative.node() != NULL) { |
| 2732 new_alternatives->Add(alternative); |
| 2733 } |
| 2734 } |
| 2735 alternatives_ = new_alternatives; |
| 2736 return this; |
| 2737 } |
| 2738 |
| 2739 |
| 2740 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth) { |
| 2741 if (info()->replacement_calculated) return replacement(); |
| 2742 if (depth < 0) return this; |
| 2743 if (info()->visited) return this; |
| 2744 VisitMarker marker(info()); |
| 2745 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 2746 // afterwards. |
| 2747 RegExpNode* node = alternatives_->at(1).node(); |
| 2748 RegExpNode* replacement = node->FilterASCII(depth - 1); |
| 2749 if (replacement == NULL) return set_replacement(NULL); |
| 2750 alternatives_->at(1).set_node(replacement); |
| 2751 |
| 2752 RegExpNode* neg_node = alternatives_->at(0).node(); |
| 2753 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1); |
| 2754 // If the negative lookahead is always going to fail then |
| 2755 // we don't need to check it. |
| 2756 if (neg_replacement == NULL) return set_replacement(replacement); |
| 2757 alternatives_->at(0).set_node(neg_replacement); |
| 2758 return set_replacement(this); |
| 2759 } |
| 2760 |
| 2761 |
2632 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2762 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2633 RegExpCompiler* compiler, | 2763 RegExpCompiler* compiler, |
2634 int characters_filled_in, | 2764 int characters_filled_in, |
2635 bool not_at_start) { | 2765 bool not_at_start) { |
2636 if (body_can_be_zero_length_ || info()->visited) return; | 2766 if (body_can_be_zero_length_ || info()->visited) return; |
2637 VisitMarker marker(info()); | 2767 VisitMarker marker(info()); |
2638 return ChoiceNode::GetQuickCheckDetails(details, | 2768 return ChoiceNode::GetQuickCheckDetails(details, |
2639 compiler, | 2769 compiler, |
2640 characters_filled_in, | 2770 characters_filled_in, |
2641 not_at_start); | 2771 not_at_start); |
(...skipping 3041 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5683 // at the start of input. | 5813 // at the start of input. |
5684 ChoiceNode* first_step_node = new ChoiceNode(2); | 5814 ChoiceNode* first_step_node = new ChoiceNode(2); |
5685 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 5815 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5686 first_step_node->AddAlternative(GuardedAlternative( | 5816 first_step_node->AddAlternative(GuardedAlternative( |
5687 new TextNode(new RegExpCharacterClass('*'), loop_node))); | 5817 new TextNode(new RegExpCharacterClass('*'), loop_node))); |
5688 node = first_step_node; | 5818 node = first_step_node; |
5689 } else { | 5819 } else { |
5690 node = loop_node; | 5820 node = loop_node; |
5691 } | 5821 } |
5692 } | 5822 } |
| 5823 if (is_ascii) node = node->FilterASCII(RegExpCompiler::kMaxRecursion); |
| 5824 |
| 5825 if (node == NULL) node = new EndNode(EndNode::BACKTRACK); |
5693 data->node = node; | 5826 data->node = node; |
5694 Analysis analysis(ignore_case, is_ascii); | 5827 Analysis analysis(ignore_case, is_ascii); |
5695 analysis.EnsureAnalyzed(node); | 5828 analysis.EnsureAnalyzed(node); |
5696 if (analysis.has_failed()) { | 5829 if (analysis.has_failed()) { |
5697 const char* error_message = analysis.error_message(); | 5830 const char* error_message = analysis.error_message(); |
5698 return CompilationResult(error_message); | 5831 return CompilationResult(error_message); |
5699 } | 5832 } |
5700 | 5833 |
5701 // Create the correct assembler for the architecture. | 5834 // Create the correct assembler for the architecture. |
5702 #ifndef V8_INTERPRETED_REGEXP | 5835 #ifndef V8_INTERPRETED_REGEXP |
(...skipping 29 matching lines...) Expand all Loading... |
5732 } | 5865 } |
5733 | 5866 |
5734 return compiler.Assemble(¯o_assembler, | 5867 return compiler.Assemble(¯o_assembler, |
5735 node, | 5868 node, |
5736 data->capture_count, | 5869 data->capture_count, |
5737 pattern); | 5870 pattern); |
5738 } | 5871 } |
5739 | 5872 |
5740 | 5873 |
5741 }} // namespace v8::internal | 5874 }} // namespace v8::internal |
OLD | NEW |