OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 1663 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1674 bool ascii_subject, | 1674 bool ascii_subject, |
1675 unibrow::uchar* letters) { | 1675 unibrow::uchar* letters) { |
1676 int length = | 1676 int length = |
1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); | 1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
1678 // Unibrow returns 0 or 1 for characters where case independence is | 1678 // Unibrow returns 0 or 1 for characters where case independence is |
1679 // trivial. | 1679 // trivial. |
1680 if (length == 0) { | 1680 if (length == 0) { |
1681 letters[0] = character; | 1681 letters[0] = character; |
1682 length = 1; | 1682 length = 1; |
1683 } | 1683 } |
1684 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1684 if (!ascii_subject || character <= String::kMaxOneByteCharCode) { |
1685 return length; | 1685 return length; |
1686 } | 1686 } |
1687 // The standard requires that non-ASCII characters cannot have ASCII | 1687 // The standard requires that non-ASCII characters cannot have ASCII |
1688 // character codes in their equivalence class. | 1688 // character codes in their equivalence class. |
1689 return 0; | 1689 return 0; |
1690 } | 1690 } |
1691 | 1691 |
1692 | 1692 |
1693 static inline bool EmitSimpleCharacter(Isolate* isolate, | 1693 static inline bool EmitSimpleCharacter(Isolate* isolate, |
1694 RegExpCompiler* compiler, | 1694 RegExpCompiler* compiler, |
(...skipping 30 matching lines...) Expand all Loading... |
1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); | 1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
1727 if (length < 1) { | 1727 if (length < 1) { |
1728 // This can't match. Must be an ASCII subject and a non-ASCII character. | 1728 // This can't match. Must be an ASCII subject and a non-ASCII character. |
1729 // We do not need to do anything since the ASCII pass already handled this. | 1729 // We do not need to do anything since the ASCII pass already handled this. |
1730 return false; // Bounds not checked. | 1730 return false; // Bounds not checked. |
1731 } | 1731 } |
1732 bool checked = false; | 1732 bool checked = false; |
1733 // We handle the length > 1 case in a later pass. | 1733 // We handle the length > 1 case in a later pass. |
1734 if (length == 1) { | 1734 if (length == 1) { |
1735 if (ascii && c > String::kMaxAsciiCharCodeU) { | 1735 if (ascii && c > String::kMaxOneByteCharCodeU) { |
1736 // Can't match - see above. | 1736 // Can't match - see above. |
1737 return false; // Bounds not checked. | 1737 return false; // Bounds not checked. |
1738 } | 1738 } |
1739 if (!preloaded) { | 1739 if (!preloaded) { |
1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1741 checked = check; | 1741 checked = check; |
1742 } | 1742 } |
1743 macro_assembler->CheckNotCharacter(c, on_failure); | 1743 macro_assembler->CheckNotCharacter(c, on_failure); |
1744 } | 1744 } |
1745 return checked; | 1745 return checked; |
1746 } | 1746 } |
1747 | 1747 |
1748 | 1748 |
1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
1750 bool ascii, | 1750 bool ascii, |
1751 uc16 c1, | 1751 uc16 c1, |
1752 uc16 c2, | 1752 uc16 c2, |
1753 Label* on_failure) { | 1753 Label* on_failure) { |
1754 uc16 char_mask; | 1754 uc16 char_mask; |
1755 if (ascii) { | 1755 if (ascii) { |
1756 char_mask = String::kMaxAsciiCharCode; | 1756 char_mask = String::kMaxOneByteCharCode; |
1757 } else { | 1757 } else { |
1758 char_mask = String::kMaxUtf16CodeUnit; | 1758 char_mask = String::kMaxUtf16CodeUnit; |
1759 } | 1759 } |
1760 uc16 exor = c1 ^ c2; | 1760 uc16 exor = c1 ^ c2; |
1761 // Check whether exor has only one bit set. | 1761 // Check whether exor has only one bit set. |
1762 if (((exor - 1) & exor) == 0) { | 1762 if (((exor - 1) & exor) == 0) { |
1763 // If c1 and c2 differ only by one bit. | 1763 // If c1 and c2 differ only by one bit. |
1764 // Ecma262UnCanonicalize always gives the highest number last. | 1764 // Ecma262UnCanonicalize always gives the highest number last. |
1765 ASSERT(c2 > c1); | 1765 ASSERT(c2 > c1); |
1766 uc16 mask = char_mask ^ exor; | 1766 uc16 mask = char_mask ^ exor; |
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2000 // encoding space can be quickly tested with a table lookup, so we don't | 2000 // encoding space can be quickly tested with a table lookup, so we don't |
2001 // wish to do binary chop search at a smaller granularity than that. A | 2001 // wish to do binary chop search at a smaller granularity than that. A |
2002 // 128-character space can take up a lot of space in the ranges array if, | 2002 // 128-character space can take up a lot of space in the ranges array if, |
2003 // for example, we only want to match every second character (eg. the lower | 2003 // for example, we only want to match every second character (eg. the lower |
2004 // case characters on some Unicode pages). | 2004 // case characters on some Unicode pages). |
2005 int binary_chop_index = (end_index + start_index) / 2; | 2005 int binary_chop_index = (end_index + start_index) / 2; |
2006 // The first test ensures that we get to the code that handles the ASCII | 2006 // The first test ensures that we get to the code that handles the ASCII |
2007 // range with a single not-taken branch, speeding up this important | 2007 // range with a single not-taken branch, speeding up this important |
2008 // character range (even non-ASCII charset-based text has spaces and | 2008 // character range (even non-ASCII charset-based text has spaces and |
2009 // punctuation). | 2009 // punctuation). |
2010 if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case. | 2010 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case. |
2011 end_index - start_index > (*new_start_index - start_index) * 2 && | 2011 end_index - start_index > (*new_start_index - start_index) * 2 && |
2012 last - first > kSize * 2 && | 2012 last - first > kSize * 2 && |
2013 binary_chop_index > *new_start_index && | 2013 binary_chop_index > *new_start_index && |
2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { | 2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { |
2015 int scan_forward_for_section_border = binary_chop_index;; | 2015 int scan_forward_for_section_border = binary_chop_index;; |
2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; | 2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; |
2017 | 2017 |
2018 while (scan_forward_for_section_border < end_index) { | 2018 while (scan_forward_for_section_border < end_index) { |
2019 if (ranges->at(scan_forward_for_section_border) > new_border) { | 2019 if (ranges->at(scan_forward_for_section_border) > new_border) { |
2020 *new_start_index = scan_forward_for_section_border; | 2020 *new_start_index = scan_forward_for_section_border; |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2204 bool check_offset, | 2204 bool check_offset, |
2205 bool preloaded, | 2205 bool preloaded, |
2206 Zone* zone) { | 2206 Zone* zone) { |
2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); | 2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); |
2208 if (!CharacterRange::IsCanonical(ranges)) { | 2208 if (!CharacterRange::IsCanonical(ranges)) { |
2209 CharacterRange::Canonicalize(ranges); | 2209 CharacterRange::Canonicalize(ranges); |
2210 } | 2210 } |
2211 | 2211 |
2212 int max_char; | 2212 int max_char; |
2213 if (ascii) { | 2213 if (ascii) { |
2214 max_char = String::kMaxAsciiCharCode; | 2214 max_char = String::kMaxOneByteCharCode; |
2215 } else { | 2215 } else { |
2216 max_char = String::kMaxUtf16CodeUnit; | 2216 max_char = String::kMaxUtf16CodeUnit; |
2217 } | 2217 } |
2218 | 2218 |
2219 int range_count = ranges->length(); | 2219 int range_count = ranges->length(); |
2220 | 2220 |
2221 int last_valid_range = range_count - 1; | 2221 int last_valid_range = range_count - 1; |
2222 while (last_valid_range >= 0) { | 2222 while (last_valid_range >= 0) { |
2223 CharacterRange& range = ranges->at(last_valid_range); | 2223 CharacterRange& range = ranges->at(last_valid_range); |
2224 if (range.from() <= max_char) { | 2224 if (range.from() <= max_char) { |
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2506 v |= v >> 8; | 2506 v |= v >> 8; |
2507 v |= v >> 16; | 2507 v |= v >> 16; |
2508 return v; | 2508 return v; |
2509 } | 2509 } |
2510 | 2510 |
2511 | 2511 |
2512 bool QuickCheckDetails::Rationalize(bool asc) { | 2512 bool QuickCheckDetails::Rationalize(bool asc) { |
2513 bool found_useful_op = false; | 2513 bool found_useful_op = false; |
2514 uint32_t char_mask; | 2514 uint32_t char_mask; |
2515 if (asc) { | 2515 if (asc) { |
2516 char_mask = String::kMaxAsciiCharCode; | 2516 char_mask = String::kMaxOneByteCharCode; |
2517 } else { | 2517 } else { |
2518 char_mask = String::kMaxUtf16CodeUnit; | 2518 char_mask = String::kMaxUtf16CodeUnit; |
2519 } | 2519 } |
2520 mask_ = 0; | 2520 mask_ = 0; |
2521 value_ = 0; | 2521 value_ = 0; |
2522 int char_shift = 0; | 2522 int char_shift = 0; |
2523 for (int i = 0; i < characters_; i++) { | 2523 for (int i = 0; i < characters_; i++) { |
2524 Position* pos = &positions_[i]; | 2524 Position* pos = &positions_[i]; |
2525 if ((pos->mask & String::kMaxAsciiCharCode) != 0) { | 2525 if ((pos->mask & String::kMaxOneByteCharCode) != 0) { |
2526 found_useful_op = true; | 2526 found_useful_op = true; |
2527 } | 2527 } |
2528 mask_ |= (pos->mask & char_mask) << char_shift; | 2528 mask_ |= (pos->mask & char_mask) << char_shift; |
2529 value_ |= (pos->value & char_mask) << char_shift; | 2529 value_ |= (pos->value & char_mask) << char_shift; |
2530 char_shift += asc ? 8 : 16; | 2530 char_shift += asc ? 8 : 16; |
2531 } | 2531 } |
2532 return found_useful_op; | 2532 return found_useful_op; |
2533 } | 2533 } |
2534 | 2534 |
2535 | 2535 |
(...skipping 22 matching lines...) Expand all Loading... |
2558 } | 2558 } |
2559 | 2559 |
2560 | 2560 |
2561 bool need_mask = true; | 2561 bool need_mask = true; |
2562 | 2562 |
2563 if (details->characters() == 1) { | 2563 if (details->characters() == 1) { |
2564 // If number of characters preloaded is 1 then we used a byte or 16 bit | 2564 // If number of characters preloaded is 1 then we used a byte or 16 bit |
2565 // load so the value is already masked down. | 2565 // load so the value is already masked down. |
2566 uint32_t char_mask; | 2566 uint32_t char_mask; |
2567 if (compiler->ascii()) { | 2567 if (compiler->ascii()) { |
2568 char_mask = String::kMaxAsciiCharCode; | 2568 char_mask = String::kMaxOneByteCharCode; |
2569 } else { | 2569 } else { |
2570 char_mask = String::kMaxUtf16CodeUnit; | 2570 char_mask = String::kMaxUtf16CodeUnit; |
2571 } | 2571 } |
2572 if ((mask & char_mask) == char_mask) need_mask = false; | 2572 if ((mask & char_mask) == char_mask) need_mask = false; |
2573 mask &= char_mask; | 2573 mask &= char_mask; |
2574 } else { | 2574 } else { |
2575 // For 2-character preloads in ASCII mode or 1-character preloads in | 2575 // For 2-character preloads in ASCII mode or 1-character preloads in |
2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. | 2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. |
2577 if (details->characters() == 2 && compiler->ascii()) { | 2577 if (details->characters() == 2 && compiler->ascii()) { |
2578 if ((mask & 0x7f7f) == 0x7f7f) need_mask = false; | 2578 #ifndef ENABLE_LATIN_1 |
| 2579 if ((mask & 0x7f7f) == 0xffff) need_mask = false; |
| 2580 #else |
| 2581 if ((mask & 0xffff) == 0xffff) need_mask = false; |
| 2582 #endif |
2579 } else if (details->characters() == 1 && !compiler->ascii()) { | 2583 } else if (details->characters() == 1 && !compiler->ascii()) { |
2580 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2584 if ((mask & 0xffff) == 0xffff) need_mask = false; |
2581 } else { | 2585 } else { |
2582 if (mask == 0xffffffff) need_mask = false; | 2586 if (mask == 0xffffffff) need_mask = false; |
2583 } | 2587 } |
2584 } | 2588 } |
2585 | 2589 |
2586 if (fall_through_on_failure) { | 2590 if (fall_through_on_failure) { |
2587 if (need_mask) { | 2591 if (need_mask) { |
2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 2592 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
(...skipping 21 matching lines...) Expand all Loading... |
2610 // generating a quick check. | 2614 // generating a quick check. |
2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2615 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2612 RegExpCompiler* compiler, | 2616 RegExpCompiler* compiler, |
2613 int characters_filled_in, | 2617 int characters_filled_in, |
2614 bool not_at_start) { | 2618 bool not_at_start) { |
2615 Isolate* isolate = Isolate::Current(); | 2619 Isolate* isolate = Isolate::Current(); |
2616 ASSERT(characters_filled_in < details->characters()); | 2620 ASSERT(characters_filled_in < details->characters()); |
2617 int characters = details->characters(); | 2621 int characters = details->characters(); |
2618 int char_mask; | 2622 int char_mask; |
2619 if (compiler->ascii()) { | 2623 if (compiler->ascii()) { |
2620 char_mask = String::kMaxAsciiCharCode; | 2624 char_mask = String::kMaxOneByteCharCode; |
2621 } else { | 2625 } else { |
2622 char_mask = String::kMaxUtf16CodeUnit; | 2626 char_mask = String::kMaxUtf16CodeUnit; |
2623 } | 2627 } |
2624 for (int k = 0; k < elms_->length(); k++) { | 2628 for (int k = 0; k < elms_->length(); k++) { |
2625 TextElement elm = elms_->at(k); | 2629 TextElement elm = elms_->at(k); |
2626 if (elm.type == TextElement::ATOM) { | 2630 if (elm.type == TextElement::ATOM) { |
2627 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2631 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2628 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2632 for (int i = 0; i < characters && i < quarks.length(); i++) { |
2629 QuickCheckDetails::Position* pos = | 2633 QuickCheckDetails::Position* pos = |
2630 details->positions(characters_filled_in); | 2634 details->positions(characters_filled_in); |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2827 info->visited = true; | 2831 info->visited = true; |
2828 } | 2832 } |
2829 ~VisitMarker() { | 2833 ~VisitMarker() { |
2830 info_->visited = false; | 2834 info_->visited = false; |
2831 } | 2835 } |
2832 private: | 2836 private: |
2833 NodeInfo* info_; | 2837 NodeInfo* info_; |
2834 }; | 2838 }; |
2835 | 2839 |
2836 | 2840 |
2837 RegExpNode* SeqRegExpNode::FilterASCII(int depth) { | 2841 RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) { |
2838 if (info()->replacement_calculated) return replacement(); | 2842 if (info()->replacement_calculated) return replacement(); |
2839 if (depth < 0) return this; | 2843 if (depth < 0) return this; |
2840 ASSERT(!info()->visited); | 2844 ASSERT(!info()->visited); |
2841 VisitMarker marker(info()); | 2845 VisitMarker marker(info()); |
2842 return FilterSuccessor(depth - 1); | 2846 return FilterSuccessor(depth - 1, ignore_case); |
2843 } | 2847 } |
2844 | 2848 |
2845 | 2849 |
2846 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) { | 2850 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
2847 RegExpNode* next = on_success_->FilterASCII(depth - 1); | 2851 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); |
2848 if (next == NULL) return set_replacement(NULL); | 2852 if (next == NULL) return set_replacement(NULL); |
2849 on_success_ = next; | 2853 on_success_ = next; |
2850 return set_replacement(this); | 2854 return set_replacement(this); |
2851 } | 2855 } |
2852 | 2856 |
2853 | 2857 |
2854 RegExpNode* TextNode::FilterASCII(int depth) { | 2858 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { |
2855 if (info()->replacement_calculated) return replacement(); | 2859 if (info()->replacement_calculated) return replacement(); |
2856 if (depth < 0) return this; | 2860 if (depth < 0) return this; |
2857 ASSERT(!info()->visited); | 2861 ASSERT(!info()->visited); |
2858 VisitMarker marker(info()); | 2862 VisitMarker marker(info()); |
2859 int element_count = elms_->length(); | 2863 int element_count = elms_->length(); |
2860 for (int i = 0; i < element_count; i++) { | 2864 for (int i = 0; i < element_count; i++) { |
2861 TextElement elm = elms_->at(i); | 2865 TextElement elm = elms_->at(i); |
2862 if (elm.type == TextElement::ATOM) { | 2866 if (elm.type == TextElement::ATOM) { |
2863 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2867 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2864 for (int j = 0; j < quarks.length(); j++) { | 2868 for (int j = 0; j < quarks.length(); j++) { |
2865 // We don't need special handling for case independence | 2869 #ifndef ENABLE_LATIN_1 |
2866 // because of the rule that case independence cannot make | 2870 if (quarks[j] > String::kMaxOneByteCharCode) { |
2867 // a non-ASCII character match an ASCII character. | |
2868 if (quarks[j] > String::kMaxAsciiCharCode) { | |
2869 return set_replacement(NULL); | 2871 return set_replacement(NULL); |
2870 } | 2872 } |
| 2873 #else |
| 2874 if (quarks[j] <= String::kMaxOneByteCharCode) continue; |
| 2875 if (!ignore_case) return set_replacement(NULL); |
| 2876 // Here, we need to check for characters whose upper and lower cases |
| 2877 // are outside the Latin-1 range. |
| 2878 // TODO(dcarney): Replace this code with a simple |
| 2879 // table lookup in unibrow::Latin-1. |
| 2880 // TODO(dcarney): Test cases!. |
| 2881 unibrow::uchar result; |
| 2882 int chars; |
| 2883 chars = unibrow::ToLowercase::Convert(quarks[j], 0, &result, NULL); |
| 2884 if (chars > 1 || |
| 2885 (chars == 1 && result <= String::kMaxOneByteCharCodeU)) { |
| 2886 continue; |
| 2887 } |
| 2888 chars = unibrow::ToUppercase::Convert(quarks[j], 0, &result, NULL); |
| 2889 if (chars > 1 || |
| 2890 (chars == 1 && result <= String::kMaxOneByteCharCodeU)) { |
| 2891 continue; |
| 2892 } |
| 2893 // This character is definitely not in the Latin-1 range. |
| 2894 return set_replacement(NULL); |
| 2895 #endif |
2871 } | 2896 } |
2872 } else { | 2897 } else { |
2873 ASSERT(elm.type == TextElement::CHAR_CLASS); | 2898 ASSERT(elm.type == TextElement::CHAR_CLASS); |
| 2899 #ifdef ENABLE_LATIN_1 |
| 2900 // TODO(dcarney): Can this be improved? |
| 2901 if (ignore_case) continue; |
| 2902 #endif |
2874 RegExpCharacterClass* cc = elm.data.u_char_class; | 2903 RegExpCharacterClass* cc = elm.data.u_char_class; |
2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2904 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
2876 if (!CharacterRange::IsCanonical(ranges)) { | 2905 if (!CharacterRange::IsCanonical(ranges)) { |
2877 CharacterRange::Canonicalize(ranges); | 2906 CharacterRange::Canonicalize(ranges); |
2878 } | 2907 } |
2879 // Now they are in order so we only need to look at the first. | 2908 // Now they are in order so we only need to look at the first. |
2880 int range_count = ranges->length(); | 2909 int range_count = ranges->length(); |
2881 if (cc->is_negated()) { | 2910 if (cc->is_negated()) { |
2882 if (range_count != 0 && | 2911 if (range_count != 0 && |
2883 ranges->at(0).from() == 0 && | 2912 ranges->at(0).from() == 0 && |
2884 ranges->at(0).to() >= String::kMaxAsciiCharCode) { | 2913 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
2885 return set_replacement(NULL); | 2914 return set_replacement(NULL); |
2886 } | 2915 } |
2887 } else { | 2916 } else { |
2888 if (range_count == 0 || | 2917 if (range_count == 0 || |
2889 ranges->at(0).from() > String::kMaxAsciiCharCode) { | 2918 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
2890 return set_replacement(NULL); | 2919 return set_replacement(NULL); |
2891 } | 2920 } |
2892 } | 2921 } |
2893 } | 2922 } |
2894 } | 2923 } |
2895 return FilterSuccessor(depth - 1); | 2924 return FilterSuccessor(depth - 1, ignore_case); |
2896 } | 2925 } |
2897 | 2926 |
2898 | 2927 |
2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { | 2928 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { |
2900 if (info()->replacement_calculated) return replacement(); | 2929 if (info()->replacement_calculated) return replacement(); |
2901 if (depth < 0) return this; | 2930 if (depth < 0) return this; |
2902 if (info()->visited) return this; | 2931 if (info()->visited) return this; |
2903 { | 2932 { |
2904 VisitMarker marker(info()); | 2933 VisitMarker marker(info()); |
2905 | 2934 |
2906 RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); | 2935 RegExpNode* continue_replacement = |
| 2936 continue_node_->FilterASCII(depth - 1, ignore_case); |
2907 // If we can't continue after the loop then there is no sense in doing the | 2937 // If we can't continue after the loop then there is no sense in doing the |
2908 // loop. | 2938 // loop. |
2909 if (continue_replacement == NULL) return set_replacement(NULL); | 2939 if (continue_replacement == NULL) return set_replacement(NULL); |
2910 } | 2940 } |
2911 | 2941 |
2912 return ChoiceNode::FilterASCII(depth - 1); | 2942 return ChoiceNode::FilterASCII(depth - 1, ignore_case); |
2913 } | 2943 } |
2914 | 2944 |
2915 | 2945 |
2916 RegExpNode* ChoiceNode::FilterASCII(int depth) { | 2946 RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) { |
2917 if (info()->replacement_calculated) return replacement(); | 2947 if (info()->replacement_calculated) return replacement(); |
2918 if (depth < 0) return this; | 2948 if (depth < 0) return this; |
2919 if (info()->visited) return this; | 2949 if (info()->visited) return this; |
2920 VisitMarker marker(info()); | 2950 VisitMarker marker(info()); |
2921 int choice_count = alternatives_->length(); | 2951 int choice_count = alternatives_->length(); |
2922 | 2952 |
2923 for (int i = 0; i < choice_count; i++) { | 2953 for (int i = 0; i < choice_count; i++) { |
2924 GuardedAlternative alternative = alternatives_->at(i); | 2954 GuardedAlternative alternative = alternatives_->at(i); |
2925 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2955 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
2926 set_replacement(this); | 2956 set_replacement(this); |
2927 return this; | 2957 return this; |
2928 } | 2958 } |
2929 } | 2959 } |
2930 | 2960 |
2931 int surviving = 0; | 2961 int surviving = 0; |
2932 RegExpNode* survivor = NULL; | 2962 RegExpNode* survivor = NULL; |
2933 for (int i = 0; i < choice_count; i++) { | 2963 for (int i = 0; i < choice_count; i++) { |
2934 GuardedAlternative alternative = alternatives_->at(i); | 2964 GuardedAlternative alternative = alternatives_->at(i); |
2935 RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1); | 2965 RegExpNode* replacement = |
| 2966 alternative.node()->FilterASCII(depth - 1, ignore_case); |
2936 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. | 2967 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. |
2937 if (replacement != NULL) { | 2968 if (replacement != NULL) { |
2938 alternatives_->at(i).set_node(replacement); | 2969 alternatives_->at(i).set_node(replacement); |
2939 surviving++; | 2970 surviving++; |
2940 survivor = replacement; | 2971 survivor = replacement; |
2941 } | 2972 } |
2942 } | 2973 } |
2943 if (surviving < 2) return set_replacement(survivor); | 2974 if (surviving < 2) return set_replacement(survivor); |
2944 | 2975 |
2945 set_replacement(this); | 2976 set_replacement(this); |
2946 if (surviving == choice_count) { | 2977 if (surviving == choice_count) { |
2947 return this; | 2978 return this; |
2948 } | 2979 } |
2949 // Only some of the nodes survived the filtering. We need to rebuild the | 2980 // Only some of the nodes survived the filtering. We need to rebuild the |
2950 // alternatives list. | 2981 // alternatives list. |
2951 ZoneList<GuardedAlternative>* new_alternatives = | 2982 ZoneList<GuardedAlternative>* new_alternatives = |
2952 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); | 2983 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); |
2953 for (int i = 0; i < choice_count; i++) { | 2984 for (int i = 0; i < choice_count; i++) { |
2954 RegExpNode* replacement = | 2985 RegExpNode* replacement = |
2955 alternatives_->at(i).node()->FilterASCII(depth - 1); | 2986 alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case); |
2956 if (replacement != NULL) { | 2987 if (replacement != NULL) { |
2957 alternatives_->at(i).set_node(replacement); | 2988 alternatives_->at(i).set_node(replacement); |
2958 new_alternatives->Add(alternatives_->at(i), zone()); | 2989 new_alternatives->Add(alternatives_->at(i), zone()); |
2959 } | 2990 } |
2960 } | 2991 } |
2961 alternatives_ = new_alternatives; | 2992 alternatives_ = new_alternatives; |
2962 return this; | 2993 return this; |
2963 } | 2994 } |
2964 | 2995 |
2965 | 2996 |
2966 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth) { | 2997 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth, |
| 2998 bool ignore_case) { |
2967 if (info()->replacement_calculated) return replacement(); | 2999 if (info()->replacement_calculated) return replacement(); |
2968 if (depth < 0) return this; | 3000 if (depth < 0) return this; |
2969 if (info()->visited) return this; | 3001 if (info()->visited) return this; |
2970 VisitMarker marker(info()); | 3002 VisitMarker marker(info()); |
2971 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 3003 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
2972 // afterwards. | 3004 // afterwards. |
2973 RegExpNode* node = alternatives_->at(1).node(); | 3005 RegExpNode* node = alternatives_->at(1).node(); |
2974 RegExpNode* replacement = node->FilterASCII(depth - 1); | 3006 RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case); |
2975 if (replacement == NULL) return set_replacement(NULL); | 3007 if (replacement == NULL) return set_replacement(NULL); |
2976 alternatives_->at(1).set_node(replacement); | 3008 alternatives_->at(1).set_node(replacement); |
2977 | 3009 |
2978 RegExpNode* neg_node = alternatives_->at(0).node(); | 3010 RegExpNode* neg_node = alternatives_->at(0).node(); |
2979 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1); | 3011 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case); |
2980 // If the negative lookahead is always going to fail then | 3012 // If the negative lookahead is always going to fail then |
2981 // we don't need to check it. | 3013 // we don't need to check it. |
2982 if (neg_replacement == NULL) return set_replacement(replacement); | 3014 if (neg_replacement == NULL) return set_replacement(replacement); |
2983 alternatives_->at(0).set_node(neg_replacement); | 3015 alternatives_->at(0).set_node(neg_replacement); |
2984 return set_replacement(this); | 3016 return set_replacement(this); |
2985 } | 3017 } |
2986 | 3018 |
2987 | 3019 |
2988 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 3020 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2989 RegExpCompiler* compiler, | 3021 RegExpCompiler* compiler, |
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3292 int cp_offset = trace->cp_offset() + elm.cp_offset; | 3324 int cp_offset = trace->cp_offset() + elm.cp_offset; |
3293 if (elm.type == TextElement::ATOM) { | 3325 if (elm.type == TextElement::ATOM) { |
3294 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3326 Vector<const uc16> quarks = elm.data.u_atom->data(); |
3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3327 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
3296 if (first_element_checked && i == 0 && j == 0) continue; | 3328 if (first_element_checked && i == 0 && j == 0) continue; |
3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; | 3329 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; |
3298 EmitCharacterFunction* emit_function = NULL; | 3330 EmitCharacterFunction* emit_function = NULL; |
3299 switch (pass) { | 3331 switch (pass) { |
3300 case NON_ASCII_MATCH: | 3332 case NON_ASCII_MATCH: |
3301 ASSERT(ascii); | 3333 ASSERT(ascii); |
3302 if (quarks[j] > String::kMaxAsciiCharCode) { | 3334 if (quarks[j] > String::kMaxOneByteCharCode) { |
3303 assembler->GoTo(backtrack); | 3335 assembler->GoTo(backtrack); |
3304 return; | 3336 return; |
3305 } | 3337 } |
3306 break; | 3338 break; |
3307 case NON_LETTER_CHARACTER_MATCH: | 3339 case NON_LETTER_CHARACTER_MATCH: |
3308 emit_function = &EmitAtomNonLetter; | 3340 emit_function = &EmitAtomNonLetter; |
3309 break; | 3341 break; |
3310 case SIMPLE_CHARACTER_MATCH: | 3342 case SIMPLE_CHARACTER_MATCH: |
3311 emit_function = &EmitSimpleCharacter; | 3343 emit_function = &EmitSimpleCharacter; |
3312 break; | 3344 break; |
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3491 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3523 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
3492 if (!CharacterRange::IsCanonical(ranges)) { | 3524 if (!CharacterRange::IsCanonical(ranges)) { |
3493 CharacterRange::Canonicalize(ranges); | 3525 CharacterRange::Canonicalize(ranges); |
3494 } | 3526 } |
3495 if (node->is_negated()) { | 3527 if (node->is_negated()) { |
3496 return ranges->length() == 0 ? on_success() : NULL; | 3528 return ranges->length() == 0 ? on_success() : NULL; |
3497 } | 3529 } |
3498 if (ranges->length() != 1) return NULL; | 3530 if (ranges->length() != 1) return NULL; |
3499 uint32_t max_char; | 3531 uint32_t max_char; |
3500 if (compiler->ascii()) { | 3532 if (compiler->ascii()) { |
3501 max_char = String::kMaxAsciiCharCode; | 3533 max_char = String::kMaxOneByteCharCode; |
3502 } else { | 3534 } else { |
3503 max_char = String::kMaxUtf16CodeUnit; | 3535 max_char = String::kMaxUtf16CodeUnit; |
3504 } | 3536 } |
3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; | 3537 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; |
3506 } | 3538 } |
3507 | 3539 |
3508 | 3540 |
3509 // Finds the fixed match length of a sequence of nodes that goes from | 3541 // Finds the fixed match length of a sequence of nodes that goes from |
3510 // this alternative and back to this choice node. If there are variable | 3542 // this alternative and back to this choice node. If there are variable |
3511 // length nodes or other complications in the way then return a sentinel | 3543 // length nodes or other complications in the way then return a sentinel |
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; | 3723 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
3692 } | 3724 } |
3693 } | 3725 } |
3694 | 3726 |
3695 | 3727 |
3696 BoyerMooreLookahead::BoyerMooreLookahead( | 3728 BoyerMooreLookahead::BoyerMooreLookahead( |
3697 int length, RegExpCompiler* compiler, Zone* zone) | 3729 int length, RegExpCompiler* compiler, Zone* zone) |
3698 : length_(length), | 3730 : length_(length), |
3699 compiler_(compiler) { | 3731 compiler_(compiler) { |
3700 if (compiler->ascii()) { | 3732 if (compiler->ascii()) { |
3701 max_char_ = String::kMaxAsciiCharCode; | 3733 max_char_ = String::kMaxOneByteCharCode; |
3702 } else { | 3734 } else { |
3703 max_char_ = String::kMaxUtf16CodeUnit; | 3735 max_char_ = String::kMaxUtf16CodeUnit; |
3704 } | 3736 } |
3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); | 3737 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); |
3706 for (int i = 0; i < length; i++) { | 3738 for (int i = 0; i < length; i++) { |
3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); | 3739 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); |
3708 } | 3740 } |
3709 } | 3741 } |
3710 | 3742 |
3711 | 3743 |
(...skipping 1618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5330 } | 5362 } |
5331 | 5363 |
5332 | 5364 |
5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 5365 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
5334 bool is_ascii, | 5366 bool is_ascii, |
5335 Zone* zone) { | 5367 Zone* zone) { |
5336 Isolate* isolate = Isolate::Current(); | 5368 Isolate* isolate = Isolate::Current(); |
5337 uc16 bottom = from(); | 5369 uc16 bottom = from(); |
5338 uc16 top = to(); | 5370 uc16 top = to(); |
5339 if (is_ascii) { | 5371 if (is_ascii) { |
5340 if (bottom > String::kMaxAsciiCharCode) return; | 5372 if (bottom > String::kMaxOneByteCharCode) return; |
5341 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 5373 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; |
5342 } | 5374 } |
5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5375 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5344 if (top == bottom) { | 5376 if (top == bottom) { |
5345 // If this is a singleton we just expand the one character. | 5377 // If this is a singleton we just expand the one character. |
5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); | 5378 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
5347 for (int i = 0; i < length; i++) { | 5379 for (int i = 0; i < length; i++) { |
5348 uc32 chr = chars[i]; | 5380 uc32 chr = chars[i]; |
5349 if (chr != bottom) { | 5381 if (chr != bottom) { |
5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone); | 5382 ranges->Add(CharacterRange::Singleton(chars[i]), zone); |
5351 } | 5383 } |
(...skipping 526 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5878 if (offset >= bm->length()) { | 5910 if (offset >= bm->length()) { |
5879 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5911 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5880 return; | 5912 return; |
5881 } | 5913 } |
5882 uc16 character = atom->data()[j]; | 5914 uc16 character = atom->data()[j]; |
5883 if (bm->compiler()->ignore_case()) { | 5915 if (bm->compiler()->ignore_case()) { |
5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5916 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5885 int length = GetCaseIndependentLetters( | 5917 int length = GetCaseIndependentLetters( |
5886 ISOLATE, | 5918 ISOLATE, |
5887 character, | 5919 character, |
5888 bm->max_char() == String::kMaxAsciiCharCode, | 5920 bm->max_char() == String::kMaxOneByteCharCode, |
5889 chars); | 5921 chars); |
5890 for (int j = 0; j < length; j++) { | 5922 for (int j = 0; j < length; j++) { |
5891 bm->Set(offset, chars[j]); | 5923 bm->Set(offset, chars[j]); |
5892 } | 5924 } |
5893 } else { | 5925 } else { |
5894 if (character <= max_char) bm->Set(offset, character); | 5926 if (character <= max_char) bm->Set(offset, character); |
5895 } | 5927 } |
5896 } | 5928 } |
5897 } else { | 5929 } else { |
5898 ASSERT(text.type == TextElement::CHAR_CLASS); | 5930 ASSERT(text.type == TextElement::CHAR_CLASS); |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6092 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 6124 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
6093 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 6125 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
6094 first_step_node->AddAlternative(GuardedAlternative( | 6126 first_step_node->AddAlternative(GuardedAlternative( |
6095 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); | 6127 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); |
6096 node = first_step_node; | 6128 node = first_step_node; |
6097 } else { | 6129 } else { |
6098 node = loop_node; | 6130 node = loop_node; |
6099 } | 6131 } |
6100 } | 6132 } |
6101 if (is_ascii) { | 6133 if (is_ascii) { |
6102 node = node->FilterASCII(RegExpCompiler::kMaxRecursion); | 6134 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case); |
6103 // Do it again to propagate the new nodes to places where they were not | 6135 // Do it again to propagate the new nodes to places where they were not |
6104 // put because they had not been calculated yet. | 6136 // put because they had not been calculated yet. |
6105 if (node != NULL) node = node->FilterASCII(RegExpCompiler::kMaxRecursion); | 6137 if (node != NULL) { |
| 6138 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case); |
| 6139 } |
6106 } | 6140 } |
6107 | 6141 |
6108 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 6142 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
6109 data->node = node; | 6143 data->node = node; |
6110 Analysis analysis(ignore_case, is_ascii); | 6144 Analysis analysis(ignore_case, is_ascii); |
6111 analysis.EnsureAnalyzed(node); | 6145 analysis.EnsureAnalyzed(node); |
6112 if (analysis.has_failed()) { | 6146 if (analysis.has_failed()) { |
6113 const char* error_message = analysis.error_message(); | 6147 const char* error_message = analysis.error_message(); |
6114 return CompilationResult(error_message); | 6148 return CompilationResult(error_message); |
6115 } | 6149 } |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6159 } | 6193 } |
6160 | 6194 |
6161 return compiler.Assemble(¯o_assembler, | 6195 return compiler.Assemble(¯o_assembler, |
6162 node, | 6196 node, |
6163 data->capture_count, | 6197 data->capture_count, |
6164 pattern); | 6198 pattern); |
6165 } | 6199 } |
6166 | 6200 |
6167 | 6201 |
6168 }} // namespace v8::internal | 6202 }} // namespace v8::internal |
OLD | NEW |