 Chromium Code Reviews
 Chromium Code Reviews Issue 9854020:
  RegExp: Add support for table-based character class  (Closed) 
  Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
    
  
    Issue 9854020:
  RegExp: Add support for table-based character class  (Closed) 
  Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/| OLD | NEW | 
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright | 
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. | 
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above | 
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following | 
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided | 
| (...skipping 1516 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1527 macro_assembler->Bind(&ok); | 1527 macro_assembler->Bind(&ok); | 
| 1528 break; | 1528 break; | 
| 1529 default: | 1529 default: | 
| 1530 UNREACHABLE(); | 1530 UNREACHABLE(); | 
| 1531 break; | 1531 break; | 
| 1532 } | 1532 } | 
| 1533 return true; | 1533 return true; | 
| 1534 } | 1534 } | 
| 1535 | 1535 | 
| 1536 | 1536 | 
| 1537 static void EmitBoundaryTest(RegExpMacroAssembler* masm, | |
| 1538 int border, | |
| 1539 Label* fall_through, | |
| 1540 Label* above_or_equal, | |
| 1541 Label* below) { | |
| 1542 if (below != fall_through) { | |
| 1543 masm->CheckCharacterLT(border, below); | |
| 1544 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); | |
| 1545 } else { | |
| 1546 masm->CheckCharacterGT(border - 1, above_or_equal); | |
| 1547 } | |
| 1548 } | |
| 1549 | |
| 1550 | |
| 1551 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, | |
| 1552 int first, | |
| 1553 int last, | |
| 1554 Label* fall_through, | |
| 1555 Label* in_range, | |
| 1556 Label* out_of_range) { | |
| 1557 if (in_range == fall_through) { | |
| 1558 if (first == last) { | |
| 1559 masm->CheckNotCharacter(first, out_of_range); | |
| 1560 } else { | |
| 1561 masm->CheckCharacterNotInRange(first, last, out_of_range); | |
| 1562 } | |
| 1563 } else { | |
| 1564 if (first == last) { | |
| 1565 masm->CheckCharacter(first, in_range); | |
| 1566 } else { | |
| 1567 masm->CheckCharacterInRange(first, last, in_range); | |
| 1568 } | |
| 1569 if (out_of_range != fall_through) masm->GoTo(out_of_range); | |
| 1570 } | |
| 1571 } | |
| 1572 | |
| 1573 | |
| 1574 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | |
| 1575 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | |
| 1576 static void EmitUseLookupTable( | |
| 1577 RegExpMacroAssembler* masm, | |
| 1578 ZoneList<int>* ranges, | |
| 1579 int start_index, | |
| 1580 int end_index, | |
| 1581 int min_char, | |
| 1582 Label* fall_through, | |
| 1583 Label* even_label, | |
| 1584 Label* odd_label) { | |
| 1585 static const int kSize = RegExpMacroAssembler::kTableSize; | |
| 1586 static const int kMask = RegExpMacroAssembler::kTableMask; | |
| 1587 | |
| 1588 int base = (min_char & ~kMask); | |
| 1589 USE(base); | |
| 1590 | |
| 1591 // Assert that everything is on one kTableSize page. | |
| 1592 for (int i = start_index; i <= end_index; i++) { | |
| 1593 ASSERT_EQ(ranges->at(i) & ~kMask, base); | |
| 1594 } | |
| 1595 ASSERT(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base); | |
| 1596 | |
| 1597 char templ[kSize]; | |
| 1598 Label* on_bit_set; | |
| 1599 Label* on_bit_clear; | |
| 1600 int bit; | |
| 1601 if (even_label == fall_through) { | |
| 1602 on_bit_set = odd_label; | |
| 1603 on_bit_clear = even_label; | |
| 1604 bit = 1; | |
| 1605 } else { | |
| 1606 on_bit_set = even_label; | |
| 1607 on_bit_clear = odd_label; | |
| 1608 bit = 0; | |
| 1609 } | |
| 1610 for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) { | |
| 1611 templ[i] = bit; | |
| 1612 } | |
| 1613 int j = 0; | |
| 1614 bit ^= 1; | |
| 1615 for (int i = start_index; i < end_index; i++) { | |
| 1616 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) { | |
| 1617 templ[j] = bit; | |
| 1618 } | |
| 1619 bit ^= 1; | |
| 1620 } | |
| 1621 for (int i = j; i < kSize; i++) { | |
| 1622 templ[i] = bit; | |
| 1623 } | |
| 1624 // TODO(erikcorry): Cache these. | |
| 1625 Handle<ByteArray> ba = FACTORY->NewByteArray(kSize, TENURED); | |
| 1626 for (int i = 0; i < kSize; i++) { | |
| 1627 ba->set(i, templ[i]); | |
| 1628 } | |
| 1629 masm->CheckBitInTable(ba, on_bit_set); | |
| 1630 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | |
| 1631 } | |
| 1632 | |
| 1633 | |
| 1634 static void CutOutRange(RegExpMacroAssembler* masm, | |
| 1635 ZoneList<int>* ranges, | |
| 1636 int start_index, | |
| 1637 int end_index, | |
| 1638 int cut_index, | |
| 1639 Label* even_label, | |
| 1640 Label* odd_label) { | |
| 1641 bool odd = (((cut_index - start_index) & 1) == 1); | |
| 1642 Label* in_range_label = odd ? odd_label : even_label; | |
| 1643 Label dummy; | |
| 1644 EmitDoubleBoundaryTest(masm, | |
| 1645 ranges->at(cut_index), | |
| 1646 ranges->at(cut_index + 1) - 1, | |
| 1647 &dummy, | |
| 1648 in_range_label, | |
| 1649 &dummy); | |
| 1650 ASSERT(!dummy.is_linked()); | |
| 1651 // Cut out the single range by rewriting the array. This creates a new | |
| 1652 // range that is a merger of the two ranges on either side of the one we | |
| 1653 // are cutting out. The oddity of the labels is preserved. | |
| 1654 for (int j = cut_index; j > start_index; j--) { | |
| 1655 ranges->at(j) = ranges->at(j - 1); | |
| 1656 } | |
| 1657 for (int j = cut_index + 1; j < end_index; j++) { | |
| 1658 ranges->at(j) = ranges->at(j + 1); | |
| 1659 } | |
| 1660 } | |
| 1661 | |
| 1662 | |
| 1663 // Unicode case. Split the search space into kSize spaces that are handled | |
| 1664 // with recursion. | |
| 1665 static void SplitSearchSpace(ZoneList<int>* ranges, | |
| 1666 int start_index, | |
| 1667 int end_index, | |
| 1668 int* new_start_index, | |
| 1669 int* new_end_index, | |
| 1670 int* border) { | |
| 1671 static const int kSize = RegExpMacroAssembler::kTableSize; | |
| 1672 static const int kMask = RegExpMacroAssembler::kTableMask; | |
| 1673 | |
| 1674 int first = ranges->at(start_index); | |
| 1675 int last = ranges->at(end_index) - 1; | |
| 1676 | |
| 1677 *new_start_index = start_index; | |
| 1678 *border = (ranges->at(start_index) & ~kMask) + kSize; | |
| 1679 while (*new_start_index < end_index) { | |
| 1680 if (ranges->at(*new_start_index) > *border) break; | |
| 1681 (*new_start_index)++; | |
| 1682 } | |
| 1683 // new_start_index is the index of the first edge that is beyond the | |
| 1684 // current kSize space. | |
| 1685 | |
| 1686 // For very large search spaces we do a binary chop search of the non-ASCII | |
| 1687 // space instead of just going to the end of the current kSize space. The | |
| 1688 // heuristics are complicated a little by the fact that any 128-character | |
| 1689 // encoding space can be quickly tested with a table lookup, so we don't | |
| 1690 // wish to do binary chop search at a smaller granularity than that. A | |
| 1691 // 128-character space can take up a lot of space in the ranges array if, | |
| 1692 // for example, we only want to match every second character (eg. the lower | |
| 1693 // case characters on some Unicode pages). | |
| 1694 int binary_chop_index = (end_index + start_index) / 2; | |
| 1695 // The first test ensures that we get to the code that handles the ASCII | |
| 1696 // range with a single not-taken branch, speeding up this important | |
| 1697 // character range (even non-ASCII charset-based text has spaces and | |
| 1698 // punctuation). | |
| 1699 if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case. | |
| 1700 end_index - start_index > (*new_start_index - start_index) * 2 && | |
| 1701 last - first > kSize * 2 && | |
| 1702 binary_chop_index > *new_start_index && | |
| 1703 ranges->at(binary_chop_index) >= first + 2 * kSize) { | |
| 1704 int scan_forward_for_section_border = binary_chop_index;; | |
| 1705 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; | |
| 1706 | |
| 1707 while (scan_forward_for_section_border < end_index) { | |
| 1708 if (ranges->at(scan_forward_for_section_border) > new_border) { | |
| 1709 *new_start_index = scan_forward_for_section_border; | |
| 1710 *border = new_border; | |
| 1711 break; | |
| 1712 } | |
| 1713 scan_forward_for_section_border++; | |
| 1714 } | |
| 1715 } | |
| 1716 | |
| 1717 ASSERT(*new_start_index > start_index); | |
| 1718 *new_end_index = *new_start_index - 1; | |
| 1719 if (ranges->at(*new_end_index) == *border) { | |
| 1720 (*new_end_index)--; | |
| 1721 } | |
| 1722 if (*border >= ranges->at(end_index)) { | |
| 1723 *border = ranges->at(end_index); | |
| 1724 *new_start_index = end_index; // Won't be used. | |
| 1725 *new_end_index = end_index - 1; | |
| 1726 } | |
| 1727 } | |
| 1728 | |
| 1729 | |
| 1730 // Gets a series of segment boundaries representing a character class. If the | |
| 1731 // character is in the range between an even and an odd boundary (counting from | |
| 1732 // start_index) then go to even_label, otherwise go to odd_label. We already | |
| 1733 // know that the character is in the range of min_char to max_char inclusive. | |
| 1734 // Either label can be NULL indicating backtracking. Either label can also be | |
| 1735 // equal to the fall_through label. | |
| 1736 static void GenerateBranches(RegExpMacroAssembler* masm, | |
| 1737 ZoneList<int>* ranges, | |
| 1738 int start_index, | |
| 1739 int end_index, | |
| 1740 uc16 min_char, | |
| 1741 uc16 max_char, | |
| 1742 Label* fall_through, | |
| 1743 Label* even_label, | |
| 1744 Label* odd_label) { | |
| 1745 int first = ranges->at(start_index); | |
| 1746 int last = ranges->at(end_index) - 1; | |
| 1747 | |
| 1748 // Just need to test if the character is before or on-or-after | |
| 1749 // a particular character. | |
| 1750 if (start_index == end_index) { | |
| 1751 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); | |
| 1752 return; | |
| 1753 } | |
| 1754 | |
| 1755 // Another almost trivial case: There is one interval in the middle that is | |
| 1756 // different from the end intervals. | |
| 1757 if (start_index + 1 == end_index) { | |
| 1758 EmitDoubleBoundaryTest( | |
| 1759 masm, first, last, fall_through, even_label, odd_label); | |
| 1760 return; | |
| 1761 } | |
| 1762 | |
| 1763 // It's not worth using table lookup if there are very few intervals in the | |
| 1764 // character class. | |
| 1765 if (end_index - start_index <= 6) { | |
| 1766 // It is faster to test for individual characters, so we look for those | |
| 1767 // first, then try arbitrary ranges in the second round. | |
| 1768 static int kNoCutIndex = -1; | |
| 1769 int cut = kNoCutIndex; | |
| 1770 for (int i = start_index; i < end_index; i++) { | |
| 1771 if (ranges->at(i) == ranges->at(i + 1) - 1) { | |
| 1772 cut = i; | |
| 1773 break; | |
| 1774 } | |
| 1775 } | |
| 1776 if (cut == kNoCutIndex) cut = start_index; | |
| 1777 CutOutRange( | |
| 1778 masm, ranges, start_index, end_index, cut, even_label, odd_label); | |
| 1779 ASSERT_GE(end_index - start_index, 2); | |
| 1780 GenerateBranches(masm, | |
| 1781 ranges, | |
| 1782 start_index + 1, | |
| 1783 end_index - 1, | |
| 1784 min_char, | |
| 1785 max_char, | |
| 1786 fall_through, | |
| 1787 even_label, | |
| 1788 odd_label); | |
| 1789 return; | |
| 1790 } | |
| 1791 | |
| 1792 // If there are a lot of intervals in the regexp, then we will use tables to | |
| 1793 // determine whether the character is inside or outside the character class. | |
| 1794 static const int kBits = RegExpMacroAssembler::kTableSizeBits; | |
| 1795 | |
| 1796 if ((max_char >> kBits) == (min_char >> kBits)) { | |
| 1797 EmitUseLookupTable(masm, | |
| 1798 ranges, | |
| 1799 start_index, | |
| 1800 end_index, | |
| 1801 min_char, | |
| 1802 fall_through, | |
| 1803 even_label, | |
| 1804 odd_label); | |
| 1805 return; | |
| 1806 } | |
| 1807 | |
| 1808 if ((min_char >> kBits) != (first >> kBits)) { | |
| 1809 masm->CheckCharacterLT(first, odd_label); | |
| 1810 GenerateBranches(masm, | |
| 1811 ranges, | |
| 1812 start_index + 1, | |
| 1813 end_index, | |
| 1814 first, | |
| 1815 max_char, | |
| 1816 fall_through, | |
| 1817 odd_label, | |
| 1818 even_label); | |
| 1819 return; | |
| 1820 } | |
| 1821 | |
| 1822 int new_start_index = 0; | |
| 1823 int new_end_index = 0; | |
| 1824 int border = 0; | |
| 1825 | |
| 1826 SplitSearchSpace(ranges, | |
| 1827 start_index, | |
| 1828 end_index, | |
| 1829 &new_start_index, | |
| 1830 &new_end_index, | |
| 1831 &border); | |
| 1832 | |
| 1833 Label handle_rest; | |
| 1834 Label* above = &handle_rest; | |
| 1835 if (border == last + 1) { | |
| 1836 // We didn't find any section that started after the limit, so everything | |
| 1837 // above the border is one of the terminal labels. | |
| 1838 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; | |
| 1839 ASSERT(new_end_index == end_index - 1); | |
| 1840 } | |
| 1841 | |
| 1842 ASSERT_LT(start_index, new_start_index); | |
| 1843 ASSERT_LT(new_end_index, end_index); | |
| 
ulan
2012/03/29 16:19:17
Also: start_index <= new_end_index && new_start_in
 
Erik Corry
2012/03/30 07:46:28
Done.
 | |
| 1844 ASSERT(new_end_index + 1 == new_start_index || | |
| 1845 (new_end_index + 2 == new_start_index && | |
| 1846 border == ranges->at(new_end_index + 1))); | |
| 1847 ASSERT_LT(min_char, border - 1); | |
| 1848 ASSERT_LT(border, max_char); | |
| 1849 ASSERT_LT(ranges->at(new_end_index), border); | |
| 1850 ASSERT(border < ranges->at(new_start_index) || | |
| 1851 (border == ranges->at(new_start_index) && | |
| 1852 new_start_index == end_index && | |
| 1853 new_end_index == end_index - 1 && | |
| 1854 border == last + 1)); | |
| 1855 ASSERT(new_start_index == 0 || border >= ranges->at(new_start_index - 1)); | |
| 1856 | |
| 1857 masm->CheckCharacterGT(border - 1, above); | |
| 1858 Label dummy; | |
| 1859 GenerateBranches(masm, | |
| 1860 ranges, | |
| 1861 start_index, | |
| 1862 new_end_index, | |
| 1863 min_char, | |
| 1864 border - 1, | |
| 1865 &dummy, | |
| 1866 even_label, | |
| 1867 odd_label); | |
| 1868 if (handle_rest.is_linked()) { | |
| 1869 masm->Bind(&handle_rest); | |
| 1870 bool flip = (new_start_index & 1) != (start_index & 1); | |
| 1871 GenerateBranches(masm, | |
| 1872 ranges, | |
| 1873 new_start_index, | |
| 1874 end_index, | |
| 1875 border, | |
| 1876 max_char, | |
| 1877 &dummy, | |
| 1878 flip ? odd_label : even_label, | |
| 1879 flip ? even_label : odd_label); | |
| 1880 } | |
| 1881 } | |
| 1882 | |
| 1883 | |
| 1537 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1884 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 
| 1538 RegExpCharacterClass* cc, | 1885 RegExpCharacterClass* cc, | 
| 1539 bool ascii, | 1886 bool ascii, | 
| 1540 Label* on_failure, | 1887 Label* on_failure, | 
| 1541 int cp_offset, | 1888 int cp_offset, | 
| 1542 bool check_offset, | 1889 bool check_offset, | 
| 1543 bool preloaded) { | 1890 bool preloaded) { | 
| 1544 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1891 ZoneList<CharacterRange>* ranges = cc->ranges(); | 
| 1892 if (!CharacterRange::IsCanonical(ranges)) { | |
| 1893 CharacterRange::Canonicalize(ranges); | |
| 1894 } | |
| 1895 | |
| 1545 int max_char; | 1896 int max_char; | 
| 1546 if (ascii) { | 1897 if (ascii) { | 
| 1547 max_char = String::kMaxAsciiCharCode; | 1898 max_char = String::kMaxAsciiCharCode; | 
| 1548 } else { | 1899 } else { | 
| 1549 max_char = String::kMaxUtf16CodeUnit; | 1900 max_char = String::kMaxUtf16CodeUnit; | 
| 1550 } | 1901 } | 
| 1551 | 1902 | 
| 1552 Label success; | |
| 1553 | |
| 1554 Label* char_is_in_class = | |
| 1555 cc->is_negated() ? on_failure : &success; | |
| 1556 | |
| 1557 int range_count = ranges->length(); | 1903 int range_count = ranges->length(); | 
| 1558 | 1904 | 
| 1559 int last_valid_range = range_count - 1; | 1905 int last_valid_range = range_count - 1; | 
| 1560 while (last_valid_range >= 0) { | 1906 while (last_valid_range >= 0) { | 
| 1561 CharacterRange& range = ranges->at(last_valid_range); | 1907 CharacterRange& range = ranges->at(last_valid_range); | 
| 1562 if (range.from() <= max_char) { | 1908 if (range.from() <= max_char) { | 
| 1563 break; | 1909 break; | 
| 1564 } | 1910 } | 
| 1565 last_valid_range--; | 1911 last_valid_range--; | 
| 1566 } | 1912 } | 
| 1567 | 1913 | 
| 1568 if (last_valid_range < 0) { | 1914 if (last_valid_range < 0) { | 
| 1569 if (!cc->is_negated()) { | 1915 if (!cc->is_negated()) { | 
| 1570 // TODO(plesner): We can remove this when the node level does our | |
| 1571 // ASCII optimizations for us. | |
| 1572 macro_assembler->GoTo(on_failure); | 1916 macro_assembler->GoTo(on_failure); | 
| 1573 } | 1917 } | 
| 1574 if (check_offset) { | 1918 if (check_offset) { | 
| 1575 macro_assembler->CheckPosition(cp_offset, on_failure); | 1919 macro_assembler->CheckPosition(cp_offset, on_failure); | 
| 1576 } | 1920 } | 
| 1577 return; | 1921 return; | 
| 1578 } | 1922 } | 
| 1579 | 1923 | 
| 1580 if (last_valid_range == 0 && | 1924 if (last_valid_range == 0 && | 
| 1925 ranges->at(0).IsEverything(max_char)) { | |
| 1926 if (cc->is_negated()) { | |
| 1927 macro_assembler->GoTo(on_failure); | |
| 1928 } else { | |
| 1929 // This is a common case hit by non-anchored expressions. | |
| 1930 if (check_offset) { | |
| 1931 macro_assembler->CheckPosition(cp_offset, on_failure); | |
| 1932 } | |
| 1933 } | |
| 1934 return; | |
| 1935 } | |
| 1936 if (last_valid_range == 0 && | |
| 1581 !cc->is_negated() && | 1937 !cc->is_negated() && | 
| 1582 ranges->at(0).IsEverything(max_char)) { | 1938 ranges->at(0).IsEverything(max_char)) { | 
| 1583 // This is a common case hit by non-anchored expressions. | 1939 // This is a common case hit by non-anchored expressions. | 
| 1584 if (check_offset) { | 1940 if (check_offset) { | 
| 1585 macro_assembler->CheckPosition(cp_offset, on_failure); | 1941 macro_assembler->CheckPosition(cp_offset, on_failure); | 
| 1586 } | 1942 } | 
| 1587 return; | 1943 return; | 
| 1588 } | 1944 } | 
| 1589 | 1945 | 
| 1590 if (!preloaded) { | 1946 if (!preloaded) { | 
| 1591 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1947 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 
| 1592 } | 1948 } | 
| 1593 | 1949 | 
| 1594 if (cc->is_standard() && | 1950 if (cc->is_standard() && | 
| 1595 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1951 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 
| 1596 on_failure)) { | 1952 on_failure)) { | 
| 1597 return; | 1953 return; | 
| 1598 } | 1954 } | 
| 1599 | 1955 | 
| 1600 for (int i = 0; i < last_valid_range; i++) { | 1956 | 
| 1957 // A new list with ascending entries. Each entry is a code unit | |
| 1958 // where there is a boundary between code units that are part of | |
| 1959 // the class and code units that are not. Normally we insert an | |
| 1960 // entry at zero which goes to the failure label, but if there | |
| 1961 // was already one there we fall through for success on that entry. | |
| 1962 // Subsequent entries have alternating meaning (success/failure). | |
| 1963 ZoneList<int>* range_boundaries = new ZoneList<int>(last_valid_range); | |
| 1964 | |
| 1965 bool zeroth_entry_is_failure = !cc->is_negated(); | |
| 1966 | |
| 1967 for (int i = 0; i <= last_valid_range; i++) { | |
| 1601 CharacterRange& range = ranges->at(i); | 1968 CharacterRange& range = ranges->at(i); | 
| 1602 Label next_range; | 1969 if (range.from() == 0) { | 
| 
ulan
2012/03/29 16:19:17
I think we don't need to check for range[i] == 0.
 
Erik Corry
2012/03/30 07:46:28
There is an explicit assumption in GenerateBranche
 | |
| 1603 uc16 from = range.from(); | 1970 ASSERT_EQ(i, 0); | 
| 1604 uc16 to = range.to(); | 1971 zeroth_entry_is_failure = !zeroth_entry_is_failure; | 
| 1605 if (from > max_char) { | 1972 } else { | 
| 1606 continue; | 1973 range_boundaries->Add(range.from()); | 
| 1607 } | 1974 } | 
| 1608 if (to > max_char) to = max_char; | 1975 range_boundaries->Add(range.to() + 1); | 
| 1609 if (to == from) { | 1976 } | 
| 1610 macro_assembler->CheckCharacter(to, char_is_in_class); | 1977 int end_index = range_boundaries->length() - 1; | 
| 1611 } else { | 1978 if (range_boundaries->at(end_index) > max_char) { | 
| 1612 if (from != 0) { | 1979 end_index--; | 
| 1613 macro_assembler->CheckCharacterLT(from, &next_range); | |
| 1614 } | |
| 1615 if (to != max_char) { | |
| 1616 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); | |
| 1617 } else { | |
| 1618 macro_assembler->GoTo(char_is_in_class); | |
| 1619 } | |
| 1620 } | |
| 1621 macro_assembler->Bind(&next_range); | |
| 1622 } | 1980 } | 
| 1623 | 1981 | 
| 1624 CharacterRange& range = ranges->at(last_valid_range); | 1982 Label fall_through; | 
| 1625 uc16 from = range.from(); | 1983 GenerateBranches(macro_assembler, | 
| 1626 uc16 to = range.to(); | 1984 range_boundaries, | 
| 1627 | 1985 0, // start_index. | 
| 1628 if (to > max_char) to = max_char; | 1986 end_index, | 
| 1629 ASSERT(to >= from); | 1987 0, // min_char. | 
| 1630 | 1988 max_char, | 
| 1631 if (to == from) { | 1989 &fall_through, | 
| 1632 if (cc->is_negated()) { | 1990 zeroth_entry_is_failure ? &fall_through : on_failure, | 
| 1633 macro_assembler->CheckCharacter(to, on_failure); | 1991 zeroth_entry_is_failure ? on_failure : &fall_through); | 
| 1634 } else { | 1992 macro_assembler->Bind(&fall_through); | 
| 1635 macro_assembler->CheckNotCharacter(to, on_failure); | |
| 1636 } | |
| 1637 } else { | |
| 1638 if (from != 0) { | |
| 1639 if (cc->is_negated()) { | |
| 1640 macro_assembler->CheckCharacterLT(from, &success); | |
| 1641 } else { | |
| 1642 macro_assembler->CheckCharacterLT(from, on_failure); | |
| 1643 } | |
| 1644 } | |
| 1645 if (to != String::kMaxUtf16CodeUnit) { | |
| 1646 if (cc->is_negated()) { | |
| 1647 macro_assembler->CheckCharacterLT(to + 1, on_failure); | |
| 1648 } else { | |
| 1649 macro_assembler->CheckCharacterGT(to, on_failure); | |
| 1650 } | |
| 1651 } else { | |
| 1652 if (cc->is_negated()) { | |
| 1653 macro_assembler->GoTo(on_failure); | |
| 1654 } | |
| 1655 } | |
| 1656 } | |
| 1657 macro_assembler->Bind(&success); | |
| 1658 } | 1993 } | 
| 1659 | 1994 | 
| 1660 | 1995 | 
| 1661 RegExpNode::~RegExpNode() { | 1996 RegExpNode::~RegExpNode() { | 
| 1662 } | 1997 } | 
| 1663 | 1998 | 
| 1664 | 1999 | 
| 1665 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 2000 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 
| 1666 Trace* trace) { | 2001 Trace* trace) { | 
| 1667 // If we are generating a greedy loop then don't stop and don't reuse code. | 2002 // If we are generating a greedy loop then don't stop and don't reuse code. | 
| (...skipping 3666 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5334 } | 5669 } | 
| 5335 | 5670 | 
| 5336 return compiler.Assemble(¯o_assembler, | 5671 return compiler.Assemble(¯o_assembler, | 
| 5337 node, | 5672 node, | 
| 5338 data->capture_count, | 5673 data->capture_count, | 
| 5339 pattern); | 5674 pattern); | 
| 5340 } | 5675 } | 
| 5341 | 5676 | 
| 5342 | 5677 | 
| 5343 }} // namespace v8::internal | 5678 }} // namespace v8::internal | 
| OLD | NEW |