| Index: src/jsregexp.cc
|
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc
|
| index f6e2e7f905052dfff5d8e38c26ca40abaf4d801c..a33df6f929d2ae50bbcf337de3ec65dc8f827dba 100644
|
| --- a/src/jsregexp.cc
|
| +++ b/src/jsregexp.cc
|
| @@ -2855,6 +2855,29 @@ RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
|
| }
|
|
|
|
|
| +// We need to check for the following characters: 0x39c 0x3bc 0x178.
|
| +static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
|
| +#ifdef ENABLE_LATIN_1
|
| + // TODO(dcarney): this could be a lot more efficient.
|
| + return range.Contains(0x39c) ||
|
| + range.Contains(0x3bc) || range.Contains(0x178);
|
| +#else
|
| + return false;
|
| +#endif
|
| +}
|
| +
|
| +
|
| +#ifdef ENABLE_LATIN_1
|
| +static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
|
| + for (int i = 0; i < ranges->length(); i++) {
|
| + // TODO(dcarney): this could be a lot more efficient.
|
| + if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
|
| + }
|
| + return false;
|
| +}
|
| +#endif
|
| +
|
| +
|
| RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
|
| if (info()->replacement_calculated) return replacement();
|
| if (depth < 0) return this;
|
| @@ -2871,21 +2894,21 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
|
| return set_replacement(NULL);
|
| }
|
| #else
|
| - if (quarks[j] <= String::kMaxOneByteCharCode) continue;
|
| + uint16_t c = quarks[j];
|
| + if (c <= String::kMaxOneByteCharCode) continue;
|
| if (!ignore_case) return set_replacement(NULL);
|
| // Here, we need to check for characters whose upper and lower cases
|
| // are outside the Latin-1 range.
|
| - if (!unibrow::Latin1::NonLatin1CanBeConvertedToLatin1(quarks[j])) {
|
| - return set_replacement(NULL);
|
| - }
|
| + uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
|
| + // Character is outside Latin-1 completely
|
| + if (converted == 0) return set_replacement(NULL);
|
| + // Convert quark to Latin-1 in place.
|
| + uint16_t* copy = const_cast<uint16_t*>(quarks.start());
|
| + copy[j] = converted;
|
| #endif
|
| }
|
| } else {
|
| ASSERT(elm.type == TextElement::CHAR_CLASS);
|
| -#ifdef ENABLE_LATIN_1
|
| - // TODO(dcarney): Can this be improved?
|
| - if (ignore_case) continue;
|
| -#endif
|
| RegExpCharacterClass* cc = elm.data.u_char_class;
|
| ZoneList<CharacterRange>* ranges = cc->ranges(zone());
|
| if (!CharacterRange::IsCanonical(ranges)) {
|
| @@ -2897,11 +2920,19 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
|
| if (range_count != 0 &&
|
| ranges->at(0).from() == 0 &&
|
| ranges->at(0).to() >= String::kMaxOneByteCharCode) {
|
| +#ifdef ENABLE_LATIN_1
|
| + // This will be handled in a later filter.
|
| + if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
|
| +#endif
|
| return set_replacement(NULL);
|
| }
|
| } else {
|
| if (range_count == 0 ||
|
| ranges->at(0).from() > String::kMaxOneByteCharCode) {
|
| +#ifdef ENABLE_LATIN_1
|
| + // This will be handled in a later filter.
|
| + if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
|
| +#endif
|
| return set_replacement(NULL);
|
| }
|
| }
|
| @@ -5354,7 +5385,7 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
| Isolate* isolate = Isolate::Current();
|
| uc16 bottom = from();
|
| uc16 top = to();
|
| - if (is_ascii) {
|
| + if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
|
| if (bottom > String::kMaxOneByteCharCode) return;
|
| if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
|
| }
|
|
|