Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 11428) |
+++ src/jsregexp.cc (working copy) |
@@ -2426,15 +2426,9 @@ |
QuickCheckDetails::Position* pos = |
details->positions(characters_filled_in); |
uc16 c = quarks[i]; |
- if (c > char_mask) { |
- // If we expect a non-ASCII character from an ASCII string, |
- // there is no way we can match. Not even case independent |
- // matching can turn an ASCII character into non-ASCII or |
- // vice versa. |
- details->set_cannot_match(); |
- pos->determines_perfectly = false; |
- return; |
- } |
+ // We should already have filtered out nodes that have non-ASCII |
+ // characters if we are matching against an ASCII string. |
+ ASSERT(c <= char_mask); |
if (compiler->ignore_case()) { |
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), |
@@ -2496,11 +2490,9 @@ |
int first_range = 0; |
while (ranges->at(first_range).from() > char_mask) { |
first_range++; |
- if (first_range == ranges->length()) { |
- details->set_cannot_match(); |
- pos->determines_perfectly = false; |
- return; |
- } |
+ // We should already have filtered out nodes that cannot match |
+ // so the first range should be a valid range. |
+ ASSERT(first_range != ranges->length()); |
} |
CharacterRange range = ranges->at(first_range); |
uc16 from = range.from(); |
@@ -2629,6 +2621,144 @@ |
}; |
+RegExpNode* SeqRegExpNode::FilterASCII(int depth) { |
+ if (info()->replacement_calculated) return replacement(); |
+ if (depth < 0) return this; |
+ ASSERT(!info()->visited); |
+ VisitMarker marker(info()); |
+ return FilterSuccessor(depth - 1); |
+} |
+ |
+ |
+RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) { |
+ RegExpNode* next = on_success_->FilterASCII(depth - 1); |
+ if (next == NULL) return set_replacement(NULL); |
+ on_success_ = next; |
+ return set_replacement(this); |
+} |
+ |
+ |
+RegExpNode* TextNode::FilterASCII(int depth) { |
+ if (info()->replacement_calculated) return replacement(); |
+ if (depth < 0) return this; |
+ ASSERT(!info()->visited); |
+ VisitMarker marker(info()); |
+ int element_count = elms_->length(); |
+ for (int i = 0; i < element_count; i++) { |
+ TextElement elm = elms_->at(i); |
+ if (elm.type == TextElement::ATOM) { |
+ Vector<const uc16> quarks = elm.data.u_atom->data(); |
+ for (int j = 0; j < quarks.length(); j++) { |
+ // We don't need special handling for case independence |
+ // because of the rule that case independence cannot make |
+ // a non-ASCII character match an ASCII character. |
+ if (quarks[j] > String::kMaxAsciiCharCode) { |
+ return set_replacement(NULL); |
+ } |
+ } |
+ } else { |
+ ASSERT(elm.type == TextElement::CHAR_CLASS); |
+ RegExpCharacterClass* cc = elm.data.u_char_class; |
+ ZoneList<CharacterRange>* ranges = cc->ranges(); |
+ if (!CharacterRange::IsCanonical(ranges)) { |
+ CharacterRange::Canonicalize(ranges); |
+ } |
+ // Now they are in order so we only need to look at the first. |
+ int range_count = ranges->length(); |
+ if (cc->is_negated()) { |
+ if (range_count != 0 && |
+ ranges->at(0).from() == 0 && |
+ ranges->at(0).to() >= String::kMaxAsciiCharCode) { |
+ return set_replacement(NULL); |
+ } |
+ } else { |
+ if (range_count == 0 || |
+ ranges->at(0).from() > String::kMaxAsciiCharCode) { |
+ return set_replacement(NULL); |
+ } |
+ } |
+ } |
+ } |
+ return FilterSuccessor(depth - 1); |
+} |
+ |
+ |
+RegExpNode* LoopChoiceNode::FilterASCII(int depth) { |
+ if (info()->replacement_calculated) return replacement(); |
+ if (depth < 0) return this; |
+ if (info()->visited) return this; |
+ VisitMarker marker(info()); |
+ |
+ RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); |
+ // If we can't continue after the loop then there is no sense in doing the |
+ // loop. |
+ if (continue_replacement == NULL) return set_replacement(NULL); |
+ |
+ return ChoiceNode::FilterASCII(depth - 1); |
+} |
+ |
+ |
+RegExpNode* ChoiceNode::FilterASCII(int depth) { |
+ if (info()->replacement_calculated) return replacement(); |
+ if (depth < 0) return this; |
+ if (info()->visited) return this; |
+ VisitMarker marker(info()); |
+ int choice_count = alternatives_->length(); |
+ int surviving = 0; |
+ RegExpNode* survivor = NULL; |
+ for (int i = 0; i < choice_count; i++) { |
+ GuardedAlternative alternative = alternatives_->at(i); |
+ RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1); |
+ ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. |
+ alternatives_->at(i).set_node(replacement); |
+ if (replacement != NULL) { |
+ surviving++; |
+ survivor = replacement; |
+ } |
+ } |
+ if (surviving < 2) return set_replacement(survivor); |
+ |
+ set_replacement(this); |
+ if (surviving == choice_count) { |
+ return this; |
+ } |
+ // Only some of the nodes survived the filtering. We need to rebuild the |
+ // alternatives list. |
+ ZoneList<GuardedAlternative>* new_alternatives = |
+ new ZoneList<GuardedAlternative>(surviving); |
+ for (int i = 0; i < choice_count; i++) { |
+ GuardedAlternative alternative = alternatives_->at(i); |
+ if (alternative.node() != NULL) { |
+ new_alternatives->Add(alternative); |
+ } |
+ } |
+ alternatives_ = new_alternatives; |
+ return this; |
+} |
+ |
+ |
+RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth) { |
+ if (info()->replacement_calculated) return replacement(); |
+ if (depth < 0) return this; |
+ if (info()->visited) return this; |
+ VisitMarker marker(info()); |
+ // Alternative 0 is the negative lookahead, alternative 1 is what comes |
+ // afterwards. |
+ RegExpNode* node = alternatives_->at(1).node(); |
+ RegExpNode* replacement = node->FilterASCII(depth - 1); |
+ if (replacement == NULL) return set_replacement(NULL); |
+ alternatives_->at(1).set_node(replacement); |
+ |
+ RegExpNode* neg_node = alternatives_->at(0).node(); |
+ RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1); |
+ // If the negative lookahead is always going to fail then |
+ // we don't need to check it. |
+ if (neg_replacement == NULL) return set_replacement(replacement); |
+ alternatives_->at(0).set_node(neg_replacement); |
+ return set_replacement(this); |
+} |
+ |
+ |
void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
RegExpCompiler* compiler, |
int characters_filled_in, |
@@ -5690,6 +5820,9 @@ |
node = loop_node; |
} |
} |
+ if (is_ascii) node = node->FilterASCII(RegExpCompiler::kMaxRecursion); |
+ |
+ if (node == NULL) node = new EndNode(EndNode::BACKTRACK); |
data->node = node; |
Analysis analysis(ignore_case, is_ascii); |
analysis.EnsureAnalyzed(node); |