| Index: src/jsregexp.cc
|
| ===================================================================
|
| --- src/jsregexp.cc (revision 11428)
|
| +++ src/jsregexp.cc (working copy)
|
| @@ -2426,15 +2426,9 @@
|
| QuickCheckDetails::Position* pos =
|
| details->positions(characters_filled_in);
|
| uc16 c = quarks[i];
|
| - if (c > char_mask) {
|
| - // If we expect a non-ASCII character from an ASCII string,
|
| - // there is no way we can match. Not even case independent
|
| - // matching can turn an ASCII character into non-ASCII or
|
| - // vice versa.
|
| - details->set_cannot_match();
|
| - pos->determines_perfectly = false;
|
| - return;
|
| - }
|
| + // We should already have filtered out nodes that have non-ASCII
|
| + // characters if we are matching against an ASCII string.
|
| + ASSERT(c <= char_mask);
|
| if (compiler->ignore_case()) {
|
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
|
| @@ -2496,11 +2490,9 @@
|
| int first_range = 0;
|
| while (ranges->at(first_range).from() > char_mask) {
|
| first_range++;
|
| - if (first_range == ranges->length()) {
|
| - details->set_cannot_match();
|
| - pos->determines_perfectly = false;
|
| - return;
|
| - }
|
| + // We should already have filtered out nodes that cannot match
|
| + // so the first range should be a valid range.
|
| + ASSERT(first_range != ranges->length());
|
| }
|
| CharacterRange range = ranges->at(first_range);
|
| uc16 from = range.from();
|
| @@ -2629,6 +2621,144 @@
|
| };
|
|
|
|
|
| +RegExpNode* SeqRegExpNode::FilterASCII(int depth) {
|
| + if (info()->replacement_calculated) return replacement();
|
| + if (depth < 0) return this;
|
| + ASSERT(!info()->visited);
|
| + VisitMarker marker(info());
|
| + return FilterSuccessor(depth - 1);
|
| +}
|
| +
|
| +
|
| +RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
|
| + RegExpNode* next = on_success_->FilterASCII(depth - 1);
|
| + if (next == NULL) return set_replacement(NULL);
|
| + on_success_ = next;
|
| + return set_replacement(this);
|
| +}
|
| +
|
| +
|
| +RegExpNode* TextNode::FilterASCII(int depth) {
|
| + if (info()->replacement_calculated) return replacement();
|
| + if (depth < 0) return this;
|
| + ASSERT(!info()->visited);
|
| + VisitMarker marker(info());
|
| + int element_count = elms_->length();
|
| + for (int i = 0; i < element_count; i++) {
|
| + TextElement elm = elms_->at(i);
|
| + if (elm.type == TextElement::ATOM) {
|
| + Vector<const uc16> quarks = elm.data.u_atom->data();
|
| + for (int j = 0; j < quarks.length(); j++) {
|
| + // We don't need special handling for case independence
|
| + // because of the rule that case independence cannot make
|
| + // a non-ASCII character match an ASCII character.
|
| + if (quarks[j] > String::kMaxAsciiCharCode) {
|
| + return set_replacement(NULL);
|
| + }
|
| + }
|
| + } else {
|
| + ASSERT(elm.type == TextElement::CHAR_CLASS);
|
| + RegExpCharacterClass* cc = elm.data.u_char_class;
|
| + ZoneList<CharacterRange>* ranges = cc->ranges();
|
| + if (!CharacterRange::IsCanonical(ranges)) {
|
| + CharacterRange::Canonicalize(ranges);
|
| + }
|
| + // Now they are in order so we only need to look at the first.
|
| + int range_count = ranges->length();
|
| + if (cc->is_negated()) {
|
| + if (range_count != 0 &&
|
| + ranges->at(0).from() == 0 &&
|
| + ranges->at(0).to() >= String::kMaxAsciiCharCode) {
|
| + return set_replacement(NULL);
|
| + }
|
| + } else {
|
| + if (range_count == 0 ||
|
| + ranges->at(0).from() > String::kMaxAsciiCharCode) {
|
| + return set_replacement(NULL);
|
| + }
|
| + }
|
| + }
|
| + }
|
| + return FilterSuccessor(depth - 1);
|
| +}
|
| +
|
| +
|
| +RegExpNode* LoopChoiceNode::FilterASCII(int depth) {
|
| + if (info()->replacement_calculated) return replacement();
|
| + if (depth < 0) return this;
|
| + if (info()->visited) return this;
|
| + VisitMarker marker(info());
|
| +
|
| + RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1);
|
| + // If we can't continue after the loop then there is no sense in doing the
|
| + // loop.
|
| + if (continue_replacement == NULL) return set_replacement(NULL);
|
| +
|
| + return ChoiceNode::FilterASCII(depth - 1);
|
| +}
|
| +
|
| +
|
| +RegExpNode* ChoiceNode::FilterASCII(int depth) {
|
| + if (info()->replacement_calculated) return replacement();
|
| + if (depth < 0) return this;
|
| + if (info()->visited) return this;
|
| + VisitMarker marker(info());
|
| + int choice_count = alternatives_->length();
|
| + int surviving = 0;
|
| + RegExpNode* survivor = NULL;
|
| + for (int i = 0; i < choice_count; i++) {
|
| + GuardedAlternative alternative = alternatives_->at(i);
|
| + RegExpNode* replacement = alternative.node()->FilterASCII(depth - 1);
|
| + ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.
|
| + alternatives_->at(i).set_node(replacement);
|
| + if (replacement != NULL) {
|
| + surviving++;
|
| + survivor = replacement;
|
| + }
|
| + }
|
| + if (surviving < 2) return set_replacement(survivor);
|
| +
|
| + set_replacement(this);
|
| + if (surviving == choice_count) {
|
| + return this;
|
| + }
|
| + // Only some of the nodes survived the filtering. We need to rebuild the
|
| + // alternatives list.
|
| + ZoneList<GuardedAlternative>* new_alternatives =
|
| + new ZoneList<GuardedAlternative>(surviving);
|
| + for (int i = 0; i < choice_count; i++) {
|
| + GuardedAlternative alternative = alternatives_->at(i);
|
| + if (alternative.node() != NULL) {
|
| + new_alternatives->Add(alternative);
|
| + }
|
| + }
|
| + alternatives_ = new_alternatives;
|
| + return this;
|
| +}
|
| +
|
| +
|
| +RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth) {
|
| + if (info()->replacement_calculated) return replacement();
|
| + if (depth < 0) return this;
|
| + if (info()->visited) return this;
|
| + VisitMarker marker(info());
|
| + // Alternative 0 is the negative lookahead, alternative 1 is what comes
|
| + // afterwards.
|
| + RegExpNode* node = alternatives_->at(1).node();
|
| + RegExpNode* replacement = node->FilterASCII(depth - 1);
|
| + if (replacement == NULL) return set_replacement(NULL);
|
| + alternatives_->at(1).set_node(replacement);
|
| +
|
| + RegExpNode* neg_node = alternatives_->at(0).node();
|
| + RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1);
|
| + // If the negative lookahead is always going to fail then
|
| + // we don't need to check it.
|
| + if (neg_replacement == NULL) return set_replacement(replacement);
|
| + alternatives_->at(0).set_node(neg_replacement);
|
| + return set_replacement(this);
|
| +}
|
| +
|
| +
|
| void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| RegExpCompiler* compiler,
|
| int characters_filled_in,
|
| @@ -5690,6 +5820,9 @@
|
| node = loop_node;
|
| }
|
| }
|
| + if (is_ascii) node = node->FilterASCII(RegExpCompiler::kMaxRecursion);
|
| +
|
| + if (node == NULL) node = new EndNode(EndNode::BACKTRACK);
|
| data->node = node;
|
| Analysis analysis(ignore_case, is_ascii);
|
| analysis.EnsureAnalyzed(node);
|
|
|