Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 11224) |
+++ src/jsregexp.cc (working copy) |
@@ -108,6 +108,38 @@ |
} |
+static ContainedInLattice Combine(ContainedInLattice a, bool inside) { |
ulan
2012/04/16 13:52:22
Passing kLatticeIn/kLatticeOut instead of bool wou
Erik Corry
2012/04/17 07:51:45
Done.
|
+ if (a == kLatticeUnknown) return a; |
+ if (a == kNotYet) return inside ? kLatticeIn : kLatticeOut; |
+ if (a == kLatticeIn) return inside ? kLatticeIn : kLatticeUnknown; |
+ if (a == kLatticeOut) return inside ? kLatticeUnknown : kLatticeOut; |
+ UNREACHABLE(); |
+ return kLatticeUnknown; |
+} |
+ |
+ |
+ContainedInLattice AddRange(ContainedInLattice containment, |
+ const int* ranges, |
+ int ranges_length, |
+ Interval new_range) { |
+ if (containment == kLatticeUnknown) return containment; |
+ bool inside = false; |
+ int last = 0; |
+ for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) { |
+ // Consider the range from last to ranges[i]. |
+ // We haven't got to the new range yet. |
+ if (ranges[i] <= new_range.from()) continue; |
+ // New range is wholly inside last-ranges[i]. |
+ if (last <= new_range.from() && ranges[i] > new_range.to()) { |
ulan
2012/04/16 13:52:22
So ranges[i] is not inclusive, but new_range.to()
Erik Corry
2012/04/17 07:51:45
Done.
|
+ return Combine(containment, inside); |
+ } |
+ // Otherwise it |
ulan
2012/04/16 13:52:22
Trailing space.
Erik Corry
2012/04/17 07:51:45
Done.
|
+ return kLatticeUnknown; |
+ } |
+ return containment; |
+} |
+ |
+ |
// More makes code generation slower, less makes V8 benchmark score lower. |
const int kMaxLookaheadForBoyerMoore = 8; |
// In a 3-character pattern you can maximally step forwards 3 characters |
@@ -2157,6 +2189,7 @@ |
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { |
on_success()->FillInBMInfo(offset, bm, not_at_start); |
} |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
ulan
2012/04/16 13:52:22
As discussed offline, it probably better to move t
Erik Corry
2012/04/17 07:51:45
Done.
|
} |
@@ -2181,6 +2214,7 @@ |
// Match the behaviour of EatsAtLeast on this node. |
if (type() == AT_START && not_at_start) return; |
on_success()->FillInBMInfo(offset, bm, not_at_start); |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
} |
@@ -2617,12 +2651,14 @@ |
void LoopChoiceNode::FillInBMInfo( |
- int offset, BoyerMooreLookahead* bm, bool nas) { |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
if (body_can_be_zero_length_) { |
bm->SetRest(offset); |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
return; |
} |
- ChoiceNode::FillInBMInfo(offset, bm, nas); |
+ ChoiceNode::FillInBMInfo(offset, bm, not_at_start); |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
} |
@@ -2710,110 +2746,83 @@ |
} |
-// Emit the code to handle \b and \B (word-boundary or non-word-boundary) |
-// when we know whether the next character must be a word character or not. |
-static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type, |
- RegExpCompiler* compiler, |
- RegExpNode* on_success, |
- Trace* trace) { |
+// Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
+void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
- Label done; |
- |
- Trace new_trace(*trace); |
- |
- bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER); |
- Label* on_word = expect_word_character ? &done : new_trace.backtrack(); |
- Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done; |
- |
- // Check whether previous character was a word character. |
- switch (trace->at_start()) { |
- case Trace::TRUE: |
- if (expect_word_character) { |
- assembler->GoTo(on_non_word); |
- } |
- break; |
- case Trace::UNKNOWN: |
- ASSERT_EQ(0, trace->cp_offset()); |
- assembler->CheckAtStart(on_non_word); |
- // Fall through. |
- case Trace::FALSE: |
- int prev_char_offset = trace->cp_offset() - 1; |
- assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1); |
- EmitWordCheck(assembler, on_word, on_non_word, expect_word_character); |
- // We may or may not have loaded the previous character. |
- new_trace.InvalidateCurrentCharacter(); |
+ Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
+ bool not_at_start = (trace->at_start() == Trace::FALSE); |
+ BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
+ if (lookahead == NULL) { |
+ int eats_at_least = |
+ Min(kMaxLookaheadForBoyerMoore, |
+ EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); |
+ if (eats_at_least >= 1) { |
+ BoyerMooreLookahead* bm = |
+ new BoyerMooreLookahead(eats_at_least, compiler); |
+ FillInBMInfo(0, bm, not_at_start); |
+ if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; |
+ if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; |
ulan
2012/04/16 13:52:22
Consider writing the two lines above as:
next_is_w
Erik Corry
2012/04/17 07:51:45
Not the same, since it could be neither known to b
|
+ } |
+ } else { |
+ if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; |
+ if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; |
} |
+ bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); |
+ if (next_is_word_character == Trace::UNKNOWN) { |
+ Label before_non_word; |
+ Label before_word; |
+ if (trace->characters_preloaded() != 1) { |
+ assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
+ } |
+ // Fall through on non-word. |
+ EmitWordCheck(assembler, &before_word, &before_non_word, false); |
+ // Next character is not a word character. |
+ assembler->Bind(&before_non_word); |
+ Label ok; |
+ BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
+ assembler->GoTo(&ok); |
- assembler->Bind(&done); |
- |
- on_success->Emit(compiler, &new_trace); |
+ assembler->Bind(&before_word); |
+ BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
+ assembler->Bind(&ok); |
+ } else if (next_is_word_character == Trace::TRUE) { |
+ BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
+ } else { |
+ ASSERT(next_is_word_character == Trace::FALSE); |
+ BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
+ } |
} |
-// Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
-static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type, |
- RegExpCompiler* compiler, |
- RegExpNode* on_success, |
- Trace* trace) { |
+void AssertionNode::BacktrackIfPrevious( |
+ RegExpCompiler* compiler, |
+ Trace* trace, |
+ AssertionNode::IfPrevious backtrack_if_previous) { |
RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
- Label before_non_word; |
- Label before_word; |
- if (trace->characters_preloaded() != 1) { |
- assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
- } |
- // Fall through on non-word. |
- EmitWordCheck(assembler, &before_word, &before_non_word, false); |
- |
- // We will be loading the previous character into the current character |
- // register. |
Trace new_trace(*trace); |
new_trace.InvalidateCurrentCharacter(); |
- Label ok; |
- Label* boundary; |
- Label* not_boundary; |
- if (type == AssertionNode::AT_BOUNDARY) { |
- boundary = &ok; |
- not_boundary = new_trace.backtrack(); |
- } else { |
- not_boundary = &ok; |
- boundary = new_trace.backtrack(); |
- } |
+ Label fall_through, dummy; |
- // Next character is not a word character. |
- assembler->Bind(&before_non_word); |
- if (new_trace.cp_offset() == 0) { |
- // The start of input counts as a non-word character, so the question is |
- // decided if we are at the start. |
- assembler->CheckAtStart(not_boundary); |
- } |
- // We already checked that we are not at the start of input so it must be |
- // OK to load the previous character. |
- assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, |
- &ok, // Unused dummy label in this call. |
- false); |
- // Fall through on non-word. |
- EmitWordCheck(assembler, boundary, not_boundary, false); |
- assembler->GoTo(not_boundary); |
+ Label* non_word = backtrack_if_previous == kIsNonWord ? |
+ new_trace.backtrack() : |
+ &fall_through; |
+ Label* word = backtrack_if_previous == kIsNonWord ? |
+ &fall_through : |
+ new_trace.backtrack(); |
- // Next character is a word character. |
- assembler->Bind(&before_word); |
if (new_trace.cp_offset() == 0) { |
// The start of input counts as a non-word character, so the question is |
// decided if we are at the start. |
- assembler->CheckAtStart(boundary); |
+ assembler->CheckAtStart(non_word); |
} |
// We already checked that we are not at the start of input so it must be |
// OK to load the previous character. |
- assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, |
- &ok, // Unused dummy label in this call. |
- false); |
- bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY); |
- EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word); |
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
+ EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
- assembler->Bind(&ok); |
- |
- on_success->Emit(compiler, &new_trace); |
+ assembler->Bind(&fall_through); |
+ on_success()->Emit(compiler, &new_trace); |
} |
@@ -2861,13 +2870,9 @@ |
return; |
case AT_BOUNDARY: |
case AT_NON_BOUNDARY: { |
- EmitBoundaryCheck(type_, compiler, on_success(), trace); |
+ EmitBoundaryCheck(compiler, trace); |
return; |
} |
- case AFTER_WORD_CHARACTER: |
- case AFTER_NONWORD_CHARACTER: { |
- EmitHalfBoundaryCheck(type_, compiler, on_success(), trace); |
- } |
} |
on_success()->Emit(compiler, trace); |
} |
@@ -3277,24 +3282,75 @@ |
}; |
+// The '2' variant is has inclusive from and exclusive to. |
+static const int kSpaceRanges2[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, |
ulan
2012/04/16 13:52:22
Consider adding a static assert that the range cou
Erik Corry
2012/04/17 07:51:45
AddRange jsregexp.cc:118 uses all the elements in
|
+ 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, |
+ 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 }; |
+static const int kSpaceRange2Count = ARRAY_SIZE(kSpaceRanges2); |
+ |
+static const int kWordRanges2[] = { |
+ '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; |
+static const int kWordRange2Count = ARRAY_SIZE(kWordRanges2); |
+static const int kDigitRanges2[] = { '0', '9' + 1, 0x10000 }; |
+static const int kDigitRange2Count = ARRAY_SIZE(kDigitRanges2); |
+static const int kSurrogateRanges2[] = { 0xd800, 0xe000, 0x10000 }; |
+static const int kSurrogateRange2Count = ARRAY_SIZE(kSurrogateRanges2); |
+static const int kLineTerminatorRanges2[] = { 0x000A, 0x000B, 0x000D, 0x000E, |
+ 0x2028, 0x202A, 0x10000 }; |
+static const int kLineTerminatorRange2Count = ARRAY_SIZE(kLineTerminatorRanges2); |
ulan
2012/04/16 13:52:22
Long line above and too many new lines below.
Erik Corry
2012/04/17 07:51:45
Done.
|
+ |
+ |
+ |
+void BoyerMoorePositionInfo::Set(int character) { |
+ SetInterval(Interval(character, character)); |
+} |
+ |
+ |
+void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
+ s_ = AddRange(s_, kSpaceRanges2, kSpaceRange2Count, interval); |
+ w_ = AddRange(w_, kWordRanges2, kWordRange2Count, interval); |
+ d_ = AddRange(d_, kDigitRanges2, kDigitRange2Count, interval); |
+ surrogate_ = |
+ AddRange(surrogate_, kSurrogateRanges2, kSurrogateRange2Count, interval); |
+ if (interval.to() - interval.from() >= kMapSize - 1) { |
+ if (map_count_ != kMapSize) { |
+ map_count_ = kMapSize; |
+ for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
+ } |
+ return; |
+ } |
+ for (int i = interval.from(); i <= interval.to(); i++) { |
+ int mod_character = (i & kMask); |
+ if (!map_->at(mod_character)) { |
+ map_count_++; |
+ map_->at(mod_character) = true; |
+ } |
+ if (map_count_ == kMapSize) return; |
+ } |
+} |
+ |
+ |
+void BoyerMoorePositionInfo::SetAll() { |
+ s_ = w_ = d_ = kLatticeUnknown; |
+ if (map_count_ != kMapSize) { |
+ map_count_ = kMapSize; |
+ for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
+ } |
+} |
+ |
+ |
BoyerMooreLookahead::BoyerMooreLookahead( |
- int length, int map_length, RegExpCompiler* compiler) |
+ int length, RegExpCompiler* compiler) |
: length_(length), |
- map_length_(map_length), |
compiler_(compiler) { |
- ASSERT(IsPowerOf2(map_length)); |
if (compiler->ascii()) { |
max_char_ = String::kMaxAsciiCharCode; |
} else { |
max_char_ = String::kMaxUtf16CodeUnit; |
} |
- bitmaps_ = new ZoneList<ZoneList<bool>*>(length); |
+ bitmaps_ = new ZoneList<BoyerMoorePositionInfo*>(length); |
for (int i = 0; i < length; i++) { |
- bitmaps_->Add(new ZoneList<bool>(map_length)); |
- ZoneList<bool>* map = bitmaps_->at(i); |
- for (int i = 0; i < map_length; i++) { |
- map->Add(false); |
- } |
+ bitmaps_->Add(new BoyerMoorePositionInfo()); |
} |
} |
@@ -3305,7 +3361,7 @@ |
bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { |
int biggest_points = 0; |
for (int max_number_of_chars = 4; |
- max_number_of_chars < kTooManyCharacters; |
+ max_number_of_chars < 32; |
ulan
2012/04/16 13:52:22
Now 32 is more magical :)
Erik Corry
2012/04/17 07:51:45
Done.
|
max_number_of_chars *= 2) { |
biggest_points = |
FindBestInterval(max_number_of_chars, biggest_points, from, to); |
@@ -3332,7 +3388,7 @@ |
bool union_map[kSize]; |
for (int j = 0; j < kSize; j++) union_map[j] = false; |
while (i < length_ && Count(i) <= max_number_of_chars) { |
- ZoneList<bool>* map = bitmaps_->at(i); |
+ BoyerMoorePositionInfo* map = bitmaps_->at(i); |
for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j); |
i++; |
} |
@@ -3387,8 +3443,8 @@ |
int skip = max_lookahead + 1 - min_lookahead; |
for (int i = max_lookahead; i >= min_lookahead; i--) { |
- ZoneList<bool>* map = bitmaps_->at(i); |
- for (int j = 0; j < map_length_; j++) { |
+ BoyerMoorePositionInfo* map = bitmaps_->at(i); |
+ for (int j = 0; j < kSize; j++) { |
if (map->at(j)) { |
boolean_skip_table->set(j, kDontSkipArrayEntry); |
} |
@@ -3401,29 +3457,29 @@ |
// See comment above on the implementation of GetSkipTable. |
bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { |
+ const int kSize = RegExpMacroAssembler::kTableSize; |
+ |
int min_lookahead = 0; |
int max_lookahead = 0; |
if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false; |
bool found_single_character = false; |
- bool abandoned_search_for_single_character = false; |
int single_character = 0; |
for (int i = max_lookahead; i >= min_lookahead; i--) { |
- ZoneList<bool>* map = bitmaps_->at(i); |
- for (int j = 0; j < map_length_; j++) { |
+ BoyerMoorePositionInfo* map = bitmaps_->at(i); |
+ if (map->map_count() > 1 || |
+ (found_single_character && map->map_count() != 0)) { |
+ found_single_character = false; |
+ break; |
+ } |
+ for (int j = 0; j < kSize; j++) { |
if (map->at(j)) { |
- if (found_single_character) { |
- found_single_character = false; // Found two. |
- abandoned_search_for_single_character = true; |
- break; |
- } else { |
- found_single_character = true; |
- single_character = j; |
- } |
+ found_single_character = true; |
+ single_character = j; |
+ break; |
} |
} |
- if (abandoned_search_for_single_character) break; |
} |
int lookahead_width = max_lookahead + 1 - min_lookahead; |
@@ -3437,8 +3493,7 @@ |
Label cont, again; |
masm->Bind(&again); |
masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
- if (max_char_ > map_length_) { |
- ASSERT(map_length_ == RegExpMacroAssembler::kTableSize); |
+ if (max_char_ > kSize) { |
masm->CheckCharacterAfterAnd(single_character, |
RegExpMacroAssembler::kTableMask, |
&cont); |
@@ -3452,7 +3507,7 @@ |
} |
Handle<ByteArray> boolean_skip_table = |
- FACTORY->NewByteArray(map_length_, TENURED); |
+ FACTORY->NewByteArray(kSize, TENURED); |
int skip_distance = GetSkipTable( |
min_lookahead, max_lookahead, boolean_skip_table); |
ASSERT(skip_distance != 0); |
@@ -3631,16 +3686,20 @@ |
// not be atoms, they can be any reasonably limited character class or |
// small alternation. |
ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. |
- eats_at_least = |
- Min(kMaxLookaheadForBoyerMoore, |
- EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); |
- if (eats_at_least >= 1) { |
- BoyerMooreLookahead bm(eats_at_least, |
- RegExpMacroAssembler::kTableSize, |
- compiler); |
- GuardedAlternative alt0 = alternatives_->at(0); |
- alt0.node()->FillInBMInfo(0, &bm, not_at_start); |
- skip_was_emitted = bm.EmitSkipInstructions(macro_assembler); |
+ BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
+ if (lookahead == NULL) { |
+ eats_at_least = |
+ Min(kMaxLookaheadForBoyerMoore, |
+ EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); |
+ if (eats_at_least >= 1) { |
+ BoyerMooreLookahead* bm = |
+ new BoyerMooreLookahead(eats_at_least, compiler); |
+ GuardedAlternative alt0 = alternatives_->at(0); |
+ alt0.node()->FillInBMInfo(0, bm, not_at_start); |
+ skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); |
+ } |
+ } else { |
+ skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); |
} |
} |
} |
@@ -4203,12 +4262,6 @@ |
case AssertionNode::AFTER_NEWLINE: |
stream()->Add("label=\"(?<=\\n)\", shape=septagon"); |
break; |
- case AssertionNode::AFTER_WORD_CHARACTER: |
- stream()->Add("label=\"(?<=\\w)\", shape=septagon"); |
- break; |
- case AssertionNode::AFTER_NONWORD_CHARACTER: |
- stream()->Add("label=\"(?<=\\W)\", shape=septagon"); |
- break; |
} |
stream()->Add("];\n"); |
PrintAttributes(that); |
@@ -4313,21 +4366,6 @@ |
// ------------------------------------------------------------------- |
// Tree to graph conversion |
-static const uc16 kSpaceRanges[] = { 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, |
- 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, |
- 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000, 0xFEFF, 0xFEFF }; |
-static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); |
- |
-static const uc16 kWordRanges[] = { '0', '9', 'A', 'Z', '_', '_', 'a', 'z' }; |
-static const int kWordRangeCount = ARRAY_SIZE(kWordRanges); |
- |
-static const uc16 kDigitRanges[] = { '0', '9' }; |
-static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges); |
- |
-static const uc16 kLineTerminatorRanges[] = { 0x000A, 0x000A, 0x000D, 0x000D, |
- 0x2028, 0x2029 }; |
-static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges); |
- |
RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
@@ -4341,9 +4379,12 @@ |
return new TextNode(elements(), on_success); |
} |
+ |
static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, |
- const uc16* special_class, |
+ const int* special_class, |
int length) { |
+ length--; // Remove final 0x10000. |
+ ASSERT(special_class[length] == 0x10000); |
ASSERT(ranges->length() != 0); |
ASSERT(length != 0); |
ASSERT(special_class[0] != 0); |
@@ -4359,7 +4400,7 @@ |
return false; |
} |
range = ranges->at((i >> 1) + 1); |
- if (special_class[i+1] != range.from() - 1) { |
+ if (special_class[i+1] != range.from()) { |
return false; |
} |
} |
@@ -4371,14 +4412,17 @@ |
static bool CompareRanges(ZoneList<CharacterRange>* ranges, |
- const uc16* special_class, |
+ const int* special_class, |
int length) { |
+ length--; // Remove final 0x10000. |
+ ASSERT(special_class[length] == 0x10000); |
if (ranges->length() * 2 != length) { |
return false; |
} |
for (int i = 0; i < length; i += 2) { |
CharacterRange range = ranges->at(i >> 1); |
- if (range.from() != special_class[i] || range.to() != special_class[i+1]) { |
+ if (range.from() != special_class[i] || |
+ range.to() != special_class[i + 1] - 1) { |
return false; |
} |
} |
@@ -4395,31 +4439,31 @@ |
if (set_.is_standard()) { |
return true; |
} |
- if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
+ if (CompareRanges(set_.ranges(), kSpaceRanges2, kSpaceRange2Count)) { |
set_.set_standard_set_type('s'); |
return true; |
} |
- if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
+ if (CompareInverseRanges(set_.ranges(), kSpaceRanges2, kSpaceRange2Count)) { |
set_.set_standard_set_type('S'); |
return true; |
} |
if (CompareInverseRanges(set_.ranges(), |
- kLineTerminatorRanges, |
- kLineTerminatorRangeCount)) { |
+ kLineTerminatorRanges2, |
+ kLineTerminatorRange2Count)) { |
set_.set_standard_set_type('.'); |
return true; |
} |
if (CompareRanges(set_.ranges(), |
- kLineTerminatorRanges, |
- kLineTerminatorRangeCount)) { |
+ kLineTerminatorRanges2, |
+ kLineTerminatorRange2Count)) { |
set_.set_standard_set_type('n'); |
return true; |
} |
- if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
+ if (CompareRanges(set_.ranges(), kWordRanges2, kWordRange2Count)) { |
set_.set_standard_set_type('w'); |
return true; |
} |
- if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
+ if (CompareInverseRanges(set_.ranges(), kWordRanges2, kWordRange2Count)) { |
set_.set_standard_set_type('W'); |
return true; |
} |
@@ -4779,27 +4823,31 @@ |
} |
-static void AddClass(const uc16* elmv, |
+static void AddClass(const int* elmv, |
int elmc, |
ZoneList<CharacterRange>* ranges) { |
+ elmc--; |
+ ASSERT(elmv[elmc] == 0x10000); |
for (int i = 0; i < elmc; i += 2) { |
- ASSERT(elmv[i] <= elmv[i + 1]); |
- ranges->Add(CharacterRange(elmv[i], elmv[i + 1])); |
+ ASSERT(elmv[i] < elmv[i + 1]); |
+ ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); |
} |
} |
-static void AddClassNegated(const uc16 *elmv, |
+static void AddClassNegated(const int *elmv, |
int elmc, |
ZoneList<CharacterRange>* ranges) { |
+ elmc--; |
+ ASSERT(elmv[elmc] == 0x10000); |
ASSERT(elmv[0] != 0x0000); |
ASSERT(elmv[elmc-1] != String::kMaxUtf16CodeUnit); |
uc16 last = 0x0000; |
for (int i = 0; i < elmc; i += 2) { |
ASSERT(last <= elmv[i] - 1); |
- ASSERT(elmv[i] <= elmv[i + 1]); |
+ ASSERT(elmv[i] < elmv[i + 1]); |
ranges->Add(CharacterRange(last, elmv[i] - 1)); |
- last = elmv[i + 1] + 1; |
+ last = elmv[i + 1]; |
} |
ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit)); |
} |
@@ -4809,26 +4857,26 @@ |
ZoneList<CharacterRange>* ranges) { |
switch (type) { |
case 's': |
- AddClass(kSpaceRanges, kSpaceRangeCount, ranges); |
+ AddClass(kSpaceRanges2, kSpaceRange2Count, ranges); |
break; |
case 'S': |
- AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); |
+ AddClassNegated(kSpaceRanges2, kSpaceRange2Count, ranges); |
break; |
case 'w': |
- AddClass(kWordRanges, kWordRangeCount, ranges); |
+ AddClass(kWordRanges2, kWordRange2Count, ranges); |
break; |
case 'W': |
- AddClassNegated(kWordRanges, kWordRangeCount, ranges); |
+ AddClassNegated(kWordRanges2, kWordRange2Count, ranges); |
break; |
case 'd': |
- AddClass(kDigitRanges, kDigitRangeCount, ranges); |
+ AddClass(kDigitRanges2, kDigitRange2Count, ranges); |
break; |
case 'D': |
- AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); |
+ AddClassNegated(kDigitRanges2, kDigitRange2Count, ranges); |
break; |
case '.': |
- AddClassNegated(kLineTerminatorRanges, |
- kLineTerminatorRangeCount, |
+ AddClassNegated(kLineTerminatorRanges2, |
+ kLineTerminatorRange2Count, |
ranges); |
break; |
// This is not a character range as defined by the spec but a |
@@ -4840,8 +4888,8 @@ |
// This is the set of characters matched by the $ and ^ symbols |
// in multiline mode. |
case 'n': |
- AddClass(kLineTerminatorRanges, |
- kLineTerminatorRangeCount, |
+ AddClass(kLineTerminatorRanges2, |
+ kLineTerminatorRange2Count, |
ranges); |
break; |
default: |
@@ -4850,8 +4898,8 @@ |
} |
-Vector<const uc16> CharacterRange::GetWordBounds() { |
- return Vector<const uc16>(kWordRanges, kWordRangeCount); |
+Vector<const int> CharacterRange::GetWordBounds() { |
+ return Vector<const int>(kWordRanges2, kWordRange2Count - 1); |
} |
@@ -4883,7 +4931,7 @@ |
void CharacterRange::Split(ZoneList<CharacterRange>* base, |
- Vector<const uc16> overlay, |
+ Vector<const int> overlay, |
ZoneList<CharacterRange>** included, |
ZoneList<CharacterRange>** excluded) { |
ASSERT_EQ(NULL, *included); |
@@ -4892,7 +4940,7 @@ |
for (int i = 0; i < base->length(); i++) |
table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); |
for (int i = 0; i < overlay.length(); i += 2) { |
- table.AddRange(CharacterRange(overlay[i], overlay[i+1]), |
+ table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1), |
CharacterRangeSplitter::kInOverlay); |
} |
CharacterRangeSplitter callback(included, excluded); |
@@ -4978,88 +5026,7 @@ |
return true; |
} |
-SetRelation CharacterRange::WordCharacterRelation( |
- ZoneList<CharacterRange>* range) { |
- ASSERT(IsCanonical(range)); |
- int i = 0; // Word character range index. |
- int j = 0; // Argument range index. |
- ASSERT_NE(0, kWordRangeCount); |
- SetRelation result; |
- if (range->length() == 0) { |
- result.SetElementsInSecondSet(); |
- return result; |
- } |
- CharacterRange argument_range = range->at(0); |
- CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]); |
- while (i < kWordRangeCount && j < range->length()) { |
- // Check the two ranges for the five cases: |
- // - no overlap. |
- // - partial overlap (there are elements in both ranges that isn't |
- // in the other, and there are also elements that are in both). |
- // - argument range entirely inside word range. |
- // - word range entirely inside argument range. |
- // - ranges are completely equal. |
- // First check for no overlap. The earlier range is not in the other set. |
- if (argument_range.from() > word_range.to()) { |
- // Ranges are disjoint. The earlier word range contains elements that |
- // cannot be in the argument set. |
- result.SetElementsInSecondSet(); |
- } else if (word_range.from() > argument_range.to()) { |
- // Ranges are disjoint. The earlier argument range contains elements that |
- // cannot be in the word set. |
- result.SetElementsInFirstSet(); |
- } else if (word_range.from() <= argument_range.from() && |
- word_range.to() >= argument_range.from()) { |
- result.SetElementsInBothSets(); |
- // argument range completely inside word range. |
- if (word_range.from() < argument_range.from() || |
- word_range.to() > argument_range.from()) { |
- result.SetElementsInSecondSet(); |
- } |
- } else if (word_range.from() >= argument_range.from() && |
- word_range.to() <= argument_range.from()) { |
- result.SetElementsInBothSets(); |
- result.SetElementsInFirstSet(); |
- } else { |
- // There is overlap, and neither is a subrange of the other |
- result.SetElementsInFirstSet(); |
- result.SetElementsInSecondSet(); |
- result.SetElementsInBothSets(); |
- } |
- if (result.NonTrivialIntersection()) { |
- // The result is as (im)precise as we can possibly make it. |
- return result; |
- } |
- // Progress the range(s) with minimal to-character. |
- uc16 word_to = word_range.to(); |
- uc16 argument_to = argument_range.to(); |
- if (argument_to <= word_to) { |
- j++; |
- if (j < range->length()) { |
- argument_range = range->at(j); |
- } |
- } |
- if (word_to <= argument_to) { |
- i += 2; |
- if (i < kWordRangeCount) { |
- word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]); |
- } |
- } |
- } |
- // Check if anything wasn't compared in the loop. |
- if (i < kWordRangeCount) { |
- // word range contains something not in argument range. |
- result.SetElementsInSecondSet(); |
- } else if (j < range->length()) { |
- // Argument range contains something not in word range. |
- result.SetElementsInFirstSet(); |
- } |
- |
- return result; |
-} |
- |
- |
ZoneList<CharacterRange>* CharacterSet::ranges() { |
if (ranges_ == NULL) { |
ranges_ = new ZoneList<CharacterRange>(2); |
@@ -5191,145 +5158,6 @@ |
} |
-// Utility function for CharacterRange::Merge. Adds a range at the end of |
-// a canonicalized range list, if necessary merging the range with the last |
-// range of the list. |
-static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) { |
- if (set == NULL) return; |
- ASSERT(set->length() == 0 || set->at(set->length() - 1).to() < range.from()); |
- int n = set->length(); |
- if (n > 0) { |
- CharacterRange lastRange = set->at(n - 1); |
- if (lastRange.to() == range.from() - 1) { |
- set->at(n - 1) = CharacterRange(lastRange.from(), range.to()); |
- return; |
- } |
- } |
- set->Add(range); |
-} |
- |
- |
-static void AddRangeToSelectedSet(int selector, |
- ZoneList<CharacterRange>* first_set, |
- ZoneList<CharacterRange>* second_set, |
- ZoneList<CharacterRange>* intersection_set, |
- CharacterRange range) { |
- switch (selector) { |
- case kInsideFirst: |
- AddRangeToSet(first_set, range); |
- break; |
- case kInsideSecond: |
- AddRangeToSet(second_set, range); |
- break; |
- case kInsideBoth: |
- AddRangeToSet(intersection_set, range); |
- break; |
- } |
-} |
- |
- |
- |
-void CharacterRange::Merge(ZoneList<CharacterRange>* first_set, |
- ZoneList<CharacterRange>* second_set, |
- ZoneList<CharacterRange>* first_set_only_out, |
- ZoneList<CharacterRange>* second_set_only_out, |
- ZoneList<CharacterRange>* both_sets_out) { |
- // Inputs are canonicalized. |
- ASSERT(CharacterRange::IsCanonical(first_set)); |
- ASSERT(CharacterRange::IsCanonical(second_set)); |
- // Outputs are empty, if applicable. |
- ASSERT(first_set_only_out == NULL || first_set_only_out->length() == 0); |
- ASSERT(second_set_only_out == NULL || second_set_only_out->length() == 0); |
- ASSERT(both_sets_out == NULL || both_sets_out->length() == 0); |
- |
- // Merge sets by iterating through the lists in order of lowest "from" value, |
- // and putting intervals into one of three sets. |
- |
- if (first_set->length() == 0) { |
- second_set_only_out->AddAll(*second_set); |
- return; |
- } |
- if (second_set->length() == 0) { |
- first_set_only_out->AddAll(*first_set); |
- return; |
- } |
- // Indices into input lists. |
- int i1 = 0; |
- int i2 = 0; |
- // Cache length of input lists. |
- int n1 = first_set->length(); |
- int n2 = second_set->length(); |
- // Current range. May be invalid if state is kInsideNone. |
- int from = 0; |
- int to = -1; |
- // Where current range comes from. |
- int state = kInsideNone; |
- |
- while (i1 < n1 || i2 < n2) { |
- CharacterRange next_range; |
- int range_source; |
- if (i2 == n2 || |
- (i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) { |
- // Next smallest element is in first set. |
- next_range = first_set->at(i1++); |
- range_source = kInsideFirst; |
- } else { |
- // Next smallest element is in second set. |
- next_range = second_set->at(i2++); |
- range_source = kInsideSecond; |
- } |
- if (to < next_range.from()) { |
- // Ranges disjoint: |current| |next| |
- AddRangeToSelectedSet(state, |
- first_set_only_out, |
- second_set_only_out, |
- both_sets_out, |
- CharacterRange(from, to)); |
- from = next_range.from(); |
- to = next_range.to(); |
- state = range_source; |
- } else { |
- if (from < next_range.from()) { |
- AddRangeToSelectedSet(state, |
- first_set_only_out, |
- second_set_only_out, |
- both_sets_out, |
- CharacterRange(from, next_range.from()-1)); |
- } |
- if (to < next_range.to()) { |
- // Ranges overlap: |current| |
- // |next| |
- AddRangeToSelectedSet(state | range_source, |
- first_set_only_out, |
- second_set_only_out, |
- both_sets_out, |
- CharacterRange(next_range.from(), to)); |
- from = to + 1; |
- to = next_range.to(); |
- state = range_source; |
- } else { |
- // Range included: |current| , possibly ending at same character. |
- // |next| |
- AddRangeToSelectedSet( |
- state | range_source, |
- first_set_only_out, |
- second_set_only_out, |
- both_sets_out, |
- CharacterRange(next_range.from(), next_range.to())); |
- from = next_range.to() + 1; |
- // If ranges end at same character, both ranges are consumed completely. |
- if (next_range.to() == to) state = kInsideNone; |
- } |
- } |
- } |
- AddRangeToSelectedSet(state, |
- first_set_only_out, |
- second_set_only_out, |
- both_sets_out, |
- CharacterRange(from, to)); |
-} |
- |
- |
void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, |
ZoneList<CharacterRange>* negated_ranges) { |
ASSERT(CharacterRange::IsCanonical(ranges)); |
@@ -5642,171 +5470,22 @@ |
void Analysis::VisitAssertion(AssertionNode* that) { |
EnsureAnalyzed(that->on_success()); |
- AssertionNode::AssertionNodeType type = that->type(); |
- if (type == AssertionNode::AT_BOUNDARY || |
- type == AssertionNode::AT_NON_BOUNDARY) { |
- // Check if the following character is known to be a word character |
- // or known to not be a word character. |
- ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet(); |
- |
- CharacterRange::Canonicalize(following_chars); |
- |
- SetRelation word_relation = |
- CharacterRange::WordCharacterRelation(following_chars); |
- if (word_relation.Disjoint()) { |
- // Includes the case where following_chars is empty (e.g., end-of-input). |
- // Following character is definitely *not* a word character. |
- type = (type == AssertionNode::AT_BOUNDARY) ? |
- AssertionNode::AFTER_WORD_CHARACTER : |
- AssertionNode::AFTER_NONWORD_CHARACTER; |
- that->set_type(type); |
- } else if (word_relation.ContainedIn()) { |
- // Following character is definitely a word character. |
- type = (type == AssertionNode::AT_BOUNDARY) ? |
- AssertionNode::AFTER_NONWORD_CHARACTER : |
- AssertionNode::AFTER_WORD_CHARACTER; |
- that->set_type(type); |
- } |
- } |
} |
-ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() { |
- if (first_character_set_ == NULL) { |
- if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) { |
- // If we can't find an exact solution within the budget, we |
- // set the value to the set of every character, i.e., all characters |
- // are possible. |
- ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1); |
- all_set->Add(CharacterRange::Everything()); |
- first_character_set_ = all_set; |
- } |
- } |
- return first_character_set_; |
+void BackReferenceNode::FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ // Working out the set of characters that a backreference can match is too |
+ // hard, so we just say that any character can match. |
+ bm->SetRest(offset); |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
} |
-int RegExpNode::ComputeFirstCharacterSet(int budget) { |
- // Default behavior is to not be able to determine the first character. |
- return kComputeFirstCharacterSetFail; |
-} |
+STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == |
+ RegExpMacroAssembler::kTableSize); |
-int LoopChoiceNode::ComputeFirstCharacterSet(int budget) { |
- budget--; |
- if (budget >= 0) { |
- // Find loop min-iteration. It's the value of the guarded choice node |
- // with a GEQ guard, if any. |
- int min_repetition = 0; |
- |
- for (int i = 0; i <= 1; i++) { |
- GuardedAlternative alternative = alternatives()->at(i); |
- ZoneList<Guard*>* guards = alternative.guards(); |
- if (guards != NULL && guards->length() > 0) { |
- Guard* guard = guards->at(0); |
- if (guard->op() == Guard::GEQ) { |
- min_repetition = guard->value(); |
- break; |
- } |
- } |
- } |
- |
- budget = loop_node()->ComputeFirstCharacterSet(budget); |
- if (budget >= 0) { |
- ZoneList<CharacterRange>* character_set = |
- loop_node()->first_character_set(); |
- if (body_can_be_zero_length() || min_repetition == 0) { |
- budget = continue_node()->ComputeFirstCharacterSet(budget); |
- if (budget < 0) return budget; |
- ZoneList<CharacterRange>* body_set = |
- continue_node()->first_character_set(); |
- ZoneList<CharacterRange>* union_set = |
- new ZoneList<CharacterRange>(Max(character_set->length(), |
- body_set->length())); |
- CharacterRange::Merge(character_set, |
- body_set, |
- union_set, |
- union_set, |
- union_set); |
- character_set = union_set; |
- } |
- set_first_character_set(character_set); |
- } |
- } |
- return budget; |
-} |
- |
- |
-int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) { |
- budget--; |
- if (budget >= 0) { |
- GuardedAlternative successor = this->alternatives()->at(1); |
- RegExpNode* successor_node = successor.node(); |
- budget = successor_node->ComputeFirstCharacterSet(budget); |
- if (budget >= 0) { |
- set_first_character_set(successor_node->first_character_set()); |
- } |
- } |
- return budget; |
-} |
- |
- |
-// The first character set of an EndNode is unknowable. Just use the |
-// default implementation that fails and returns all characters as possible. |
- |
- |
-int AssertionNode::ComputeFirstCharacterSet(int budget) { |
- budget -= 1; |
- if (budget >= 0) { |
- switch (type_) { |
- case AT_END: { |
- set_first_character_set(new ZoneList<CharacterRange>(0)); |
- break; |
- } |
- case AT_START: |
- case AT_BOUNDARY: |
- case AT_NON_BOUNDARY: |
- case AFTER_NEWLINE: |
- case AFTER_NONWORD_CHARACTER: |
- case AFTER_WORD_CHARACTER: { |
- ASSERT_NOT_NULL(on_success()); |
- budget = on_success()->ComputeFirstCharacterSet(budget); |
- if (budget >= 0) { |
- set_first_character_set(on_success()->first_character_set()); |
- } |
- break; |
- } |
- } |
- } |
- return budget; |
-} |
- |
- |
-int ActionNode::ComputeFirstCharacterSet(int budget) { |
- if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail; |
- budget--; |
- if (budget >= 0) { |
- ASSERT_NOT_NULL(on_success()); |
- budget = on_success()->ComputeFirstCharacterSet(budget); |
- if (budget >= 0) { |
- set_first_character_set(on_success()->first_character_set()); |
- } |
- } |
- return budget; |
-} |
- |
- |
-int BackReferenceNode::ComputeFirstCharacterSet(int budget) { |
- // We don't know anything about the first character of a backreference |
- // at this point. |
- // The potential first characters are the first characters of the capture, |
- // and the first characters of the on_success node, depending on whether the |
- // capture can be empty and whether it is known to be participating or known |
- // not to be. |
- return kComputeFirstCharacterSetFail; |
-} |
- |
- |
void ChoiceNode::FillInBMInfo( |
int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
ZoneList<GuardedAlternative>* alts = alternatives(); |
@@ -5814,24 +5493,33 @@ |
GuardedAlternative& alt = alts->at(i); |
if (alt.guards() != NULL && alt.guards()->length() != 0) { |
bm->SetRest(offset); // Give up trying to fill in info. |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
return; |
} |
alt.node()->FillInBMInfo(offset, bm, not_at_start); |
} |
+ if (offset == 0) set_bm_info(not_at_start, bm); |
} |
void TextNode::FillInBMInfo( |
- int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
- if (offset >= bm->length()) return; |
+ int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ if (initial_offset >= bm->length()) return; |
+ int offset = initial_offset; |
int max_char = bm->max_char(); |
for (int i = 0; i < elements()->length(); i++) { |
- if (offset >= bm->length()) return; |
+ if (offset >= bm->length()) { |
+ if (initial_offset == 0) set_bm_info(not_at_start, bm); |
+ return; |
+ } |
TextElement text = elements()->at(i); |
if (text.type == TextElement::ATOM) { |
RegExpAtom* atom = text.data.u_atom; |
for (int j = 0; j < atom->length(); j++, offset++) { |
- if (offset >= bm->length()) return; |
+ if (offset >= bm->length()) { |
+ if (initial_offset == 0) set_bm_info(not_at_start, bm); |
+ return; |
+ } |
uc16 character = atom->data()[j]; |
if (bm->compiler()->ignore_case()) { |
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
@@ -5858,67 +5546,23 @@ |
CharacterRange& range = ranges->at(k); |
if (range.from() > max_char) continue; |
int to = Min(max_char, static_cast<int>(range.to())); |
- if (to - range.from() >= BoyerMooreLookahead::kTooManyCharacters) { |
- bm->SetAll(offset); |
- break; |
- } |
- for (int m = range.from(); m <= to; m++) { |
- bm->Set(offset, m); |
- } |
+ bm->SetInterval(offset, Interval(range.from(), to)); |
} |
} |
offset++; |
} |
} |
- if (offset >= bm->length()) return; |
+ if (offset >= bm->length()) { |
+ if (initial_offset == 0) set_bm_info(not_at_start, bm); |
+ return; |
+ } |
on_success()->FillInBMInfo(offset, |
bm, |
true); // Not at start after a text node. |
+ if (initial_offset == 0) set_bm_info(not_at_start, bm); |
} |
-int TextNode::ComputeFirstCharacterSet(int budget) { |
- budget--; |
- if (budget >= 0) { |
- ASSERT_NE(0, elements()->length()); |
- TextElement text = elements()->at(0); |
- if (text.type == TextElement::ATOM) { |
- RegExpAtom* atom = text.data.u_atom; |
- ASSERT_NE(0, atom->length()); |
- uc16 first_char = atom->data()[0]; |
- ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1); |
- range->Add(CharacterRange(first_char, first_char)); |
- set_first_character_set(range); |
- } else { |
- ASSERT(text.type == TextElement::CHAR_CLASS); |
- RegExpCharacterClass* char_class = text.data.u_char_class; |
- ZoneList<CharacterRange>* ranges = char_class->ranges(); |
- // TODO(lrn): Canonicalize ranges when they are created |
- // instead of waiting until now. |
- CharacterRange::Canonicalize(ranges); |
- if (char_class->is_negated()) { |
- int length = ranges->length(); |
- int new_length = length + 1; |
- if (length > 0) { |
- if (ranges->at(0).from() == 0) new_length--; |
- if (ranges->at(length - 1).to() == String::kMaxUtf16CodeUnit) { |
- new_length--; |
- } |
- } |
- ZoneList<CharacterRange>* negated_ranges = |
- new ZoneList<CharacterRange>(new_length); |
- CharacterRange::Negate(ranges, negated_ranges); |
- set_first_character_set(negated_ranges); |
- } else { |
- set_first_character_set(ranges); |
- } |
- } |
- } |
- return budget; |
-} |
- |
- |
- |
// ------------------------------------------------------------------- |
// Dispatch table construction |