Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 11211) |
+++ src/jsregexp.cc (working copy) |
@@ -108,6 +108,34 @@ |
} |
+// More makes code generation slower, less makes V8 benchmark score lower. |
+const int kMaxLookaheadForBoyerMoore = 8; |
+// In a 3-character pattern you can maximally step forwards 3 characters |
+// at a time, which is not always enough to pay for the extra logic. |
+const int kPatternTooShortForBoyerMoore = 2; |
+ |
+ |
+// Identifies the sort of regexps where the regexp engine is faster |
+// than the code used for atom matches. |
+static bool HasFewDifferentCharacters(Handle<String> pattern) { |
+ int length = Min(kMaxLookaheadForBoyerMoore, pattern->length()); |
+ if (length <= kPatternTooShortForBoyerMoore) return false; |
+ const int kMod = 128; |
+ bool character_found[kMod]; |
+ int different = 0; |
+ memset(&character_found[0], 0, sizeof(character_found)); |
+ for (int i = 0; i < length; i++) { |
+ int ch = (pattern->Get(i) & (kMod - 1)); |
+ if (!character_found[ch]) { |
+ character_found[ch] = true; |
+ different++; |
+ if (different * 3 > length) return false; |
ulan
2012/04/03 11:43:13
A comment explaining '3' would be helpful.
|
+ } |
+ } |
+ return true; |
+} |
+ |
+ |
// Generic RegExp methods. Dispatches to implementation specific methods. |
@@ -141,9 +169,14 @@ |
return Handle<Object>::null(); |
} |
- if (parse_result.simple && !flags.is_ignore_case()) { |
+ bool has_been_compiled = false; |
+ |
+ if (parse_result.simple && |
+ !flags.is_ignore_case() && |
+ !HasFewDifferentCharacters(pattern)) { |
// Parse-tree is a single atom that is equal to the pattern. |
AtomCompile(re, pattern, flags, pattern); |
+ has_been_compiled = true; |
} else if (parse_result.tree->IsAtom() && |
!flags.is_ignore_case() && |
parse_result.capture_count == 0) { |
@@ -151,8 +184,12 @@ |
Vector<const uc16> atom_pattern = atom->data(); |
Handle<String> atom_string = |
isolate->factory()->NewStringFromTwoByte(atom_pattern); |
- AtomCompile(re, pattern, flags, atom_string); |
- } else { |
+ if (!HasFewDifferentCharacters(atom_string)) { |
+ AtomCompile(re, pattern, flags, atom_string); |
+ has_been_compiled = true; |
+ } |
+ } |
+ if (!has_been_compiled) { |
IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
} |
ASSERT(re->data()->IsFixedArray()); |
@@ -3429,6 +3466,7 @@ |
return true; |
} |
+ |
/* Code generation for choice nodes. |
* |
* We generate quick checks that do a mask and compare to eliminate a |
@@ -3507,7 +3545,6 @@ |
* \______________/ |
*/ |
- |
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
int choice_count = alternatives_->length(); |
@@ -3578,9 +3615,6 @@ |
bool skip_was_emitted = false; |
- // More makes code generation slower, less makes V8 benchmark score lower. |
- const int kMaxLookaheadForBoyerMoore = 8; |
- |
if (!greedy_loop && choice_count == 2) { |
GuardedAlternative alt1 = alternatives_->at(1); |
if (alt1.guards() == NULL || alt1.guards()->length() == 0) { |