Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: src/jsregexp.cc

Issue 9959096: Switch regexp strategy for regexps that are just plain (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 11211)
+++ src/jsregexp.cc (working copy)
@@ -108,6 +108,34 @@
}
+// More makes code generation slower, less makes V8 benchmark score lower.
+const int kMaxLookaheadForBoyerMoore = 8;
+// In a 3-character pattern you can maximally step forwards 3 characters
+// at a time, which is not always enough to pay for the extra logic.
+const int kPatternTooShortForBoyerMoore = 2;
+
+
+// Identifies the sort of regexps where the regexp engine is faster
+// than the code used for atom matches.
+static bool HasFewDifferentCharacters(Handle<String> pattern) {
+ int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
+ if (length <= kPatternTooShortForBoyerMoore) return false;
+ const int kMod = 128;
+ bool character_found[kMod];
+ int different = 0;
+ memset(&character_found[0], 0, sizeof(character_found));
+ for (int i = 0; i < length; i++) {
+ int ch = (pattern->Get(i) & (kMod - 1));
+ if (!character_found[ch]) {
+ character_found[ch] = true;
+ different++;
+ if (different * 3 > length) return false;
ulan 2012/04/03 11:43:13 A comment explaining '3' would be helpful.
+ }
+ }
+ return true;
+}
+
+
// Generic RegExp methods. Dispatches to implementation specific methods.
@@ -141,9 +169,14 @@
return Handle<Object>::null();
}
- if (parse_result.simple && !flags.is_ignore_case()) {
+ bool has_been_compiled = false;
+
+ if (parse_result.simple &&
+ !flags.is_ignore_case() &&
+ !HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
+ has_been_compiled = true;
} else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() &&
parse_result.capture_count == 0) {
@@ -151,8 +184,12 @@
Vector<const uc16> atom_pattern = atom->data();
Handle<String> atom_string =
isolate->factory()->NewStringFromTwoByte(atom_pattern);
- AtomCompile(re, pattern, flags, atom_string);
- } else {
+ if (!HasFewDifferentCharacters(atom_string)) {
+ AtomCompile(re, pattern, flags, atom_string);
+ has_been_compiled = true;
+ }
+ }
+ if (!has_been_compiled) {
IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
}
ASSERT(re->data()->IsFixedArray());
@@ -3429,6 +3466,7 @@
return true;
}
+
/* Code generation for choice nodes.
*
* We generate quick checks that do a mask and compare to eliminate a
@@ -3507,7 +3545,6 @@
* \______________/
*/
-
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length();
@@ -3578,9 +3615,6 @@
bool skip_was_emitted = false;
- // More makes code generation slower, less makes V8 benchmark score lower.
- const int kMaxLookaheadForBoyerMoore = 8;
-
if (!greedy_loop && choice_count == 2) {
GuardedAlternative alt1 = alternatives_->at(1);
if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698