Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(467)

Side by Side Diff: src/jsregexp.cc

Issue 9959096: Switch regexp strategy for regexps that are just plain (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 Factory* factory = isolate->factory(); 101 Factory* factory = isolate->factory();
102 Handle<FixedArray> elements = factory->NewFixedArray(2); 102 Handle<FixedArray> elements = factory->NewFixedArray(2);
103 elements->set(0, *pattern); 103 elements->set(0, *pattern);
104 elements->set(1, *error_text); 104 elements->set(1, *error_text);
105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); 105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); 106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
107 isolate->Throw(*regexp_err); 107 isolate->Throw(*regexp_err);
108 } 108 }
109 109
110 110
111 // More makes code generation slower, less makes V8 benchmark score lower.
112 const int kMaxLookaheadForBoyerMoore = 8;
113 // In a 3-character pattern you can maximally step forwards 3 characters
114 // at a time, which is not always enough to pay for the extra logic.
115 const int kPatternTooShortForBoyerMoore = 2;
116
117
118 // Identifies the sort of regexps where the regexp engine is faster
119 // than the code used for atom matches.
120 static bool HasFewDifferentCharacters(Handle<String> pattern) {
121 int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
122 if (length <= kPatternTooShortForBoyerMoore) return false;
123 const int kMod = 128;
124 bool character_found[kMod];
125 int different = 0;
126 memset(&character_found[0], 0, sizeof(character_found));
127 for (int i = 0; i < length; i++) {
128 int ch = (pattern->Get(i) & (kMod - 1));
129 if (!character_found[ch]) {
130 character_found[ch] = true;
131 different++;
132 if (different * 3 > length) return false;
ulan 2012/04/03 11:43:13 A comment explaining '3' would be helpful.
133 }
134 }
135 return true;
136 }
137
138
111 // Generic RegExp methods. Dispatches to implementation specific methods. 139 // Generic RegExp methods. Dispatches to implementation specific methods.
112 140
113 141
114 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, 142 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
115 Handle<String> pattern, 143 Handle<String> pattern,
116 Handle<String> flag_str) { 144 Handle<String> flag_str) {
117 Isolate* isolate = re->GetIsolate(); 145 Isolate* isolate = re->GetIsolate();
118 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 146 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
119 CompilationCache* compilation_cache = isolate->compilation_cache(); 147 CompilationCache* compilation_cache = isolate->compilation_cache();
120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); 148 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
(...skipping 13 matching lines...) Expand all
134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 162 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
135 &parse_result)) { 163 &parse_result)) {
136 // Throw an exception if we fail to parse the pattern. 164 // Throw an exception if we fail to parse the pattern.
137 ThrowRegExpException(re, 165 ThrowRegExpException(re,
138 pattern, 166 pattern,
139 parse_result.error, 167 parse_result.error,
140 "malformed_regexp"); 168 "malformed_regexp");
141 return Handle<Object>::null(); 169 return Handle<Object>::null();
142 } 170 }
143 171
144 if (parse_result.simple && !flags.is_ignore_case()) { 172 bool has_been_compiled = false;
173
174 if (parse_result.simple &&
175 !flags.is_ignore_case() &&
176 !HasFewDifferentCharacters(pattern)) {
145 // Parse-tree is a single atom that is equal to the pattern. 177 // Parse-tree is a single atom that is equal to the pattern.
146 AtomCompile(re, pattern, flags, pattern); 178 AtomCompile(re, pattern, flags, pattern);
179 has_been_compiled = true;
147 } else if (parse_result.tree->IsAtom() && 180 } else if (parse_result.tree->IsAtom() &&
148 !flags.is_ignore_case() && 181 !flags.is_ignore_case() &&
149 parse_result.capture_count == 0) { 182 parse_result.capture_count == 0) {
150 RegExpAtom* atom = parse_result.tree->AsAtom(); 183 RegExpAtom* atom = parse_result.tree->AsAtom();
151 Vector<const uc16> atom_pattern = atom->data(); 184 Vector<const uc16> atom_pattern = atom->data();
152 Handle<String> atom_string = 185 Handle<String> atom_string =
153 isolate->factory()->NewStringFromTwoByte(atom_pattern); 186 isolate->factory()->NewStringFromTwoByte(atom_pattern);
154 AtomCompile(re, pattern, flags, atom_string); 187 if (!HasFewDifferentCharacters(atom_string)) {
155 } else { 188 AtomCompile(re, pattern, flags, atom_string);
189 has_been_compiled = true;
190 }
191 }
192 if (!has_been_compiled) {
156 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); 193 IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
157 } 194 }
158 ASSERT(re->data()->IsFixedArray()); 195 ASSERT(re->data()->IsFixedArray());
159 // Compilation succeeded so the data is set on the regexp 196 // Compilation succeeded so the data is set on the regexp
160 // and we can store it in the cache. 197 // and we can store it in the cache.
161 Handle<FixedArray> data(FixedArray::cast(re->data())); 198 Handle<FixedArray> data(FixedArray::cast(re->data()));
162 compilation_cache->PutRegExp(pattern, flags, data); 199 compilation_cache->PutRegExp(pattern, flags, data);
163 200
164 return re; 201 return re;
165 } 202 }
(...skipping 3256 matching lines...) Expand 10 before | Expand all | Expand 10 after
3422 masm->Bind(&again); 3459 masm->Bind(&again);
3423 masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3460 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3424 masm->CheckBitInTable(boolean_skip_table, &cont); 3461 masm->CheckBitInTable(boolean_skip_table, &cont);
3425 masm->AdvanceCurrentPosition(skip_distance); 3462 masm->AdvanceCurrentPosition(skip_distance);
3426 masm->GoTo(&again); 3463 masm->GoTo(&again);
3427 masm->Bind(&cont); 3464 masm->Bind(&cont);
3428 3465
3429 return true; 3466 return true;
3430 } 3467 }
3431 3468
3469
3432 /* Code generation for choice nodes. 3470 /* Code generation for choice nodes.
3433 * 3471 *
3434 * We generate quick checks that do a mask and compare to eliminate a 3472 * We generate quick checks that do a mask and compare to eliminate a
3435 * choice. If the quick check succeeds then it jumps to the continuation to 3473 * choice. If the quick check succeeds then it jumps to the continuation to
3436 * do slow checks and check subsequent nodes. If it fails (the common case) 3474 * do slow checks and check subsequent nodes. If it fails (the common case)
3437 * it falls through to the next choice. 3475 * it falls through to the next choice.
3438 * 3476 *
3439 * Here is the desired flow graph. Nodes directly below each other imply 3477 * Here is the desired flow graph. Nodes directly below each other imply
3440 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative 3478 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
3441 * 3 doesn't have a quick check so we have to call the slow check. 3479 * 3 doesn't have a quick check so we have to call the slow check.
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
3500 * | F/ | 3538 * | F/ |
3501 * | / | 3539 * | / |
3502 * | R | 3540 * | R |
3503 * | / | 3541 * | / |
3504 * F VL | 3542 * F VL |
3505 * <------U | 3543 * <------U |
3506 * back |S | 3544 * back |S |
3507 * \______________/ 3545 * \______________/
3508 */ 3546 */
3509 3547
3510
3511 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3548 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3512 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3549 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3513 int choice_count = alternatives_->length(); 3550 int choice_count = alternatives_->length();
3514 #ifdef DEBUG 3551 #ifdef DEBUG
3515 for (int i = 0; i < choice_count - 1; i++) { 3552 for (int i = 0; i < choice_count - 1; i++) {
3516 GuardedAlternative alternative = alternatives_->at(i); 3553 GuardedAlternative alternative = alternatives_->at(i);
3517 ZoneList<Guard*>* guards = alternative.guards(); 3554 ZoneList<Guard*>* guards = alternative.guards();
3518 int guard_count = (guards == NULL) ? 0 : guards->length(); 3555 int guard_count = (guards == NULL) ? 0 : guards->length();
3519 for (int j = 0; j < guard_count; j++) { 3556 for (int j = 0; j < guard_count; j++) {
3520 ASSERT(!trace->mentions_reg(guards->at(j)->reg())); 3557 ASSERT(!trace->mentions_reg(guards->at(j)->reg()));
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
3571 macro_assembler->Bind(&second_choice); 3608 macro_assembler->Bind(&second_choice);
3572 3609
3573 int first_normal_choice = greedy_loop ? 1 : 0; 3610 int first_normal_choice = greedy_loop ? 1 : 0;
3574 3611
3575 bool not_at_start = current_trace->at_start() == Trace::FALSE; 3612 bool not_at_start = current_trace->at_start() == Trace::FALSE;
3576 const int kEatsAtLeastNotYetInitialized = -1; 3613 const int kEatsAtLeastNotYetInitialized = -1;
3577 int eats_at_least = kEatsAtLeastNotYetInitialized; 3614 int eats_at_least = kEatsAtLeastNotYetInitialized;
3578 3615
3579 bool skip_was_emitted = false; 3616 bool skip_was_emitted = false;
3580 3617
3581 // More makes code generation slower, less makes V8 benchmark score lower.
3582 const int kMaxLookaheadForBoyerMoore = 8;
3583
3584 if (!greedy_loop && choice_count == 2) { 3618 if (!greedy_loop && choice_count == 2) {
3585 GuardedAlternative alt1 = alternatives_->at(1); 3619 GuardedAlternative alt1 = alternatives_->at(1);
3586 if (alt1.guards() == NULL || alt1.guards()->length() == 0) { 3620 if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
3587 RegExpNode* eats_anything_node = alt1.node(); 3621 RegExpNode* eats_anything_node = alt1.node();
3588 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == 3622 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
3589 this) { 3623 this) {
3590 // At this point we know that we are at a non-greedy loop that will eat 3624 // At this point we know that we are at a non-greedy loop that will eat
3591 // any character one at a time. Any non-anchored regexp has such a 3625 // any character one at a time. Any non-anchored regexp has such a
3592 // loop prepended to it in order to find where it starts. We look for 3626 // loop prepended to it in order to find where it starts. We look for
3593 // a pattern of the form ...abc... where we can look 6 characters ahead 3627 // a pattern of the form ...abc... where we can look 6 characters ahead
(...skipping 2503 matching lines...) Expand 10 before | Expand all | Expand 10 after
6097 } 6131 }
6098 6132
6099 return compiler.Assemble(&macro_assembler, 6133 return compiler.Assemble(&macro_assembler,
6100 node, 6134 node,
6101 data->capture_count, 6135 data->capture_count,
6102 pattern); 6136 pattern);
6103 } 6137 }
6104 6138
6105 6139
6106 }} // namespace v8::internal 6140 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698