OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
101 Factory* factory = isolate->factory(); | 101 Factory* factory = isolate->factory(); |
102 Handle<FixedArray> elements = factory->NewFixedArray(2); | 102 Handle<FixedArray> elements = factory->NewFixedArray(2); |
103 elements->set(0, *pattern); | 103 elements->set(0, *pattern); |
104 elements->set(1, *error_text); | 104 elements->set(1, *error_text); |
105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); | 105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); | 106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); |
107 isolate->Throw(*regexp_err); | 107 isolate->Throw(*regexp_err); |
108 } | 108 } |
109 | 109 |
110 | 110 |
111 // More makes code generation slower, less makes V8 benchmark score lower. | |
112 const int kMaxLookaheadForBoyerMoore = 8; | |
113 // In a 3-character pattern you can maximally step forwards 3 characters | |
114 // at a time, which is not always enough to pay for the extra logic. | |
115 const int kPatternTooShortForBoyerMoore = 2; | |
116 | |
117 | |
118 // Identifies the sort of regexps where the regexp engine is faster | |
119 // than the code used for atom matches. | |
120 static bool HasFewDifferentCharacters(Handle<String> pattern) { | |
121 int length = Min(kMaxLookaheadForBoyerMoore, pattern->length()); | |
122 if (length <= kPatternTooShortForBoyerMoore) return false; | |
123 const int kMod = 128; | |
124 bool character_found[kMod]; | |
125 int different = 0; | |
126 memset(&character_found[0], 0, sizeof(character_found)); | |
127 for (int i = 0; i < length; i++) { | |
128 int ch = (pattern->Get(i) & (kMod - 1)); | |
129 if (!character_found[ch]) { | |
130 character_found[ch] = true; | |
131 different++; | |
132 if (different * 3 > length) return false; | |
ulan
2012/04/03 11:43:13
A comment explaining '3' would be helpful.
| |
133 } | |
134 } | |
135 return true; | |
136 } | |
137 | |
138 | |
111 // Generic RegExp methods. Dispatches to implementation specific methods. | 139 // Generic RegExp methods. Dispatches to implementation specific methods. |
112 | 140 |
113 | 141 |
114 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 142 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
115 Handle<String> pattern, | 143 Handle<String> pattern, |
116 Handle<String> flag_str) { | 144 Handle<String> flag_str) { |
117 Isolate* isolate = re->GetIsolate(); | 145 Isolate* isolate = re->GetIsolate(); |
118 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 146 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
119 CompilationCache* compilation_cache = isolate->compilation_cache(); | 147 CompilationCache* compilation_cache = isolate->compilation_cache(); |
120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); | 148 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); |
(...skipping 13 matching lines...) Expand all Loading... | |
134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 162 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
135 &parse_result)) { | 163 &parse_result)) { |
136 // Throw an exception if we fail to parse the pattern. | 164 // Throw an exception if we fail to parse the pattern. |
137 ThrowRegExpException(re, | 165 ThrowRegExpException(re, |
138 pattern, | 166 pattern, |
139 parse_result.error, | 167 parse_result.error, |
140 "malformed_regexp"); | 168 "malformed_regexp"); |
141 return Handle<Object>::null(); | 169 return Handle<Object>::null(); |
142 } | 170 } |
143 | 171 |
144 if (parse_result.simple && !flags.is_ignore_case()) { | 172 bool has_been_compiled = false; |
173 | |
174 if (parse_result.simple && | |
175 !flags.is_ignore_case() && | |
176 !HasFewDifferentCharacters(pattern)) { | |
145 // Parse-tree is a single atom that is equal to the pattern. | 177 // Parse-tree is a single atom that is equal to the pattern. |
146 AtomCompile(re, pattern, flags, pattern); | 178 AtomCompile(re, pattern, flags, pattern); |
179 has_been_compiled = true; | |
147 } else if (parse_result.tree->IsAtom() && | 180 } else if (parse_result.tree->IsAtom() && |
148 !flags.is_ignore_case() && | 181 !flags.is_ignore_case() && |
149 parse_result.capture_count == 0) { | 182 parse_result.capture_count == 0) { |
150 RegExpAtom* atom = parse_result.tree->AsAtom(); | 183 RegExpAtom* atom = parse_result.tree->AsAtom(); |
151 Vector<const uc16> atom_pattern = atom->data(); | 184 Vector<const uc16> atom_pattern = atom->data(); |
152 Handle<String> atom_string = | 185 Handle<String> atom_string = |
153 isolate->factory()->NewStringFromTwoByte(atom_pattern); | 186 isolate->factory()->NewStringFromTwoByte(atom_pattern); |
154 AtomCompile(re, pattern, flags, atom_string); | 187 if (!HasFewDifferentCharacters(atom_string)) { |
155 } else { | 188 AtomCompile(re, pattern, flags, atom_string); |
189 has_been_compiled = true; | |
190 } | |
191 } | |
192 if (!has_been_compiled) { | |
156 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); | 193 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
157 } | 194 } |
158 ASSERT(re->data()->IsFixedArray()); | 195 ASSERT(re->data()->IsFixedArray()); |
159 // Compilation succeeded so the data is set on the regexp | 196 // Compilation succeeded so the data is set on the regexp |
160 // and we can store it in the cache. | 197 // and we can store it in the cache. |
161 Handle<FixedArray> data(FixedArray::cast(re->data())); | 198 Handle<FixedArray> data(FixedArray::cast(re->data())); |
162 compilation_cache->PutRegExp(pattern, flags, data); | 199 compilation_cache->PutRegExp(pattern, flags, data); |
163 | 200 |
164 return re; | 201 return re; |
165 } | 202 } |
(...skipping 3256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3422 masm->Bind(&again); | 3459 masm->Bind(&again); |
3423 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3460 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3424 masm->CheckBitInTable(boolean_skip_table, &cont); | 3461 masm->CheckBitInTable(boolean_skip_table, &cont); |
3425 masm->AdvanceCurrentPosition(skip_distance); | 3462 masm->AdvanceCurrentPosition(skip_distance); |
3426 masm->GoTo(&again); | 3463 masm->GoTo(&again); |
3427 masm->Bind(&cont); | 3464 masm->Bind(&cont); |
3428 | 3465 |
3429 return true; | 3466 return true; |
3430 } | 3467 } |
3431 | 3468 |
3469 | |
3432 /* Code generation for choice nodes. | 3470 /* Code generation for choice nodes. |
3433 * | 3471 * |
3434 * We generate quick checks that do a mask and compare to eliminate a | 3472 * We generate quick checks that do a mask and compare to eliminate a |
3435 * choice. If the quick check succeeds then it jumps to the continuation to | 3473 * choice. If the quick check succeeds then it jumps to the continuation to |
3436 * do slow checks and check subsequent nodes. If it fails (the common case) | 3474 * do slow checks and check subsequent nodes. If it fails (the common case) |
3437 * it falls through to the next choice. | 3475 * it falls through to the next choice. |
3438 * | 3476 * |
3439 * Here is the desired flow graph. Nodes directly below each other imply | 3477 * Here is the desired flow graph. Nodes directly below each other imply |
3440 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative | 3478 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative |
3441 * 3 doesn't have a quick check so we have to call the slow check. | 3479 * 3 doesn't have a quick check so we have to call the slow check. |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3500 * | F/ | | 3538 * | F/ | |
3501 * | / | | 3539 * | / | |
3502 * | R | | 3540 * | R | |
3503 * | / | | 3541 * | / | |
3504 * F VL | | 3542 * F VL | |
3505 * <------U | | 3543 * <------U | |
3506 * back |S | | 3544 * back |S | |
3507 * \______________/ | 3545 * \______________/ |
3508 */ | 3546 */ |
3509 | 3547 |
3510 | |
3511 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3548 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3512 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3549 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3513 int choice_count = alternatives_->length(); | 3550 int choice_count = alternatives_->length(); |
3514 #ifdef DEBUG | 3551 #ifdef DEBUG |
3515 for (int i = 0; i < choice_count - 1; i++) { | 3552 for (int i = 0; i < choice_count - 1; i++) { |
3516 GuardedAlternative alternative = alternatives_->at(i); | 3553 GuardedAlternative alternative = alternatives_->at(i); |
3517 ZoneList<Guard*>* guards = alternative.guards(); | 3554 ZoneList<Guard*>* guards = alternative.guards(); |
3518 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3555 int guard_count = (guards == NULL) ? 0 : guards->length(); |
3519 for (int j = 0; j < guard_count; j++) { | 3556 for (int j = 0; j < guard_count; j++) { |
3520 ASSERT(!trace->mentions_reg(guards->at(j)->reg())); | 3557 ASSERT(!trace->mentions_reg(guards->at(j)->reg())); |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3571 macro_assembler->Bind(&second_choice); | 3608 macro_assembler->Bind(&second_choice); |
3572 | 3609 |
3573 int first_normal_choice = greedy_loop ? 1 : 0; | 3610 int first_normal_choice = greedy_loop ? 1 : 0; |
3574 | 3611 |
3575 bool not_at_start = current_trace->at_start() == Trace::FALSE; | 3612 bool not_at_start = current_trace->at_start() == Trace::FALSE; |
3576 const int kEatsAtLeastNotYetInitialized = -1; | 3613 const int kEatsAtLeastNotYetInitialized = -1; |
3577 int eats_at_least = kEatsAtLeastNotYetInitialized; | 3614 int eats_at_least = kEatsAtLeastNotYetInitialized; |
3578 | 3615 |
3579 bool skip_was_emitted = false; | 3616 bool skip_was_emitted = false; |
3580 | 3617 |
3581 // More makes code generation slower, less makes V8 benchmark score lower. | |
3582 const int kMaxLookaheadForBoyerMoore = 8; | |
3583 | |
3584 if (!greedy_loop && choice_count == 2) { | 3618 if (!greedy_loop && choice_count == 2) { |
3585 GuardedAlternative alt1 = alternatives_->at(1); | 3619 GuardedAlternative alt1 = alternatives_->at(1); |
3586 if (alt1.guards() == NULL || alt1.guards()->length() == 0) { | 3620 if (alt1.guards() == NULL || alt1.guards()->length() == 0) { |
3587 RegExpNode* eats_anything_node = alt1.node(); | 3621 RegExpNode* eats_anything_node = alt1.node(); |
3588 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == | 3622 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == |
3589 this) { | 3623 this) { |
3590 // At this point we know that we are at a non-greedy loop that will eat | 3624 // At this point we know that we are at a non-greedy loop that will eat |
3591 // any character one at a time. Any non-anchored regexp has such a | 3625 // any character one at a time. Any non-anchored regexp has such a |
3592 // loop prepended to it in order to find where it starts. We look for | 3626 // loop prepended to it in order to find where it starts. We look for |
3593 // a pattern of the form ...abc... where we can look 6 characters ahead | 3627 // a pattern of the form ...abc... where we can look 6 characters ahead |
(...skipping 2503 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6097 } | 6131 } |
6098 | 6132 |
6099 return compiler.Assemble(¯o_assembler, | 6133 return compiler.Assemble(¯o_assembler, |
6100 node, | 6134 node, |
6101 data->capture_count, | 6135 data->capture_count, |
6102 pattern); | 6136 pattern); |
6103 } | 6137 } |
6104 | 6138 |
6105 | 6139 |
6106 }} // namespace v8::internal | 6140 }} // namespace v8::internal |
OLD | NEW |