Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 101 Factory* factory = isolate->factory(); | 101 Factory* factory = isolate->factory(); |
| 102 Handle<FixedArray> elements = factory->NewFixedArray(2); | 102 Handle<FixedArray> elements = factory->NewFixedArray(2); |
| 103 elements->set(0, *pattern); | 103 elements->set(0, *pattern); |
| 104 elements->set(1, *error_text); | 104 elements->set(1, *error_text); |
| 105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); | 105 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
| 106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); | 106 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); |
| 107 isolate->Throw(*regexp_err); | 107 isolate->Throw(*regexp_err); |
| 108 } | 108 } |
| 109 | 109 |
| 110 | 110 |
| 111 // More makes code generation slower, less makes V8 benchmark score lower. | |
| 112 const int kMaxLookaheadForBoyerMoore = 8; | |
| 113 // In a 3-character pattern you can maximally step forwards 3 characters | |
| 114 // at a time, which is not always enough to pay for the extra logic. | |
| 115 const int kPatternTooShortForBoyerMoore = 2; | |
| 116 | |
| 117 | |
| 118 // Identifies the sort of regexps where the regexp engine is faster | |
| 119 // than the code used for atom matches. | |
| 120 static bool HasFewDifferentCharacters(Handle<String> pattern) { | |
| 121 int length = Min(kMaxLookaheadForBoyerMoore, pattern->length()); | |
| 122 if (length <= kPatternTooShortForBoyerMoore) return false; | |
| 123 const int kMod = 128; | |
| 124 bool character_found[kMod]; | |
| 125 int different = 0; | |
| 126 memset(&character_found[0], 0, sizeof(character_found)); | |
| 127 for (int i = 0; i < length; i++) { | |
| 128 int ch = (pattern->Get(i) & (kMod - 1)); | |
| 129 if (!character_found[ch]) { | |
| 130 character_found[ch] = true; | |
| 131 different++; | |
| 132 if (different * 3 > length) return false; | |
|
ulan
2012/04/03 11:43:13
A comment explaining '3' would be helpful.
| |
| 133 } | |
| 134 } | |
| 135 return true; | |
| 136 } | |
| 137 | |
| 138 | |
| 111 // Generic RegExp methods. Dispatches to implementation specific methods. | 139 // Generic RegExp methods. Dispatches to implementation specific methods. |
| 112 | 140 |
| 113 | 141 |
| 114 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 142 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| 115 Handle<String> pattern, | 143 Handle<String> pattern, |
| 116 Handle<String> flag_str) { | 144 Handle<String> flag_str) { |
| 117 Isolate* isolate = re->GetIsolate(); | 145 Isolate* isolate = re->GetIsolate(); |
| 118 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 146 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| 119 CompilationCache* compilation_cache = isolate->compilation_cache(); | 147 CompilationCache* compilation_cache = isolate->compilation_cache(); |
| 120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); | 148 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 162 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 135 &parse_result)) { | 163 &parse_result)) { |
| 136 // Throw an exception if we fail to parse the pattern. | 164 // Throw an exception if we fail to parse the pattern. |
| 137 ThrowRegExpException(re, | 165 ThrowRegExpException(re, |
| 138 pattern, | 166 pattern, |
| 139 parse_result.error, | 167 parse_result.error, |
| 140 "malformed_regexp"); | 168 "malformed_regexp"); |
| 141 return Handle<Object>::null(); | 169 return Handle<Object>::null(); |
| 142 } | 170 } |
| 143 | 171 |
| 144 if (parse_result.simple && !flags.is_ignore_case()) { | 172 bool has_been_compiled = false; |
| 173 | |
| 174 if (parse_result.simple && | |
| 175 !flags.is_ignore_case() && | |
| 176 !HasFewDifferentCharacters(pattern)) { | |
| 145 // Parse-tree is a single atom that is equal to the pattern. | 177 // Parse-tree is a single atom that is equal to the pattern. |
| 146 AtomCompile(re, pattern, flags, pattern); | 178 AtomCompile(re, pattern, flags, pattern); |
| 179 has_been_compiled = true; | |
| 147 } else if (parse_result.tree->IsAtom() && | 180 } else if (parse_result.tree->IsAtom() && |
| 148 !flags.is_ignore_case() && | 181 !flags.is_ignore_case() && |
| 149 parse_result.capture_count == 0) { | 182 parse_result.capture_count == 0) { |
| 150 RegExpAtom* atom = parse_result.tree->AsAtom(); | 183 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 151 Vector<const uc16> atom_pattern = atom->data(); | 184 Vector<const uc16> atom_pattern = atom->data(); |
| 152 Handle<String> atom_string = | 185 Handle<String> atom_string = |
| 153 isolate->factory()->NewStringFromTwoByte(atom_pattern); | 186 isolate->factory()->NewStringFromTwoByte(atom_pattern); |
| 154 AtomCompile(re, pattern, flags, atom_string); | 187 if (!HasFewDifferentCharacters(atom_string)) { |
| 155 } else { | 188 AtomCompile(re, pattern, flags, atom_string); |
| 189 has_been_compiled = true; | |
| 190 } | |
| 191 } | |
| 192 if (!has_been_compiled) { | |
| 156 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); | 193 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
| 157 } | 194 } |
| 158 ASSERT(re->data()->IsFixedArray()); | 195 ASSERT(re->data()->IsFixedArray()); |
| 159 // Compilation succeeded so the data is set on the regexp | 196 // Compilation succeeded so the data is set on the regexp |
| 160 // and we can store it in the cache. | 197 // and we can store it in the cache. |
| 161 Handle<FixedArray> data(FixedArray::cast(re->data())); | 198 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 162 compilation_cache->PutRegExp(pattern, flags, data); | 199 compilation_cache->PutRegExp(pattern, flags, data); |
| 163 | 200 |
| 164 return re; | 201 return re; |
| 165 } | 202 } |
| (...skipping 3256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3422 masm->Bind(&again); | 3459 masm->Bind(&again); |
| 3423 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3460 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
| 3424 masm->CheckBitInTable(boolean_skip_table, &cont); | 3461 masm->CheckBitInTable(boolean_skip_table, &cont); |
| 3425 masm->AdvanceCurrentPosition(skip_distance); | 3462 masm->AdvanceCurrentPosition(skip_distance); |
| 3426 masm->GoTo(&again); | 3463 masm->GoTo(&again); |
| 3427 masm->Bind(&cont); | 3464 masm->Bind(&cont); |
| 3428 | 3465 |
| 3429 return true; | 3466 return true; |
| 3430 } | 3467 } |
| 3431 | 3468 |
| 3469 | |
| 3432 /* Code generation for choice nodes. | 3470 /* Code generation for choice nodes. |
| 3433 * | 3471 * |
| 3434 * We generate quick checks that do a mask and compare to eliminate a | 3472 * We generate quick checks that do a mask and compare to eliminate a |
| 3435 * choice. If the quick check succeeds then it jumps to the continuation to | 3473 * choice. If the quick check succeeds then it jumps to the continuation to |
| 3436 * do slow checks and check subsequent nodes. If it fails (the common case) | 3474 * do slow checks and check subsequent nodes. If it fails (the common case) |
| 3437 * it falls through to the next choice. | 3475 * it falls through to the next choice. |
| 3438 * | 3476 * |
| 3439 * Here is the desired flow graph. Nodes directly below each other imply | 3477 * Here is the desired flow graph. Nodes directly below each other imply |
| 3440 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative | 3478 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative |
| 3441 * 3 doesn't have a quick check so we have to call the slow check. | 3479 * 3 doesn't have a quick check so we have to call the slow check. |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3500 * | F/ | | 3538 * | F/ | |
| 3501 * | / | | 3539 * | / | |
| 3502 * | R | | 3540 * | R | |
| 3503 * | / | | 3541 * | / | |
| 3504 * F VL | | 3542 * F VL | |
| 3505 * <------U | | 3543 * <------U | |
| 3506 * back |S | | 3544 * back |S | |
| 3507 * \______________/ | 3545 * \______________/ |
| 3508 */ | 3546 */ |
| 3509 | 3547 |
| 3510 | |
| 3511 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3548 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3512 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3549 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 3513 int choice_count = alternatives_->length(); | 3550 int choice_count = alternatives_->length(); |
| 3514 #ifdef DEBUG | 3551 #ifdef DEBUG |
| 3515 for (int i = 0; i < choice_count - 1; i++) { | 3552 for (int i = 0; i < choice_count - 1; i++) { |
| 3516 GuardedAlternative alternative = alternatives_->at(i); | 3553 GuardedAlternative alternative = alternatives_->at(i); |
| 3517 ZoneList<Guard*>* guards = alternative.guards(); | 3554 ZoneList<Guard*>* guards = alternative.guards(); |
| 3518 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3555 int guard_count = (guards == NULL) ? 0 : guards->length(); |
| 3519 for (int j = 0; j < guard_count; j++) { | 3556 for (int j = 0; j < guard_count; j++) { |
| 3520 ASSERT(!trace->mentions_reg(guards->at(j)->reg())); | 3557 ASSERT(!trace->mentions_reg(guards->at(j)->reg())); |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3571 macro_assembler->Bind(&second_choice); | 3608 macro_assembler->Bind(&second_choice); |
| 3572 | 3609 |
| 3573 int first_normal_choice = greedy_loop ? 1 : 0; | 3610 int first_normal_choice = greedy_loop ? 1 : 0; |
| 3574 | 3611 |
| 3575 bool not_at_start = current_trace->at_start() == Trace::FALSE; | 3612 bool not_at_start = current_trace->at_start() == Trace::FALSE; |
| 3576 const int kEatsAtLeastNotYetInitialized = -1; | 3613 const int kEatsAtLeastNotYetInitialized = -1; |
| 3577 int eats_at_least = kEatsAtLeastNotYetInitialized; | 3614 int eats_at_least = kEatsAtLeastNotYetInitialized; |
| 3578 | 3615 |
| 3579 bool skip_was_emitted = false; | 3616 bool skip_was_emitted = false; |
| 3580 | 3617 |
| 3581 // More makes code generation slower, less makes V8 benchmark score lower. | |
| 3582 const int kMaxLookaheadForBoyerMoore = 8; | |
| 3583 | |
| 3584 if (!greedy_loop && choice_count == 2) { | 3618 if (!greedy_loop && choice_count == 2) { |
| 3585 GuardedAlternative alt1 = alternatives_->at(1); | 3619 GuardedAlternative alt1 = alternatives_->at(1); |
| 3586 if (alt1.guards() == NULL || alt1.guards()->length() == 0) { | 3620 if (alt1.guards() == NULL || alt1.guards()->length() == 0) { |
| 3587 RegExpNode* eats_anything_node = alt1.node(); | 3621 RegExpNode* eats_anything_node = alt1.node(); |
| 3588 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == | 3622 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == |
| 3589 this) { | 3623 this) { |
| 3590 // At this point we know that we are at a non-greedy loop that will eat | 3624 // At this point we know that we are at a non-greedy loop that will eat |
| 3591 // any character one at a time. Any non-anchored regexp has such a | 3625 // any character one at a time. Any non-anchored regexp has such a |
| 3592 // loop prepended to it in order to find where it starts. We look for | 3626 // loop prepended to it in order to find where it starts. We look for |
| 3593 // a pattern of the form ...abc... where we can look 6 characters ahead | 3627 // a pattern of the form ...abc... where we can look 6 characters ahead |
| (...skipping 2503 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6097 } | 6131 } |
| 6098 | 6132 |
| 6099 return compiler.Assemble(¯o_assembler, | 6133 return compiler.Assemble(¯o_assembler, |
| 6100 node, | 6134 node, |
| 6101 data->capture_count, | 6135 data->capture_count, |
| 6102 pattern); | 6136 pattern); |
| 6103 } | 6137 } |
| 6104 | 6138 |
| 6105 | 6139 |
| 6106 }} // namespace v8::internal | 6140 }} // namespace v8::internal |
| OLD | NEW |