Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: src/jsregexp.cc

Issue 10386090: Implement loop for global regexps in regexp assembler. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: fix bugs, add tests, port to x64 and arm. Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution. 11 // with the distribution.
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after
422 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 422 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
423 ThrowRegExpException(re, 423 ThrowRegExpException(re,
424 pattern, 424 pattern,
425 compile_data.error, 425 compile_data.error,
426 "malformed_regexp"); 426 "malformed_regexp");
427 return false; 427 return false;
428 } 428 }
429 RegExpEngine::CompilationResult result = 429 RegExpEngine::CompilationResult result =
430 RegExpEngine::Compile(&compile_data, 430 RegExpEngine::Compile(&compile_data,
431 flags.is_ignore_case(), 431 flags.is_ignore_case(),
432 flags.is_global(),
432 flags.is_multiline(), 433 flags.is_multiline(),
433 pattern, 434 pattern,
434 sample_subject, 435 sample_subject,
435 is_ascii); 436 is_ascii);
436 if (result.error_message != NULL) { 437 if (result.error_message != NULL) {
437 // Unable to compile regexp. 438 // Unable to compile regexp.
438 Handle<String> error_message = 439 Handle<String> error_message =
439 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message)); 440 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
440 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 441 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
441 return false; 442 return false;
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
508 // Byte-code regexp needs space allocated for all its registers. 509 // Byte-code regexp needs space allocated for all its registers.
509 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); 510 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
510 #else // V8_INTERPRETED_REGEXP 511 #else // V8_INTERPRETED_REGEXP
511 // Native regexp only needs room to output captures. Registers are handled 512 // Native regexp only needs room to output captures. Registers are handled
512 // internally. 513 // internally.
513 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 514 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
514 #endif // V8_INTERPRETED_REGEXP 515 #endif // V8_INTERPRETED_REGEXP
515 } 516 }
516 517
517 518
518 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( 519 int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
520 int registers_per_match,
521 int* max_matches) {
522 #ifdef V8_INTERPRETED_REGEXP
523 // Global loop in interpreted regexp is not implemented. Therefore we choose
524 // the size of the offsets vector so that it can only store one match.
525 *max_matches = 1;
526 return registers_per_match;
527 #else // V8_INTERPRETED_REGEXP
528 int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
529 *max_matches = size / registers_per_match;
530 return size;
531 #endif // V8_INTERPRETED_REGEXP
532 }
533
534
535 int RegExpImpl::IrregexpExecRaw(
519 Handle<JSRegExp> regexp, 536 Handle<JSRegExp> regexp,
520 Handle<String> subject, 537 Handle<String> subject,
521 int index, 538 int index,
522 Vector<int> output) { 539 Vector<int> output) {
523 Isolate* isolate = regexp->GetIsolate(); 540 Isolate* isolate = regexp->GetIsolate();
524 541
525 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 542 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
526 543
527 ASSERT(index >= 0); 544 ASSERT(index >= 0);
528 ASSERT(index <= subject->length()); 545 ASSERT(index <= subject->length());
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
610 #endif 627 #endif
611 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); 628 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
612 if (required_registers < 0) { 629 if (required_registers < 0) {
613 // Compiling failed with an exception. 630 // Compiling failed with an exception.
614 ASSERT(isolate->has_pending_exception()); 631 ASSERT(isolate->has_pending_exception());
615 return Handle<Object>::null(); 632 return Handle<Object>::null();
616 } 633 }
617 634
618 OffsetsVector registers(required_registers, isolate); 635 OffsetsVector registers(required_registers, isolate);
619 636
620 IrregexpResult res = RegExpImpl::IrregexpExecOnce( 637 int res = RegExpImpl::IrregexpExecRaw(
621 jsregexp, subject, previous_index, Vector<int>(registers.vector(), 638 jsregexp, subject, previous_index, Vector<int>(registers.vector(),
622 registers.length())); 639 registers.length()));
623 if (res == RE_SUCCESS) { 640 if (res == RE_SUCCESS) {
624 int capture_register_count = 641 int capture_register_count =
625 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; 642 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
626 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); 643 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
627 AssertNoAllocation no_gc; 644 AssertNoAllocation no_gc;
628 int* register_vector = registers.vector(); 645 int* register_vector = registers.vector();
629 FixedArray* array = FixedArray::cast(last_match_info->elements()); 646 FixedArray* array = FixedArray::cast(last_match_info->elements());
630 for (int i = 0; i < capture_register_count; i += 2) { 647 for (int i = 0; i < capture_register_count; i += 2) {
(...skipping 5142 matching lines...) Expand 10 before | Expand all | Expand 10 after
5773 5790
5774 void DispatchTableConstructor::VisitAction(ActionNode* that) { 5791 void DispatchTableConstructor::VisitAction(ActionNode* that) {
5775 RegExpNode* target = that->on_success(); 5792 RegExpNode* target = that->on_success();
5776 target->Accept(this); 5793 target->Accept(this);
5777 } 5794 }
5778 5795
5779 5796
5780 RegExpEngine::CompilationResult RegExpEngine::Compile( 5797 RegExpEngine::CompilationResult RegExpEngine::Compile(
5781 RegExpCompileData* data, 5798 RegExpCompileData* data,
5782 bool ignore_case, 5799 bool ignore_case,
5800 bool is_global,
5783 bool is_multiline, 5801 bool is_multiline,
5784 Handle<String> pattern, 5802 Handle<String> pattern,
5785 Handle<String> sample_subject, 5803 Handle<String> sample_subject,
5786 bool is_ascii) { 5804 bool is_ascii) {
5787 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 5805 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
5788 return IrregexpRegExpTooBig(); 5806 return IrregexpRegExpTooBig();
5789 } 5807 }
5790 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 5808 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
Erik Corry 2012/05/22 08:32:46 Optimization idea (probably for the next patch): A
Yang 2012/05/22 14:48:03 This is exactly what I've been looking for! I trie
5791 5809
5792 // Sample some characters from the middle of the string. 5810 // Sample some characters from the middle of the string.
5793 static const int kSampleSize = 128; 5811 static const int kSampleSize = 128;
5794 5812
5795 FlattenString(sample_subject); 5813 FlattenString(sample_subject);
5796 int chars_sampled = 0; 5814 int chars_sampled = 0;
5797 int half_way = (sample_subject->length() - kSampleSize) / 2; 5815 int half_way = (sample_subject->length() - kSampleSize) / 2;
5798 for (int i = Max(0, half_way); 5816 for (int i = Max(0, half_way);
5799 i < sample_subject->length() && chars_sampled < kSampleSize; 5817 i < sample_subject->length() && chars_sampled < kSampleSize;
5800 i++, chars_sampled++) { 5818 i++, chars_sampled++) {
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
5876 5894
5877 // Inserted here, instead of in Assembler, because it depends on information 5895 // Inserted here, instead of in Assembler, because it depends on information
5878 // in the AST that isn't replicated in the Node structure. 5896 // in the AST that isn't replicated in the Node structure.
5879 static const int kMaxBacksearchLimit = 1024; 5897 static const int kMaxBacksearchLimit = 1024;
5880 if (is_end_anchored && 5898 if (is_end_anchored &&
5881 !is_start_anchored && 5899 !is_start_anchored &&
5882 max_length < kMaxBacksearchLimit) { 5900 max_length < kMaxBacksearchLimit) {
5883 macro_assembler.SetCurrentPositionFromEnd(max_length); 5901 macro_assembler.SetCurrentPositionFromEnd(max_length);
5884 } 5902 }
5885 5903
5904 macro_assembler.set_global(is_global);
5905
5886 return compiler.Assemble(&macro_assembler, 5906 return compiler.Assemble(&macro_assembler,
5887 node, 5907 node,
5888 data->capture_count, 5908 data->capture_count,
5889 pattern); 5909 pattern);
5890 } 5910 }
5891 5911
5892 5912
5893 }} // namespace v8::internal 5913 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698