Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: src/jsregexp.cc

Issue 10831126: Take advantage of batched results when matching global regexp. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: addressed comments and formatting changes. Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/mips/code-stubs-mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after
271 int to) { 271 int to) {
272 NoHandleAllocation no_handles; 272 NoHandleAllocation no_handles;
273 RegExpImpl::SetLastCaptureCount(array, 2); 273 RegExpImpl::SetLastCaptureCount(array, 2);
274 RegExpImpl::SetLastSubject(array, subject); 274 RegExpImpl::SetLastSubject(array, subject);
275 RegExpImpl::SetLastInput(array, subject); 275 RegExpImpl::SetLastInput(array, subject);
276 RegExpImpl::SetCapture(array, 0, from); 276 RegExpImpl::SetCapture(array, 0, from);
277 RegExpImpl::SetCapture(array, 1, to); 277 RegExpImpl::SetCapture(array, 1, to);
278 } 278 }
279 279
280 280
281 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 281 int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
282 Handle<String> subject, 282 Handle<String> subject,
283 int index, 283 int index,
284 Handle<JSArray> last_match_info) { 284 int32_t* output,
285 Isolate* isolate = re->GetIsolate(); 285 int output_size) {
286 Isolate* isolate = regexp->GetIsolate();
286 287
287 ASSERT(0 <= index); 288 ASSERT(0 <= index);
288 ASSERT(index <= subject->length()); 289 ASSERT(index <= subject->length());
289 290
290 if (!subject->IsFlat()) FlattenString(subject); 291 if (!subject->IsFlat()) FlattenString(subject);
291 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 292 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
292 293
293 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); 294 String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
294 int needle_len = needle->length(); 295 int needle_len = needle->length();
295 ASSERT(needle->IsFlat()); 296 ASSERT(needle->IsFlat());
297 ASSERT_LT(0, needle_len);
296 298
297 if (needle_len != 0) { 299 if (index + needle_len > subject->length()) {
298 if (index + needle_len > subject->length()) { 300 return RegExpImpl::RE_FAILURE;
299 return isolate->factory()->null_value(); 301 }
300 }
301 302
303 for (int i = 0; i < output_size; i += 2) {
302 String::FlatContent needle_content = needle->GetFlatContent(); 304 String::FlatContent needle_content = needle->GetFlatContent();
303 String::FlatContent subject_content = subject->GetFlatContent(); 305 String::FlatContent subject_content = subject->GetFlatContent();
304 ASSERT(needle_content.IsFlat()); 306 ASSERT(needle_content.IsFlat());
305 ASSERT(subject_content.IsFlat()); 307 ASSERT(subject_content.IsFlat());
306 // dispatch on type of strings 308 // dispatch on type of strings
307 index = (needle_content.IsAscii() 309 index = (needle_content.IsAscii()
308 ? (subject_content.IsAscii() 310 ? (subject_content.IsAscii()
309 ? SearchString(isolate, 311 ? SearchString(isolate,
310 subject_content.ToAsciiVector(), 312 subject_content.ToAsciiVector(),
311 needle_content.ToAsciiVector(), 313 needle_content.ToAsciiVector(),
312 index) 314 index)
313 : SearchString(isolate, 315 : SearchString(isolate,
314 subject_content.ToUC16Vector(), 316 subject_content.ToUC16Vector(),
315 needle_content.ToAsciiVector(), 317 needle_content.ToAsciiVector(),
316 index)) 318 index))
317 : (subject_content.IsAscii() 319 : (subject_content.IsAscii()
318 ? SearchString(isolate, 320 ? SearchString(isolate,
319 subject_content.ToAsciiVector(), 321 subject_content.ToAsciiVector(),
320 needle_content.ToUC16Vector(), 322 needle_content.ToUC16Vector(),
321 index) 323 index)
322 : SearchString(isolate, 324 : SearchString(isolate,
323 subject_content.ToUC16Vector(), 325 subject_content.ToUC16Vector(),
324 needle_content.ToUC16Vector(), 326 needle_content.ToUC16Vector(),
325 index))); 327 index)));
326 if (index == -1) return isolate->factory()->null_value(); 328 if (index == -1) {
329 return i / 2; // Return number of matches.
330 } else {
331 output[i] = index;
332 output[i+1] = index + needle_len;
333 index += needle_len;
334 }
327 } 335 }
328 ASSERT(last_match_info->HasFastObjectElements()); 336 return output_size / 2;
337 }
329 338
330 { 339
331 NoHandleAllocation no_handles; 340 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
332 FixedArray* array = FixedArray::cast(last_match_info->elements()); 341 Handle<String> subject,
333 SetAtomLastCapture(array, *subject, index, index + needle_len); 342 int index,
334 } 343 Handle<JSArray> last_match_info) {
344 Isolate* isolate = re->GetIsolate();
345
346 static const int kNumRegisters = 2;
347 STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
348 int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
349
350 int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
351
352 if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
353
354 ASSERT_EQ(res, RegExpImpl::RE_SUCCESS);
355 NoHandleAllocation no_handles;
356 FixedArray* array = FixedArray::cast(last_match_info->elements());
357 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
335 return last_match_info; 358 return last_match_info;
336 } 359 }
337 360
338 361
339 // Irregexp implementation. 362 // Irregexp implementation.
340 363
341 // Ensures that the regexp object contains a compiled version of the 364 // Ensures that the regexp object contains a compiled version of the
342 // source for either ASCII or non-ASCII strings. 365 // source for either ASCII or non-ASCII strings.
343 // If the compiled version doesn't already exist, it is compiled 366 // If the compiled version doesn't already exist, it is compiled
344 // from the source pattern. 367 // from the source pattern.
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
504 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 527 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
505 Handle<String> subject) { 528 Handle<String> subject) {
506 if (!subject->IsFlat()) FlattenString(subject); 529 if (!subject->IsFlat()) FlattenString(subject);
507 530
508 // Check the asciiness of the underlying storage. 531 // Check the asciiness of the underlying storage.
509 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); 532 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
510 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; 533 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;
511 534
512 #ifdef V8_INTERPRETED_REGEXP 535 #ifdef V8_INTERPRETED_REGEXP
513 // Byte-code regexp needs space allocated for all its registers. 536 // Byte-code regexp needs space allocated for all its registers.
514 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); 537 // The result captures are copied to the start of the registers array
538 // if the match succeeds. This way those registers are not clobbered
539 // when we set the last match info from last successful match.
540 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
541 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
515 #else // V8_INTERPRETED_REGEXP 542 #else // V8_INTERPRETED_REGEXP
516 // Native regexp only needs room to output captures. Registers are handled 543 // Native regexp only needs room to output captures. Registers are handled
517 // internally. 544 // internally.
518 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 545 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
519 #endif // V8_INTERPRETED_REGEXP 546 #endif // V8_INTERPRETED_REGEXP
520 } 547 }
521 548
522 549
523 int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, 550 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
524 int registers_per_match, 551 Handle<String> subject,
525 int* max_matches) { 552 int index,
526 #ifdef V8_INTERPRETED_REGEXP 553 int32_t* output,
527 // Global loop in interpreted regexp is not implemented. Therefore we choose 554 int output_size) {
528 // the size of the offsets vector so that it can only store one match.
529 *max_matches = 1;
530 return registers_per_match;
531 #else // V8_INTERPRETED_REGEXP
532 int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
533 *max_matches = size / registers_per_match;
534 return size;
535 #endif // V8_INTERPRETED_REGEXP
536 }
537
538
539 int RegExpImpl::IrregexpExecRaw(
540 Handle<JSRegExp> regexp,
541 Handle<String> subject,
542 int index,
543 Vector<int> output) {
544 Isolate* isolate = regexp->GetIsolate(); 555 Isolate* isolate = regexp->GetIsolate();
545 556
546 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 557 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
547 558
548 ASSERT(index >= 0); 559 ASSERT(index >= 0);
549 ASSERT(index <= subject->length()); 560 ASSERT(index <= subject->length());
550 ASSERT(subject->IsFlat()); 561 ASSERT(subject->IsFlat());
551 562
552 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); 563 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
553 564
554 #ifndef V8_INTERPRETED_REGEXP 565 #ifndef V8_INTERPRETED_REGEXP
555 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 566 ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
556 do { 567 do {
557 EnsureCompiledIrregexp(regexp, subject, is_ascii); 568 EnsureCompiledIrregexp(regexp, subject, is_ascii);
558 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 569 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
570 // The stack is used to allocate registers for the compiled regexp code.
571 // This means that in case of failure, the output registers array is left
572 // untouched and contains the capture results from the previous successful
573 // match. We can use that to set the last match info lazily.
559 NativeRegExpMacroAssembler::Result res = 574 NativeRegExpMacroAssembler::Result res =
560 NativeRegExpMacroAssembler::Match(code, 575 NativeRegExpMacroAssembler::Match(code,
561 subject, 576 subject,
562 output.start(), 577 output,
563 output.length(), 578 output_size,
564 index, 579 index,
565 isolate); 580 isolate);
566 if (res != NativeRegExpMacroAssembler::RETRY) { 581 if (res != NativeRegExpMacroAssembler::RETRY) {
567 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || 582 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
568 isolate->has_pending_exception()); 583 isolate->has_pending_exception());
569 STATIC_ASSERT( 584 STATIC_ASSERT(
570 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 585 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
571 STATIC_ASSERT( 586 STATIC_ASSERT(
572 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 587 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
573 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 588 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
574 == RE_EXCEPTION); 589 == RE_EXCEPTION);
575 return static_cast<IrregexpResult>(res); 590 return static_cast<IrregexpResult>(res);
576 } 591 }
577 // If result is RETRY, the string has changed representation, and we 592 // If result is RETRY, the string has changed representation, and we
578 // must restart from scratch. 593 // must restart from scratch.
579 // In this case, it means we must make sure we are prepared to handle 594 // In this case, it means we must make sure we are prepared to handle
580 // the, potentially, different subject (the string can switch between 595 // the, potentially, different subject (the string can switch between
581 // being internal and external, and even between being ASCII and UC16, 596 // being internal and external, and even between being ASCII and UC16,
582 // but the characters are always the same). 597 // but the characters are always the same).
583 IrregexpPrepare(regexp, subject); 598 IrregexpPrepare(regexp, subject);
584 is_ascii = subject->IsAsciiRepresentationUnderneath(); 599 is_ascii = subject->IsAsciiRepresentationUnderneath();
585 } while (true); 600 } while (true);
586 UNREACHABLE(); 601 UNREACHABLE();
587 return RE_EXCEPTION; 602 return RE_EXCEPTION;
588 #else // V8_INTERPRETED_REGEXP 603 #else // V8_INTERPRETED_REGEXP
589 604
590 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); 605 ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp));
591 // We must have done EnsureCompiledIrregexp, so we can get the number of 606 // We must have done EnsureCompiledIrregexp, so we can get the number of
592 // registers. 607 // registers.
593 int* register_vector = output.start();
594 int number_of_capture_registers = 608 int number_of_capture_registers =
595 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 609 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
610 int32_t* raw_output = &output[number_of_capture_registers];
611 // We do not touch the actual capture result registers until we know there
612 // has been a match so that we can use those capture results to set the
613 // last match info.
596 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 614 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
597 register_vector[i] = -1; 615 raw_output[i] = -1;
598 } 616 }
599 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); 617 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
600 618
601 IrregexpResult result = IrregexpInterpreter::Match(isolate, 619 IrregexpResult result = IrregexpInterpreter::Match(isolate,
602 byte_codes, 620 byte_codes,
603 subject, 621 subject,
604 register_vector, 622 raw_output,
605 index); 623 index);
624 if (result == RE_SUCCESS) {
625 // Copy capture results to the start of the registers array.
626 memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t));
627 }
606 if (result == RE_EXCEPTION) { 628 if (result == RE_EXCEPTION) {
607 ASSERT(!isolate->has_pending_exception()); 629 ASSERT(!isolate->has_pending_exception());
608 isolate->StackOverflow(); 630 isolate->StackOverflow();
609 } 631 }
610 return result; 632 return result;
611 #endif // V8_INTERPRETED_REGEXP 633 #endif // V8_INTERPRETED_REGEXP
612 } 634 }
613 635
614 636
615 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, 637 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
616 Handle<String> subject, 638 Handle<String> subject,
617 int previous_index, 639 int previous_index,
618 Handle<JSArray> last_match_info) { 640 Handle<JSArray> last_match_info) {
619 Isolate* isolate = jsregexp->GetIsolate(); 641 Isolate* isolate = regexp->GetIsolate();
620 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); 642 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
621 643
622 // Prepare space for the return values. 644 // Prepare space for the return values.
623 #ifdef V8_INTERPRETED_REGEXP 645 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
624 #ifdef DEBUG
625 if (FLAG_trace_regexp_bytecodes) { 646 if (FLAG_trace_regexp_bytecodes) {
626 String* pattern = jsregexp->Pattern(); 647 String* pattern = regexp->Pattern();
627 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 648 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
628 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 649 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
629 } 650 }
630 #endif 651 #endif
631 #endif 652 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
632 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
633 if (required_registers < 0) { 653 if (required_registers < 0) {
634 // Compiling failed with an exception. 654 // Compiling failed with an exception.
635 ASSERT(isolate->has_pending_exception()); 655 ASSERT(isolate->has_pending_exception());
636 return Handle<Object>::null(); 656 return Handle<Object>::null();
637 } 657 }
638 658
639 OffsetsVector registers(required_registers, isolate); 659 int32_t* output_registers = NULL;
660 if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
661 output_registers = NewArray<int32_t>(required_registers);
662 }
663 SmartArrayPointer<int32_t> auto_release(output_registers);
664 if (output_registers == NULL) {
665 output_registers = isolate->jsregexp_static_offsets_vector();
666 }
640 667
641 int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, 668 int res = RegExpImpl::IrregexpExecRaw(
642 Vector<int>(registers.vector(), 669 regexp, subject, previous_index, output_registers, required_registers);
643 registers.length()));
644 if (res == RE_SUCCESS) { 670 if (res == RE_SUCCESS) {
645 int capture_register_count = 671 int capture_count =
646 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; 672 IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
647 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); 673 return SetLastMatchInfo(
648 AssertNoAllocation no_gc; 674 last_match_info, subject, capture_count, output_registers);
649 int* register_vector = registers.vector();
650 FixedArray* array = FixedArray::cast(last_match_info->elements());
651 for (int i = 0; i < capture_register_count; i += 2) {
652 SetCapture(array, i, register_vector[i]);
653 SetCapture(array, i + 1, register_vector[i + 1]);
654 }
655 SetLastCaptureCount(array, capture_register_count);
656 SetLastSubject(array, *subject);
657 SetLastInput(array, *subject);
658 return last_match_info;
659 } 675 }
660 if (res == RE_EXCEPTION) { 676 if (res == RE_EXCEPTION) {
661 ASSERT(isolate->has_pending_exception()); 677 ASSERT(isolate->has_pending_exception());
662 return Handle<Object>::null(); 678 return Handle<Object>::null();
663 } 679 }
664 ASSERT(res == RE_FAILURE); 680 ASSERT(res == RE_FAILURE);
665 return isolate->factory()->null_value(); 681 return isolate->factory()->null_value();
666 } 682 }
667 683
668 684
685 Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
686 Handle<String> subject,
687 int capture_count,
688 int32_t* match) {
689 int capture_register_count = (capture_count + 1) * 2;
690 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
691 AssertNoAllocation no_gc;
692 FixedArray* array = FixedArray::cast(last_match_info->elements());
693 if (match != NULL) {
694 for (int i = 0; i < capture_register_count; i += 2) {
695 SetCapture(array, i, match[i]);
696 SetCapture(array, i + 1, match[i + 1]);
697 }
698 }
699 SetLastCaptureCount(array, capture_register_count);
700 SetLastSubject(array, *subject);
701 SetLastInput(array, *subject);
702 return last_match_info;
703 }
704
705
706 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
707 Handle<String> subject,
708 bool is_global,
709 Isolate* isolate) {
710 #ifdef V8_INTERPRETED_REGEXP
711 bool interpreted = true;
712 #else
713 bool interpreted = false;
714 #endif // V8_INTERPRETED_REGEXP
715
716 regexp_ = regexp;
717 subject_ = subject;
718
719 if (regexp_->TypeTag() == JSRegExp::ATOM) {
720 static const int kAtomRegistersPerMatch = 2;
721 registers_per_match_ = kAtomRegistersPerMatch;
722 // There is no distinction between interpreted and native for atom regexps.
723 interpreted = false;
724 } else {
725 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
726 if (registers_per_match_ < 0) {
727 num_matches_ = -1; // Signal exception.
728 return;
729 }
730 }
731
732 if (is_global && !interpreted) {
733 register_array_size_ =
734 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
735 max_matches_ = register_array_size_ / registers_per_match_;
736 } else {
737 // Global loop in interpreted regexp is not implemented. We choose
738 // the size of the offsets vector so that it can only store one match.
739 register_array_size_ = registers_per_match_;
740 max_matches_ = 1;
741 }
742
743 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
744 register_array_ = NewArray<int32_t>(register_array_size_);
745 } else {
746 register_array_ = isolate->jsregexp_static_offsets_vector();
747 }
748
749 // Set state so that fetching the results the first time triggers a call
750 // to the compiled regexp.
751 current_match_index_ = max_matches_;
752 num_matches_ = max_matches_;
753 ASSERT(registers_per_match_ >= 2); // Each match has at least one capture.
754 ASSERT_GE(register_array_size_, registers_per_match_);
755 int32_t* last_match =
756 &register_array_[register_array_size_ - registers_per_match_];
757 last_match[0] = -1;
758 last_match[1] = 0;
759 }
760
761
762 RegExpImpl::GlobalCache::~GlobalCache() {
763 // Deallocate the register array if we allocated it in the constructor
764 // (as opposed to using the existing jsregexp_static_offsets_vector).
765 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
766 DeleteArray(register_array_);
767 }
768 }
769
770
771 int32_t* RegExpImpl::GlobalCache::FetchNext() {
772 current_match_index_++;
773 if (current_match_index_ >= num_matches_) {
774 // Current batch of results exhausted.
775 // Fail if last batch was not even fully filled.
776 if (num_matches_ < max_matches_) {
777 num_matches_ = 0; // Signal failed match.
778 return NULL;
779 }
780
781 int32_t* last_match =
782 &register_array_[register_array_size_ - registers_per_match_];
783 int last_end_index = last_match[1];
784
785 if (regexp_->TypeTag() == JSRegExp::ATOM) {
786 num_matches_ = RegExpImpl::AtomExecRaw(regexp_,
787 subject_,
788 last_end_index,
789 register_array_,
790 register_array_size_);
791 } else {
792 int last_start_index = last_match[0];
793 if (last_start_index == last_end_index) last_end_index++;
794 if (last_end_index > subject_->length()) {
795 num_matches_ = 0; // Signal failed match.
796 return NULL;
797 }
798 num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_,
799 subject_,
800 last_end_index,
801 register_array_,
802 register_array_size_);
803 }
804
805 if (num_matches_ <= 0) return NULL;
806 current_match_index_ = 0;
807 return register_array_;
808 } else {
809 return &register_array_[current_match_index_ * registers_per_match_];
810 }
811 }
812
813
814 int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() {
815 int index = current_match_index_ * registers_per_match_;
816 if (num_matches_ == 0) {
817 // After a failed match we shift back by one result.
818 index -= registers_per_match_;
819 }
820 return &register_array_[index];
821 }
822
823
669 // ------------------------------------------------------------------- 824 // -------------------------------------------------------------------
670 // Implementation of the Irregexp regular expression engine. 825 // Implementation of the Irregexp regular expression engine.
671 // 826 //
672 // The Irregexp regular expression engine is intended to be a complete 827 // The Irregexp regular expression engine is intended to be a complete
673 // implementation of ECMAScript regular expressions. It generates either 828 // implementation of ECMAScript regular expressions. It generates either
674 // bytecodes or native code. 829 // bytecodes or native code.
675 830
676 // The Irregexp regexp engine is structured in three steps. 831 // The Irregexp regexp engine is structured in three steps.
677 // 1) The parser generates an abstract syntax tree. See ast.cc. 832 // 1) The parser generates an abstract syntax tree. See ast.cc.
678 // 2) From the AST a node network is created. The nodes are all 833 // 2) From the AST a node network is created. The nodes are all
(...skipping 5324 matching lines...) Expand 10 before | Expand all | Expand 10 after
6003 } 6158 }
6004 6159
6005 return compiler.Assemble(&macro_assembler, 6160 return compiler.Assemble(&macro_assembler,
6006 node, 6161 node,
6007 data->capture_count, 6162 data->capture_count,
6008 pattern); 6163 pattern);
6009 } 6164 }
6010 6165
6011 6166
6012 }} // namespace v8::internal 6167 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/mips/code-stubs-mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698