OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
271 int to) { | 271 int to) { |
272 NoHandleAllocation no_handles; | 272 NoHandleAllocation no_handles; |
273 RegExpImpl::SetLastCaptureCount(array, 2); | 273 RegExpImpl::SetLastCaptureCount(array, 2); |
274 RegExpImpl::SetLastSubject(array, subject); | 274 RegExpImpl::SetLastSubject(array, subject); |
275 RegExpImpl::SetLastInput(array, subject); | 275 RegExpImpl::SetLastInput(array, subject); |
276 RegExpImpl::SetCapture(array, 0, from); | 276 RegExpImpl::SetCapture(array, 0, from); |
277 RegExpImpl::SetCapture(array, 1, to); | 277 RegExpImpl::SetCapture(array, 1, to); |
278 } | 278 } |
279 | 279 |
280 | 280 |
281 int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp, | 281 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
282 Handle<String> subject, | 282 Handle<String> subject, |
283 int index, | 283 int index, |
284 int32_t* output, | 284 Handle<JSArray> last_match_info) { |
285 int output_size) { | 285 Isolate* isolate = re->GetIsolate(); |
286 Isolate* isolate = regexp->GetIsolate(); | |
287 | 286 |
288 ASSERT(0 <= index); | 287 ASSERT(0 <= index); |
289 ASSERT(index <= subject->length()); | 288 ASSERT(index <= subject->length()); |
290 | 289 |
291 if (!subject->IsFlat()) FlattenString(subject); | 290 if (!subject->IsFlat()) FlattenString(subject); |
292 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 291 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
293 | 292 |
294 String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); | 293 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
295 int needle_len = needle->length(); | 294 int needle_len = needle->length(); |
296 ASSERT(needle->IsFlat()); | 295 ASSERT(needle->IsFlat()); |
297 ASSERT_LT(0, needle_len); | |
298 | 296 |
299 if (index + needle_len > subject->length()) { | 297 if (needle_len != 0) { |
300 return RegExpImpl::RE_FAILURE; | 298 if (index + needle_len > subject->length()) { |
301 } | 299 return isolate->factory()->null_value(); |
| 300 } |
302 | 301 |
303 for (int i = 0; i < output_size; i += 2) { | |
304 String::FlatContent needle_content = needle->GetFlatContent(); | 302 String::FlatContent needle_content = needle->GetFlatContent(); |
305 String::FlatContent subject_content = subject->GetFlatContent(); | 303 String::FlatContent subject_content = subject->GetFlatContent(); |
306 ASSERT(needle_content.IsFlat()); | 304 ASSERT(needle_content.IsFlat()); |
307 ASSERT(subject_content.IsFlat()); | 305 ASSERT(subject_content.IsFlat()); |
308 // dispatch on type of strings | 306 // dispatch on type of strings |
309 index = (needle_content.IsAscii() | 307 index = (needle_content.IsAscii() |
310 ? (subject_content.IsAscii() | 308 ? (subject_content.IsAscii() |
311 ? SearchString(isolate, | 309 ? SearchString(isolate, |
312 subject_content.ToAsciiVector(), | 310 subject_content.ToAsciiVector(), |
313 needle_content.ToAsciiVector(), | 311 needle_content.ToAsciiVector(), |
314 index) | 312 index) |
315 : SearchString(isolate, | 313 : SearchString(isolate, |
316 subject_content.ToUC16Vector(), | 314 subject_content.ToUC16Vector(), |
317 needle_content.ToAsciiVector(), | 315 needle_content.ToAsciiVector(), |
318 index)) | 316 index)) |
319 : (subject_content.IsAscii() | 317 : (subject_content.IsAscii() |
320 ? SearchString(isolate, | 318 ? SearchString(isolate, |
321 subject_content.ToAsciiVector(), | 319 subject_content.ToAsciiVector(), |
322 needle_content.ToUC16Vector(), | 320 needle_content.ToUC16Vector(), |
323 index) | 321 index) |
324 : SearchString(isolate, | 322 : SearchString(isolate, |
325 subject_content.ToUC16Vector(), | 323 subject_content.ToUC16Vector(), |
326 needle_content.ToUC16Vector(), | 324 needle_content.ToUC16Vector(), |
327 index))); | 325 index))); |
328 if (index == -1) { | 326 if (index == -1) return isolate->factory()->null_value(); |
329 return i / 2; // Return number of matches. | |
330 } else { | |
331 output[i] = index; | |
332 output[i+1] = index + needle_len; | |
333 index += needle_len; | |
334 } | |
335 } | 327 } |
336 return output_size / 2; | 328 ASSERT(last_match_info->HasFastObjectElements()); |
337 } | |
338 | 329 |
339 | 330 { |
340 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 331 NoHandleAllocation no_handles; |
341 Handle<String> subject, | 332 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
342 int index, | 333 SetAtomLastCapture(array, *subject, index, index + needle_len); |
343 Handle<JSArray> last_match_info) { | 334 } |
344 Isolate* isolate = re->GetIsolate(); | |
345 | |
346 static const int kNumRegisters = 2; | |
347 STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); | |
348 int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); | |
349 | |
350 int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters); | |
351 | |
352 if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value(); | |
353 | |
354 ASSERT_EQ(res, RegExpImpl::RE_SUCCESS); | |
355 NoHandleAllocation no_handles; | |
356 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
357 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]); | |
358 return last_match_info; | 335 return last_match_info; |
359 } | 336 } |
360 | 337 |
361 | 338 |
362 // Irregexp implementation. | 339 // Irregexp implementation. |
363 | 340 |
364 // Ensures that the regexp object contains a compiled version of the | 341 // Ensures that the regexp object contains a compiled version of the |
365 // source for either ASCII or non-ASCII strings. | 342 // source for either ASCII or non-ASCII strings. |
366 // If the compiled version doesn't already exist, it is compiled | 343 // If the compiled version doesn't already exist, it is compiled |
367 // from the source pattern. | 344 // from the source pattern. |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
527 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 504 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
528 Handle<String> subject) { | 505 Handle<String> subject) { |
529 if (!subject->IsFlat()) FlattenString(subject); | 506 if (!subject->IsFlat()) FlattenString(subject); |
530 | 507 |
531 // Check the asciiness of the underlying storage. | 508 // Check the asciiness of the underlying storage. |
532 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 509 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
533 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; | 510 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; |
534 | 511 |
535 #ifdef V8_INTERPRETED_REGEXP | 512 #ifdef V8_INTERPRETED_REGEXP |
536 // Byte-code regexp needs space allocated for all its registers. | 513 // Byte-code regexp needs space allocated for all its registers. |
537 // The result captures are copied to the start of the registers array | 514 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
538 // if the match succeeds. This way those registers are not clobbered | |
539 // when we set the last match info from last successful match. | |
540 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + | |
541 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | |
542 #else // V8_INTERPRETED_REGEXP | 515 #else // V8_INTERPRETED_REGEXP |
543 // Native regexp only needs room to output captures. Registers are handled | 516 // Native regexp only needs room to output captures. Registers are handled |
544 // internally. | 517 // internally. |
545 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 518 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
546 #endif // V8_INTERPRETED_REGEXP | 519 #endif // V8_INTERPRETED_REGEXP |
547 } | 520 } |
548 | 521 |
549 | 522 |
550 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp, | 523 int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, |
551 Handle<String> subject, | 524 int registers_per_match, |
552 int index, | 525 int* max_matches) { |
553 int32_t* output, | 526 #ifdef V8_INTERPRETED_REGEXP |
554 int output_size) { | 527 // Global loop in interpreted regexp is not implemented. Therefore we choose |
| 528 // the size of the offsets vector so that it can only store one match. |
| 529 *max_matches = 1; |
| 530 return registers_per_match; |
| 531 #else // V8_INTERPRETED_REGEXP |
| 532 int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize); |
| 533 *max_matches = size / registers_per_match; |
| 534 return size; |
| 535 #endif // V8_INTERPRETED_REGEXP |
| 536 } |
| 537 |
| 538 |
| 539 int RegExpImpl::IrregexpExecRaw( |
| 540 Handle<JSRegExp> regexp, |
| 541 Handle<String> subject, |
| 542 int index, |
| 543 Vector<int> output) { |
555 Isolate* isolate = regexp->GetIsolate(); | 544 Isolate* isolate = regexp->GetIsolate(); |
556 | 545 |
557 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 546 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
558 | 547 |
559 ASSERT(index >= 0); | 548 ASSERT(index >= 0); |
560 ASSERT(index <= subject->length()); | 549 ASSERT(index <= subject->length()); |
561 ASSERT(subject->IsFlat()); | 550 ASSERT(subject->IsFlat()); |
562 | 551 |
563 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 552 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
564 | 553 |
565 #ifndef V8_INTERPRETED_REGEXP | 554 #ifndef V8_INTERPRETED_REGEXP |
566 ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 555 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
567 do { | 556 do { |
568 EnsureCompiledIrregexp(regexp, subject, is_ascii); | 557 EnsureCompiledIrregexp(regexp, subject, is_ascii); |
569 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 558 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
570 // The stack is used to allocate registers for the compiled regexp code. | |
571 // This means that in case of failure, the output registers array is left | |
572 // untouched and contains the capture results from the previous successful | |
573 // match. We can use that to set the last match info lazily. | |
574 NativeRegExpMacroAssembler::Result res = | 559 NativeRegExpMacroAssembler::Result res = |
575 NativeRegExpMacroAssembler::Match(code, | 560 NativeRegExpMacroAssembler::Match(code, |
576 subject, | 561 subject, |
577 output, | 562 output.start(), |
578 output_size, | 563 output.length(), |
579 index, | 564 index, |
580 isolate); | 565 isolate); |
581 if (res != NativeRegExpMacroAssembler::RETRY) { | 566 if (res != NativeRegExpMacroAssembler::RETRY) { |
582 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 567 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
583 isolate->has_pending_exception()); | 568 isolate->has_pending_exception()); |
584 STATIC_ASSERT( | 569 STATIC_ASSERT( |
585 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 570 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
586 STATIC_ASSERT( | 571 STATIC_ASSERT( |
587 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 572 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
588 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 573 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
589 == RE_EXCEPTION); | 574 == RE_EXCEPTION); |
590 return static_cast<IrregexpResult>(res); | 575 return static_cast<IrregexpResult>(res); |
591 } | 576 } |
592 // If result is RETRY, the string has changed representation, and we | 577 // If result is RETRY, the string has changed representation, and we |
593 // must restart from scratch. | 578 // must restart from scratch. |
594 // In this case, it means we must make sure we are prepared to handle | 579 // In this case, it means we must make sure we are prepared to handle |
595 // the, potentially, different subject (the string can switch between | 580 // the, potentially, different subject (the string can switch between |
596 // being internal and external, and even between being ASCII and UC16, | 581 // being internal and external, and even between being ASCII and UC16, |
597 // but the characters are always the same). | 582 // but the characters are always the same). |
598 IrregexpPrepare(regexp, subject); | 583 IrregexpPrepare(regexp, subject); |
599 is_ascii = subject->IsAsciiRepresentationUnderneath(); | 584 is_ascii = subject->IsAsciiRepresentationUnderneath(); |
600 } while (true); | 585 } while (true); |
601 UNREACHABLE(); | 586 UNREACHABLE(); |
602 return RE_EXCEPTION; | 587 return RE_EXCEPTION; |
603 #else // V8_INTERPRETED_REGEXP | 588 #else // V8_INTERPRETED_REGEXP |
604 | 589 |
605 ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp)); | 590 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
606 // We must have done EnsureCompiledIrregexp, so we can get the number of | 591 // We must have done EnsureCompiledIrregexp, so we can get the number of |
607 // registers. | 592 // registers. |
| 593 int* register_vector = output.start(); |
608 int number_of_capture_registers = | 594 int number_of_capture_registers = |
609 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 595 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
610 int32_t* raw_output = &output[number_of_capture_registers]; | |
611 // We do not touch the actual capture result registers until we know there | |
612 // has been a match so that we can use those capture results to set the | |
613 // last match info. | |
614 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 596 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
615 raw_output[i] = -1; | 597 register_vector[i] = -1; |
616 } | 598 } |
617 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); | 599 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
618 | 600 |
619 IrregexpResult result = IrregexpInterpreter::Match(isolate, | 601 IrregexpResult result = IrregexpInterpreter::Match(isolate, |
620 byte_codes, | 602 byte_codes, |
621 subject, | 603 subject, |
622 raw_output, | 604 register_vector, |
623 index); | 605 index); |
624 if (result == RE_SUCCESS) { | |
625 // Copy capture results to the start of the registers array. | |
626 memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); | |
627 } | |
628 if (result == RE_EXCEPTION) { | 606 if (result == RE_EXCEPTION) { |
629 ASSERT(!isolate->has_pending_exception()); | 607 ASSERT(!isolate->has_pending_exception()); |
630 isolate->StackOverflow(); | 608 isolate->StackOverflow(); |
631 } | 609 } |
632 return result; | 610 return result; |
633 #endif // V8_INTERPRETED_REGEXP | 611 #endif // V8_INTERPRETED_REGEXP |
634 } | 612 } |
635 | 613 |
636 | 614 |
637 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 615 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
638 Handle<String> subject, | 616 Handle<String> subject, |
639 int previous_index, | 617 int previous_index, |
640 Handle<JSArray> last_match_info) { | 618 Handle<JSArray> last_match_info) { |
641 Isolate* isolate = regexp->GetIsolate(); | 619 Isolate* isolate = jsregexp->GetIsolate(); |
642 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 620 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
643 | 621 |
644 // Prepare space for the return values. | 622 // Prepare space for the return values. |
645 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) | 623 #ifdef V8_INTERPRETED_REGEXP |
| 624 #ifdef DEBUG |
646 if (FLAG_trace_regexp_bytecodes) { | 625 if (FLAG_trace_regexp_bytecodes) { |
647 String* pattern = regexp->Pattern(); | 626 String* pattern = jsregexp->Pattern(); |
648 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 627 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
649 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 628 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
650 } | 629 } |
651 #endif | 630 #endif |
652 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); | 631 #endif |
| 632 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
653 if (required_registers < 0) { | 633 if (required_registers < 0) { |
654 // Compiling failed with an exception. | 634 // Compiling failed with an exception. |
655 ASSERT(isolate->has_pending_exception()); | 635 ASSERT(isolate->has_pending_exception()); |
656 return Handle<Object>::null(); | 636 return Handle<Object>::null(); |
657 } | 637 } |
658 | 638 |
659 int32_t* output_registers = NULL; | 639 OffsetsVector registers(required_registers, isolate); |
660 if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
661 output_registers = NewArray<int32_t>(required_registers); | |
662 } | |
663 SmartArrayPointer<int32_t> auto_release(output_registers); | |
664 if (output_registers == NULL) { | |
665 output_registers = isolate->jsregexp_static_offsets_vector(); | |
666 } | |
667 | 640 |
668 int res = RegExpImpl::IrregexpExecRaw( | 641 int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, |
669 regexp, subject, previous_index, output_registers, required_registers); | 642 Vector<int>(registers.vector(), |
| 643 registers.length())); |
670 if (res == RE_SUCCESS) { | 644 if (res == RE_SUCCESS) { |
671 int capture_count = | 645 int capture_register_count = |
672 IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); | 646 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
673 return SetLastMatchInfo( | 647 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
674 last_match_info, subject, capture_count, output_registers); | 648 AssertNoAllocation no_gc; |
| 649 int* register_vector = registers.vector(); |
| 650 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 651 for (int i = 0; i < capture_register_count; i += 2) { |
| 652 SetCapture(array, i, register_vector[i]); |
| 653 SetCapture(array, i + 1, register_vector[i + 1]); |
| 654 } |
| 655 SetLastCaptureCount(array, capture_register_count); |
| 656 SetLastSubject(array, *subject); |
| 657 SetLastInput(array, *subject); |
| 658 return last_match_info; |
675 } | 659 } |
676 if (res == RE_EXCEPTION) { | 660 if (res == RE_EXCEPTION) { |
677 ASSERT(isolate->has_pending_exception()); | 661 ASSERT(isolate->has_pending_exception()); |
678 return Handle<Object>::null(); | 662 return Handle<Object>::null(); |
679 } | 663 } |
680 ASSERT(res == RE_FAILURE); | 664 ASSERT(res == RE_FAILURE); |
681 return isolate->factory()->null_value(); | 665 return isolate->factory()->null_value(); |
682 } | 666 } |
683 | 667 |
684 | 668 |
685 Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info, | |
686 Handle<String> subject, | |
687 int capture_count, | |
688 int32_t* match) { | |
689 int capture_register_count = (capture_count + 1) * 2; | |
690 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | |
691 AssertNoAllocation no_gc; | |
692 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
693 if (match != NULL) { | |
694 for (int i = 0; i < capture_register_count; i += 2) { | |
695 SetCapture(array, i, match[i]); | |
696 SetCapture(array, i + 1, match[i + 1]); | |
697 } | |
698 } | |
699 SetLastCaptureCount(array, capture_register_count); | |
700 SetLastSubject(array, *subject); | |
701 SetLastInput(array, *subject); | |
702 return last_match_info; | |
703 } | |
704 | |
705 | |
706 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, | |
707 Handle<String> subject, | |
708 bool is_global, | |
709 Isolate* isolate) { | |
710 #ifdef V8_INTERPRETED_REGEXP | |
711 bool interpreted = true; | |
712 #else | |
713 bool interpreted = false; | |
714 #endif // V8_INTERPRETED_REGEXP | |
715 | |
716 regexp_ = regexp; | |
717 subject_ = subject; | |
718 | |
719 if (regexp_->TypeTag() == JSRegExp::ATOM) { | |
720 static const int kAtomRegistersPerMatch = 2; | |
721 registers_per_match_ = kAtomRegistersPerMatch; | |
722 // There is no distinction between interpreted and native for atom regexps. | |
723 interpreted = false; | |
724 } else { | |
725 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); | |
726 if (registers_per_match_ < 0) { | |
727 num_matches_ = -1; // Signal exception. | |
728 return; | |
729 } | |
730 } | |
731 | |
732 if (is_global && !interpreted) { | |
733 register_array_size_ = | |
734 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); | |
735 max_matches_ = register_array_size_ / registers_per_match_; | |
736 } else { | |
737 // Global loop in interpreted regexp is not implemented. We choose | |
738 // the size of the offsets vector so that it can only store one match. | |
739 register_array_size_ = registers_per_match_; | |
740 max_matches_ = 1; | |
741 } | |
742 | |
743 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
744 register_array_ = NewArray<int32_t>(register_array_size_); | |
745 } else { | |
746 register_array_ = isolate->jsregexp_static_offsets_vector(); | |
747 } | |
748 | |
749 // Set state so that fetching the results the first time triggers a call | |
750 // to the compiled regexp. | |
751 current_match_index_ = max_matches_ - 1; | |
752 num_matches_ = max_matches_; | |
753 ASSERT(registers_per_match_ >= 2); // Each match has at least one capture. | |
754 ASSERT_GE(register_array_size_, registers_per_match_); | |
755 int32_t* last_match = | |
756 ®ister_array_[current_match_index_ * registers_per_match_]; | |
757 last_match[0] = -1; | |
758 last_match[1] = 0; | |
759 } | |
760 | |
761 | |
762 RegExpImpl::GlobalCache::~GlobalCache() { | |
763 // Deallocate the register array if we allocated it in the constructor | |
764 // (as opposed to using the existing jsregexp_static_offsets_vector). | |
765 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
766 DeleteArray(register_array_); | |
767 } | |
768 } | |
769 | |
770 | |
771 int32_t* RegExpImpl::GlobalCache::FetchNext() { | |
772 current_match_index_++; | |
773 if (current_match_index_ >= num_matches_) { | |
774 // Current batch of results exhausted. | |
775 // Fail if last batch was not even fully filled. | |
776 if (num_matches_ < max_matches_) { | |
777 num_matches_ = 0; // Signal failed match. | |
778 return NULL; | |
779 } | |
780 | |
781 int32_t* last_match = | |
782 ®ister_array_[(current_match_index_ - 1) * registers_per_match_]; | |
783 int last_end_index = last_match[1]; | |
784 | |
785 if (regexp_->TypeTag() == JSRegExp::ATOM) { | |
786 num_matches_ = RegExpImpl::AtomExecRaw(regexp_, | |
787 subject_, | |
788 last_end_index, | |
789 register_array_, | |
790 register_array_size_); | |
791 } else { | |
792 int last_start_index = last_match[0]; | |
793 if (last_start_index == last_end_index) last_end_index++; | |
794 if (last_end_index > subject_->length()) { | |
795 num_matches_ = 0; // Signal failed match. | |
796 return NULL; | |
797 } | |
798 num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_, | |
799 subject_, | |
800 last_end_index, | |
801 register_array_, | |
802 register_array_size_); | |
803 } | |
804 | |
805 if (num_matches_ <= 0) return NULL; | |
806 current_match_index_ = 0; | |
807 return register_array_; | |
808 } else { | |
809 return ®ister_array_[current_match_index_ * registers_per_match_]; | |
810 } | |
811 } | |
812 | |
813 | |
814 int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() { | |
815 int index = current_match_index_ * registers_per_match_; | |
816 if (num_matches_ == 0) { | |
817 // After a failed match we shift back by one result. | |
818 index -= registers_per_match_; | |
819 } | |
820 return ®ister_array_[index]; | |
821 } | |
822 | |
823 | |
824 // ------------------------------------------------------------------- | 669 // ------------------------------------------------------------------- |
825 // Implementation of the Irregexp regular expression engine. | 670 // Implementation of the Irregexp regular expression engine. |
826 // | 671 // |
827 // The Irregexp regular expression engine is intended to be a complete | 672 // The Irregexp regular expression engine is intended to be a complete |
828 // implementation of ECMAScript regular expressions. It generates either | 673 // implementation of ECMAScript regular expressions. It generates either |
829 // bytecodes or native code. | 674 // bytecodes or native code. |
830 | 675 |
831 // The Irregexp regexp engine is structured in three steps. | 676 // The Irregexp regexp engine is structured in three steps. |
832 // 1) The parser generates an abstract syntax tree. See ast.cc. | 677 // 1) The parser generates an abstract syntax tree. See ast.cc. |
833 // 2) From the AST a node network is created. The nodes are all | 678 // 2) From the AST a node network is created. The nodes are all |
(...skipping 5324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6158 } | 6003 } |
6159 | 6004 |
6160 return compiler.Assemble(¯o_assembler, | 6005 return compiler.Assemble(¯o_assembler, |
6161 node, | 6006 node, |
6162 data->capture_count, | 6007 data->capture_count, |
6163 pattern); | 6008 pattern); |
6164 } | 6009 } |
6165 | 6010 |
6166 | 6011 |
6167 }} // namespace v8::internal | 6012 }} // namespace v8::internal |
OLD | NEW |