| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 271 int to) { | 271 int to) { |
| 272 NoHandleAllocation no_handles; | 272 NoHandleAllocation no_handles; |
| 273 RegExpImpl::SetLastCaptureCount(array, 2); | 273 RegExpImpl::SetLastCaptureCount(array, 2); |
| 274 RegExpImpl::SetLastSubject(array, subject); | 274 RegExpImpl::SetLastSubject(array, subject); |
| 275 RegExpImpl::SetLastInput(array, subject); | 275 RegExpImpl::SetLastInput(array, subject); |
| 276 RegExpImpl::SetCapture(array, 0, from); | 276 RegExpImpl::SetCapture(array, 0, from); |
| 277 RegExpImpl::SetCapture(array, 1, to); | 277 RegExpImpl::SetCapture(array, 1, to); |
| 278 } | 278 } |
| 279 | 279 |
| 280 | 280 |
| 281 int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp, | 281 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| 282 Handle<String> subject, | 282 Handle<String> subject, |
| 283 int index, | 283 int index, |
| 284 int32_t* output, | 284 Handle<JSArray> last_match_info) { |
| 285 int output_size) { | 285 Isolate* isolate = re->GetIsolate(); |
| 286 Isolate* isolate = regexp->GetIsolate(); | |
| 287 | 286 |
| 288 ASSERT(0 <= index); | 287 ASSERT(0 <= index); |
| 289 ASSERT(index <= subject->length()); | 288 ASSERT(index <= subject->length()); |
| 290 | 289 |
| 291 if (!subject->IsFlat()) FlattenString(subject); | 290 if (!subject->IsFlat()) FlattenString(subject); |
| 292 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 291 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 293 | 292 |
| 294 String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); | 293 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
| 295 int needle_len = needle->length(); | 294 int needle_len = needle->length(); |
| 296 ASSERT(needle->IsFlat()); | 295 ASSERT(needle->IsFlat()); |
| 297 ASSERT_LT(0, needle_len); | |
| 298 | 296 |
| 299 if (index + needle_len > subject->length()) { | 297 if (needle_len != 0) { |
| 300 return RegExpImpl::RE_FAILURE; | 298 if (index + needle_len > subject->length()) { |
| 301 } | 299 return isolate->factory()->null_value(); |
| 300 } |
| 302 | 301 |
| 303 for (int i = 0; i < output_size; i += 2) { | |
| 304 String::FlatContent needle_content = needle->GetFlatContent(); | 302 String::FlatContent needle_content = needle->GetFlatContent(); |
| 305 String::FlatContent subject_content = subject->GetFlatContent(); | 303 String::FlatContent subject_content = subject->GetFlatContent(); |
| 306 ASSERT(needle_content.IsFlat()); | 304 ASSERT(needle_content.IsFlat()); |
| 307 ASSERT(subject_content.IsFlat()); | 305 ASSERT(subject_content.IsFlat()); |
| 308 // dispatch on type of strings | 306 // dispatch on type of strings |
| 309 index = (needle_content.IsAscii() | 307 index = (needle_content.IsAscii() |
| 310 ? (subject_content.IsAscii() | 308 ? (subject_content.IsAscii() |
| 311 ? SearchString(isolate, | 309 ? SearchString(isolate, |
| 312 subject_content.ToAsciiVector(), | 310 subject_content.ToAsciiVector(), |
| 313 needle_content.ToAsciiVector(), | 311 needle_content.ToAsciiVector(), |
| 314 index) | 312 index) |
| 315 : SearchString(isolate, | 313 : SearchString(isolate, |
| 316 subject_content.ToUC16Vector(), | 314 subject_content.ToUC16Vector(), |
| 317 needle_content.ToAsciiVector(), | 315 needle_content.ToAsciiVector(), |
| 318 index)) | 316 index)) |
| 319 : (subject_content.IsAscii() | 317 : (subject_content.IsAscii() |
| 320 ? SearchString(isolate, | 318 ? SearchString(isolate, |
| 321 subject_content.ToAsciiVector(), | 319 subject_content.ToAsciiVector(), |
| 322 needle_content.ToUC16Vector(), | 320 needle_content.ToUC16Vector(), |
| 323 index) | 321 index) |
| 324 : SearchString(isolate, | 322 : SearchString(isolate, |
| 325 subject_content.ToUC16Vector(), | 323 subject_content.ToUC16Vector(), |
| 326 needle_content.ToUC16Vector(), | 324 needle_content.ToUC16Vector(), |
| 327 index))); | 325 index))); |
| 328 if (index == -1) { | 326 if (index == -1) return isolate->factory()->null_value(); |
| 329 return i / 2; // Return number of matches. | |
| 330 } else { | |
| 331 output[i] = index; | |
| 332 output[i+1] = index + needle_len; | |
| 333 index += needle_len; | |
| 334 } | |
| 335 } | 327 } |
| 336 return output_size / 2; | 328 ASSERT(last_match_info->HasFastObjectElements()); |
| 337 } | |
| 338 | 329 |
| 339 | 330 { |
| 340 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 331 NoHandleAllocation no_handles; |
| 341 Handle<String> subject, | 332 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 342 int index, | 333 SetAtomLastCapture(array, *subject, index, index + needle_len); |
| 343 Handle<JSArray> last_match_info) { | 334 } |
| 344 Isolate* isolate = re->GetIsolate(); | |
| 345 | |
| 346 static const int kNumRegisters = 2; | |
| 347 STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); | |
| 348 int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); | |
| 349 | |
| 350 int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters); | |
| 351 | |
| 352 if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value(); | |
| 353 | |
| 354 ASSERT_EQ(res, RegExpImpl::RE_SUCCESS); | |
| 355 NoHandleAllocation no_handles; | |
| 356 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
| 357 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]); | |
| 358 return last_match_info; | 335 return last_match_info; |
| 359 } | 336 } |
| 360 | 337 |
| 361 | 338 |
| 362 // Irregexp implementation. | 339 // Irregexp implementation. |
| 363 | 340 |
| 364 // Ensures that the regexp object contains a compiled version of the | 341 // Ensures that the regexp object contains a compiled version of the |
| 365 // source for either ASCII or non-ASCII strings. | 342 // source for either ASCII or non-ASCII strings. |
| 366 // If the compiled version doesn't already exist, it is compiled | 343 // If the compiled version doesn't already exist, it is compiled |
| 367 // from the source pattern. | 344 // from the source pattern. |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 527 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 504 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| 528 Handle<String> subject) { | 505 Handle<String> subject) { |
| 529 if (!subject->IsFlat()) FlattenString(subject); | 506 if (!subject->IsFlat()) FlattenString(subject); |
| 530 | 507 |
| 531 // Check the asciiness of the underlying storage. | 508 // Check the asciiness of the underlying storage. |
| 532 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 509 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 533 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; | 510 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; |
| 534 | 511 |
| 535 #ifdef V8_INTERPRETED_REGEXP | 512 #ifdef V8_INTERPRETED_REGEXP |
| 536 // Byte-code regexp needs space allocated for all its registers. | 513 // Byte-code regexp needs space allocated for all its registers. |
| 537 // The result captures are copied to the start of the registers array | 514 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
| 538 // if the match succeeds. This way those registers are not clobbered | |
| 539 // when we set the last match info from last successful match. | |
| 540 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + | |
| 541 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | |
| 542 #else // V8_INTERPRETED_REGEXP | 515 #else // V8_INTERPRETED_REGEXP |
| 543 // Native regexp only needs room to output captures. Registers are handled | 516 // Native regexp only needs room to output captures. Registers are handled |
| 544 // internally. | 517 // internally. |
| 545 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 518 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| 546 #endif // V8_INTERPRETED_REGEXP | 519 #endif // V8_INTERPRETED_REGEXP |
| 547 } | 520 } |
| 548 | 521 |
| 549 | 522 |
| 550 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp, | 523 int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, |
| 551 Handle<String> subject, | 524 int registers_per_match, |
| 552 int index, | 525 int* max_matches) { |
| 553 int32_t* output, | 526 #ifdef V8_INTERPRETED_REGEXP |
| 554 int output_size) { | 527 // Global loop in interpreted regexp is not implemented. Therefore we choose |
| 528 // the size of the offsets vector so that it can only store one match. |
| 529 *max_matches = 1; |
| 530 return registers_per_match; |
| 531 #else // V8_INTERPRETED_REGEXP |
| 532 int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize); |
| 533 *max_matches = size / registers_per_match; |
| 534 return size; |
| 535 #endif // V8_INTERPRETED_REGEXP |
| 536 } |
| 537 |
| 538 |
| 539 int RegExpImpl::IrregexpExecRaw( |
| 540 Handle<JSRegExp> regexp, |
| 541 Handle<String> subject, |
| 542 int index, |
| 543 Vector<int> output) { |
| 555 Isolate* isolate = regexp->GetIsolate(); | 544 Isolate* isolate = regexp->GetIsolate(); |
| 556 | 545 |
| 557 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 546 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
| 558 | 547 |
| 559 ASSERT(index >= 0); | 548 ASSERT(index >= 0); |
| 560 ASSERT(index <= subject->length()); | 549 ASSERT(index <= subject->length()); |
| 561 ASSERT(subject->IsFlat()); | 550 ASSERT(subject->IsFlat()); |
| 562 | 551 |
| 563 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 552 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 564 | 553 |
| 565 #ifndef V8_INTERPRETED_REGEXP | 554 #ifndef V8_INTERPRETED_REGEXP |
| 566 ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 555 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| 567 do { | 556 do { |
| 568 EnsureCompiledIrregexp(regexp, subject, is_ascii); | 557 EnsureCompiledIrregexp(regexp, subject, is_ascii); |
| 569 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 558 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
| 570 // The stack is used to allocate registers for the compiled regexp code. | |
| 571 // This means that in case of failure, the output registers array is left | |
| 572 // untouched and contains the capture results from the previous successful | |
| 573 // match. We can use that to set the last match info lazily. | |
| 574 NativeRegExpMacroAssembler::Result res = | 559 NativeRegExpMacroAssembler::Result res = |
| 575 NativeRegExpMacroAssembler::Match(code, | 560 NativeRegExpMacroAssembler::Match(code, |
| 576 subject, | 561 subject, |
| 577 output, | 562 output.start(), |
| 578 output_size, | 563 output.length(), |
| 579 index, | 564 index, |
| 580 isolate); | 565 isolate); |
| 581 if (res != NativeRegExpMacroAssembler::RETRY) { | 566 if (res != NativeRegExpMacroAssembler::RETRY) { |
| 582 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 567 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| 583 isolate->has_pending_exception()); | 568 isolate->has_pending_exception()); |
| 584 STATIC_ASSERT( | 569 STATIC_ASSERT( |
| 585 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 570 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
| 586 STATIC_ASSERT( | 571 STATIC_ASSERT( |
| 587 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 572 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
| 588 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 573 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
| 589 == RE_EXCEPTION); | 574 == RE_EXCEPTION); |
| 590 return static_cast<IrregexpResult>(res); | 575 return static_cast<IrregexpResult>(res); |
| 591 } | 576 } |
| 592 // If result is RETRY, the string has changed representation, and we | 577 // If result is RETRY, the string has changed representation, and we |
| 593 // must restart from scratch. | 578 // must restart from scratch. |
| 594 // In this case, it means we must make sure we are prepared to handle | 579 // In this case, it means we must make sure we are prepared to handle |
| 595 // the, potentially, different subject (the string can switch between | 580 // the, potentially, different subject (the string can switch between |
| 596 // being internal and external, and even between being ASCII and UC16, | 581 // being internal and external, and even between being ASCII and UC16, |
| 597 // but the characters are always the same). | 582 // but the characters are always the same). |
| 598 IrregexpPrepare(regexp, subject); | 583 IrregexpPrepare(regexp, subject); |
| 599 is_ascii = subject->IsAsciiRepresentationUnderneath(); | 584 is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 600 } while (true); | 585 } while (true); |
| 601 UNREACHABLE(); | 586 UNREACHABLE(); |
| 602 return RE_EXCEPTION; | 587 return RE_EXCEPTION; |
| 603 #else // V8_INTERPRETED_REGEXP | 588 #else // V8_INTERPRETED_REGEXP |
| 604 | 589 |
| 605 ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp)); | 590 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| 606 // We must have done EnsureCompiledIrregexp, so we can get the number of | 591 // We must have done EnsureCompiledIrregexp, so we can get the number of |
| 607 // registers. | 592 // registers. |
| 593 int* register_vector = output.start(); |
| 608 int number_of_capture_registers = | 594 int number_of_capture_registers = |
| 609 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 595 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| 610 int32_t* raw_output = &output[number_of_capture_registers]; | |
| 611 // We do not touch the actual capture result registers until we know there | |
| 612 // has been a match so that we can use those capture results to set the | |
| 613 // last match info. | |
| 614 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 596 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| 615 raw_output[i] = -1; | 597 register_vector[i] = -1; |
| 616 } | 598 } |
| 617 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); | 599 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
| 618 | 600 |
| 619 IrregexpResult result = IrregexpInterpreter::Match(isolate, | 601 IrregexpResult result = IrregexpInterpreter::Match(isolate, |
| 620 byte_codes, | 602 byte_codes, |
| 621 subject, | 603 subject, |
| 622 raw_output, | 604 register_vector, |
| 623 index); | 605 index); |
| 624 if (result == RE_SUCCESS) { | |
| 625 // Copy capture results to the start of the registers array. | |
| 626 memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); | |
| 627 } | |
| 628 if (result == RE_EXCEPTION) { | 606 if (result == RE_EXCEPTION) { |
| 629 ASSERT(!isolate->has_pending_exception()); | 607 ASSERT(!isolate->has_pending_exception()); |
| 630 isolate->StackOverflow(); | 608 isolate->StackOverflow(); |
| 631 } | 609 } |
| 632 return result; | 610 return result; |
| 633 #endif // V8_INTERPRETED_REGEXP | 611 #endif // V8_INTERPRETED_REGEXP |
| 634 } | 612 } |
| 635 | 613 |
| 636 | 614 |
| 637 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 615 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| 638 Handle<String> subject, | 616 Handle<String> subject, |
| 639 int previous_index, | 617 int previous_index, |
| 640 Handle<JSArray> last_match_info) { | 618 Handle<JSArray> last_match_info) { |
| 641 Isolate* isolate = regexp->GetIsolate(); | 619 Isolate* isolate = jsregexp->GetIsolate(); |
| 642 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 620 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| 643 | 621 |
| 644 // Prepare space for the return values. | 622 // Prepare space for the return values. |
| 645 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) | 623 #ifdef V8_INTERPRETED_REGEXP |
| 624 #ifdef DEBUG |
| 646 if (FLAG_trace_regexp_bytecodes) { | 625 if (FLAG_trace_regexp_bytecodes) { |
| 647 String* pattern = regexp->Pattern(); | 626 String* pattern = jsregexp->Pattern(); |
| 648 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 627 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 649 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 628 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 650 } | 629 } |
| 651 #endif | 630 #endif |
| 652 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); | 631 #endif |
| 632 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
| 653 if (required_registers < 0) { | 633 if (required_registers < 0) { |
| 654 // Compiling failed with an exception. | 634 // Compiling failed with an exception. |
| 655 ASSERT(isolate->has_pending_exception()); | 635 ASSERT(isolate->has_pending_exception()); |
| 656 return Handle<Object>::null(); | 636 return Handle<Object>::null(); |
| 657 } | 637 } |
| 658 | 638 |
| 659 int32_t* output_registers = NULL; | 639 OffsetsVector registers(required_registers, isolate); |
| 660 if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
| 661 output_registers = NewArray<int32_t>(required_registers); | |
| 662 } | |
| 663 SmartArrayPointer<int32_t> auto_release(output_registers); | |
| 664 if (output_registers == NULL) { | |
| 665 output_registers = isolate->jsregexp_static_offsets_vector(); | |
| 666 } | |
| 667 | 640 |
| 668 int res = RegExpImpl::IrregexpExecRaw( | 641 int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, |
| 669 regexp, subject, previous_index, output_registers, required_registers); | 642 Vector<int>(registers.vector(), |
| 643 registers.length())); |
| 670 if (res == RE_SUCCESS) { | 644 if (res == RE_SUCCESS) { |
| 671 int capture_count = | 645 int capture_register_count = |
| 672 IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); | 646 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| 673 return SetLastMatchInfo( | 647 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
| 674 last_match_info, subject, capture_count, output_registers); | 648 AssertNoAllocation no_gc; |
| 649 int* register_vector = registers.vector(); |
| 650 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 651 for (int i = 0; i < capture_register_count; i += 2) { |
| 652 SetCapture(array, i, register_vector[i]); |
| 653 SetCapture(array, i + 1, register_vector[i + 1]); |
| 654 } |
| 655 SetLastCaptureCount(array, capture_register_count); |
| 656 SetLastSubject(array, *subject); |
| 657 SetLastInput(array, *subject); |
| 658 return last_match_info; |
| 675 } | 659 } |
| 676 if (res == RE_EXCEPTION) { | 660 if (res == RE_EXCEPTION) { |
| 677 ASSERT(isolate->has_pending_exception()); | 661 ASSERT(isolate->has_pending_exception()); |
| 678 return Handle<Object>::null(); | 662 return Handle<Object>::null(); |
| 679 } | 663 } |
| 680 ASSERT(res == RE_FAILURE); | 664 ASSERT(res == RE_FAILURE); |
| 681 return isolate->factory()->null_value(); | 665 return isolate->factory()->null_value(); |
| 682 } | 666 } |
| 683 | 667 |
| 684 | 668 |
| 685 Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info, | |
| 686 Handle<String> subject, | |
| 687 int capture_count, | |
| 688 int32_t* match) { | |
| 689 int capture_register_count = (capture_count + 1) * 2; | |
| 690 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | |
| 691 AssertNoAllocation no_gc; | |
| 692 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
| 693 if (match != NULL) { | |
| 694 for (int i = 0; i < capture_register_count; i += 2) { | |
| 695 SetCapture(array, i, match[i]); | |
| 696 SetCapture(array, i + 1, match[i + 1]); | |
| 697 } | |
| 698 } | |
| 699 SetLastCaptureCount(array, capture_register_count); | |
| 700 SetLastSubject(array, *subject); | |
| 701 SetLastInput(array, *subject); | |
| 702 return last_match_info; | |
| 703 } | |
| 704 | |
| 705 | |
| 706 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, | |
| 707 Handle<String> subject, | |
| 708 bool is_global, | |
| 709 Isolate* isolate) { | |
| 710 #ifdef V8_INTERPRETED_REGEXP | |
| 711 bool interpreted = true; | |
| 712 #else | |
| 713 bool interpreted = false; | |
| 714 #endif // V8_INTERPRETED_REGEXP | |
| 715 | |
| 716 regexp_ = regexp; | |
| 717 subject_ = subject; | |
| 718 | |
| 719 if (regexp_->TypeTag() == JSRegExp::ATOM) { | |
| 720 static const int kAtomRegistersPerMatch = 2; | |
| 721 registers_per_match_ = kAtomRegistersPerMatch; | |
| 722 // There is no distinction between interpreted and native for atom regexps. | |
| 723 interpreted = false; | |
| 724 } else { | |
| 725 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); | |
| 726 if (registers_per_match_ < 0) { | |
| 727 num_matches_ = -1; // Signal exception. | |
| 728 return; | |
| 729 } | |
| 730 } | |
| 731 | |
| 732 if (is_global && !interpreted) { | |
| 733 register_array_size_ = | |
| 734 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); | |
| 735 max_matches_ = register_array_size_ / registers_per_match_; | |
| 736 } else { | |
| 737 // Global loop in interpreted regexp is not implemented. We choose | |
| 738 // the size of the offsets vector so that it can only store one match. | |
| 739 register_array_size_ = registers_per_match_; | |
| 740 max_matches_ = 1; | |
| 741 } | |
| 742 | |
| 743 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
| 744 register_array_ = NewArray<int32_t>(register_array_size_); | |
| 745 } else { | |
| 746 register_array_ = isolate->jsregexp_static_offsets_vector(); | |
| 747 } | |
| 748 | |
| 749 // Set state so that fetching the results the first time triggers a call | |
| 750 // to the compiled regexp. | |
| 751 current_match_index_ = max_matches_ - 1; | |
| 752 num_matches_ = max_matches_; | |
| 753 ASSERT(registers_per_match_ >= 2); // Each match has at least one capture. | |
| 754 ASSERT_GE(register_array_size_, registers_per_match_); | |
| 755 int32_t* last_match = | |
| 756 ®ister_array_[current_match_index_ * registers_per_match_]; | |
| 757 last_match[0] = -1; | |
| 758 last_match[1] = 0; | |
| 759 } | |
| 760 | |
| 761 | |
| 762 RegExpImpl::GlobalCache::~GlobalCache() { | |
| 763 // Deallocate the register array if we allocated it in the constructor | |
| 764 // (as opposed to using the existing jsregexp_static_offsets_vector). | |
| 765 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
| 766 DeleteArray(register_array_); | |
| 767 } | |
| 768 } | |
| 769 | |
| 770 | |
| 771 int32_t* RegExpImpl::GlobalCache::FetchNext() { | |
| 772 current_match_index_++; | |
| 773 if (current_match_index_ >= num_matches_) { | |
| 774 // Current batch of results exhausted. | |
| 775 // Fail if last batch was not even fully filled. | |
| 776 if (num_matches_ < max_matches_) { | |
| 777 num_matches_ = 0; // Signal failed match. | |
| 778 return NULL; | |
| 779 } | |
| 780 | |
| 781 int32_t* last_match = | |
| 782 ®ister_array_[(current_match_index_ - 1) * registers_per_match_]; | |
| 783 int last_end_index = last_match[1]; | |
| 784 | |
| 785 if (regexp_->TypeTag() == JSRegExp::ATOM) { | |
| 786 num_matches_ = RegExpImpl::AtomExecRaw(regexp_, | |
| 787 subject_, | |
| 788 last_end_index, | |
| 789 register_array_, | |
| 790 register_array_size_); | |
| 791 } else { | |
| 792 int last_start_index = last_match[0]; | |
| 793 if (last_start_index == last_end_index) last_end_index++; | |
| 794 if (last_end_index > subject_->length()) { | |
| 795 num_matches_ = 0; // Signal failed match. | |
| 796 return NULL; | |
| 797 } | |
| 798 num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_, | |
| 799 subject_, | |
| 800 last_end_index, | |
| 801 register_array_, | |
| 802 register_array_size_); | |
| 803 } | |
| 804 | |
| 805 if (num_matches_ <= 0) return NULL; | |
| 806 current_match_index_ = 0; | |
| 807 return register_array_; | |
| 808 } else { | |
| 809 return ®ister_array_[current_match_index_ * registers_per_match_]; | |
| 810 } | |
| 811 } | |
| 812 | |
| 813 | |
| 814 int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() { | |
| 815 int index = current_match_index_ * registers_per_match_; | |
| 816 if (num_matches_ == 0) { | |
| 817 // After a failed match we shift back by one result. | |
| 818 index -= registers_per_match_; | |
| 819 } | |
| 820 return ®ister_array_[index]; | |
| 821 } | |
| 822 | |
| 823 | |
| 824 // ------------------------------------------------------------------- | 669 // ------------------------------------------------------------------- |
| 825 // Implementation of the Irregexp regular expression engine. | 670 // Implementation of the Irregexp regular expression engine. |
| 826 // | 671 // |
| 827 // The Irregexp regular expression engine is intended to be a complete | 672 // The Irregexp regular expression engine is intended to be a complete |
| 828 // implementation of ECMAScript regular expressions. It generates either | 673 // implementation of ECMAScript regular expressions. It generates either |
| 829 // bytecodes or native code. | 674 // bytecodes or native code. |
| 830 | 675 |
| 831 // The Irregexp regexp engine is structured in three steps. | 676 // The Irregexp regexp engine is structured in three steps. |
| 832 // 1) The parser generates an abstract syntax tree. See ast.cc. | 677 // 1) The parser generates an abstract syntax tree. See ast.cc. |
| 833 // 2) From the AST a node network is created. The nodes are all | 678 // 2) From the AST a node network is created. The nodes are all |
| (...skipping 5324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6158 } | 6003 } |
| 6159 | 6004 |
| 6160 return compiler.Assemble(¯o_assembler, | 6005 return compiler.Assemble(¯o_assembler, |
| 6161 node, | 6006 node, |
| 6162 data->capture_count, | 6007 data->capture_count, |
| 6163 pattern); | 6008 pattern); |
| 6164 } | 6009 } |
| 6165 | 6010 |
| 6166 | 6011 |
| 6167 }} // namespace v8::internal | 6012 }} // namespace v8::internal |
| OLD | NEW |