OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
224 Handle<FixedArray> data(FixedArray::cast(re->data())); | 224 Handle<FixedArray> data(FixedArray::cast(re->data())); |
225 compilation_cache->PutRegExp(pattern, flags, data); | 225 compilation_cache->PutRegExp(pattern, flags, data); |
226 | 226 |
227 return re; | 227 return re; |
228 } | 228 } |
229 | 229 |
230 | 230 |
231 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 231 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
232 Handle<String> subject, | 232 Handle<String> subject, |
233 int index, | 233 int index, |
234 Handle<JSArray> last_match_info) { | 234 Handle<JSArray> last_match_info, |
| 235 Zone* zone) { |
235 switch (regexp->TypeTag()) { | 236 switch (regexp->TypeTag()) { |
236 case JSRegExp::ATOM: | 237 case JSRegExp::ATOM: |
237 return AtomExec(regexp, subject, index, last_match_info); | 238 return AtomExec(regexp, subject, index, last_match_info); |
238 case JSRegExp::IRREGEXP: { | 239 case JSRegExp::IRREGEXP: { |
239 Handle<Object> result = | 240 Handle<Object> result = |
240 IrregexpExec(regexp, subject, index, last_match_info); | 241 IrregexpExec(regexp, subject, index, last_match_info, zone); |
241 ASSERT(!result.is_null() || | 242 ASSERT(!result.is_null() || |
242 regexp->GetIsolate()->has_pending_exception()); | 243 regexp->GetIsolate()->has_pending_exception()); |
243 return result; | 244 return result; |
244 } | 245 } |
245 default: | 246 default: |
246 UNREACHABLE(); | 247 UNREACHABLE(); |
247 return Handle<Object>::null(); | 248 return Handle<Object>::null(); |
248 } | 249 } |
249 } | 250 } |
250 | 251 |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
337 | 338 |
338 // Irregexp implementation. | 339 // Irregexp implementation. |
339 | 340 |
340 // Ensures that the regexp object contains a compiled version of the | 341 // Ensures that the regexp object contains a compiled version of the |
341 // source for either ASCII or non-ASCII strings. | 342 // source for either ASCII or non-ASCII strings. |
342 // If the compiled version doesn't already exist, it is compiled | 343 // If the compiled version doesn't already exist, it is compiled |
343 // from the source pattern. | 344 // from the source pattern. |
344 // If compilation fails, an exception is thrown and this function | 345 // If compilation fails, an exception is thrown and this function |
345 // returns false. | 346 // returns false. |
346 bool RegExpImpl::EnsureCompiledIrregexp( | 347 bool RegExpImpl::EnsureCompiledIrregexp( |
347 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) { | 348 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii, |
| 349 Zone* zone) { |
348 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii)); | 350 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii)); |
349 #ifdef V8_INTERPRETED_REGEXP | 351 #ifdef V8_INTERPRETED_REGEXP |
350 if (compiled_code->IsByteArray()) return true; | 352 if (compiled_code->IsByteArray()) return true; |
351 #else // V8_INTERPRETED_REGEXP (RegExp native code) | 353 #else // V8_INTERPRETED_REGEXP (RegExp native code) |
352 if (compiled_code->IsCode()) return true; | 354 if (compiled_code->IsCode()) return true; |
353 #endif | 355 #endif |
354 // We could potentially have marked this as flushable, but have kept | 356 // We could potentially have marked this as flushable, but have kept |
355 // a saved version if we did not flush it yet. | 357 // a saved version if we did not flush it yet. |
356 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii)); | 358 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii)); |
357 if (saved_code->IsCode()) { | 359 if (saved_code->IsCode()) { |
358 // Reinstate the code in the original place. | 360 // Reinstate the code in the original place. |
359 re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code); | 361 re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code); |
360 ASSERT(compiled_code->IsSmi()); | 362 ASSERT(compiled_code->IsSmi()); |
361 return true; | 363 return true; |
362 } | 364 } |
363 return CompileIrregexp(re, sample_subject, is_ascii); | 365 return CompileIrregexp(re, sample_subject, is_ascii, zone); |
364 } | 366 } |
365 | 367 |
366 | 368 |
367 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, | 369 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, |
368 bool is_ascii, | 370 bool is_ascii, |
369 Handle<String> error_message, | 371 Handle<String> error_message, |
370 Isolate* isolate) { | 372 Isolate* isolate) { |
371 Factory* factory = isolate->factory(); | 373 Factory* factory = isolate->factory(); |
372 Handle<FixedArray> elements = factory->NewFixedArray(2); | 374 Handle<FixedArray> elements = factory->NewFixedArray(2); |
373 elements->set(0, re->Pattern()); | 375 elements->set(0, re->Pattern()); |
374 elements->set(1, *error_message); | 376 elements->set(1, *error_message); |
375 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); | 377 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
376 Handle<Object> regexp_err = | 378 Handle<Object> regexp_err = |
377 factory->NewSyntaxError("malformed_regexp", array); | 379 factory->NewSyntaxError("malformed_regexp", array); |
378 isolate->Throw(*regexp_err); | 380 isolate->Throw(*regexp_err); |
379 return false; | 381 return false; |
380 } | 382 } |
381 | 383 |
382 | 384 |
383 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, | 385 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, |
384 Handle<String> sample_subject, | 386 Handle<String> sample_subject, |
385 bool is_ascii) { | 387 bool is_ascii, |
| 388 Zone* zone) { |
386 // Compile the RegExp. | 389 // Compile the RegExp. |
387 Isolate* isolate = re->GetIsolate(); | 390 Isolate* isolate = re->GetIsolate(); |
388 ZoneScope zone_scope(isolate, DELETE_ON_EXIT); | 391 ZoneScope zone_scope(isolate, DELETE_ON_EXIT); |
389 PostponeInterruptsScope postpone(isolate); | 392 PostponeInterruptsScope postpone(isolate); |
390 // If we had a compilation error the last time this is saved at the | 393 // If we had a compilation error the last time this is saved at the |
391 // saved code index. | 394 // saved code index. |
392 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); | 395 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); |
393 // When arriving here entry can only be a smi, either representing an | 396 // When arriving here entry can only be a smi, either representing an |
394 // uncompiled regexp, a previous compilation error, or code that has | 397 // uncompiled regexp, a previous compilation error, or code that has |
395 // been flushed. | 398 // been flushed. |
(...skipping 30 matching lines...) Expand all Loading... |
426 "malformed_regexp"); | 429 "malformed_regexp"); |
427 return false; | 430 return false; |
428 } | 431 } |
429 RegExpEngine::CompilationResult result = | 432 RegExpEngine::CompilationResult result = |
430 RegExpEngine::Compile(&compile_data, | 433 RegExpEngine::Compile(&compile_data, |
431 flags.is_ignore_case(), | 434 flags.is_ignore_case(), |
432 flags.is_global(), | 435 flags.is_global(), |
433 flags.is_multiline(), | 436 flags.is_multiline(), |
434 pattern, | 437 pattern, |
435 sample_subject, | 438 sample_subject, |
436 is_ascii); | 439 is_ascii, |
| 440 zone); |
437 if (result.error_message != NULL) { | 441 if (result.error_message != NULL) { |
438 // Unable to compile regexp. | 442 // Unable to compile regexp. |
439 Handle<String> error_message = | 443 Handle<String> error_message = |
440 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message)); | 444 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message)); |
441 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); | 445 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); |
442 return false; | 446 return false; |
443 } | 447 } |
444 | 448 |
445 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); | 449 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); |
446 data->set(JSRegExp::code_index(is_ascii), result.code); | 450 data->set(JSRegExp::code_index(is_ascii), result.code); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
491 // Initialize compiled code entries to null. | 495 // Initialize compiled code entries to null. |
492 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, | 496 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, |
493 JSRegExp::IRREGEXP, | 497 JSRegExp::IRREGEXP, |
494 pattern, | 498 pattern, |
495 flags, | 499 flags, |
496 capture_count); | 500 capture_count); |
497 } | 501 } |
498 | 502 |
499 | 503 |
500 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 504 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
501 Handle<String> subject) { | 505 Handle<String> subject, |
| 506 Zone* zone) { |
502 if (!subject->IsFlat()) FlattenString(subject); | 507 if (!subject->IsFlat()) FlattenString(subject); |
503 | 508 |
504 // Check the asciiness of the underlying storage. | 509 // Check the asciiness of the underlying storage. |
505 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 510 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
506 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; | 511 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii, zone)) return -1; |
507 | 512 |
508 #ifdef V8_INTERPRETED_REGEXP | 513 #ifdef V8_INTERPRETED_REGEXP |
509 // Byte-code regexp needs space allocated for all its registers. | 514 // Byte-code regexp needs space allocated for all its registers. |
510 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | 515 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
511 #else // V8_INTERPRETED_REGEXP | 516 #else // V8_INTERPRETED_REGEXP |
512 // Native regexp only needs room to output captures. Registers are handled | 517 // Native regexp only needs room to output captures. Registers are handled |
513 // internally. | 518 // internally. |
514 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 519 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
515 #endif // V8_INTERPRETED_REGEXP | 520 #endif // V8_INTERPRETED_REGEXP |
516 } | 521 } |
(...skipping 12 matching lines...) Expand all Loading... |
529 *max_matches = size / registers_per_match; | 534 *max_matches = size / registers_per_match; |
530 return size; | 535 return size; |
531 #endif // V8_INTERPRETED_REGEXP | 536 #endif // V8_INTERPRETED_REGEXP |
532 } | 537 } |
533 | 538 |
534 | 539 |
535 int RegExpImpl::IrregexpExecRaw( | 540 int RegExpImpl::IrregexpExecRaw( |
536 Handle<JSRegExp> regexp, | 541 Handle<JSRegExp> regexp, |
537 Handle<String> subject, | 542 Handle<String> subject, |
538 int index, | 543 int index, |
539 Vector<int> output) { | 544 Vector<int> output, |
| 545 Zone* zone) { |
540 Isolate* isolate = regexp->GetIsolate(); | 546 Isolate* isolate = regexp->GetIsolate(); |
541 | 547 |
542 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 548 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
543 | 549 |
544 ASSERT(index >= 0); | 550 ASSERT(index >= 0); |
545 ASSERT(index <= subject->length()); | 551 ASSERT(index <= subject->length()); |
546 ASSERT(subject->IsFlat()); | 552 ASSERT(subject->IsFlat()); |
547 | 553 |
548 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 554 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
549 | 555 |
550 #ifndef V8_INTERPRETED_REGEXP | 556 #ifndef V8_INTERPRETED_REGEXP |
551 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 557 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
552 do { | 558 do { |
553 EnsureCompiledIrregexp(regexp, subject, is_ascii); | 559 EnsureCompiledIrregexp(regexp, subject, is_ascii, zone); |
554 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 560 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
555 NativeRegExpMacroAssembler::Result res = | 561 NativeRegExpMacroAssembler::Result res = |
556 NativeRegExpMacroAssembler::Match(code, | 562 NativeRegExpMacroAssembler::Match(code, |
557 subject, | 563 subject, |
558 output.start(), | 564 output.start(), |
559 output.length(), | 565 output.length(), |
560 index, | 566 index, |
561 isolate); | 567 isolate); |
562 if (res != NativeRegExpMacroAssembler::RETRY) { | 568 if (res != NativeRegExpMacroAssembler::RETRY) { |
563 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 569 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
564 isolate->has_pending_exception()); | 570 isolate->has_pending_exception()); |
565 STATIC_ASSERT( | 571 STATIC_ASSERT( |
566 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 572 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
567 STATIC_ASSERT( | 573 STATIC_ASSERT( |
568 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 574 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
569 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 575 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
570 == RE_EXCEPTION); | 576 == RE_EXCEPTION); |
571 return static_cast<IrregexpResult>(res); | 577 return static_cast<IrregexpResult>(res); |
572 } | 578 } |
573 // If result is RETRY, the string has changed representation, and we | 579 // If result is RETRY, the string has changed representation, and we |
574 // must restart from scratch. | 580 // must restart from scratch. |
575 // In this case, it means we must make sure we are prepared to handle | 581 // In this case, it means we must make sure we are prepared to handle |
576 // the, potentially, different subject (the string can switch between | 582 // the, potentially, different subject (the string can switch between |
577 // being internal and external, and even between being ASCII and UC16, | 583 // being internal and external, and even between being ASCII and UC16, |
578 // but the characters are always the same). | 584 // but the characters are always the same). |
579 IrregexpPrepare(regexp, subject); | 585 IrregexpPrepare(regexp, subject, zone); |
580 is_ascii = subject->IsAsciiRepresentationUnderneath(); | 586 is_ascii = subject->IsAsciiRepresentationUnderneath(); |
581 } while (true); | 587 } while (true); |
582 UNREACHABLE(); | 588 UNREACHABLE(); |
583 return RE_EXCEPTION; | 589 return RE_EXCEPTION; |
584 #else // V8_INTERPRETED_REGEXP | 590 #else // V8_INTERPRETED_REGEXP |
585 | 591 |
586 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 592 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
587 // We must have done EnsureCompiledIrregexp, so we can get the number of | 593 // We must have done EnsureCompiledIrregexp, so we can get the number of |
588 // registers. | 594 // registers. |
589 int* register_vector = output.start(); | 595 int* register_vector = output.start(); |
(...skipping 14 matching lines...) Expand all Loading... |
604 isolate->StackOverflow(); | 610 isolate->StackOverflow(); |
605 } | 611 } |
606 return result; | 612 return result; |
607 #endif // V8_INTERPRETED_REGEXP | 613 #endif // V8_INTERPRETED_REGEXP |
608 } | 614 } |
609 | 615 |
610 | 616 |
611 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, | 617 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
612 Handle<String> subject, | 618 Handle<String> subject, |
613 int previous_index, | 619 int previous_index, |
614 Handle<JSArray> last_match_info) { | 620 Handle<JSArray> last_match_info, |
| 621 Zone* zone) { |
615 Isolate* isolate = jsregexp->GetIsolate(); | 622 Isolate* isolate = jsregexp->GetIsolate(); |
616 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); | 623 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
617 | 624 |
618 // Prepare space for the return values. | 625 // Prepare space for the return values. |
619 #ifdef V8_INTERPRETED_REGEXP | 626 #ifdef V8_INTERPRETED_REGEXP |
620 #ifdef DEBUG | 627 #ifdef DEBUG |
621 if (FLAG_trace_regexp_bytecodes) { | 628 if (FLAG_trace_regexp_bytecodes) { |
622 String* pattern = jsregexp->Pattern(); | 629 String* pattern = jsregexp->Pattern(); |
623 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 630 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
624 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 631 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
625 } | 632 } |
626 #endif | 633 #endif |
627 #endif | 634 #endif |
628 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); | 635 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject, zone); |
629 if (required_registers < 0) { | 636 if (required_registers < 0) { |
630 // Compiling failed with an exception. | 637 // Compiling failed with an exception. |
631 ASSERT(isolate->has_pending_exception()); | 638 ASSERT(isolate->has_pending_exception()); |
632 return Handle<Object>::null(); | 639 return Handle<Object>::null(); |
633 } | 640 } |
634 | 641 |
635 OffsetsVector registers(required_registers, isolate); | 642 OffsetsVector registers(required_registers, isolate); |
636 | 643 |
637 int res = RegExpImpl::IrregexpExecRaw( | 644 int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, |
638 jsregexp, subject, previous_index, Vector<int>(registers.vector(), | 645 Vector<int>(registers.vector(), |
639 registers.length())); | 646 registers.length()), |
| 647 zone); |
640 if (res == RE_SUCCESS) { | 648 if (res == RE_SUCCESS) { |
641 int capture_register_count = | 649 int capture_register_count = |
642 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | 650 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
643 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | 651 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
644 AssertNoAllocation no_gc; | 652 AssertNoAllocation no_gc; |
645 int* register_vector = registers.vector(); | 653 int* register_vector = registers.vector(); |
646 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 654 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
647 for (int i = 0; i < capture_register_count; i += 2) { | 655 for (int i = 0; i < capture_register_count; i += 2) { |
648 SetCapture(array, i, register_vector[i]); | 656 SetCapture(array, i, register_vector[i]); |
649 SetCapture(array, i + 1, register_vector[i + 1]); | 657 SetCapture(array, i + 1, register_vector[i + 1]); |
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
910 | 918 |
911 | 919 |
912 private: | 920 private: |
913 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 921 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
914 int total_samples_; | 922 int total_samples_; |
915 }; | 923 }; |
916 | 924 |
917 | 925 |
918 class RegExpCompiler { | 926 class RegExpCompiler { |
919 public: | 927 public: |
920 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); | 928 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii, |
| 929 Zone* zone); |
921 | 930 |
922 int AllocateRegister() { | 931 int AllocateRegister() { |
923 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { | 932 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { |
924 reg_exp_too_big_ = true; | 933 reg_exp_too_big_ = true; |
925 return next_register_; | 934 return next_register_; |
926 } | 935 } |
927 return next_register_++; | 936 return next_register_++; |
928 } | 937 } |
929 | 938 |
930 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, | 939 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, |
(...skipping 19 matching lines...) Expand all Loading... |
950 | 959 |
951 inline bool ignore_case() { return ignore_case_; } | 960 inline bool ignore_case() { return ignore_case_; } |
952 inline bool ascii() { return ascii_; } | 961 inline bool ascii() { return ascii_; } |
953 FrequencyCollator* frequency_collator() { return &frequency_collator_; } | 962 FrequencyCollator* frequency_collator() { return &frequency_collator_; } |
954 | 963 |
955 int current_expansion_factor() { return current_expansion_factor_; } | 964 int current_expansion_factor() { return current_expansion_factor_; } |
956 void set_current_expansion_factor(int value) { | 965 void set_current_expansion_factor(int value) { |
957 current_expansion_factor_ = value; | 966 current_expansion_factor_ = value; |
958 } | 967 } |
959 | 968 |
| 969 Zone* zone() { return zone_; } |
| 970 |
960 static const int kNoRegister = -1; | 971 static const int kNoRegister = -1; |
961 | 972 |
962 private: | 973 private: |
963 EndNode* accept_; | 974 EndNode* accept_; |
964 int next_register_; | 975 int next_register_; |
965 List<RegExpNode*>* work_list_; | 976 List<RegExpNode*>* work_list_; |
966 int recursion_depth_; | 977 int recursion_depth_; |
967 RegExpMacroAssembler* macro_assembler_; | 978 RegExpMacroAssembler* macro_assembler_; |
968 bool ignore_case_; | 979 bool ignore_case_; |
969 bool ascii_; | 980 bool ascii_; |
970 bool reg_exp_too_big_; | 981 bool reg_exp_too_big_; |
971 int current_expansion_factor_; | 982 int current_expansion_factor_; |
972 FrequencyCollator frequency_collator_; | 983 FrequencyCollator frequency_collator_; |
| 984 Zone* zone_; |
973 }; | 985 }; |
974 | 986 |
975 | 987 |
976 class RecursionCheck { | 988 class RecursionCheck { |
977 public: | 989 public: |
978 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 990 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
979 compiler->IncrementRecursionDepth(); | 991 compiler->IncrementRecursionDepth(); |
980 } | 992 } |
981 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 993 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
982 private: | 994 private: |
983 RegExpCompiler* compiler_; | 995 RegExpCompiler* compiler_; |
984 }; | 996 }; |
985 | 997 |
986 | 998 |
987 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 999 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
988 return RegExpEngine::CompilationResult("RegExp too big"); | 1000 return RegExpEngine::CompilationResult("RegExp too big"); |
989 } | 1001 } |
990 | 1002 |
991 | 1003 |
992 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1004 // Attempts to compile the regexp using an Irregexp code generator. Returns |
993 // a fixed array or a null handle depending on whether it succeeded. | 1005 // a fixed array or a null handle depending on whether it succeeded. |
994 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 1006 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii, |
| 1007 Zone* zone) |
995 : next_register_(2 * (capture_count + 1)), | 1008 : next_register_(2 * (capture_count + 1)), |
996 work_list_(NULL), | 1009 work_list_(NULL), |
997 recursion_depth_(0), | 1010 recursion_depth_(0), |
998 ignore_case_(ignore_case), | 1011 ignore_case_(ignore_case), |
999 ascii_(ascii), | 1012 ascii_(ascii), |
1000 reg_exp_too_big_(false), | 1013 reg_exp_too_big_(false), |
1001 current_expansion_factor_(1), | 1014 current_expansion_factor_(1), |
1002 frequency_collator_() { | 1015 frequency_collator_(), |
1003 accept_ = new EndNode(EndNode::ACCEPT); | 1016 zone_(zone) { |
| 1017 accept_ = new EndNode(EndNode::ACCEPT, zone); |
1004 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 1018 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
1005 } | 1019 } |
1006 | 1020 |
1007 | 1021 |
1008 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 1022 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
1009 RegExpMacroAssembler* macro_assembler, | 1023 RegExpMacroAssembler* macro_assembler, |
1010 RegExpNode* start, | 1024 RegExpNode* start, |
1011 int capture_count, | 1025 int capture_count, |
1012 Handle<String> pattern) { | 1026 Handle<String> pattern) { |
1013 Heap* heap = pattern->GetHeap(); | 1027 Heap* heap = pattern->GetHeap(); |
(...skipping 1903 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2917 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2931 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2918 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2932 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
2919 bool not_at_start = (trace->at_start() == Trace::FALSE); | 2933 bool not_at_start = (trace->at_start() == Trace::FALSE); |
2920 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2934 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
2921 if (lookahead == NULL) { | 2935 if (lookahead == NULL) { |
2922 int eats_at_least = | 2936 int eats_at_least = |
2923 Min(kMaxLookaheadForBoyerMoore, | 2937 Min(kMaxLookaheadForBoyerMoore, |
2924 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); | 2938 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); |
2925 if (eats_at_least >= 1) { | 2939 if (eats_at_least >= 1) { |
2926 BoyerMooreLookahead* bm = | 2940 BoyerMooreLookahead* bm = |
2927 new BoyerMooreLookahead(eats_at_least, compiler); | 2941 new BoyerMooreLookahead(eats_at_least, compiler, zone()); |
2928 FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); | 2942 FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); |
2929 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; | 2943 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; |
2930 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; | 2944 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; |
2931 } | 2945 } |
2932 } else { | 2946 } else { |
2933 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; | 2947 if (lookahead->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; |
2934 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; | 2948 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE; |
2935 } | 2949 } |
2936 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); | 2950 bool at_boundary = (type_ == AssertionNode::AT_BOUNDARY); |
2937 if (next_is_word_character == Trace::UNKNOWN) { | 2951 if (next_is_word_character == Trace::UNKNOWN) { |
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3498 void BoyerMoorePositionInfo::SetAll() { | 3512 void BoyerMoorePositionInfo::SetAll() { |
3499 s_ = w_ = d_ = kLatticeUnknown; | 3513 s_ = w_ = d_ = kLatticeUnknown; |
3500 if (map_count_ != kMapSize) { | 3514 if (map_count_ != kMapSize) { |
3501 map_count_ = kMapSize; | 3515 map_count_ = kMapSize; |
3502 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; | 3516 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
3503 } | 3517 } |
3504 } | 3518 } |
3505 | 3519 |
3506 | 3520 |
3507 BoyerMooreLookahead::BoyerMooreLookahead( | 3521 BoyerMooreLookahead::BoyerMooreLookahead( |
3508 int length, RegExpCompiler* compiler) | 3522 int length, RegExpCompiler* compiler, Zone* zone) |
3509 : length_(length), | 3523 : length_(length), |
3510 compiler_(compiler) { | 3524 compiler_(compiler) { |
3511 if (compiler->ascii()) { | 3525 if (compiler->ascii()) { |
3512 max_char_ = String::kMaxAsciiCharCode; | 3526 max_char_ = String::kMaxAsciiCharCode; |
3513 } else { | 3527 } else { |
3514 max_char_ = String::kMaxUtf16CodeUnit; | 3528 max_char_ = String::kMaxUtf16CodeUnit; |
3515 } | 3529 } |
3516 bitmaps_ = new ZoneList<BoyerMoorePositionInfo*>(length); | 3530 bitmaps_ = new ZoneList<BoyerMoorePositionInfo*>(length); |
3517 for (int i = 0; i < length; i++) { | 3531 for (int i = 0; i < length; i++) { |
3518 bitmaps_->Add(new BoyerMoorePositionInfo()); | 3532 bitmaps_->Add(new BoyerMoorePositionInfo(zone)); |
3519 } | 3533 } |
3520 } | 3534 } |
3521 | 3535 |
3522 | 3536 |
3523 // Find the longest range of lookahead that has the fewest number of different | 3537 // Find the longest range of lookahead that has the fewest number of different |
3524 // characters that can occur at a given position. Since we are optimizing two | 3538 // characters that can occur at a given position. Since we are optimizing two |
3525 // different parameters at once this is a tradeoff. | 3539 // different parameters at once this is a tradeoff. |
3526 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { | 3540 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { |
3527 int biggest_points = 0; | 3541 int biggest_points = 0; |
3528 // If more than 32 characters out of 128 can occur it is unlikely that we can | 3542 // If more than 32 characters out of 128 can occur it is unlikely that we can |
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3854 // not be atoms, they can be any reasonably limited character class or | 3868 // not be atoms, they can be any reasonably limited character class or |
3855 // small alternation. | 3869 // small alternation. |
3856 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. | 3870 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes. |
3857 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 3871 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
3858 if (lookahead == NULL) { | 3872 if (lookahead == NULL) { |
3859 eats_at_least = | 3873 eats_at_least = |
3860 Min(kMaxLookaheadForBoyerMoore, | 3874 Min(kMaxLookaheadForBoyerMoore, |
3861 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); | 3875 EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start)); |
3862 if (eats_at_least >= 1) { | 3876 if (eats_at_least >= 1) { |
3863 BoyerMooreLookahead* bm = | 3877 BoyerMooreLookahead* bm = |
3864 new BoyerMooreLookahead(eats_at_least, compiler); | 3878 new BoyerMooreLookahead(eats_at_least, compiler, zone()); |
3865 GuardedAlternative alt0 = alternatives_->at(0); | 3879 GuardedAlternative alt0 = alternatives_->at(0); |
3866 alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); | 3880 alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start); |
3867 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); | 3881 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); |
3868 } | 3882 } |
3869 } else { | 3883 } else { |
3870 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); | 3884 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler); |
3871 } | 3885 } |
3872 } | 3886 } |
3873 } | 3887 } |
3874 } | 3888 } |
(...skipping 767 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4642 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4656 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
4643 RegExpNode* on_success) { | 4657 RegExpNode* on_success) { |
4644 return new TextNode(this, on_success); | 4658 return new TextNode(this, on_success); |
4645 } | 4659 } |
4646 | 4660 |
4647 | 4661 |
4648 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 4662 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
4649 RegExpNode* on_success) { | 4663 RegExpNode* on_success) { |
4650 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 4664 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
4651 int length = alternatives->length(); | 4665 int length = alternatives->length(); |
4652 ChoiceNode* result = new ChoiceNode(length); | 4666 ChoiceNode* result = new ChoiceNode(length, compiler->zone()); |
4653 for (int i = 0; i < length; i++) { | 4667 for (int i = 0; i < length; i++) { |
4654 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 4668 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
4655 on_success)); | 4669 on_success)); |
4656 result->AddAlternative(alternative); | 4670 result->AddAlternative(alternative); |
4657 } | 4671 } |
4658 return result; | 4672 return result; |
4659 } | 4673 } |
4660 | 4674 |
4661 | 4675 |
4662 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 4676 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4765 return answer; | 4779 return answer; |
4766 } | 4780 } |
4767 } | 4781 } |
4768 if (max <= kMaxUnrolledMaxMatches && min == 0) { | 4782 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
4769 ASSERT(max > 0); // Due to the 'if' above. | 4783 ASSERT(max > 0); // Due to the 'if' above. |
4770 RegExpExpansionLimiter limiter(compiler, max); | 4784 RegExpExpansionLimiter limiter(compiler, max); |
4771 if (limiter.ok_to_expand()) { | 4785 if (limiter.ok_to_expand()) { |
4772 // Unroll the optional matches up to max. | 4786 // Unroll the optional matches up to max. |
4773 RegExpNode* answer = on_success; | 4787 RegExpNode* answer = on_success; |
4774 for (int i = 0; i < max; i++) { | 4788 for (int i = 0; i < max; i++) { |
4775 ChoiceNode* alternation = new ChoiceNode(2); | 4789 ChoiceNode* alternation = new ChoiceNode(2, compiler->zone()); |
4776 if (is_greedy) { | 4790 if (is_greedy) { |
4777 alternation->AddAlternative( | 4791 alternation->AddAlternative( |
4778 GuardedAlternative(body->ToNode(compiler, answer))); | 4792 GuardedAlternative(body->ToNode(compiler, answer))); |
4779 alternation->AddAlternative(GuardedAlternative(on_success)); | 4793 alternation->AddAlternative(GuardedAlternative(on_success)); |
4780 } else { | 4794 } else { |
4781 alternation->AddAlternative(GuardedAlternative(on_success)); | 4795 alternation->AddAlternative(GuardedAlternative(on_success)); |
4782 alternation->AddAlternative( | 4796 alternation->AddAlternative( |
4783 GuardedAlternative(body->ToNode(compiler, answer))); | 4797 GuardedAlternative(body->ToNode(compiler, answer))); |
4784 } | 4798 } |
4785 answer = alternation; | 4799 answer = alternation; |
4786 if (not_at_start) alternation->set_not_at_start(); | 4800 if (not_at_start) alternation->set_not_at_start(); |
4787 } | 4801 } |
4788 return answer; | 4802 return answer; |
4789 } | 4803 } |
4790 } | 4804 } |
4791 } | 4805 } |
4792 bool has_min = min > 0; | 4806 bool has_min = min > 0; |
4793 bool has_max = max < RegExpTree::kInfinity; | 4807 bool has_max = max < RegExpTree::kInfinity; |
4794 bool needs_counter = has_min || has_max; | 4808 bool needs_counter = has_min || has_max; |
4795 int reg_ctr = needs_counter | 4809 int reg_ctr = needs_counter |
4796 ? compiler->AllocateRegister() | 4810 ? compiler->AllocateRegister() |
4797 : RegExpCompiler::kNoRegister; | 4811 : RegExpCompiler::kNoRegister; |
4798 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); | 4812 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0, |
| 4813 compiler->zone()); |
4799 if (not_at_start) center->set_not_at_start(); | 4814 if (not_at_start) center->set_not_at_start(); |
4800 RegExpNode* loop_return = needs_counter | 4815 RegExpNode* loop_return = needs_counter |
4801 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 4816 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
4802 : static_cast<RegExpNode*>(center); | 4817 : static_cast<RegExpNode*>(center); |
4803 if (body_can_be_empty) { | 4818 if (body_can_be_empty) { |
4804 // If the body can be empty we need to check if it was and then | 4819 // If the body can be empty we need to check if it was and then |
4805 // backtrack. | 4820 // backtrack. |
4806 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, | 4821 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, |
4807 reg_ctr, | 4822 reg_ctr, |
4808 min, | 4823 min, |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4857 return AssertionNode::AtNonBoundary(on_success); | 4872 return AssertionNode::AtNonBoundary(on_success); |
4858 case END_OF_INPUT: | 4873 case END_OF_INPUT: |
4859 return AssertionNode::AtEnd(on_success); | 4874 return AssertionNode::AtEnd(on_success); |
4860 case END_OF_LINE: { | 4875 case END_OF_LINE: { |
4861 // Compile $ in multiline regexps as an alternation with a positive | 4876 // Compile $ in multiline regexps as an alternation with a positive |
4862 // lookahead in one side and an end-of-input on the other side. | 4877 // lookahead in one side and an end-of-input on the other side. |
4863 // We need two registers for the lookahead. | 4878 // We need two registers for the lookahead. |
4864 int stack_pointer_register = compiler->AllocateRegister(); | 4879 int stack_pointer_register = compiler->AllocateRegister(); |
4865 int position_register = compiler->AllocateRegister(); | 4880 int position_register = compiler->AllocateRegister(); |
4866 // The ChoiceNode to distinguish between a newline and end-of-input. | 4881 // The ChoiceNode to distinguish between a newline and end-of-input. |
4867 ChoiceNode* result = new ChoiceNode(2); | 4882 ChoiceNode* result = new ChoiceNode(2, compiler->zone()); |
4868 // Create a newline atom. | 4883 // Create a newline atom. |
4869 ZoneList<CharacterRange>* newline_ranges = | 4884 ZoneList<CharacterRange>* newline_ranges = |
4870 new ZoneList<CharacterRange>(3); | 4885 new ZoneList<CharacterRange>(3); |
4871 CharacterRange::AddClassEscape('n', newline_ranges); | 4886 CharacterRange::AddClassEscape('n', newline_ranges); |
4872 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); | 4887 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); |
4873 TextNode* newline_matcher = new TextNode( | 4888 TextNode* newline_matcher = new TextNode( |
4874 newline_atom, | 4889 newline_atom, |
4875 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4890 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
4876 position_register, | 4891 position_register, |
4877 0, // No captures inside. | 4892 0, // No captures inside. |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4944 // the second alternative is tried, which is exactly the desired result | 4959 // the second alternative is tried, which is exactly the desired result |
4945 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 4960 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
4946 // ChoiceNode that knows to ignore the first exit when calculating quick | 4961 // ChoiceNode that knows to ignore the first exit when calculating quick |
4947 // checks. | 4962 // checks. |
4948 GuardedAlternative body_alt( | 4963 GuardedAlternative body_alt( |
4949 body()->ToNode( | 4964 body()->ToNode( |
4950 compiler, | 4965 compiler, |
4951 success = new NegativeSubmatchSuccess(stack_pointer_register, | 4966 success = new NegativeSubmatchSuccess(stack_pointer_register, |
4952 position_register, | 4967 position_register, |
4953 register_count, | 4968 register_count, |
4954 register_start))); | 4969 register_start, |
| 4970 compiler->zone()))); |
4955 ChoiceNode* choice_node = | 4971 ChoiceNode* choice_node = |
4956 new NegativeLookaheadChoiceNode(body_alt, | 4972 new NegativeLookaheadChoiceNode(body_alt, |
4957 GuardedAlternative(on_success)); | 4973 GuardedAlternative(on_success), |
| 4974 compiler->zone()); |
4958 return ActionNode::BeginSubmatch(stack_pointer_register, | 4975 return ActionNode::BeginSubmatch(stack_pointer_register, |
4959 position_register, | 4976 position_register, |
4960 choice_node); | 4977 choice_node); |
4961 } | 4978 } |
4962 } | 4979 } |
4963 | 4980 |
4964 | 4981 |
4965 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4982 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
4966 RegExpNode* on_success) { | 4983 RegExpNode* on_success) { |
4967 return ToNode(body(), index(), compiler, on_success); | 4984 return ToNode(body(), index(), compiler, on_success); |
(...skipping 852 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5820 } | 5837 } |
5821 | 5838 |
5822 | 5839 |
5823 RegExpEngine::CompilationResult RegExpEngine::Compile( | 5840 RegExpEngine::CompilationResult RegExpEngine::Compile( |
5824 RegExpCompileData* data, | 5841 RegExpCompileData* data, |
5825 bool ignore_case, | 5842 bool ignore_case, |
5826 bool is_global, | 5843 bool is_global, |
5827 bool is_multiline, | 5844 bool is_multiline, |
5828 Handle<String> pattern, | 5845 Handle<String> pattern, |
5829 Handle<String> sample_subject, | 5846 Handle<String> sample_subject, |
5830 bool is_ascii) { | 5847 bool is_ascii, |
| 5848 Zone* zone) { |
5831 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 5849 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
5832 return IrregexpRegExpTooBig(); | 5850 return IrregexpRegExpTooBig(); |
5833 } | 5851 } |
5834 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); | 5852 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone); |
5835 | 5853 |
5836 // Sample some characters from the middle of the string. | 5854 // Sample some characters from the middle of the string. |
5837 static const int kSampleSize = 128; | 5855 static const int kSampleSize = 128; |
5838 | 5856 |
5839 FlattenString(sample_subject); | 5857 FlattenString(sample_subject); |
5840 int chars_sampled = 0; | 5858 int chars_sampled = 0; |
5841 int half_way = (sample_subject->length() - kSampleSize) / 2; | 5859 int half_way = (sample_subject->length() - kSampleSize) / 2; |
5842 for (int i = Max(0, half_way); | 5860 for (int i = Max(0, half_way); |
5843 i < sample_subject->length() && chars_sampled < kSampleSize; | 5861 i < sample_subject->length() && chars_sampled < kSampleSize; |
5844 i++, chars_sampled++) { | 5862 i++, chars_sampled++) { |
(...skipping 17 matching lines...) Expand all Loading... |
5862 RegExpTree::kInfinity, | 5880 RegExpTree::kInfinity, |
5863 false, | 5881 false, |
5864 new RegExpCharacterClass('*'), | 5882 new RegExpCharacterClass('*'), |
5865 &compiler, | 5883 &compiler, |
5866 captured_body, | 5884 captured_body, |
5867 data->contains_anchor); | 5885 data->contains_anchor); |
5868 | 5886 |
5869 if (data->contains_anchor) { | 5887 if (data->contains_anchor) { |
5870 // Unroll loop once, to take care of the case that might start | 5888 // Unroll loop once, to take care of the case that might start |
5871 // at the start of input. | 5889 // at the start of input. |
5872 ChoiceNode* first_step_node = new ChoiceNode(2); | 5890 ChoiceNode* first_step_node = new ChoiceNode(2, zone); |
5873 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 5891 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5874 first_step_node->AddAlternative(GuardedAlternative( | 5892 first_step_node->AddAlternative(GuardedAlternative( |
5875 new TextNode(new RegExpCharacterClass('*'), loop_node))); | 5893 new TextNode(new RegExpCharacterClass('*'), loop_node))); |
5876 node = first_step_node; | 5894 node = first_step_node; |
5877 } else { | 5895 } else { |
5878 node = loop_node; | 5896 node = loop_node; |
5879 } | 5897 } |
5880 } | 5898 } |
5881 if (is_ascii) { | 5899 if (is_ascii) { |
5882 node = node->FilterASCII(RegExpCompiler::kMaxRecursion); | 5900 node = node->FilterASCII(RegExpCompiler::kMaxRecursion); |
5883 // Do it again to propagate the new nodes to places where they were not | 5901 // Do it again to propagate the new nodes to places where they were not |
5884 // put because they had not been calculated yet. | 5902 // put because they had not been calculated yet. |
5885 if (node != NULL) node = node->FilterASCII(RegExpCompiler::kMaxRecursion); | 5903 if (node != NULL) node = node->FilterASCII(RegExpCompiler::kMaxRecursion); |
5886 } | 5904 } |
5887 | 5905 |
5888 if (node == NULL) node = new EndNode(EndNode::BACKTRACK); | 5906 if (node == NULL) node = new EndNode(EndNode::BACKTRACK, zone); |
5889 data->node = node; | 5907 data->node = node; |
5890 Analysis analysis(ignore_case, is_ascii); | 5908 Analysis analysis(ignore_case, is_ascii); |
5891 analysis.EnsureAnalyzed(node); | 5909 analysis.EnsureAnalyzed(node); |
5892 if (analysis.has_failed()) { | 5910 if (analysis.has_failed()) { |
5893 const char* error_message = analysis.error_message(); | 5911 const char* error_message = analysis.error_message(); |
5894 return CompilationResult(error_message); | 5912 return CompilationResult(error_message); |
5895 } | 5913 } |
5896 | 5914 |
5897 // Create the correct assembler for the architecture. | 5915 // Create the correct assembler for the architecture. |
5898 #ifndef V8_INTERPRETED_REGEXP | 5916 #ifndef V8_INTERPRETED_REGEXP |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5935 } | 5953 } |
5936 | 5954 |
5937 return compiler.Assemble(¯o_assembler, | 5955 return compiler.Assemble(¯o_assembler, |
5938 node, | 5956 node, |
5939 data->capture_count, | 5957 data->capture_count, |
5940 pattern); | 5958 pattern); |
5941 } | 5959 } |
5942 | 5960 |
5943 | 5961 |
5944 }} // namespace v8::internal | 5962 }} // namespace v8::internal |
OLD | NEW |