Index: src/x64/regexp-macro-assembler-x64.cc |
diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc |
index bf232bff9b523413a674ff3adc443177262a20d4..53027254c032673eb28a3a845d90e0da8a69a6e9 100644 |
--- a/src/x64/regexp-macro-assembler-x64.cc |
+++ b/src/x64/regexp-macro-assembler-x64.cc |
@@ -1,4 +1,4 @@ |
-// Copyright 2011 the V8 project authors. All rights reserved. |
+// Copyright 2012 the V8 project authors. All rights reserved. |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are |
// met: |
@@ -77,6 +77,7 @@ namespace internal { |
* through the runtime system) |
* - stack_area_base (High end of the memory area to use as |
* backtracking stack) |
+ * - capture array size (may fit multiple sets of matches) |
* - int* capture_array (int[num_saved_registers_], for output). |
* - end of input (Address of end of string) |
* - start of input (Address of first character in string) |
@@ -84,6 +85,7 @@ namespace internal { |
* - String* input_string (input string) |
* - return address |
* - backup of callee save registers (rbx, possibly rsi and rdi). |
+ * - success counter (only useful for global regexp to count matches) |
* - Offset of location before start of input (effectively character |
* position -1). Used to initialize capture registers to a non-position. |
* - At start of string (if 1, we are starting at the start of the |
@@ -744,13 +746,16 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, |
void RegExpMacroAssemblerX64::Fail() { |
- ASSERT(FAILURE == 0); // Return value for failure is zero. |
- __ Set(rax, 0); |
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero. |
+ if (!global()) { |
+ __ Set(rax, FAILURE); |
+ } |
__ jmp(&exit_label_); |
} |
Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
+ Label return_rax, restart; |
// Finalize code - write the entry point code now we know how many |
// registers we need. |
// Entry code: |
@@ -784,7 +789,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
ASSERT_EQ(kInputStart, -3 * kPointerSize); |
ASSERT_EQ(kInputEnd, -4 * kPointerSize); |
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize); |
- ASSERT_EQ(kStackHighEnd, -6 * kPointerSize); |
+ ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize); |
__ push(rdi); |
__ push(rsi); |
__ push(rdx); |
@@ -795,6 +800,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
__ push(rbx); // Callee-save |
#endif |
+ __ push(Immediate(0)); // Number of successful matches in a global regexp. |
__ push(Immediate(0)); // Make room for "at start" constant. |
// Check if we have space on the stack for registers. |
@@ -815,14 +821,14 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
// Exit with OutOfMemory exception. There is not enough space on the stack |
// for our working registers. |
__ Set(rax, EXCEPTION); |
- __ jmp(&exit_label_); |
+ __ jmp(&return_rax); |
__ bind(&stack_limit_hit); |
__ Move(code_object_pointer(), masm_.CodeObject()); |
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp. |
__ testq(rax, rax); |
// If returned value is non-zero, we exit with the returned value as result. |
- __ j(not_zero, &exit_label_); |
+ __ j(not_zero, &return_rax); |
__ bind(&stack_ok); |
@@ -847,43 +853,53 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
// position registers. |
__ movq(Operand(rbp, kInputStartMinusOne), rax); |
+ // Initialize code object pointer. |
+ __ Move(code_object_pointer(), masm_.CodeObject()); |
+ |
+ // Load previous char as initial value of current-character. |
+ Label at_start, character_loaded; |
+ __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); |
+ __ j(equal, &at_start, Label::kNear); |
+ LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. |
+ __ jmp(&character_loaded, Label::kNear); |
+ __ bind(&at_start); |
+ __ Set(current_character(), '\n'); |
+ |
+ if (global()) { |
+ // Initializing current-character already done, so skip it. |
+ __ jmp(&character_loaded, Label::kNear); |
+ // Restart matching from here in a global regexp. |
+ __ bind(&restart); |
+ // In a restarted pass, initialize current-character here. |
+ LoadCurrentCharacterUnchecked(-1, 1); |
+ } |
+ __ bind(&character_loaded); |
+ |
+ // Initialize on-stack registers. |
if (num_saved_registers_ > 0) { |
// Fill saved registers with initial value = start offset - 1 |
// Fill in stack push order, to avoid accessing across an unwritten |
// page (a problem on Windows). |
- __ Set(rcx, kRegisterZero); |
- Label init_loop; |
- __ bind(&init_loop); |
- __ movq(Operand(rbp, rcx, times_1, 0), rax); |
- __ subq(rcx, Immediate(kPointerSize)); |
- __ cmpq(rcx, |
- Immediate(kRegisterZero - num_saved_registers_ * kPointerSize)); |
- __ j(greater, &init_loop); |
- } |
- // Ensure that we have written to each stack page, in order. Skipping a page |
- // on Windows can cause segmentation faults. Assuming page size is 4k. |
- const int kPageSize = 4096; |
- const int kRegistersPerPage = kPageSize / kPointerSize; |
- for (int i = num_saved_registers_ + kRegistersPerPage - 1; |
- i < num_registers_; |
- i += kRegistersPerPage) { |
- __ movq(register_location(i), rax); // One write every page. |
+ if (num_saved_registers_ > 8) { |
+ __ Set(rcx, kRegisterZero); |
+ Label init_loop; |
+ __ bind(&init_loop); |
+ __ movq(Operand(rbp, rcx, times_1, 0), rax); |
+ __ subq(rcx, Immediate(kPointerSize)); |
+ __ cmpq(rcx, |
+ Immediate(kRegisterZero - num_saved_registers_ * kPointerSize)); |
+ __ j(greater, &init_loop); |
+ } else { // Unroll the loop. |
+ for (int i = 0; i < num_saved_registers_; i++) { |
+ __ movq(register_location(i), rax); |
+ } |
+ } |
} |
// Initialize backtrack stack pointer. |
__ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); |
- // Initialize code object pointer. |
- __ Move(code_object_pointer(), masm_.CodeObject()); |
- // Load previous char as initial value of current-character. |
- Label at_start; |
- __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); |
- __ j(equal, &at_start); |
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. |
- __ jmp(&start_label_); |
- __ bind(&at_start); |
- __ Set(current_character(), '\n'); |
- __ jmp(&start_label_); |
+ __ jmp(&start_label_); |
// Exit code: |
if (success_label_.is_linked()) { |
@@ -902,6 +918,10 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
} |
for (int i = 0; i < num_saved_registers_; i++) { |
__ movq(rax, register_location(i)); |
+ if (i == 0 && global()) { |
+ // Keep capture start in rdx for the zero-length check later. |
+ __ movq(rdx, rax); |
+ } |
__ addq(rax, rcx); // Convert to index from start, not end. |
if (mode_ == UC16) { |
__ sar(rax, Immediate(1)); // Convert byte index to character index. |
@@ -909,12 +929,54 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
__ movl(Operand(rbx, i * kIntSize), rax); |
} |
} |
- __ Set(rax, SUCCESS); |
+ |
+ if (global()) { |
+ // Restart matching if the regular expression is flagged as global. |
+ // Increment success counter. |
+ __ incq(Operand(rbp, kSuccessfulCaptures)); |
+ // Capture results have been stored, so the number of remaining global |
+ // output registers is reduced by the number of stored captures. |
+ __ movq(rcx, Operand(rbp, kNumOutputRegisters)); |
+ __ subq(rcx, Immediate(num_saved_registers_)); |
+ // Check whether we have enough room for another set of capture results. |
+ __ cmpq(rcx, Immediate(num_saved_registers_)); |
+ __ j(less, &exit_label_); |
+ |
+ __ movq(Operand(rbp, kNumOutputRegisters), rcx); |
+ // Advance the location for output. |
+ __ addq(Operand(rbp, kRegisterOutput), |
+ Immediate(num_saved_registers_ * kIntSize)); |
+ |
+ // Prepare rax to initialize registers with its value in the next run. |
+ __ movq(rax, Operand(rbp, kInputStartMinusOne)); |
+ |
+ // Special case for zero-length matches. |
+ // rdx: capture start index |
+ __ cmpq(rdi, rdx); |
+ // Not a zero-length match, restart. |
+ __ j(not_equal, &restart); |
+ // rdi (offset from the end) is zero if we already reached the end. |
+ __ testq(rdi, rdi); |
+ __ j(zero, &exit_label_, Label::kNear); |
+ // Advance current position after a zero-length match. |
+ if (mode_ == UC16) { |
+ __ addq(rdi, Immediate(2)); |
+ } else { |
+ __ incq(rdi); |
+ } |
+ __ jmp(&restart); |
+ } else { |
+ __ movq(rax, Immediate(SUCCESS)); |
+ } |
} |
- // Exit and return rax |
__ bind(&exit_label_); |
+ if (global()) { |
+ // Return the number of successful captures. |
+ __ movq(rax, Operand(rbp, kSuccessfulCaptures)); |
+ } |
+ __ bind(&return_rax); |
#ifdef _WIN64 |
// Restore callee save registers. |
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister)); |
@@ -951,7 +1013,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
__ testq(rax, rax); |
// If returning non-zero, we should end execution with the given |
// result as return value. |
- __ j(not_zero, &exit_label_); |
+ __ j(not_zero, &return_rax); |
// Restore registers. |
__ Move(code_object_pointer(), masm_.CodeObject()); |
@@ -1012,7 +1074,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
__ bind(&exit_with_exception); |
// Exit with Result EXCEPTION(-1) to signal thrown exception. |
__ Set(rax, EXCEPTION); |
- __ jmp(&exit_label_); |
+ __ jmp(&return_rax); |
} |
FixupCodeRelativePositions(); |