Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(514)

Unified Diff: src/x64/regexp-macro-assembler-x64.cc

Issue 10386090: Implement loop for global regexps in regexp assembler. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: fix bugs, add tests, port to x64 and arm. Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/x64/regexp-macro-assembler-x64.cc
diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc
index bf232bff9b523413a674ff3adc443177262a20d4..53027254c032673eb28a3a845d90e0da8a69a6e9 100644
--- a/src/x64/regexp-macro-assembler-x64.cc
+++ b/src/x64/regexp-macro-assembler-x64.cc
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -77,6 +77,7 @@ namespace internal {
* through the runtime system)
* - stack_area_base (High end of the memory area to use as
* backtracking stack)
+ * - capture array size (may fit multiple sets of matches)
* - int* capture_array (int[num_saved_registers_], for output).
* - end of input (Address of end of string)
* - start of input (Address of first character in string)
@@ -84,6 +85,7 @@ namespace internal {
* - String* input_string (input string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
+ * - success counter (only useful for global regexp to count matches)
* - Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a non-position.
* - At start of string (if 1, we are starting at the start of the
@@ -744,13 +746,16 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
void RegExpMacroAssemblerX64::Fail() {
- ASSERT(FAILURE == 0); // Return value for failure is zero.
- __ Set(rax, 0);
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ __ Set(rax, FAILURE);
+ }
__ jmp(&exit_label_);
}
Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
+ Label return_rax, restart;
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
@@ -784,7 +789,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
ASSERT_EQ(kInputStart, -3 * kPointerSize);
ASSERT_EQ(kInputEnd, -4 * kPointerSize);
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
- ASSERT_EQ(kStackHighEnd, -6 * kPointerSize);
+ ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize);
__ push(rdi);
__ push(rsi);
__ push(rdx);
@@ -795,6 +800,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ push(rbx); // Callee-save
#endif
+ __ push(Immediate(0)); // Number of successful matches in a global regexp.
__ push(Immediate(0)); // Make room for "at start" constant.
// Check if we have space on the stack for registers.
@@ -815,14 +821,14 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
__ bind(&stack_limit_hit);
__ Move(code_object_pointer(), masm_.CodeObject());
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
__ testq(rax, rax);
// If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
__ bind(&stack_ok);
@@ -847,43 +853,53 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
+ // Initialize code object pointer.
+ __ Move(code_object_pointer(), masm_.CodeObject());
+
+ // Load previous char as initial value of current-character.
+ Label at_start, character_loaded;
+ __ cmpb(Operand(rbp, kStartIndex), Immediate(0));
+ __ j(equal, &at_start, Label::kNear);
+ LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
+ __ jmp(&character_loaded, Label::kNear);
+ __ bind(&at_start);
+ __ Set(current_character(), '\n');
+
+ if (global()) {
+ // Initializing current-character already done, so skip it.
+ __ jmp(&character_loaded, Label::kNear);
+ // Restart matching from here in a global regexp.
+ __ bind(&restart);
+ // In a restarted pass, initialize current-character here.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ }
+ __ bind(&character_loaded);
+
+ // Initialize on-stack registers.
if (num_saved_registers_ > 0) {
// Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows).
- __ Set(rcx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ movq(Operand(rbp, rcx, times_1, 0), rax);
- __ subq(rcx, Immediate(kPointerSize));
- __ cmpq(rcx,
- Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
- __ j(greater, &init_loop);
- }
- // Ensure that we have written to each stack page, in order. Skipping a page
- // on Windows can cause segmentation faults. Assuming page size is 4k.
- const int kPageSize = 4096;
- const int kRegistersPerPage = kPageSize / kPointerSize;
- for (int i = num_saved_registers_ + kRegistersPerPage - 1;
- i < num_registers_;
- i += kRegistersPerPage) {
- __ movq(register_location(i), rax); // One write every page.
+ if (num_saved_registers_ > 8) {
+ __ Set(rcx, kRegisterZero);
+ Label init_loop;
+ __ bind(&init_loop);
+ __ movq(Operand(rbp, rcx, times_1, 0), rax);
+ __ subq(rcx, Immediate(kPointerSize));
+ __ cmpq(rcx,
+ Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
+ __ j(greater, &init_loop);
+ } else { // Unroll the loop.
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ movq(register_location(i), rax);
+ }
+ }
}
// Initialize backtrack stack pointer.
__ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
- // Initialize code object pointer.
- __ Move(code_object_pointer(), masm_.CodeObject());
- // Load previous char as initial value of current-character.
- Label at_start;
- __ cmpb(Operand(rbp, kStartIndex), Immediate(0));
- __ j(equal, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ Set(current_character(), '\n');
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
@@ -902,6 +918,10 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
}
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
+ if (i == 0 && global()) {
+ // Keep capture start in rdx for the zero-length check later.
+ __ movq(rdx, rax);
+ }
__ addq(rax, rcx); // Convert to index from start, not end.
if (mode_ == UC16) {
__ sar(rax, Immediate(1)); // Convert byte index to character index.
@@ -909,12 +929,54 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ movl(Operand(rbx, i * kIntSize), rax);
}
}
- __ Set(rax, SUCCESS);
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ // Increment success counter.
+ __ incq(Operand(rbp, kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ movq(rcx, Operand(rbp, kNumOutputRegisters));
+ __ subq(rcx, Immediate(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmpq(rcx, Immediate(num_saved_registers_));
+ __ j(less, &exit_label_);
+
+ __ movq(Operand(rbp, kNumOutputRegisters), rcx);
+ // Advance the location for output.
+ __ addq(Operand(rbp, kRegisterOutput),
+ Immediate(num_saved_registers_ * kIntSize));
+
+ // Prepare rax to initialize registers with its value in the next run.
+ __ movq(rax, Operand(rbp, kInputStartMinusOne));
+
+ // Special case for zero-length matches.
+ // rdx: capture start index
+ __ cmpq(rdi, rdx);
+ // Not a zero-length match, restart.
+ __ j(not_equal, &restart);
+ // rdi (offset from the end) is zero if we already reached the end.
+ __ testq(rdi, rdi);
+ __ j(zero, &exit_label_, Label::kNear);
+ // Advance current position after a zero-length match.
+ if (mode_ == UC16) {
+ __ addq(rdi, Immediate(2));
+ } else {
+ __ incq(rdi);
+ }
+ __ jmp(&restart);
+ } else {
+ __ movq(rax, Immediate(SUCCESS));
+ }
}
- // Exit and return rax
__ bind(&exit_label_);
+ if (global()) {
+ // Return the number of successful captures.
+ __ movq(rax, Operand(rbp, kSuccessfulCaptures));
+ }
+ __ bind(&return_rax);
#ifdef _WIN64
// Restore callee save registers.
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
@@ -951,7 +1013,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ testq(rax, rax);
// If returning non-zero, we should end execution with the given
// result as return value.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
// Restore registers.
__ Move(code_object_pointer(), masm_.CodeObject());
@@ -1012,7 +1074,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
}
FixupCodeRelativePositions();

Powered by Google App Engine
This is Rietveld 408576698