Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Unified Diff: src/arm/regexp-macro-assembler-arm.cc

Issue 10386090: Implement loop for global regexps in regexp assembler. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: fix bugs, add tests, port to x64 and arm. Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/arm/regexp-macro-assembler-arm.cc
diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc
index a833624cebf1dd289ffcffb410ba7d3583d3c582..e2ffc19fc955e112a5da0a8b1dbd046e8c52fcb5 100644
--- a/src/arm/regexp-macro-assembler-arm.cc
+++ b/src/arm/regexp-macro-assembler-arm.cc
@@ -1,4 +1,4 @@
-// Copyright 2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -60,28 +60,30 @@ namespace internal {
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
- * - fp[52] Isolate* isolate (Address of the current isolate)
- * - fp[48] direct_call (if 1, direct call from JavaScript code,
- * if 0, call through the runtime system).
- * - fp[44] stack_area_base (High end of the memory area to use as
- * backtracking stack).
+ * - fp[56] Isolate* isolate (Address of the current isolate)
+ * - fp[52] direct_call (if 1, direct call from JavaScript code,
+ * if 0, call through the runtime system).
+ * - fp[48] stack_area_base (High end of the memory area to use as
+ * backtracking stack).
+ * - fp[44] capture array size (may fit multiple sets of matches)
* - fp[40] int* capture_array (int[num_saved_registers_], for output).
* - fp[36] secondary link/return address used by native call.
* --- sp when called ---
- * - fp[32] return address (lr).
- * - fp[28] old frame pointer (r11).
+ * - fp[32] return address (lr).
+ * - fp[28] old frame pointer (r11).
* - fp[0..24] backup of registers r4..r10.
* --- frame pointer ----
* - fp[-4] end of input (Address of end of string).
* - fp[-8] start of input (Address of first character in string).
* - fp[-12] start index (character index of start).
Erik Corry 2012/05/22 08:32:46 Inconsistent capitalization, not started by you.
Yang 2012/05/22 14:48:03 Done.
* - fp[-16] void* input_string (location of a handle containing the string).
- * - fp[-20] Offset of location before start of input (effectively character
+ * - fp[-20] success counter (only useful for global regexp to count matches)
Erik Corry 2012/05/22 08:32:46 Does this lint?
Yang 2012/05/22 14:48:03 Done.
+ * - fp[-24] Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a
* non-position.
- * - fp[-24] At start (if 1, we are starting at the start of the
+ * - fp[-28] At start (if 1, we are starting at the start of the
* string, otherwise 0)
- * - fp[-28] register 0 (Only positions must be stored in the first
+ * - fp[-32] register 0 (Only positions must be stored in the first
* - register 1 num_saved_registers_ registers)
* - ...
* - register num_registers-1
@@ -655,6 +657,7 @@ void RegExpMacroAssemblerARM::Fail() {
Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
+ Label return_r0, restart;
// Finalize code - write the entry point code now we know how many
// registers we need.
@@ -678,6 +681,8 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
// Set frame pointer in space for it if this is not a direct call
// from generated code.
__ add(frame_pointer(), sp, Operand(4 * kPointerSize));
+ __ mov(r0, Operand(0, RelocInfo::NONE));
+ __ push(r0); // Make room for success counter and initialize it to 0.
__ push(r0); // Make room for "position - 1" constant (value is irrelevant).
__ push(r0); // Make room for "at start" constant (value is irrelevant).
// Check if we have space on the stack for registers.
@@ -698,13 +703,13 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ mov(r0, Operand(EXCEPTION));
- __ jmp(&exit_label_);
+ __ jmp(&return_r0);
__ bind(&stack_limit_hit);
CallCheckStackGuardState(r0);
__ cmp(r0, Operand(0, RelocInfo::NONE));
// If returned value is non-zero, we exit with the returned value as result.
- __ b(ne, &exit_label_);
+ __ b(ne, &return_r0);
__ bind(&stack_ok);
@@ -725,41 +730,55 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
// position registers.
__ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ // Initialize code pointer register
+ __ mov(code_pointer(), Operand(masm_->CodeObject()));
+ // Load previous char as initial value of current character register.
+ Label at_start, character_loaded;
// Determine whether the start index is zero, that is at the start of the
- // string, and store that value in a local variable.
- __ cmp(r1, Operand(0));
+ // string, and store that value in a local variable. Store the result to
+ // local variable kAtStart.
+ __ cmp(r1, Operand(0, RelocInfo::NONE));
__ mov(r1, Operand(1), LeaveCC, eq);
__ mov(r1, Operand(0, RelocInfo::NONE), LeaveCC, ne);
__ str(r1, MemOperand(frame_pointer(), kAtStart));
+ // Load newline if index is at start.
+ __ mov(current_character(), Operand('\n'), LeaveCC, eq);
Erik Corry 2012/05/22 08:32:46 Having conditional instructions here after non-con
Yang 2012/05/22 14:48:03 Done.
+ __ b(eq, &character_loaded);
+ LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
+
+ if (global()) {
+ // Initializing current-character already done, so skip it.
+ __ jmp(&character_loaded);
+ // Restart matching from here in a global regexp.
+ __ bind(&restart);
+ // In a restarted pass, initialize current-character here.
+ LoadCurrentCharacterUnchecked(-1, 1);
Erik Corry 2012/05/22 08:32:46 In the global case the generated code isn't making
Yang 2012/05/22 14:48:03 Done.
+ }
+ __ bind(&character_loaded);
+ // Initialize on-stack registers.
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
// Fill saved registers with initial value = start offset - 1
-
- // Address of register 0.
- __ add(r1, frame_pointer(), Operand(kRegisterZero));
- __ mov(r2, Operand(num_saved_registers_));
- Label init_loop;
- __ bind(&init_loop);
- __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex));
- __ sub(r2, r2, Operand(1), SetCC);
- __ b(ne, &init_loop);
+ if (num_saved_registers_ > 8) {
+ // Address of register 0.
+ __ add(r1, frame_pointer(), Operand(kRegisterZero));
+ __ mov(r2, Operand(num_saved_registers_));
+ Label init_loop;
+ __ bind(&init_loop);
+ __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex));
+ __ sub(r2, r2, Operand(1), SetCC);
+ __ b(ne, &init_loop);
+ } else {
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ str(r0, register_location(i));
+ }
+ }
}
// Initialize backtrack stack pointer.
__ ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd));
- // Initialize code pointer register
- __ mov(code_pointer(), Operand(masm_->CodeObject()));
- // Load previous char as initial value of current character register.
- Label at_start;
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
- __ cmp(r0, Operand(0, RelocInfo::NONE));
- __ b(ne, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ mov(current_character(), Operand('\n'));
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
@@ -786,6 +805,10 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
for (int i = 0; i < num_saved_registers_; i += 2) {
__ ldr(r2, register_location(i));
__ ldr(r3, register_location(i + 1));
+ if (global()) {
+ // Keep capture start in r4 for the zero-length check later.
+ __ mov(r4, r2);
+ }
if (mode_ == UC16) {
__ add(r2, r1, Operand(r2, ASR, 1));
__ add(r3, r1, Operand(r3, ASR, 1));
@@ -797,10 +820,54 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ str(r3, MemOperand(r0, kPointerSize, PostIndex));
}
}
- __ mov(r0, Operand(SUCCESS));
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ __ ldr(r1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ __ ldr(r2, MemOperand(frame_pointer(), kRegisterOutput));
+ // Increment success counter.
+ __ add(r0, r0, Operand(1));
+ __ str(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ sub(r1, r1, Operand(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmp(r1, Operand(num_saved_registers_));
+ __ b(lt, &return_r0);
+
+ __ str(r1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ // Advance the location for output.
+ __ add(r2, r2, Operand(num_saved_registers_ * kPointerSize));
+ __ str(r2, MemOperand(frame_pointer(), kRegisterOutput));
+
+ // Prepare r0 to initialize registers with its value in the next run.
+ __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ // Special case for zero-length matches.
+ // r4: capture start index
+ __ cmp(current_input_offset(), r4);
+ // Not a zero-length match, restart.
+ __ b(ne, &restart);
+ // Offset from the end is zero if we already reached the end.
+ __ cmp(current_input_offset(), Operand(0));
+ __ b(eq, &exit_label_);
+ // Advance current position after a zero-length match.
+ __ add(current_input_offset(),
+ current_input_offset(),
+ Operand((mode_ == UC16) ? 2 : 1));
+ __ b(&restart);
+ } else {
+ __ mov(r0, Operand(SUCCESS));
+ }
}
+
// Exit and return r0
__ bind(&exit_label_);
+ if (global()) {
+ __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ }
+
+ __ bind(&return_r0);
// Skip sp past regexp registers and local variables..
__ mov(sp, frame_pointer());
// Restore registers r4..r11 and return (restoring lr to pc).
@@ -822,7 +889,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ cmp(r0, Operand(0, RelocInfo::NONE));
// If returning non-zero, we should end execution with the given
// result as return value.
- __ b(ne, &exit_label_);
+ __ b(ne, &return_r0);
// String might have moved: Reload end of string from frame.
Erik Corry 2012/05/22 08:32:46 If this happens will r4 be updated with the correc
Yang 2012/05/22 14:48:03 r4 is a relative index from the end of string, the
__ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
@@ -859,7 +926,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ mov(r0, Operand(EXCEPTION));
- __ jmp(&exit_label_);
+ __ jmp(&return_r0);
}
CodeDesc code_desc;
@@ -1307,8 +1374,8 @@ void RegExpMacroAssemblerARM::LoadCurrentCharacterUnchecked(int cp_offset,
int characters) {
Register offset = current_input_offset();
if (cp_offset != 0) {
- __ add(r0, current_input_offset(), Operand(cp_offset * char_size()));
- offset = r0;
+ __ add(ip, current_input_offset(), Operand(cp_offset * char_size()));
+ offset = ip;
Erik Corry 2012/05/22 08:32:46 This is setting a trap for a future programmer. T
Yang 2012/05/22 14:48:03 Using r4 now since it's not being used at this poi
}
// The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
// and the operating system running on the target allow it.

Powered by Google App Engine
This is Rietveld 408576698