Chromium Code Reviews| Index: src/arm/regexp-macro-assembler-arm.cc |
| diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc |
| index a833624cebf1dd289ffcffb410ba7d3583d3c582..e2ffc19fc955e112a5da0a8b1dbd046e8c52fcb5 100644 |
| --- a/src/arm/regexp-macro-assembler-arm.cc |
| +++ b/src/arm/regexp-macro-assembler-arm.cc |
| @@ -1,4 +1,4 @@ |
| -// Copyright 2009 the V8 project authors. All rights reserved. |
| +// Copyright 2012 the V8 project authors. All rights reserved. |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| @@ -60,28 +60,30 @@ namespace internal { |
| * Each call to a public method should retain this convention. |
| * |
| * The stack will have the following structure: |
| - * - fp[52] Isolate* isolate (Address of the current isolate) |
| - * - fp[48] direct_call (if 1, direct call from JavaScript code, |
| - * if 0, call through the runtime system). |
| - * - fp[44] stack_area_base (High end of the memory area to use as |
| - * backtracking stack). |
| + * - fp[56] Isolate* isolate (Address of the current isolate) |
| + * - fp[52] direct_call (if 1, direct call from JavaScript code, |
| + * if 0, call through the runtime system). |
| + * - fp[48] stack_area_base (High end of the memory area to use as |
| + * backtracking stack). |
| + * - fp[44] capture array size (may fit multiple sets of matches) |
| * - fp[40] int* capture_array (int[num_saved_registers_], for output). |
| * - fp[36] secondary link/return address used by native call. |
| * --- sp when called --- |
| - * - fp[32] return address (lr). |
| - * - fp[28] old frame pointer (r11). |
| + * - fp[32] return address (lr). |
| + * - fp[28] old frame pointer (r11). |
| * - fp[0..24] backup of registers r4..r10. |
| * --- frame pointer ---- |
| * - fp[-4] end of input (Address of end of string). |
| * - fp[-8] start of input (Address of first character in string). |
| * - fp[-12] start index (character index of start). |
|
Erik Corry
2012/05/22 08:32:46
Inconsistent capitalization, not started by you.
Yang
2012/05/22 14:48:03
Done.
|
| * - fp[-16] void* input_string (location of a handle containing the string). |
| - * - fp[-20] Offset of location before start of input (effectively character |
| + * - fp[-20] success counter (only useful for global regexp to count matches) |
|
Erik Corry
2012/05/22 08:32:46
Does this lint?
Yang
2012/05/22 14:48:03
Done.
|
| + * - fp[-24] Offset of location before start of input (effectively character |
| * position -1). Used to initialize capture registers to a |
| * non-position. |
| - * - fp[-24] At start (if 1, we are starting at the start of the |
| + * - fp[-28] At start (if 1, we are starting at the start of the |
| * string, otherwise 0) |
| - * - fp[-28] register 0 (Only positions must be stored in the first |
| + * - fp[-32] register 0 (Only positions must be stored in the first |
| * - register 1 num_saved_registers_ registers) |
| * - ... |
| * - register num_registers-1 |
| @@ -655,6 +657,7 @@ void RegExpMacroAssemblerARM::Fail() { |
| Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| + Label return_r0, restart; |
| // Finalize code - write the entry point code now we know how many |
| // registers we need. |
| @@ -678,6 +681,8 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| // Set frame pointer in space for it if this is not a direct call |
| // from generated code. |
| __ add(frame_pointer(), sp, Operand(4 * kPointerSize)); |
| + __ mov(r0, Operand(0, RelocInfo::NONE)); |
| + __ push(r0); // Make room for success counter and initialize it to 0. |
| __ push(r0); // Make room for "position - 1" constant (value is irrelevant). |
| __ push(r0); // Make room for "at start" constant (value is irrelevant). |
| // Check if we have space on the stack for registers. |
| @@ -698,13 +703,13 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| // Exit with OutOfMemory exception. There is not enough space on the stack |
| // for our working registers. |
| __ mov(r0, Operand(EXCEPTION)); |
| - __ jmp(&exit_label_); |
| + __ jmp(&return_r0); |
| __ bind(&stack_limit_hit); |
| CallCheckStackGuardState(r0); |
| __ cmp(r0, Operand(0, RelocInfo::NONE)); |
| // If returned value is non-zero, we exit with the returned value as result. |
| - __ b(ne, &exit_label_); |
| + __ b(ne, &return_r0); |
| __ bind(&stack_ok); |
| @@ -725,41 +730,55 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| // position registers. |
| __ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne)); |
| + // Initialize code pointer register |
| + __ mov(code_pointer(), Operand(masm_->CodeObject())); |
| + // Load previous char as initial value of current character register. |
| + Label at_start, character_loaded; |
| // Determine whether the start index is zero, that is at the start of the |
| - // string, and store that value in a local variable. |
| - __ cmp(r1, Operand(0)); |
| + // string, and store that value in a local variable. Store the result to |
| + // local variable kAtStart. |
| + __ cmp(r1, Operand(0, RelocInfo::NONE)); |
| __ mov(r1, Operand(1), LeaveCC, eq); |
| __ mov(r1, Operand(0, RelocInfo::NONE), LeaveCC, ne); |
| __ str(r1, MemOperand(frame_pointer(), kAtStart)); |
| + // Load newline if index is at start. |
| + __ mov(current_character(), Operand('\n'), LeaveCC, eq); |
|
Erik Corry
2012/05/22 08:32:46
Having conditional instructions here after non-con
Yang
2012/05/22 14:48:03
Done.
|
| + __ b(eq, &character_loaded); |
| + LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. |
| + |
| + if (global()) { |
| + // Initializing current-character already done, so skip it. |
| + __ jmp(&character_loaded); |
| + // Restart matching from here in a global regexp. |
| + __ bind(&restart); |
| + // In a restarted pass, initialize current-character here. |
| + LoadCurrentCharacterUnchecked(-1, 1); |
|
Erik Corry
2012/05/22 08:32:46
In the global case the generated code isn't making
Yang
2012/05/22 14:48:03
Done.
|
| + } |
| + __ bind(&character_loaded); |
| + // Initialize on-stack registers. |
| if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. |
| // Fill saved registers with initial value = start offset - 1 |
| - |
| - // Address of register 0. |
| - __ add(r1, frame_pointer(), Operand(kRegisterZero)); |
| - __ mov(r2, Operand(num_saved_registers_)); |
| - Label init_loop; |
| - __ bind(&init_loop); |
| - __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex)); |
| - __ sub(r2, r2, Operand(1), SetCC); |
| - __ b(ne, &init_loop); |
| + if (num_saved_registers_ > 8) { |
| + // Address of register 0. |
| + __ add(r1, frame_pointer(), Operand(kRegisterZero)); |
| + __ mov(r2, Operand(num_saved_registers_)); |
| + Label init_loop; |
| + __ bind(&init_loop); |
| + __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex)); |
| + __ sub(r2, r2, Operand(1), SetCC); |
| + __ b(ne, &init_loop); |
| + } else { |
| + for (int i = 0; i < num_saved_registers_; i++) { |
| + __ str(r0, register_location(i)); |
| + } |
| + } |
| } |
| // Initialize backtrack stack pointer. |
| __ ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd)); |
| - // Initialize code pointer register |
| - __ mov(code_pointer(), Operand(masm_->CodeObject())); |
| - // Load previous char as initial value of current character register. |
| - Label at_start; |
| - __ ldr(r0, MemOperand(frame_pointer(), kAtStart)); |
| - __ cmp(r0, Operand(0, RelocInfo::NONE)); |
| - __ b(ne, &at_start); |
| - LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. |
| - __ jmp(&start_label_); |
| - __ bind(&at_start); |
| - __ mov(current_character(), Operand('\n')); |
| - __ jmp(&start_label_); |
| + __ jmp(&start_label_); |
| // Exit code: |
| if (success_label_.is_linked()) { |
| @@ -786,6 +805,10 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| for (int i = 0; i < num_saved_registers_; i += 2) { |
| __ ldr(r2, register_location(i)); |
| __ ldr(r3, register_location(i + 1)); |
| + if (global()) { |
| + // Keep capture start in r4 for the zero-length check later. |
| + __ mov(r4, r2); |
| + } |
| if (mode_ == UC16) { |
| __ add(r2, r1, Operand(r2, ASR, 1)); |
| __ add(r3, r1, Operand(r3, ASR, 1)); |
| @@ -797,10 +820,54 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| __ str(r3, MemOperand(r0, kPointerSize, PostIndex)); |
| } |
| } |
| - __ mov(r0, Operand(SUCCESS)); |
| + |
| + if (global()) { |
| + // Restart matching if the regular expression is flagged as global. |
| + __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures)); |
| + __ ldr(r1, MemOperand(frame_pointer(), kNumOutputRegisters)); |
| + __ ldr(r2, MemOperand(frame_pointer(), kRegisterOutput)); |
| + // Increment success counter. |
| + __ add(r0, r0, Operand(1)); |
| + __ str(r0, MemOperand(frame_pointer(), kSuccessfulCaptures)); |
| + // Capture results have been stored, so the number of remaining global |
| + // output registers is reduced by the number of stored captures. |
| + __ sub(r1, r1, Operand(num_saved_registers_)); |
| + // Check whether we have enough room for another set of capture results. |
| + __ cmp(r1, Operand(num_saved_registers_)); |
| + __ b(lt, &return_r0); |
| + |
| + __ str(r1, MemOperand(frame_pointer(), kNumOutputRegisters)); |
| + // Advance the location for output. |
| + __ add(r2, r2, Operand(num_saved_registers_ * kPointerSize)); |
| + __ str(r2, MemOperand(frame_pointer(), kRegisterOutput)); |
| + |
| + // Prepare r0 to initialize registers with its value in the next run. |
| + __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne)); |
| + // Special case for zero-length matches. |
| + // r4: capture start index |
| + __ cmp(current_input_offset(), r4); |
| + // Not a zero-length match, restart. |
| + __ b(ne, &restart); |
| + // Offset from the end is zero if we already reached the end. |
| + __ cmp(current_input_offset(), Operand(0)); |
| + __ b(eq, &exit_label_); |
| + // Advance current position after a zero-length match. |
| + __ add(current_input_offset(), |
| + current_input_offset(), |
| + Operand((mode_ == UC16) ? 2 : 1)); |
| + __ b(&restart); |
| + } else { |
| + __ mov(r0, Operand(SUCCESS)); |
| + } |
| } |
| + |
| // Exit and return r0 |
| __ bind(&exit_label_); |
| + if (global()) { |
| + __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures)); |
| + } |
| + |
| + __ bind(&return_r0); |
| // Skip sp past regexp registers and local variables.. |
| __ mov(sp, frame_pointer()); |
| // Restore registers r4..r11 and return (restoring lr to pc). |
| @@ -822,7 +889,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| __ cmp(r0, Operand(0, RelocInfo::NONE)); |
| // If returning non-zero, we should end execution with the given |
| // result as return value. |
| - __ b(ne, &exit_label_); |
| + __ b(ne, &return_r0); |
| // String might have moved: Reload end of string from frame. |
|
Erik Corry
2012/05/22 08:32:46
If this happens will r4 be updated with the correc
Yang
2012/05/22 14:48:03
r4 is a relative index from the end of string, the
|
| __ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); |
| @@ -859,7 +926,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { |
| __ bind(&exit_with_exception); |
| // Exit with Result EXCEPTION(-1) to signal thrown exception. |
| __ mov(r0, Operand(EXCEPTION)); |
| - __ jmp(&exit_label_); |
| + __ jmp(&return_r0); |
| } |
| CodeDesc code_desc; |
| @@ -1307,8 +1374,8 @@ void RegExpMacroAssemblerARM::LoadCurrentCharacterUnchecked(int cp_offset, |
| int characters) { |
| Register offset = current_input_offset(); |
| if (cp_offset != 0) { |
| - __ add(r0, current_input_offset(), Operand(cp_offset * char_size())); |
| - offset = r0; |
| + __ add(ip, current_input_offset(), Operand(cp_offset * char_size())); |
| + offset = ip; |
|
Erik Corry
2012/05/22 08:32:46
This is setting a trap for a future programmer. T
Yang
2012/05/22 14:48:03
Using r4 now since it's not being used at this poi
|
| } |
| // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU |
| // and the operating system running on the target allow it. |