Chromium Code Reviews| Index: src/arm/codegen-arm.cc |
| diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc |
| index 5b2980aeb4d2bfb580eef2b9fe446efcc65e49fe..20d89dc0c3ead39c4eb683163d112a10e44b8250 100644 |
| --- a/src/arm/codegen-arm.cc |
| +++ b/src/arm/codegen-arm.cc |
| @@ -112,6 +112,251 @@ UnaryMathFunction CreateExpFunction() { |
| #endif |
| } |
| +#if defined(V8_HOST_ARCH_ARM) && defined (V8_HOST_CAN_READ_UNALIGNED) |
|
ulan
2013/06/28 15:07:43
It looks like V8_HOST_CAN_READ_UNALIGNED (which is
|
| +OS::MemCopyUint8Function CreateMemCopyUint8Function( |
| + OS::MemCopyUint8Function stub) { |
| +#if defined(USE_SIMULATOR) |
| + return stub; |
| +#else |
| + if (Serializer::enabled()) { |
| + return stub; |
| + } |
| + size_t actual_size; |
| + byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true)); |
| + if (buffer == NULL) return stub; |
| + |
| + MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); |
| + |
| + Register dest = r0; |
| + Register src = r1; |
| + Register chars = r2; |
| + Register temp1 = r3; |
| + Label less_4; |
| + |
| + if (CpuFeatures::IsSupported(NEON)) { |
| + Label loop, less_256, less_128, less_64, less_32, _16_or_less, _8_or_less; |
| + Label size_less_than_8; |
| + __ pld(MemOperand(src, 0)); |
| + |
| + __ cmp(chars, Operand(8)); |
| + __ b(lt, &size_less_than_8); |
| + __ cmp(chars, Operand(32)); |
| + __ b(lt, &less_32); |
| + if (CpuFeatures::cache_line_size() == 32) { |
| + __ pld(MemOperand(src, 32)); |
| + } |
| + __ cmp(chars, Operand(64)); |
| + __ b(lt, &less_64); |
| + __ pld(MemOperand(src, 64)); |
| + if (CpuFeatures::cache_line_size() == 32) { |
| + __ pld(MemOperand(src, 96)); |
| + } |
| + __ cmp(chars, Operand(128)); |
| + __ b(lt, &less_128); |
| + __ pld(MemOperand(src, 128)); |
| + if (CpuFeatures::cache_line_size() == 32) { |
| + __ pld(MemOperand(src, 160)); |
| + } |
| + __ pld(MemOperand(src, 192)); |
| + if (CpuFeatures::cache_line_size() == 32) { |
| + __ pld(MemOperand(src, 224)); |
| + } |
| + __ cmp(chars, Operand(256)); |
| + __ b(lt, &less_256); |
| + __ sub(chars, chars, Operand(256)); |
| + |
| + __ bind(&loop); |
| + __ pld(MemOperand(src, 256)); |
| + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); |
| + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); |
| + if (CpuFeatures::cache_line_size() == 32) { |
| + __ pld(MemOperand(src, 256)); |
| + } |
| + __ sub(chars, chars, Operand(64), SetCC); |
| + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); |
| + __ b(ge, &loop); |
| + __ add(chars, chars, Operand(256)); |
| + |
| + __ bind(&less_256); |
| + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); |
| + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); |
| + __ sub(chars, chars, Operand(128)); |
| + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); |
| + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); |
| + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); |
| + __ cmp(chars, Operand(64)); |
| + __ b(lt, &less_64); |
| + |
| + __ bind(&less_128); |
| + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); |
| + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); |
| + __ sub(chars, chars, Operand(64)); |
| + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); |
| + |
| + __ bind(&less_64); |
| + __ cmp(chars, Operand(32)); |
| + __ b(lt, &less_32); |
| + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); |
| + __ sub(chars, chars, Operand(32)); |
| + |
| + __ bind(&less_32); |
| + __ cmp(chars, Operand(16)); |
| + __ b(le, &_16_or_less); |
| + __ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(src, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex)); |
| + __ sub(chars, chars, Operand(16)); |
| + |
| + __ bind(&_16_or_less); |
| + __ cmp(chars, Operand(8)); |
| + __ b(le, &_8_or_less); |
| + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex)); |
| + __ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest, PostIndex)); |
| + __ sub(chars, chars, Operand(8)); |
| + |
| + // Do a last copy which may overlap with the previous copy (up to 8 bytes). |
| + __ bind(&_8_or_less); |
| + __ rsb(chars, chars, Operand(8)); |
| + __ sub(src, src, Operand(chars)); |
| + __ sub(dest, dest, Operand(chars)); |
| + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src)); |
| + __ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest)); |
| + |
| + __ Ret(); |
| + |
| + __ bind(&size_less_than_8); |
| + |
| + __ bic(temp1, chars, Operand(0x3), SetCC); |
| + __ b(&less_4, eq); |
| + __ ldr(temp1, MemOperand(src, 4, PostIndex)); |
| + __ str(temp1, MemOperand(dest, 4, PostIndex)); |
| + } else { |
| + Register temp2 = ip; |
| + Label loop; |
| + |
| + __ bic(temp2, chars, Operand(0x3), SetCC); |
| + __ b(&less_4, eq); |
| + __ add(temp2, dest, temp2); |
| + |
| + __ bind(&loop); |
| + __ ldr(temp1, MemOperand(src, 4, PostIndex)); |
| + __ str(temp1, MemOperand(dest, 4, PostIndex)); |
| + __ cmp(dest, temp2); |
| + __ b(&loop, ne); |
| + } |
| + |
| + __ bind(&less_4); |
| + __ mov(chars, Operand(chars, LSL, 31), SetCC); |
| + // bit0 => Z (ne), bit1 => C (cs) |
| + __ ldrh(temp1, MemOperand(src, 2, PostIndex), cs); |
| + __ strh(temp1, MemOperand(dest, 2, PostIndex), cs); |
| + __ ldrb(temp1, MemOperand(src), ne); |
| + __ strb(temp1, MemOperand(dest), ne); |
| + __ Ret(); |
| + |
| + CodeDesc desc; |
| + masm.GetCode(&desc); |
| + ASSERT(!RelocInfo::RequiresRelocation(desc)); |
| + |
| + CPU::FlushICache(buffer, actual_size); |
| + OS::ProtectCode(buffer, actual_size); |
| + return FUNCTION_CAST<OS::MemCopyUint8Function>(buffer); |
| +#endif |
| +} |
| + |
| +// Convert 8 to 16. The number of character to copy must be at least 8. |
| +OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function( |
| + OS::MemCopyUint16Uint8Function stub) { |
| +#if defined(USE_SIMULATOR) |
| + return stub; |
| +#else |
| + if (Serializer::enabled()) { |
| + return stub; |
| + } |
| + size_t actual_size; |
| + byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true)); |
| + if (buffer == NULL) return stub; |
| + |
| + MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); |
| + |
| + Register dest = r0; |
| + Register src = r1; |
| + Register chars = r2; |
| + if (CpuFeatures::IsSupported(NEON)) { |
| + Register temp = r3; |
| + Label loop; |
| + |
| + __ bic(temp, chars, Operand(0x7)); |
| + __ sub(chars, chars, Operand(temp)); |
| + __ add(temp, dest, Operand(temp, LSL, 1)); |
| + |
| + __ bind(&loop); |
| + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex)); |
| + __ vmovl(NeonU8, q0, d0); |
| + __ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex)); |
| + __ cmp(dest, temp); |
| + __ b(&loop, ne); |
| + |
| + // Do a last copy which will overlap with the previous copy (1 to 8 bytes). |
| + __ rsb(chars, chars, Operand(8)); |
| + __ sub(src, src, Operand(chars)); |
| + __ sub(dest, dest, Operand(chars, LSL, 1)); |
| + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src)); |
| + __ vmovl(NeonU8, q0, d0); |
| + __ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest)); |
| + __ Ret(); |
| + } else { |
| + Register temp1 = r3; |
| + Register temp2 = ip; |
| + Register temp3 = lr; |
| + Register temp4 = r4; |
| + Label loop; |
| + Label not_two; |
| + |
| + __ Push(lr, r4); |
| + __ bic(temp2, chars, Operand(0x3)); |
| + __ add(temp2, dest, Operand(temp2, LSL, 1)); |
| + |
| + __ bind(&loop); |
| + __ ldr(temp1, MemOperand(src, 4, PostIndex)); |
| + __ uxtb16(temp3, Operand(temp1, ROR, 0)); |
| + __ uxtb16(temp4, Operand(temp1, ROR, 8)); |
| + __ pkhbt(temp1, temp3, Operand(temp4, LSL, 16)); |
| + __ str(temp1, MemOperand(dest)); |
| + __ pkhtb(temp1, temp4, Operand(temp3, ASR, 16)); |
| + __ str(temp1, MemOperand(dest, 4)); |
| + __ add(dest, dest, Operand(8)); |
| + __ cmp(dest, temp2); |
| + __ b(&loop, ne); |
| + |
| + __ mov(chars, Operand(chars, LSL, 31), SetCC); // bit0 => ne, bit1 => cs |
| + __ b(¬_two, cc); |
| + __ ldrh(temp1, MemOperand(src, 2, PostIndex)); |
| + __ uxtb(temp3, Operand(temp1, ROR, 8)); |
| + __ mov(temp3, Operand(temp3, LSL, 16)); |
| + __ uxtab(temp3, temp3, Operand(temp1, ROR, 0)); |
| + __ str(temp3, MemOperand(dest, 4, PostIndex)); |
| + __ bind(¬_two); |
| + __ ldrb(temp1, MemOperand(src), ne); |
| + __ strh(temp1, MemOperand(dest), ne); |
| + __ Pop(pc, r4); |
| + } |
| + |
| + CodeDesc desc; |
| + masm.GetCode(&desc); |
| + |
| + CPU::FlushICache(buffer, actual_size); |
| + OS::ProtectCode(buffer, actual_size); |
| + |
| + return FUNCTION_CAST<OS::MemCopyUint16Uint8Function>(buffer); |
| +#endif |
| +} |
| +#endif |
| #undef __ |