Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Unified Diff: src/arm/assembler-arm.cc

Issue 17858002: ARM: Implement memcpy using NEON. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Remove "unaligned accesses" from C++ code Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/arm/assembler-arm.cc
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index 89c0a3b3cd936d111c4e354156d743c368684f7d..3aaf82ba3341d64bc71296d6bea59c34b9a87049 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -49,6 +49,7 @@ bool CpuFeatures::initialized_ = false;
#endif
unsigned CpuFeatures::supported_ = 0;
unsigned CpuFeatures::found_by_runtime_probing_only_ = 0;
+unsigned CpuFeatures::cache_line_size_ = 64;
ExternalReference ExternalReference::cpu_features() {
@@ -124,6 +125,9 @@ void CpuFeatures::Probe() {
static_cast<uint64_t>(1) << VFP3 |
static_cast<uint64_t>(1) << ARMv7;
}
+ if (FLAG_enable_neon) {
+ supported_ |= 1u << NEON;
+ }
// For the simulator=arm build, use ARMv7 when FLAG_enable_armv7 is enabled
if (FLAG_enable_armv7) {
supported_ |= static_cast<uint64_t>(1) << ARMv7;
@@ -156,6 +160,10 @@ void CpuFeatures::Probe() {
static_cast<uint64_t>(1) << ARMv7;
}
+ if (!IsSupported(NEON) && FLAG_enable_neon && OS::ArmCpuHasFeature(NEON)) {
+ found_by_runtime_probing_only_ |= 1u << NEON;
+ }
+
if (!IsSupported(ARMv7) && FLAG_enable_armv7 && OS::ArmCpuHasFeature(ARMv7)) {
found_by_runtime_probing_only_ |= static_cast<uint64_t>(1) << ARMv7;
}
@@ -170,12 +178,18 @@ void CpuFeatures::Probe() {
static_cast<uint64_t>(1) << UNALIGNED_ACCESSES;
}
- if (OS::GetCpuImplementer() == QUALCOMM_IMPLEMENTER &&
+ CpuImplementer implementer = OS::GetCpuImplementer();
+ if (implementer == QUALCOMM_IMPLEMENTER &&
FLAG_enable_movw_movt && OS::ArmCpuHasFeature(ARMv7)) {
found_by_runtime_probing_only_ |=
static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS;
}
+ CpuPart part = OS::GetCpuPart(implementer);
+ if ((part == CORTEX_A9) || (part == CORTEX_A5)) {
+ cache_line_size_ = 32;
+ }
+
if (!IsSupported(VFP32DREGS) && FLAG_enable_32dregs
&& OS::ArmCpuHasFeature(VFP32DREGS)) {
found_by_runtime_probing_only_ |= static_cast<uint64_t>(1) << VFP32DREGS;
@@ -246,11 +260,12 @@ void CpuFeatures::PrintTarget() {
void CpuFeatures::PrintFeatures() {
printf(
- "ARMv7=%d VFP3=%d VFP32DREGS=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
+ "ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
"MOVW_MOVT_IMMEDIATE_LOADS=%d",
CpuFeatures::IsSupported(ARMv7),
CpuFeatures::IsSupported(VFP3),
CpuFeatures::IsSupported(VFP32DREGS),
+ CpuFeatures::IsSupported(NEON),
CpuFeatures::IsSupported(SUDIV),
CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
@@ -376,6 +391,78 @@ MemOperand::MemOperand(Register rn, Register rm,
}
+NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align) {
+ ASSERT((am == Offset) || (am == PostIndex));
+ rn_ = rn;
+ rm_ = (am == Offset) ? pc : sp;
+ switch (align) {
ulan 2013/07/09 15:16:32 Extracting this switch into a function would avoid
vincent.belliard.fr 2013/07/10 15:30:38 Done.
+ case 0:
+ align_ = 0;
+ break;
+ case 64:
+ align_ = 1;
+ break;
+ case 128:
+ align_ = 2;
+ break;
+ case 256:
+ align_ = 3;
+ break;
+ default:
+ UNREACHABLE();
+ align_ = 0;
+ break;
+ }
+}
+
+
+NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align) {
+ rn_ = rn;
+ rm_ = rm;
+ switch (align) {
+ case 0:
+ align_ = 0;
+ break;
+ case 64:
+ align_ = 1;
+ break;
+ case 128:
+ align_ = 2;
+ break;
+ case 256:
+ align_ = 3;
+ break;
+ default:
+ UNREACHABLE();
+ align_ = 0;
+ break;
+ }
+}
+
+
+NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
+ base_ = base;
+ switch (registers_count) {
+ case 1:
+ type_ = nlt_1;
+ break;
+ case 2:
+ type_ = nlt_2;
+ break;
+ case 3:
+ type_ = nlt_3;
+ break;
+ case 4:
+ type_ = nlt_4;
+ break;
+ default:
+ UNREACHABLE();
+ type_ = nlt_1;
+ break;
+ }
+}
+
+
// -----------------------------------------------------------------------------
// Specific instructions, constants, and masks.
@@ -1543,6 +1630,107 @@ void Assembler::bfi(Register dst,
}
+void Assembler::pkhbt(Register dst,
+ Register src1,
+ const Operand& src2,
+ Condition cond ) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.125.
+ // cond(31-28) | 01101000(27-20) | Rn(19-16) |
+ // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
+ ASSERT(!dst.is(pc));
+ ASSERT(!src1.is(pc));
+ ASSERT(!src2.rm().is(pc));
+ ASSERT(!src2.rm().is(no_reg));
+ ASSERT(src2.rs().is(no_reg));
+ ASSERT((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
+ ASSERT(src2.shift_op() == LSL);
+ emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
+ src2.shift_imm_*B7 | B4 | src2.rm().code());
+}
+
+
+void Assembler::pkhtb(Register dst,
+ Register src1,
+ const Operand& src2,
+ Condition cond) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.125.
+ // cond(31-28) | 01101000(27-20) | Rn(19-16) |
+ // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
+ ASSERT(!dst.is(pc));
+ ASSERT(!src1.is(pc));
+ ASSERT(!src2.rm().is(pc));
+ ASSERT(!src2.rm().is(no_reg));
+ ASSERT(src2.rs().is(no_reg));
+ ASSERT((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
+ ASSERT(src2.shift_op() == ASR);
+ int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
+ emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
+ asr*B7 | B6 | B4 | src2.rm().code());
+}
+
+
+void Assembler::uxtb(Register dst,
+ const Operand& src,
+ Condition cond) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.274.
+ // cond(31-28) | 01101110(27-20) | 1111(19-16) |
+ // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
+ ASSERT(!dst.is(pc));
+ ASSERT(!src.rm().is(pc));
+ ASSERT(!src.rm().is(no_reg));
+ ASSERT(src.rs().is(no_reg));
+ ASSERT((src.shift_imm_ == 0) ||
+ (src.shift_imm_ == 8) ||
+ (src.shift_imm_ == 16) ||
+ (src.shift_imm_ == 24));
+ ASSERT(src.shift_op() == ROR);
+ emit(cond | 0x6E*B20 | 0xF*B16 | dst.code()*B12 |
+ ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code());
+}
+
+
+void Assembler::uxtab(Register dst,
+ Register src1,
+ const Operand& src2,
+ Condition cond) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.271.
+ // cond(31-28) | 01101110(27-20) | Rn(19-16) |
+ // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
+ ASSERT(!dst.is(pc));
+ ASSERT(!src1.is(pc));
+ ASSERT(!src2.rm().is(pc));
+ ASSERT(!src2.rm().is(no_reg));
+ ASSERT(src2.rs().is(no_reg));
+ ASSERT((src2.shift_imm_ == 0) ||
+ (src2.shift_imm_ == 8) ||
+ (src2.shift_imm_ == 16) ||
+ (src2.shift_imm_ == 24));
+ ASSERT(src2.shift_op() == ROR);
+ emit(cond | 0x6E*B20 | src1.code()*B16 | dst.code()*B12 |
+ ((src2.shift_imm_ >> 1) &0xC)*B8 | 7*B4 | src2.rm().code());
+}
+
+
+void Assembler::uxtb16(Register dst,
+ const Operand& src,
+ Condition cond) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.275.
+ // cond(31-28) | 01101100(27-20) | 1111(19-16) |
+ // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
+ ASSERT(!dst.is(pc));
+ ASSERT(!src.rm().is(pc));
+ ASSERT(!src.rm().is(no_reg));
+ ASSERT(src.rs().is(no_reg));
+ ASSERT((src.shift_imm_ == 0) ||
+ (src.shift_imm_ == 8) ||
+ (src.shift_imm_ == 16) ||
+ (src.shift_imm_ == 24));
+ ASSERT(src.shift_op() == ROR);
+ emit(cond | 0x6C*B20 | 0xF*B16 | dst.code()*B12 |
+ ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code());
+}
+
+
// Status register access instructions.
void Assembler::mrs(Register dst, SRegister s, Condition cond) {
ASSERT(!dst.is(pc));
@@ -1640,6 +1828,22 @@ void Assembler::strd(Register src1, Register src2,
addrmod3(cond | B7 | B6 | B5 | B4, src1, dst);
}
+// Preload instructions.
+void Assembler::pld(const MemOperand& address) {
ulan 2013/07/09 15:16:32 Missing the description comment.
vincent.belliard.fr 2013/07/10 15:30:38 Done.
+ ASSERT(address.rm().is(no_reg));
+ ASSERT(address.am() == Offset);
+ int U = B23;
+ int offset = address.offset();
+ if (offset < 0) {
+ offset = -offset;
+ U = 0;
+ }
+ ASSERT(offset < 4096);
+ emit(kSpecialCondition | B26 | B24 | U | B22 | B20 | address.rn().code()*B16 |
+ 0xf*B12 | offset);
+}
+
+
// Load/Store multiple instructions.
void Assembler::ldm(BlockAddrMode am,
Register base,
@@ -2701,6 +2905,50 @@ void Assembler::vsqrt(const DwVfpRegister dst,
}
+// Support for NEON.
+
+void Assembler::vld1(NeonSize size,
+ const NeonListOperand& dst,
+ const NeonMemOperand& src) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.320.
+ // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
+ // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
+ ASSERT(CpuFeatures::IsSupported(NEON));
+ int vd, d;
+ dst.base().split_code(&vd, &d);
+ emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 |
+ dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code());
+}
+
+
+void Assembler::vst1(NeonSize size,
+ const NeonListOperand& src,
+ const NeonMemOperand& dst) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.404.
+ // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
+ // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
+ ASSERT(CpuFeatures::IsSupported(NEON));
+ int vd, d;
+ src.base().split_code(&vd, &d);
+ emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 |
+ size*B6 | dst.align()*B4 | dst.rm().code());
+}
+
+
+void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.346.
+ // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
+ // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
+ ASSERT(CpuFeatures::IsSupported(NEON));
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vm, m;
+ src.split_code(&vm, &m);
+ emit(0xFU*B28 | B25 | (dt & NeonDataTypeUMask) | B23 | d*B22 |
+ (dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
+}
+
+
// Pseudo instructions.
void Assembler::nop(int type) {
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/codegen-arm.cc » ('j') | src/arm/codegen-arm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698