Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index d9c57c25562296a287e29b304e7884425851d341..4a43442d7fe416e54a662b89759a7f141185b242 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -195,7 +195,6 @@ TargetARM32::TargetARM32(Cfg *Func) |
| "Duplicate alias for " #val); \ |
| RegisterAliases[RegARM32::val].set(RegAlias); \ |
| } \ |
| - RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ |
| assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ |
| ScratchRegs[RegARM32::val] = scratch; |
| REGARM32_TABLE; |
| @@ -217,6 +216,34 @@ TargetARM32::TargetARM32(Cfg *Func) |
| TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
| } |
| +namespace { |
| +void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { |
| + for (Variable *Var : Vars) { |
| + auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); |
| + if (!Var64) { |
| + // This is not the variable we are looking for. |
| + continue; |
| + } |
| + assert(Var64->hasReg() || !Var64->mustHaveReg()); |
| + if (!Var64->hasReg()) { |
| + continue; |
| + } |
| + SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum()); |
| + // This assumes little endian. |
| + Variable *Lo = Var64->getLo(); |
| + Variable *Hi = Var64->getHi(); |
| + assert(Lo->hasReg() == Hi->hasReg()); |
| + if (Lo->hasReg()) { |
| + continue; |
| + } |
| + Lo->setRegNum(FirstReg); |
| + Lo->setMustHaveReg(); |
| + Hi->setRegNum(FirstReg + 1); |
| + Hi->setMustHaveReg(); |
| + } |
| +} |
| +} // end of anonymous namespace |
| + |
| void TargetARM32::translateO2() { |
| TimerMarker T(TimerStack::TT_O2, Func); |
| @@ -284,6 +311,7 @@ void TargetARM32::translateO2() { |
| regAlloc(RAK_Global); |
| if (Func->hasError()) |
| return; |
| + copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
| Func->dump("After linear scan regalloc"); |
| if (Ctx->getFlags().getPhiEdgeSplit()) { |
| @@ -344,6 +372,7 @@ void TargetARM32::translateOm1() { |
| regAlloc(RAK_InfOnly); |
| if (Func->hasError()) |
| return; |
| + copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
| Func->dump("After regalloc of infinite-weight variables"); |
| Func->genFrame(); |
| @@ -616,7 +645,7 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| auto *Mem = OperandARM32Mem::create( |
| Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
| Ctx->getConstantInt32(Arg->getStackOffset()))); |
| - legalizeToReg(Mem, Arg->getRegNum()); |
| + _mov(Arg, legalizeToReg(Mem, Arg->getRegNum())); |
| // This argument-copying instruction uses an explicit OperandARM32Mem |
| // operand instead of a Variable, so its fill-from-stack operation has to |
| // be tracked separately for statistics. |
| @@ -716,6 +745,11 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| RegsUsed[RegARM32::Reg_lr] = true; |
| } |
| for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| + if (RegARM32::isI64RegisterPair(i)) { |
| + // We don't save register pairs explicitly. Instead, we rely on the code |
| + // fake-defing/fake-using each register in the pair. |
| + continue; |
| + } |
| if (CalleeSaves[i] && RegsUsed[i]) { |
| // TODO(jvoung): do separate vpush for each floating point register |
| // segment and += 4, or 8 depending on type. |
| @@ -884,6 +918,10 @@ void TargetARM32::addEpilog(CfgNode *Node) { |
| // Pop registers in ascending order just like push (instead of in reverse |
| // order). |
| for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| + if (RegARM32::isI64RegisterPair(i)) { |
| + continue; |
| + } |
| + |
| if (CalleeSaves[i] && RegsUsed[i]) { |
| GPRsToRestore.push_back(getPhysicalRegister(i)); |
| } |
| @@ -1739,6 +1777,7 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
| Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| Variable *T_Lo = makeReg(IceType_i32); |
| Variable *T_Hi = makeReg(IceType_i32); |
| + |
| _mov(T_Lo, Src0Lo); |
| _mov(DestLo, T_Lo); |
| _mov(T_Hi, Src0Hi); |
| @@ -2271,9 +2310,7 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
| configureBitcastTemporary(T); |
| Variable *Src0R = legalizeToReg(Src0); |
| _mov(T, Src0R); |
| - auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
| - lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo())); |
| - lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi())); |
| + lowerAssign(InstAssign::create(Func, Dest, T)); |
| break; |
| } |
| case IceType_f64: { |
| @@ -2282,11 +2319,11 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
| // vmov T2, T0, T1 |
| // Dest <- T2 |
| assert(Src0->getType() == IceType_i64); |
| + Variable *T = makeReg(DestType); |
| auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| Src64->initHiLo(Func); |
| configureBitcastTemporary(Src64); |
| lowerAssign(InstAssign::create(Func, Src64, Src0)); |
| - Variable *T = makeReg(IceType_f64); |
| _mov(T, Src64); |
| lowerAssign(InstAssign::create(Func, Dest, T)); |
| break; |
| @@ -2537,38 +2574,457 @@ void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
| UnimplementedError(Func->getContext()->getFlags()); |
| } |
| -void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| - switch (Instr->getIntrinsicInfo().ID) { |
| - case Intrinsics::AtomicCmpxchg: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| +namespace { |
| +inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| + if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| + return Integer->getValue(); |
| + return Intrinsics::MemoryOrderInvalid; |
| +} |
| +} // end of anonymous namespace |
| + |
| +void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| + Operand *Ptr, Operand *Val) { |
| + // retry: |
| + // ldrex contents, [addr] |
| + // op tmp, contents, operand |
| + // strex success, tmp, [addr] |
| + // jne retry |
| + // fake-use(addr, operand) @ prevents undesireable clobbering. |
|
Jim Stichnoth
2015/10/05 22:07:06
undesirable
John
2015/10/06 12:03:38
Done.
|
| + // mov dest, contents |
| + assert(Dest != nullptr); |
| + Type DestTy = Dest->getType(); |
| + (void)Ptr; |
| + (void)Val; |
| + |
| + OperandARM32Mem *Mem; |
| + Variable *PtrContentsReg; |
| + Variable *PtrContentsHiReg; |
| + Variable *PtrContentsLoReg; |
| + Variable *Value = Func->makeVariable(DestTy); |
| + Variable *ValueReg; |
| + Variable *ValueHiReg; |
| + Variable *ValueLoReg; |
| + Variable *Success = makeReg(IceType_i32); |
| + Variable *TmpReg; |
| + Variable *TmpHiReg; |
| + Variable *TmpLoReg; |
| + Operand *_0 = Ctx->getConstantZero(IceType_i32); |
| + InstARM32Label *Retry = InstARM32Label::create(Func, this); |
| + |
| + if (DestTy != IceType_i64) { |
|
Jim Stichnoth
2015/10/05 22:07:06
(here and below)
I think instead of this:
if (A
John
2015/10/06 12:03:38
Done here, and elsewhere.
|
| + PtrContentsReg = makeReg(DestTy); |
| + PtrContentsHiReg = nullptr; |
| + PtrContentsLoReg = PtrContentsReg; |
| + |
| + ValueReg = makeReg(DestTy); |
| + ValueHiReg = nullptr; |
| + ValueLoReg = ValueReg; |
| + |
| + TmpReg = makeReg(DestTy); |
| + TmpHiReg = nullptr; |
| + TmpLoReg = TmpReg; |
| + } else { |
| + Variable64On32 *PtrContentsReg64 = makeI64RegPair(); |
| + PtrContentsHiReg = PtrContentsReg64->getHi(); |
| + PtrContentsLoReg = PtrContentsReg64->getLo(); |
| + PtrContentsReg = PtrContentsReg64; |
| + |
| + llvm::cast<Variable64On32>(Value)->initHiLo(Func); |
| + Variable64On32 *ValueReg64 = makeI64RegPair(); |
| + ValueHiReg = ValueReg64->getHi(); |
| + ValueLoReg = ValueReg64->getLo(); |
| + ValueReg = ValueReg64; |
| + |
| + Variable64On32 *TmpReg64 = makeI64RegPair(); |
| + TmpHiReg = TmpReg64->getHi(); |
| + TmpLoReg = TmpReg64->getLo(); |
| + TmpReg = TmpReg64; |
| + } |
| + |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, Value)); |
| + } |
| + lowerAssign(InstAssign::create(Func, Value, Val)); |
| + |
| + Variable *PtrVar = Func->makeVariable(IceType_i32); |
| + lowerAssign(InstAssign::create(Func, PtrVar, Ptr)); |
| + |
| + _dmb(); |
| + Context.insert(Retry); |
| + Mem = formMemoryOperand(PtrVar, DestTy); |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, ValueReg, Value)); |
| + } |
| + lowerAssign(InstAssign::create(Func, ValueReg, Value)); |
| + if (DestTy == IceType_i8 || DestTy == IceType_i16) { |
| + _uxt(ValueReg, ValueReg); |
| + } |
| + _ldrex(PtrContentsReg, Mem); |
| + |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg)); |
| + } |
| + switch (Operation) { |
| + default: |
| + Func->setError("Unknown AtomicRMW operation"); |
| return; |
| + case Intrinsics::AtomicAdd: |
| + if (DestTy != IceType_i64) { |
| + _add(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + } else { |
| + _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| + } |
| + break; |
| + case Intrinsics::AtomicSub: |
| + if (DestTy != IceType_i64) { |
| + _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + } else { |
| + _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| + } |
| + break; |
| + case Intrinsics::AtomicOr: |
| + _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + if (DestTy == IceType_i64) { |
| + _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| + } |
| + break; |
| + case Intrinsics::AtomicAnd: |
| + _and(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + if (DestTy == IceType_i64) { |
| + _and(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| + } |
| + break; |
| + case Intrinsics::AtomicXor: |
| + _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| + if (DestTy == IceType_i64) { |
| + _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| + } |
| + break; |
| + case Intrinsics::AtomicExchange: |
| + _mov(TmpLoReg, ValueLoReg); |
| + if (DestTy == IceType_i64) { |
| + _mov(TmpHiReg, ValueHiReg); |
| + } |
| + break; |
| + } |
| + _strex(Success, TmpReg, Mem); |
| + _cmp(Success, _0); |
| + _br(Retry, CondARM32::NE); |
| + |
| + // The following fake-uses ensure that Subzero will not clobber them in the |
| + // load-linked/store-conditional loop above. We might have to spill them, but |
| + // spilling is preferable over incorrect behavior. |
| + Context.insert(InstFakeUse::create(Func, PtrVar)); |
| + if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { |
| + Context.insert(InstFakeUse::create(Func, Value64->getHi())); |
| + Context.insert(InstFakeUse::create(Func, Value64->getLo())); |
| + } else { |
| + Context.insert(InstFakeUse::create(Func, Value)); |
| + } |
| + _dmb(); |
| + if (DestTy == IceType_i8 || DestTy == IceType_i16) { |
| + _uxt(PtrContentsReg, PtrContentsReg); |
| } |
| + |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeUse::create(Func, PtrContentsReg)); |
| + } |
| + lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); |
| + if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { |
| + Context.insert(InstFakeUse::create(Func, Dest64->getLo())); |
| + Context.insert(InstFakeUse::create(Func, Dest64->getHi())); |
| + } else { |
| + Context.insert(InstFakeUse::create(Func, Dest)); |
| + } |
| +} |
| + |
| +void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| + Variable *Dest = Instr->getDest(); |
| + Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void; |
| + Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; |
| + switch (ID) { |
| case Intrinsics::AtomicFence: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - return; |
| case Intrinsics::AtomicFenceAll: |
| - // NOTE: FenceAll should prevent and load/store from being moved across the |
| - // fence (both atomic and non-atomic). The InstARM32Mfence instruction is |
| - // currently marked coarsely as "HasSideEffects". |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + assert(Dest == nullptr); |
| + _dmb(); |
| return; |
| case Intrinsics::AtomicIsLockFree: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + Operand *ByteSize = Instr->getArg(0); |
| + auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); |
| + if (CI == nullptr) { |
| + // The PNaCl ABI requires the byte size to be a compile-time constant. |
| + Func->setError("AtomicIsLockFree byte size should be compile-time const"); |
| + return; |
| + } |
| + static constexpr int32_t NotLockFree = 0; |
| + static constexpr int32_t LockFree = 1; |
| + int32_t Result = NotLockFree; |
| + switch (CI->getValue()) { |
| + case 1: |
| + case 2: |
| + case 4: |
| + case 8: |
| + Result = LockFree; |
| + break; |
| + } |
| + _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result))); |
| return; |
| } |
| case Intrinsics::AtomicLoad: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + assert(isScalarIntegerType(DestTy)); |
| + // We require the memory address to be naturally aligned. Given that is the |
| + // case, then normal loads are atomic. |
| + if (!Intrinsics::isMemoryOrderValid( |
| + ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
| + Func->setError("Unexpected memory ordering for AtomicLoad"); |
| + return; |
| + } |
| + Variable *T; |
| + |
| + if (DestTy == IceType_i64) { |
| + // ldrex is the only arm instruction that is guaranteed to load a 64-bit |
| + // integer atomically. Everything else works with a regular ldr. |
| + T = makeI64RegPair(); |
| + _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); |
| + } else { |
| + T = makeReg(DestTy); |
| + _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); |
| + } |
| + _dmb(); |
| + lowerAssign(InstAssign::create(Func, Dest, T)); |
| + // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
| + // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
| + // the FakeUse on the last-inserted instruction's dest. |
| + Context.insert( |
| + InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| return; |
| } |
| - case Intrinsics::AtomicRMW: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - return; |
| case Intrinsics::AtomicStore: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + // We require the memory address to be naturally aligned. Given that is the |
| + // case, then normal loads are atomic. |
| + if (!Intrinsics::isMemoryOrderValid( |
| + ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| + Func->setError("Unexpected memory ordering for AtomicStore"); |
| + return; |
| + } |
| + Operand *Value = Instr->getArg(0); |
| + Type ValueTy = Value->getType(); |
| + assert(isScalarIntegerType(ValueTy)); |
| + Operand *Addr = Instr->getArg(1); |
| + |
| + _dmb(); |
| + if (ValueTy != IceType_i64) { |
| + // non-64-bit stores are atomically as long as the address is aligned. |
| + // This is PNaCl, so addresses are aligned. |
| + Variable *T = makeReg(ValueTy); |
| + lowerAssign(InstAssign::create(Func, T, Value)); |
| + _str(T, formMemoryOperand(Addr, ValueTy)); |
| + } else { |
| + // Atomic 64-bit stores require a load-locked/store-conditional loop using |
| + // ldrexd, and strexd. The lowered code is: |
| + // |
| + // retry: |
| + // ldrexd t.lo, t.hi, [addr] |
| + // strexd success, value.lo, value.hi, [addr] |
| + // cmp success, #0 |
| + // bne retry |
| + // fake-use(addr, value.lo, value.hi) |
| + // |
| + // The fake-use is needed to prevent those variables from being clobbered |
| + // in the loop (which will happen under register pressure.) |
| + Variable64On32 *Tmp = makeI64RegPair(); |
| + Variable64On32 *ValueVar = |
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| + Variable *AddrVar = makeReg(IceType_i32); |
| + Variable *Success = makeReg(IceType_i32); |
| + OperandARM32Mem *Mem; |
| + Operand *_0 = Ctx->getConstantZero(IceType_i32); |
| + InstARM32Label *Retry = InstARM32Label::create(Func, this); |
| + Variable64On32 *NewReg = makeI64RegPair(); |
| + ValueVar->initHiLo(Func); |
| + ValueVar->mustNotHaveReg(); |
| + |
| + lowerAssign(InstAssign::create(Func, ValueVar, Value)); |
| + lowerAssign(InstAssign::create(Func, AddrVar, Addr)); |
| + |
| + Context.insert(Retry); |
| + Context.insert(InstFakeDef::create(Func, NewReg)); |
| + lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); |
| + Mem = formMemoryOperand(AddrVar, IceType_i64); |
| + _ldrex(Tmp, Mem); |
| + // This fake-use both prevents the ldrex from being dead-code eliminated, |
| + // while also keeping liveness happy about all defs being used. |
| + Context.insert( |
| + InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| + _strex(Success, NewReg, Mem); |
| + _cmp(Success, _0); |
| + _br(Retry, CondARM32::NE); |
| + |
| + Context.insert(InstFakeUse::create(Func, ValueVar->getLo())); |
| + Context.insert(InstFakeUse::create(Func, ValueVar->getHi())); |
| + Context.insert(InstFakeUse::create(Func, AddrVar)); |
| + } |
| + _dmb(); |
| + return; |
| + } |
| + case Intrinsics::AtomicCmpxchg: { |
| + // The initial lowering for cmpxchg was: |
| + // |
| + // retry: |
| + // ldrex tmp, [addr] |
| + // cmp tmp, expected |
| + // mov expected, tmp |
| + // jne retry |
| + // strex success, new, [addr] |
| + // cmp success, #0 |
| + // bne retry |
| + // mov dest, expected |
| + // |
| + // Besides requiring two branches, that lowering could also potentially |
| + // write to memory (in mov expected, tmp) unless we were OK with increasing |
| + // the register pressure and requiring expected to be an infinite-weight |
| + // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through |
| + // careful rewritting, and thanks to predication, we now implement the |
| + // lowering as: |
| + // |
| + // retry: |
| + // ldrex tmp, [addr] |
| + // cmp tmp, expected |
| + // strexeq success, new, [addr] |
| + // movne expected, tmp |
| + // cmpeq success, #0 |
| + // bne retry |
| + // mov dest, expected |
| + // |
| + // Predication lets us move the strex ahead of the mov expected, tmp, which |
| + // allows tmp to be a non-infinite weight temporary. We wanted to avoid |
| + // writing to memory between ldrex and strex because, even though most times |
| + // that would cause no issues, if any interleaving memory write aliased |
| + // [addr] than we would have undefined behavior. Undefined behavior isn't |
| + // cool, so we try to avoid it. See the "Synchronization and semaphores" |
| + // section of the "ARM Architecture Reference Manual." |
| + |
| + assert(isScalarIntegerType(DestTy)); |
| + // We require the memory address to be naturally aligned. Given that is the |
| + // case, then normal loads are atomic. |
| + if (!Intrinsics::isMemoryOrderValid( |
| + ID, getConstantMemoryOrder(Instr->getArg(3)), |
| + getConstantMemoryOrder(Instr->getArg(4)))) { |
| + Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| + return; |
| + } |
| + |
| + OperandARM32Mem *Mem; |
| + Variable *TmpReg; |
| + Variable *Expected, *ExpectedReg; |
| + Variable *New, *NewReg; |
| + Variable *Success = makeReg(IceType_i32); |
| + Operand *_0 = Ctx->getConstantZero(IceType_i32); |
| + InstARM32Label *Retry = InstARM32Label::create(Func, this); |
| + |
| + if (DestTy == IceType_i64) { |
| + Variable64On32 *TmpReg64 = makeI64RegPair(); |
| + Variable64On32 *New64 = |
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| + Variable64On32 *NewReg64 = makeI64RegPair(); |
| + Variable64On32 *Expected64 = |
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| + Variable64On32 *ExpectedReg64 = makeI64RegPair(); |
| + |
| + New64->initHiLo(Func); |
| + New64->mustNotHaveReg(); |
| + Expected64->initHiLo(Func); |
| + Expected64->mustNotHaveReg(); |
| + |
| + TmpReg = TmpReg64; |
| + New = New64; |
| + NewReg = NewReg64; |
| + Expected = Expected64; |
| + ExpectedReg = ExpectedReg64; |
| + } else { |
| + TmpReg = makeReg(DestTy); |
| + New = Func->makeVariable(DestTy); |
| + NewReg = makeReg(DestTy); |
| + Expected = Func->makeVariable(DestTy); |
| + ExpectedReg = makeReg(DestTy); |
| + } |
| + |
| + Mem = formMemoryOperand(Instr->getArg(0), DestTy); |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, Expected)); |
| + } |
| + lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, New)); |
| + } |
| + lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); |
| + _dmb(); |
| + |
| + Context.insert(Retry); |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected)); |
| + } |
| + lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); |
| + if (DestTy == IceType_i64) { |
| + Context.insert(InstFakeDef::create(Func, NewReg, New)); |
| + } |
| + lowerAssign(InstAssign::create(Func, NewReg, New)); |
| + |
| + _ldrex(TmpReg, Mem); |
| + Context.insert( |
| + InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| + if (DestTy != IceType_i64) { |
| + _cmp(TmpReg, ExpectedReg); |
| + } else { |
| + auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); |
| + auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg); |
| + // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's |
| + // keep liveness happy, shall we? |
| + Context.insert(InstFakeUse::create(Func, TmpReg)); |
| + Context.insert(InstFakeUse::create(Func, ExpectedReg)); |
| + _cmp(TmpReg64->getHi(), ExpectedReg64->getHi()); |
| + _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ); |
| + } |
| + _strex(Success, NewReg, Mem, CondARM32::EQ); |
| + if (DestTy != IceType_i64) { |
| + _mov_redefined(Expected, TmpReg, CondARM32::NE); |
| + } else { |
| + auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); |
| + auto *Expected64 = llvm::cast<Variable64On32>(Expected); |
| + _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE); |
| + _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE); |
| + auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg); |
| + Context.insert(FakeDef); |
| + FakeDef->setDestRedefined(); |
| + } |
| + _cmp(Success, _0, CondARM32::EQ); |
| + _br(Retry, CondARM32::NE); |
| + _dmb(); |
| + lowerAssign(InstAssign::create(Func, Dest, Expected)); |
| + Context.insert(InstFakeUse::create(Func, Expected)); |
| + if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { |
| + Context.insert(InstFakeUse::create(Func, New64->getLo())); |
| + Context.insert(InstFakeUse::create(Func, New64->getHi())); |
| + } else { |
| + Context.insert(InstFakeUse::create(Func, New)); |
| + } |
| + return; |
| + } |
| + case Intrinsics::AtomicRMW: { |
| + if (!Intrinsics::isMemoryOrderValid( |
| + ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| + Func->setError("Unexpected memory ordering for AtomicRMW"); |
| + return; |
| + } |
| + lowerAtomicRMW( |
| + Dest, static_cast<uint32_t>( |
| + llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
| + Instr->getArg(1), Instr->getArg(2)); |
| return; |
| } |
| case Intrinsics::Bswap: { |
| - Variable *Dest = Instr->getDest(); |
| Operand *Val = Instr->getArg(0); |
| Type Ty = Val->getType(); |
| if (Ty == IceType_i64) { |
| @@ -2598,7 +3054,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| return; |
| } |
| case Intrinsics::Ctpop: { |
| - Variable *Dest = Instr->getDest(); |
| Operand *Val = Instr->getArg(0); |
| InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
| ? H_call_ctpop_i32 |
| @@ -2633,7 +3088,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| } else { |
| ValLoR = legalizeToReg(Val); |
| } |
| - lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
| + lowerCLZ(Dest, ValLoR, ValHiR); |
| return; |
| } |
| case Intrinsics::Cttz: { |
| @@ -2657,17 +3112,16 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| _rbit(T, ValLoR); |
| ValLoR = T; |
| } |
| - lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
| + lowerCLZ(Dest, ValLoR, ValHiR); |
| return; |
| } |
| case Intrinsics::Fabs: { |
| - Variable *Dest = Instr->getDest(); |
| Type DestTy = Dest->getType(); |
| Variable *T = makeReg(DestTy); |
| if (isVectorType(DestTy)) { |
| // Add a fake def to keep liveness consistent in the meantime. |
| Context.insert(InstFakeDef::create(Func, T)); |
| - _mov(Instr->getDest(), T); |
| + _mov(Dest, T); |
| UnimplementedError(Func->getContext()->getFlags()); |
| return; |
| } |
| @@ -2721,20 +3175,19 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| if (Ctx->getFlags().getUseSandboxing()) { |
| UnimplementedError(Func->getContext()->getFlags()); |
| } else { |
| - InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); |
| + InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0); |
| lowerCall(Call); |
| } |
| return; |
| } |
| case Intrinsics::Setjmp: { |
| - InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); |
| + InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1); |
| Call->addArg(Instr->getArg(0)); |
| lowerCall(Call); |
| return; |
| } |
| case Intrinsics::Sqrt: { |
| Variable *Src = legalizeToReg(Instr->getArg(0)); |
| - Variable *Dest = Instr->getDest(); |
| Variable *T = makeReg(Dest->getType()); |
| _vsqrt(T, Src); |
| _mov(Dest, T); |
| @@ -2742,7 +3195,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| } |
| case Intrinsics::Stacksave: { |
| Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| - Variable *Dest = Instr->getDest(); |
| _mov(Dest, SP); |
| return; |
| } |
| @@ -3224,6 +3676,16 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { |
| llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
| } |
| +Variable64On32 *TargetARM32::makeI64RegPair() { |
| + Variable64On32 *Reg = |
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| + Reg->setMustHaveReg(); |
| + Reg->initHiLo(Func); |
| + Reg->getLo()->setMustNotHaveReg(); |
| + Reg->getHi()->setMustNotHaveReg(); |
| + return Reg; |
| +} |
| + |
| Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { |
| // There aren't any 64-bit integer registers for ARM32. |
| assert(Type != IceType_i64); |