Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1369333003: Subzero. Enable Atomics in ARM. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Comments; lit; undo's; final patch prior to review. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 I64PairRegisters[RegARM32::val] = isI64Pair; \ 188 I64PairRegisters[RegARM32::val] = isI64Pair; \
189 Float32Registers[RegARM32::val] = isFP32; \ 189 Float32Registers[RegARM32::val] = isFP32; \
190 Float64Registers[RegARM32::val] = isFP64; \ 190 Float64Registers[RegARM32::val] = isFP64; \
191 VectorRegisters[RegARM32::val] = isVec128; \ 191 VectorRegisters[RegARM32::val] = isVec128; \
192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ 192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
193 for (SizeT RegAlias : alias_init) { \ 193 for (SizeT RegAlias : alias_init) { \
194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \ 194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \
195 "Duplicate alias for " #val); \ 195 "Duplicate alias for " #val); \
196 RegisterAliases[RegARM32::val].set(RegAlias); \ 196 RegisterAliases[RegARM32::val].set(RegAlias); \
197 } \ 197 } \
198 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
199 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ 198 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
200 ScratchRegs[RegARM32::val] = scratch; 199 ScratchRegs[RegARM32::val] = scratch;
201 REGARM32_TABLE; 200 REGARM32_TABLE;
202 #undef X 201 #undef X
203 TypeToRegisterSet[IceType_void] = InvalidRegisters; 202 TypeToRegisterSet[IceType_void] = InvalidRegisters;
204 TypeToRegisterSet[IceType_i1] = IntegerRegisters; 203 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
205 TypeToRegisterSet[IceType_i8] = IntegerRegisters; 204 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 205 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 206 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
208 TypeToRegisterSet[IceType_i64] = I64PairRegisters; 207 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
209 TypeToRegisterSet[IceType_f32] = Float32Registers; 208 TypeToRegisterSet[IceType_f32] = Float32Registers;
210 TypeToRegisterSet[IceType_f64] = Float64Registers; 209 TypeToRegisterSet[IceType_f64] = Float64Registers;
211 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 210 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
212 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 211 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
213 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; 212 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
214 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; 213 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
215 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; 214 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
216 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; 215 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
217 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 216 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
218 } 217 }
219 218
219 namespace {
220 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
221 for (Variable *Var : Vars) {
222 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
223 if (!Var64) {
224 // This is not the variable we are looking for.
225 continue;
226 }
227 assert(Var64->hasReg() || !Var64->mustHaveReg());
228 if (!Var64->hasReg()) {
229 continue;
230 }
231 SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum());
232 // This assumes little endian.
233 Variable *Lo = Var64->getLo();
234 Variable *Hi = Var64->getHi();
235 assert(Lo->hasReg() == Hi->hasReg());
236 if (Lo->hasReg()) {
237 continue;
238 }
239 Lo->setRegNum(FirstReg);
240 Lo->setMustHaveReg();
241 Hi->setRegNum(FirstReg + 1);
242 Hi->setMustHaveReg();
243 }
244 }
245 } // end of anonymous namespace
246
220 void TargetARM32::translateO2() { 247 void TargetARM32::translateO2() {
221 TimerMarker T(TimerStack::TT_O2, Func); 248 TimerMarker T(TimerStack::TT_O2, Func);
222 249
223 // TODO(stichnot): share passes with X86? 250 // TODO(stichnot): share passes with X86?
224 // https://code.google.com/p/nativeclient/issues/detail?id=4094 251 // https://code.google.com/p/nativeclient/issues/detail?id=4094
225 252
226 if (!Ctx->getFlags().getPhiEdgeSplit()) { 253 if (!Ctx->getFlags().getPhiEdgeSplit()) {
227 // Lower Phi instructions. 254 // Lower Phi instructions.
228 Func->placePhiLoads(); 255 Func->placePhiLoads();
229 if (Func->hasError()) 256 if (Func->hasError())
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
277 // Validate the live range computations. The expensive validation call is 304 // Validate the live range computations. The expensive validation call is
278 // deliberately only made when assertions are enabled. 305 // deliberately only made when assertions are enabled.
279 assert(Func->validateLiveness()); 306 assert(Func->validateLiveness());
280 // The post-codegen dump is done here, after liveness analysis and associated 307 // The post-codegen dump is done here, after liveness analysis and associated
281 // cleanup, to make the dump cleaner and more useful. 308 // cleanup, to make the dump cleaner and more useful.
282 Func->dump("After initial ARM32 codegen"); 309 Func->dump("After initial ARM32 codegen");
283 Func->getVMetadata()->init(VMK_All); 310 Func->getVMetadata()->init(VMK_All);
284 regAlloc(RAK_Global); 311 regAlloc(RAK_Global);
285 if (Func->hasError()) 312 if (Func->hasError())
286 return; 313 return;
314 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
287 Func->dump("After linear scan regalloc"); 315 Func->dump("After linear scan regalloc");
288 316
289 if (Ctx->getFlags().getPhiEdgeSplit()) { 317 if (Ctx->getFlags().getPhiEdgeSplit()) {
290 Func->advancedPhiLowering(); 318 Func->advancedPhiLowering();
291 Func->dump("After advanced Phi lowering"); 319 Func->dump("After advanced Phi lowering");
292 } 320 }
293 321
294 // Stack frame mapping. 322 // Stack frame mapping.
295 Func->genFrame(); 323 Func->genFrame();
296 if (Func->hasError()) 324 if (Func->hasError())
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
337 Func->doArgLowering(); 365 Func->doArgLowering();
338 366
339 Func->genCode(); 367 Func->genCode();
340 if (Func->hasError()) 368 if (Func->hasError())
341 return; 369 return;
342 Func->dump("After initial ARM32 codegen"); 370 Func->dump("After initial ARM32 codegen");
343 371
344 regAlloc(RAK_InfOnly); 372 regAlloc(RAK_InfOnly);
345 if (Func->hasError()) 373 if (Func->hasError())
346 return; 374 return;
375 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
347 Func->dump("After regalloc of infinite-weight variables"); 376 Func->dump("After regalloc of infinite-weight variables");
348 377
349 Func->genFrame(); 378 Func->genFrame();
350 if (Func->hasError()) 379 if (Func->hasError())
351 return; 380 return;
352 Func->dump("After stack frame mapping"); 381 Func->dump("After stack frame mapping");
353 382
354 legalizeStackSlots(); 383 legalizeStackSlots();
355 if (Func->hasError()) 384 if (Func->hasError())
356 return; 385 return;
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after
609 // value from the stack slot. 638 // value from the stack slot.
610 if (Arg->hasReg()) { 639 if (Arg->hasReg()) {
611 assert(Ty != IceType_i64); 640 assert(Ty != IceType_i64);
612 // This should be simple, just load the parameter off the stack using a nice 641 // This should be simple, just load the parameter off the stack using a nice
613 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for 642 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for
614 // fp types, cannot have an index register), so we legalize the memory 643 // fp types, cannot have an index register), so we legalize the memory
615 // operand instead. 644 // operand instead.
616 auto *Mem = OperandARM32Mem::create( 645 auto *Mem = OperandARM32Mem::create(
617 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( 646 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
618 Ctx->getConstantInt32(Arg->getStackOffset()))); 647 Ctx->getConstantInt32(Arg->getStackOffset())));
619 legalizeToReg(Mem, Arg->getRegNum()); 648 _mov(Arg, legalizeToReg(Mem, Arg->getRegNum()));
620 // This argument-copying instruction uses an explicit OperandARM32Mem 649 // This argument-copying instruction uses an explicit OperandARM32Mem
621 // operand instead of a Variable, so its fill-from-stack operation has to 650 // operand instead of a Variable, so its fill-from-stack operation has to
622 // be tracked separately for statistics. 651 // be tracked separately for statistics.
623 Ctx->statsUpdateFills(); 652 Ctx->statsUpdateFills();
624 } 653 }
625 } 654 }
626 655
627 Type TargetARM32::stackSlotType() { return IceType_i32; } 656 Type TargetARM32::stackSlotType() { return IceType_i32; }
628 657
629 void TargetARM32::addProlog(CfgNode *Node) { 658 void TargetARM32::addProlog(CfgNode *Node) {
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
709 if (UsesFramePointer) { 738 if (UsesFramePointer) {
710 CalleeSaves[RegARM32::Reg_fp] = true; 739 CalleeSaves[RegARM32::Reg_fp] = true;
711 assert(RegsUsed[RegARM32::Reg_fp] == false); 740 assert(RegsUsed[RegARM32::Reg_fp] == false);
712 RegsUsed[RegARM32::Reg_fp] = true; 741 RegsUsed[RegARM32::Reg_fp] = true;
713 } 742 }
714 if (!MaybeLeafFunc) { 743 if (!MaybeLeafFunc) {
715 CalleeSaves[RegARM32::Reg_lr] = true; 744 CalleeSaves[RegARM32::Reg_lr] = true;
716 RegsUsed[RegARM32::Reg_lr] = true; 745 RegsUsed[RegARM32::Reg_lr] = true;
717 } 746 }
718 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 747 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
748 if (RegARM32::isI64RegisterPair(i)) {
749 // We don't save register pairs explicitly. Instead, we rely on the code
750 // fake-defing/fake-using each register in the pair.
751 continue;
752 }
719 if (CalleeSaves[i] && RegsUsed[i]) { 753 if (CalleeSaves[i] && RegsUsed[i]) {
720 // TODO(jvoung): do separate vpush for each floating point register 754 // TODO(jvoung): do separate vpush for each floating point register
721 // segment and += 4, or 8 depending on type. 755 // segment and += 4, or 8 depending on type.
722 ++NumCallee; 756 ++NumCallee;
723 PreservedRegsSizeBytes += 4; 757 PreservedRegsSizeBytes += 4;
724 GPRsToPreserve.push_back(getPhysicalRegister(i)); 758 GPRsToPreserve.push_back(getPhysicalRegister(i));
725 } 759 }
726 } 760 }
727 Ctx->statsUpdateRegistersSaved(NumCallee); 761 Ctx->statsUpdateRegistersSaved(NumCallee);
728 if (!GPRsToPreserve.empty()) 762 if (!GPRsToPreserve.empty())
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
877 // Consider FP and LR as callee-save / used as needed. 911 // Consider FP and LR as callee-save / used as needed.
878 if (UsesFramePointer) { 912 if (UsesFramePointer) {
879 CalleeSaves[RegARM32::Reg_fp] = true; 913 CalleeSaves[RegARM32::Reg_fp] = true;
880 } 914 }
881 if (!MaybeLeafFunc) { 915 if (!MaybeLeafFunc) {
882 CalleeSaves[RegARM32::Reg_lr] = true; 916 CalleeSaves[RegARM32::Reg_lr] = true;
883 } 917 }
884 // Pop registers in ascending order just like push (instead of in reverse 918 // Pop registers in ascending order just like push (instead of in reverse
885 // order). 919 // order).
886 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 920 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
921 if (RegARM32::isI64RegisterPair(i)) {
922 continue;
923 }
924
887 if (CalleeSaves[i] && RegsUsed[i]) { 925 if (CalleeSaves[i] && RegsUsed[i]) {
888 GPRsToRestore.push_back(getPhysicalRegister(i)); 926 GPRsToRestore.push_back(getPhysicalRegister(i));
889 } 927 }
890 } 928 }
891 if (!GPRsToRestore.empty()) 929 if (!GPRsToRestore.empty())
892 _pop(GPRsToRestore); 930 _pop(GPRsToRestore);
893 931
894 if (!Ctx->getFlags().getUseSandboxing()) 932 if (!Ctx->getFlags().getUseSandboxing())
895 return; 933 return;
896 934
(...skipping 835 matching lines...) Expand 10 before | Expand all | Expand 10 after
1732 Operand *Src0 = Inst->getSrc(0); 1770 Operand *Src0 = Inst->getSrc(0);
1733 assert(Dest->getType() == Src0->getType()); 1771 assert(Dest->getType() == Src0->getType());
1734 if (Dest->getType() == IceType_i64) { 1772 if (Dest->getType() == IceType_i64) {
1735 Src0 = legalizeUndef(Src0); 1773 Src0 = legalizeUndef(Src0);
1736 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 1774 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1737 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 1775 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1738 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1776 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1739 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1777 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1740 Variable *T_Lo = makeReg(IceType_i32); 1778 Variable *T_Lo = makeReg(IceType_i32);
1741 Variable *T_Hi = makeReg(IceType_i32); 1779 Variable *T_Hi = makeReg(IceType_i32);
1780
1742 _mov(T_Lo, Src0Lo); 1781 _mov(T_Lo, Src0Lo);
1743 _mov(DestLo, T_Lo); 1782 _mov(DestLo, T_Lo);
1744 _mov(T_Hi, Src0Hi); 1783 _mov(T_Hi, Src0Hi);
1745 _mov(DestHi, T_Hi); 1784 _mov(DestHi, T_Hi);
1746 } else { 1785 } else {
1747 Operand *NewSrc; 1786 Operand *NewSrc;
1748 if (Dest->hasReg()) { 1787 if (Dest->hasReg()) {
1749 // If Dest already has a physical register, then legalize the Src operand 1788 // If Dest already has a physical register, then legalize the Src operand
1750 // into a Variable with the same register assignment. This especially 1789 // into a Variable with the same register assignment. This especially
1751 // helps allow the use of Flex operands. 1790 // helps allow the use of Flex operands.
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after
2264 case IceType_i64: { 2303 case IceType_i64: {
2265 // t0, t1 <- src0 2304 // t0, t1 <- src0
2266 // dest[31..0] = t0 2305 // dest[31..0] = t0
2267 // dest[63..32] = t1 2306 // dest[63..32] = t1
2268 assert(Src0->getType() == IceType_f64); 2307 assert(Src0->getType() == IceType_f64);
2269 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 2308 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2270 T->initHiLo(Func); 2309 T->initHiLo(Func);
2271 configureBitcastTemporary(T); 2310 configureBitcastTemporary(T);
2272 Variable *Src0R = legalizeToReg(Src0); 2311 Variable *Src0R = legalizeToReg(Src0);
2273 _mov(T, Src0R); 2312 _mov(T, Src0R);
2274 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); 2313 lowerAssign(InstAssign::create(Func, Dest, T));
2275 lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo()));
2276 lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi()));
2277 break; 2314 break;
2278 } 2315 }
2279 case IceType_f64: { 2316 case IceType_f64: {
2280 // T0 <- lo(src) 2317 // T0 <- lo(src)
2281 // T1 <- hi(src) 2318 // T1 <- hi(src)
2282 // vmov T2, T0, T1 2319 // vmov T2, T0, T1
2283 // Dest <- T2 2320 // Dest <- T2
2284 assert(Src0->getType() == IceType_i64); 2321 assert(Src0->getType() == IceType_i64);
2322 Variable *T = makeReg(DestType);
2285 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 2323 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2286 Src64->initHiLo(Func); 2324 Src64->initHiLo(Func);
2287 configureBitcastTemporary(Src64); 2325 configureBitcastTemporary(Src64);
2288 lowerAssign(InstAssign::create(Func, Src64, Src0)); 2326 lowerAssign(InstAssign::create(Func, Src64, Src0));
2289 Variable *T = makeReg(IceType_f64);
2290 _mov(T, Src64); 2327 _mov(T, Src64);
2291 lowerAssign(InstAssign::create(Func, Dest, T)); 2328 lowerAssign(InstAssign::create(Func, Dest, T));
2292 break; 2329 break;
2293 } 2330 }
2294 case IceType_v4i1: 2331 case IceType_v4i1:
2295 case IceType_v8i1: 2332 case IceType_v8i1:
2296 case IceType_v16i1: 2333 case IceType_v16i1:
2297 case IceType_v8i16: 2334 case IceType_v8i16:
2298 case IceType_v16i8: 2335 case IceType_v16i8:
2299 case IceType_v4f32: 2336 case IceType_v4f32:
(...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after
2530 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); 2567 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition()));
2531 _mov(Dest, T); 2568 _mov(Dest, T);
2532 return; 2569 return;
2533 } 2570 }
2534 2571
2535 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { 2572 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2536 (void)Inst; 2573 (void)Inst;
2537 UnimplementedError(Func->getContext()->getFlags()); 2574 UnimplementedError(Func->getContext()->getFlags());
2538 } 2575 }
2539 2576
2577 namespace {
2578 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
2579 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
2580 return Integer->getValue();
2581 return Intrinsics::MemoryOrderInvalid;
2582 }
2583 } // end of anonymous namespace
2584
2585 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2586 Operand *Ptr, Operand *Val) {
2587 // retry:
2588 // ldrex contents, [addr]
2589 // op tmp, contents, operand
2590 // strex success, tmp, [addr]
2591 // jne retry
2592 // fake-use(addr, operand) @ prevents undesireable clobbering.
Jim Stichnoth 2015/10/05 22:07:06 undesirable
John 2015/10/06 12:03:38 Done.
2593 // mov dest, contents
2594 assert(Dest != nullptr);
2595 Type DestTy = Dest->getType();
2596 (void)Ptr;
2597 (void)Val;
2598
2599 OperandARM32Mem *Mem;
2600 Variable *PtrContentsReg;
2601 Variable *PtrContentsHiReg;
2602 Variable *PtrContentsLoReg;
2603 Variable *Value = Func->makeVariable(DestTy);
2604 Variable *ValueReg;
2605 Variable *ValueHiReg;
2606 Variable *ValueLoReg;
2607 Variable *Success = makeReg(IceType_i32);
2608 Variable *TmpReg;
2609 Variable *TmpHiReg;
2610 Variable *TmpLoReg;
2611 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2612 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2613
2614 if (DestTy != IceType_i64) {
Jim Stichnoth 2015/10/05 22:07:06 (here and below) I think instead of this: if (A
John 2015/10/06 12:03:38 Done here, and elsewhere.
2615 PtrContentsReg = makeReg(DestTy);
2616 PtrContentsHiReg = nullptr;
2617 PtrContentsLoReg = PtrContentsReg;
2618
2619 ValueReg = makeReg(DestTy);
2620 ValueHiReg = nullptr;
2621 ValueLoReg = ValueReg;
2622
2623 TmpReg = makeReg(DestTy);
2624 TmpHiReg = nullptr;
2625 TmpLoReg = TmpReg;
2626 } else {
2627 Variable64On32 *PtrContentsReg64 = makeI64RegPair();
2628 PtrContentsHiReg = PtrContentsReg64->getHi();
2629 PtrContentsLoReg = PtrContentsReg64->getLo();
2630 PtrContentsReg = PtrContentsReg64;
2631
2632 llvm::cast<Variable64On32>(Value)->initHiLo(Func);
2633 Variable64On32 *ValueReg64 = makeI64RegPair();
2634 ValueHiReg = ValueReg64->getHi();
2635 ValueLoReg = ValueReg64->getLo();
2636 ValueReg = ValueReg64;
2637
2638 Variable64On32 *TmpReg64 = makeI64RegPair();
2639 TmpHiReg = TmpReg64->getHi();
2640 TmpLoReg = TmpReg64->getLo();
2641 TmpReg = TmpReg64;
2642 }
2643
2644 if (DestTy == IceType_i64) {
2645 Context.insert(InstFakeDef::create(Func, Value));
2646 }
2647 lowerAssign(InstAssign::create(Func, Value, Val));
2648
2649 Variable *PtrVar = Func->makeVariable(IceType_i32);
2650 lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
2651
2652 _dmb();
2653 Context.insert(Retry);
2654 Mem = formMemoryOperand(PtrVar, DestTy);
2655 if (DestTy == IceType_i64) {
2656 Context.insert(InstFakeDef::create(Func, ValueReg, Value));
2657 }
2658 lowerAssign(InstAssign::create(Func, ValueReg, Value));
2659 if (DestTy == IceType_i8 || DestTy == IceType_i16) {
2660 _uxt(ValueReg, ValueReg);
2661 }
2662 _ldrex(PtrContentsReg, Mem);
2663
2664 if (DestTy == IceType_i64) {
2665 Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg));
2666 }
2667 switch (Operation) {
2668 default:
2669 Func->setError("Unknown AtomicRMW operation");
2670 return;
2671 case Intrinsics::AtomicAdd:
2672 if (DestTy != IceType_i64) {
2673 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2674 } else {
2675 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2676 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2677 }
2678 break;
2679 case Intrinsics::AtomicSub:
2680 if (DestTy != IceType_i64) {
2681 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2682 } else {
2683 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2684 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2685 }
2686 break;
2687 case Intrinsics::AtomicOr:
2688 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2689 if (DestTy == IceType_i64) {
2690 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2691 }
2692 break;
2693 case Intrinsics::AtomicAnd:
2694 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2695 if (DestTy == IceType_i64) {
2696 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2697 }
2698 break;
2699 case Intrinsics::AtomicXor:
2700 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2701 if (DestTy == IceType_i64) {
2702 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2703 }
2704 break;
2705 case Intrinsics::AtomicExchange:
2706 _mov(TmpLoReg, ValueLoReg);
2707 if (DestTy == IceType_i64) {
2708 _mov(TmpHiReg, ValueHiReg);
2709 }
2710 break;
2711 }
2712 _strex(Success, TmpReg, Mem);
2713 _cmp(Success, _0);
2714 _br(Retry, CondARM32::NE);
2715
2716 // The following fake-uses ensure that Subzero will not clobber them in the
2717 // load-linked/store-conditional loop above. We might have to spill them, but
2718 // spilling is preferable over incorrect behavior.
2719 Context.insert(InstFakeUse::create(Func, PtrVar));
2720 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
2721 Context.insert(InstFakeUse::create(Func, Value64->getHi()));
2722 Context.insert(InstFakeUse::create(Func, Value64->getLo()));
2723 } else {
2724 Context.insert(InstFakeUse::create(Func, Value));
2725 }
2726 _dmb();
2727 if (DestTy == IceType_i8 || DestTy == IceType_i16) {
2728 _uxt(PtrContentsReg, PtrContentsReg);
2729 }
2730
2731 if (DestTy == IceType_i64) {
2732 Context.insert(InstFakeUse::create(Func, PtrContentsReg));
2733 }
2734 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
2735 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
2736 Context.insert(InstFakeUse::create(Func, Dest64->getLo()));
2737 Context.insert(InstFakeUse::create(Func, Dest64->getHi()));
2738 } else {
2739 Context.insert(InstFakeUse::create(Func, Dest));
2740 }
2741 }
2742
2540 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2743 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2541 switch (Instr->getIntrinsicInfo().ID) { 2744 Variable *Dest = Instr->getDest();
2745 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
2746 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
2747 switch (ID) {
2748 case Intrinsics::AtomicFence:
2749 case Intrinsics::AtomicFenceAll:
2750 assert(Dest == nullptr);
2751 _dmb();
2752 return;
2753 case Intrinsics::AtomicIsLockFree: {
2754 Operand *ByteSize = Instr->getArg(0);
2755 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
2756 if (CI == nullptr) {
2757 // The PNaCl ABI requires the byte size to be a compile-time constant.
2758 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2759 return;
2760 }
2761 static constexpr int32_t NotLockFree = 0;
2762 static constexpr int32_t LockFree = 1;
2763 int32_t Result = NotLockFree;
2764 switch (CI->getValue()) {
2765 case 1:
2766 case 2:
2767 case 4:
2768 case 8:
2769 Result = LockFree;
2770 break;
2771 }
2772 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
2773 return;
2774 }
2775 case Intrinsics::AtomicLoad: {
2776 assert(isScalarIntegerType(DestTy));
2777 // We require the memory address to be naturally aligned. Given that is the
2778 // case, then normal loads are atomic.
2779 if (!Intrinsics::isMemoryOrderValid(
2780 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2781 Func->setError("Unexpected memory ordering for AtomicLoad");
2782 return;
2783 }
2784 Variable *T;
2785
2786 if (DestTy == IceType_i64) {
2787 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
2788 // integer atomically. Everything else works with a regular ldr.
2789 T = makeI64RegPair();
2790 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
2791 } else {
2792 T = makeReg(DestTy);
2793 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
2794 }
2795 _dmb();
2796 lowerAssign(InstAssign::create(Func, Dest, T));
2797 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2798 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
2799 // the FakeUse on the last-inserted instruction's dest.
2800 Context.insert(
2801 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2802 return;
2803 }
2804 case Intrinsics::AtomicStore: {
2805 // We require the memory address to be naturally aligned. Given that is the
2806 // case, then normal loads are atomic.
2807 if (!Intrinsics::isMemoryOrderValid(
2808 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2809 Func->setError("Unexpected memory ordering for AtomicStore");
2810 return;
2811 }
2812 Operand *Value = Instr->getArg(0);
2813 Type ValueTy = Value->getType();
2814 assert(isScalarIntegerType(ValueTy));
2815 Operand *Addr = Instr->getArg(1);
2816
2817 _dmb();
2818 if (ValueTy != IceType_i64) {
2819 // non-64-bit stores are atomically as long as the address is aligned.
2820 // This is PNaCl, so addresses are aligned.
2821 Variable *T = makeReg(ValueTy);
2822 lowerAssign(InstAssign::create(Func, T, Value));
2823 _str(T, formMemoryOperand(Addr, ValueTy));
2824 } else {
2825 // Atomic 64-bit stores require a load-locked/store-conditional loop using
2826 // ldrexd, and strexd. The lowered code is:
2827 //
2828 // retry:
2829 // ldrexd t.lo, t.hi, [addr]
2830 // strexd success, value.lo, value.hi, [addr]
2831 // cmp success, #0
2832 // bne retry
2833 // fake-use(addr, value.lo, value.hi)
2834 //
2835 // The fake-use is needed to prevent those variables from being clobbered
2836 // in the loop (which will happen under register pressure.)
2837 Variable64On32 *Tmp = makeI64RegPair();
2838 Variable64On32 *ValueVar =
2839 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2840 Variable *AddrVar = makeReg(IceType_i32);
2841 Variable *Success = makeReg(IceType_i32);
2842 OperandARM32Mem *Mem;
2843 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2844 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2845 Variable64On32 *NewReg = makeI64RegPair();
2846 ValueVar->initHiLo(Func);
2847 ValueVar->mustNotHaveReg();
2848
2849 lowerAssign(InstAssign::create(Func, ValueVar, Value));
2850 lowerAssign(InstAssign::create(Func, AddrVar, Addr));
2851
2852 Context.insert(Retry);
2853 Context.insert(InstFakeDef::create(Func, NewReg));
2854 lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
2855 Mem = formMemoryOperand(AddrVar, IceType_i64);
2856 _ldrex(Tmp, Mem);
2857 // This fake-use both prevents the ldrex from being dead-code eliminated,
2858 // while also keeping liveness happy about all defs being used.
2859 Context.insert(
2860 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2861 _strex(Success, NewReg, Mem);
2862 _cmp(Success, _0);
2863 _br(Retry, CondARM32::NE);
2864
2865 Context.insert(InstFakeUse::create(Func, ValueVar->getLo()));
2866 Context.insert(InstFakeUse::create(Func, ValueVar->getHi()));
2867 Context.insert(InstFakeUse::create(Func, AddrVar));
2868 }
2869 _dmb();
2870 return;
2871 }
2542 case Intrinsics::AtomicCmpxchg: { 2872 case Intrinsics::AtomicCmpxchg: {
2543 UnimplementedError(Func->getContext()->getFlags()); 2873 // The initial lowering for cmpxchg was:
2544 return; 2874 //
2545 } 2875 // retry:
2546 case Intrinsics::AtomicFence: 2876 // ldrex tmp, [addr]
2547 UnimplementedError(Func->getContext()->getFlags()); 2877 // cmp tmp, expected
2548 return; 2878 // mov expected, tmp
2549 case Intrinsics::AtomicFenceAll: 2879 // jne retry
2550 // NOTE: FenceAll should prevent and load/store from being moved across the 2880 // strex success, new, [addr]
2551 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is 2881 // cmp success, #0
2552 // currently marked coarsely as "HasSideEffects". 2882 // bne retry
2553 UnimplementedError(Func->getContext()->getFlags()); 2883 // mov dest, expected
2554 return; 2884 //
2555 case Intrinsics::AtomicIsLockFree: { 2885 // Besides requiring two branches, that lowering could also potentially
2556 UnimplementedError(Func->getContext()->getFlags()); 2886 // write to memory (in mov expected, tmp) unless we were OK with increasing
2557 return; 2887 // the register pressure and requiring expected to be an infinite-weight
2558 } 2888 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
2559 case Intrinsics::AtomicLoad: { 2889 // careful rewritting, and thanks to predication, we now implement the
2560 UnimplementedError(Func->getContext()->getFlags()); 2890 // lowering as:
2561 return; 2891 //
2562 } 2892 // retry:
2563 case Intrinsics::AtomicRMW: 2893 // ldrex tmp, [addr]
2564 UnimplementedError(Func->getContext()->getFlags()); 2894 // cmp tmp, expected
2565 return; 2895 // strexeq success, new, [addr]
2566 case Intrinsics::AtomicStore: { 2896 // movne expected, tmp
2567 UnimplementedError(Func->getContext()->getFlags()); 2897 // cmpeq success, #0
2898 // bne retry
2899 // mov dest, expected
2900 //
2901 // Predication lets us move the strex ahead of the mov expected, tmp, which
2902 // allows tmp to be a non-infinite weight temporary. We wanted to avoid
2903 // writing to memory between ldrex and strex because, even though most times
2904 // that would cause no issues, if any interleaving memory write aliased
2905 // [addr] than we would have undefined behavior. Undefined behavior isn't
2906 // cool, so we try to avoid it. See the "Synchronization and semaphores"
2907 // section of the "ARM Architecture Reference Manual."
2908
2909 assert(isScalarIntegerType(DestTy));
2910 // We require the memory address to be naturally aligned. Given that is the
2911 // case, then normal loads are atomic.
2912 if (!Intrinsics::isMemoryOrderValid(
2913 ID, getConstantMemoryOrder(Instr->getArg(3)),
2914 getConstantMemoryOrder(Instr->getArg(4)))) {
2915 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
2916 return;
2917 }
2918
2919 OperandARM32Mem *Mem;
2920 Variable *TmpReg;
2921 Variable *Expected, *ExpectedReg;
2922 Variable *New, *NewReg;
2923 Variable *Success = makeReg(IceType_i32);
2924 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2925 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2926
2927 if (DestTy == IceType_i64) {
2928 Variable64On32 *TmpReg64 = makeI64RegPair();
2929 Variable64On32 *New64 =
2930 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2931 Variable64On32 *NewReg64 = makeI64RegPair();
2932 Variable64On32 *Expected64 =
2933 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2934 Variable64On32 *ExpectedReg64 = makeI64RegPair();
2935
2936 New64->initHiLo(Func);
2937 New64->mustNotHaveReg();
2938 Expected64->initHiLo(Func);
2939 Expected64->mustNotHaveReg();
2940
2941 TmpReg = TmpReg64;
2942 New = New64;
2943 NewReg = NewReg64;
2944 Expected = Expected64;
2945 ExpectedReg = ExpectedReg64;
2946 } else {
2947 TmpReg = makeReg(DestTy);
2948 New = Func->makeVariable(DestTy);
2949 NewReg = makeReg(DestTy);
2950 Expected = Func->makeVariable(DestTy);
2951 ExpectedReg = makeReg(DestTy);
2952 }
2953
2954 Mem = formMemoryOperand(Instr->getArg(0), DestTy);
2955 if (DestTy == IceType_i64) {
2956 Context.insert(InstFakeDef::create(Func, Expected));
2957 }
2958 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
2959 if (DestTy == IceType_i64) {
2960 Context.insert(InstFakeDef::create(Func, New));
2961 }
2962 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
2963 _dmb();
2964
2965 Context.insert(Retry);
2966 if (DestTy == IceType_i64) {
2967 Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected));
2968 }
2969 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
2970 if (DestTy == IceType_i64) {
2971 Context.insert(InstFakeDef::create(Func, NewReg, New));
2972 }
2973 lowerAssign(InstAssign::create(Func, NewReg, New));
2974
2975 _ldrex(TmpReg, Mem);
2976 Context.insert(
2977 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2978 if (DestTy != IceType_i64) {
2979 _cmp(TmpReg, ExpectedReg);
2980 } else {
2981 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
2982 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
2983 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
2984 // keep liveness happy, shall we?
2985 Context.insert(InstFakeUse::create(Func, TmpReg));
2986 Context.insert(InstFakeUse::create(Func, ExpectedReg));
2987 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
2988 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
2989 }
2990 _strex(Success, NewReg, Mem, CondARM32::EQ);
2991 if (DestTy != IceType_i64) {
2992 _mov_redefined(Expected, TmpReg, CondARM32::NE);
2993 } else {
2994 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
2995 auto *Expected64 = llvm::cast<Variable64On32>(Expected);
2996 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
2997 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
2998 auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg);
2999 Context.insert(FakeDef);
3000 FakeDef->setDestRedefined();
3001 }
3002 _cmp(Success, _0, CondARM32::EQ);
3003 _br(Retry, CondARM32::NE);
3004 _dmb();
3005 lowerAssign(InstAssign::create(Func, Dest, Expected));
3006 Context.insert(InstFakeUse::create(Func, Expected));
3007 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
3008 Context.insert(InstFakeUse::create(Func, New64->getLo()));
3009 Context.insert(InstFakeUse::create(Func, New64->getHi()));
3010 } else {
3011 Context.insert(InstFakeUse::create(Func, New));
3012 }
3013 return;
3014 }
3015 case Intrinsics::AtomicRMW: {
3016 if (!Intrinsics::isMemoryOrderValid(
3017 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3018 Func->setError("Unexpected memory ordering for AtomicRMW");
3019 return;
3020 }
3021 lowerAtomicRMW(
3022 Dest, static_cast<uint32_t>(
3023 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3024 Instr->getArg(1), Instr->getArg(2));
2568 return; 3025 return;
2569 } 3026 }
2570 case Intrinsics::Bswap: { 3027 case Intrinsics::Bswap: {
2571 Variable *Dest = Instr->getDest();
2572 Operand *Val = Instr->getArg(0); 3028 Operand *Val = Instr->getArg(0);
2573 Type Ty = Val->getType(); 3029 Type Ty = Val->getType();
2574 if (Ty == IceType_i64) { 3030 if (Ty == IceType_i64) {
2575 Val = legalizeUndef(Val); 3031 Val = legalizeUndef(Val);
2576 Variable *Val_Lo = legalizeToReg(loOperand(Val)); 3032 Variable *Val_Lo = legalizeToReg(loOperand(Val));
2577 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); 3033 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
2578 Variable *T_Lo = makeReg(IceType_i32); 3034 Variable *T_Lo = makeReg(IceType_i32);
2579 Variable *T_Hi = makeReg(IceType_i32); 3035 Variable *T_Hi = makeReg(IceType_i32);
2580 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3036 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2581 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3037 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2582 _rev(T_Lo, Val_Lo); 3038 _rev(T_Lo, Val_Lo);
2583 _rev(T_Hi, Val_Hi); 3039 _rev(T_Hi, Val_Hi);
2584 _mov(DestLo, T_Hi); 3040 _mov(DestLo, T_Hi);
2585 _mov(DestHi, T_Lo); 3041 _mov(DestHi, T_Lo);
2586 } else { 3042 } else {
2587 assert(Ty == IceType_i32 || Ty == IceType_i16); 3043 assert(Ty == IceType_i32 || Ty == IceType_i16);
2588 Variable *ValR = legalizeToReg(Val); 3044 Variable *ValR = legalizeToReg(Val);
2589 Variable *T = makeReg(Ty); 3045 Variable *T = makeReg(Ty);
2590 _rev(T, ValR); 3046 _rev(T, ValR);
2591 if (Val->getType() == IceType_i16) { 3047 if (Val->getType() == IceType_i16) {
2592 Operand *Sixteen = 3048 Operand *Sixteen =
2593 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); 3049 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
2594 _lsr(T, T, Sixteen); 3050 _lsr(T, T, Sixteen);
2595 } 3051 }
2596 _mov(Dest, T); 3052 _mov(Dest, T);
2597 } 3053 }
2598 return; 3054 return;
2599 } 3055 }
2600 case Intrinsics::Ctpop: { 3056 case Intrinsics::Ctpop: {
2601 Variable *Dest = Instr->getDest();
2602 Operand *Val = Instr->getArg(0); 3057 Operand *Val = Instr->getArg(0);
2603 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 3058 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2604 ? H_call_ctpop_i32 3059 ? H_call_ctpop_i32
2605 : H_call_ctpop_i64, 3060 : H_call_ctpop_i64,
2606 Dest, 1); 3061 Dest, 1);
2607 Call->addArg(Val); 3062 Call->addArg(Val);
2608 lowerCall(Call); 3063 lowerCall(Call);
2609 // The popcount helpers always return 32-bit values, while the intrinsic's 3064 // The popcount helpers always return 32-bit values, while the intrinsic's
2610 // signature matches some 64-bit platform's native instructions and expect 3065 // signature matches some 64-bit platform's native instructions and expect
2611 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in 3066 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
(...skipping 14 matching lines...) Expand all
2626 Operand *Val = Instr->getArg(0); 3081 Operand *Val = Instr->getArg(0);
2627 Variable *ValLoR; 3082 Variable *ValLoR;
2628 Variable *ValHiR = nullptr; 3083 Variable *ValHiR = nullptr;
2629 if (Val->getType() == IceType_i64) { 3084 if (Val->getType() == IceType_i64) {
2630 Val = legalizeUndef(Val); 3085 Val = legalizeUndef(Val);
2631 ValLoR = legalizeToReg(loOperand(Val)); 3086 ValLoR = legalizeToReg(loOperand(Val));
2632 ValHiR = legalizeToReg(hiOperand(Val)); 3087 ValHiR = legalizeToReg(hiOperand(Val));
2633 } else { 3088 } else {
2634 ValLoR = legalizeToReg(Val); 3089 ValLoR = legalizeToReg(Val);
2635 } 3090 }
2636 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); 3091 lowerCLZ(Dest, ValLoR, ValHiR);
2637 return; 3092 return;
2638 } 3093 }
2639 case Intrinsics::Cttz: { 3094 case Intrinsics::Cttz: {
2640 // Essentially like Clz, but reverse the bits first. 3095 // Essentially like Clz, but reverse the bits first.
2641 Operand *Val = Instr->getArg(0); 3096 Operand *Val = Instr->getArg(0);
2642 Variable *ValLoR; 3097 Variable *ValLoR;
2643 Variable *ValHiR = nullptr; 3098 Variable *ValHiR = nullptr;
2644 if (Val->getType() == IceType_i64) { 3099 if (Val->getType() == IceType_i64) {
2645 Val = legalizeUndef(Val); 3100 Val = legalizeUndef(Val);
2646 ValLoR = legalizeToReg(loOperand(Val)); 3101 ValLoR = legalizeToReg(loOperand(Val));
2647 ValHiR = legalizeToReg(hiOperand(Val)); 3102 ValHiR = legalizeToReg(hiOperand(Val));
2648 Variable *TLo = makeReg(IceType_i32); 3103 Variable *TLo = makeReg(IceType_i32);
2649 Variable *THi = makeReg(IceType_i32); 3104 Variable *THi = makeReg(IceType_i32);
2650 _rbit(TLo, ValLoR); 3105 _rbit(TLo, ValLoR);
2651 _rbit(THi, ValHiR); 3106 _rbit(THi, ValHiR);
2652 ValLoR = THi; 3107 ValLoR = THi;
2653 ValHiR = TLo; 3108 ValHiR = TLo;
2654 } else { 3109 } else {
2655 ValLoR = legalizeToReg(Val); 3110 ValLoR = legalizeToReg(Val);
2656 Variable *T = makeReg(IceType_i32); 3111 Variable *T = makeReg(IceType_i32);
2657 _rbit(T, ValLoR); 3112 _rbit(T, ValLoR);
2658 ValLoR = T; 3113 ValLoR = T;
2659 } 3114 }
2660 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); 3115 lowerCLZ(Dest, ValLoR, ValHiR);
2661 return; 3116 return;
2662 } 3117 }
2663 case Intrinsics::Fabs: { 3118 case Intrinsics::Fabs: {
2664 Variable *Dest = Instr->getDest();
2665 Type DestTy = Dest->getType(); 3119 Type DestTy = Dest->getType();
2666 Variable *T = makeReg(DestTy); 3120 Variable *T = makeReg(DestTy);
2667 if (isVectorType(DestTy)) { 3121 if (isVectorType(DestTy)) {
2668 // Add a fake def to keep liveness consistent in the meantime. 3122 // Add a fake def to keep liveness consistent in the meantime.
2669 Context.insert(InstFakeDef::create(Func, T)); 3123 Context.insert(InstFakeDef::create(Func, T));
2670 _mov(Instr->getDest(), T); 3124 _mov(Dest, T);
2671 UnimplementedError(Func->getContext()->getFlags()); 3125 UnimplementedError(Func->getContext()->getFlags());
2672 return; 3126 return;
2673 } 3127 }
2674 _vabs(T, legalizeToReg(Instr->getArg(0))); 3128 _vabs(T, legalizeToReg(Instr->getArg(0)));
2675 _mov(Dest, T); 3129 _mov(Dest, T);
2676 return; 3130 return;
2677 } 3131 }
2678 case Intrinsics::Longjmp: { 3132 case Intrinsics::Longjmp: {
2679 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); 3133 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2680 Call->addArg(Instr->getArg(0)); 3134 Call->addArg(Instr->getArg(0));
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
2714 Call->addArg(Instr->getArg(0)); 3168 Call->addArg(Instr->getArg(0));
2715 Call->addArg(ValExt); 3169 Call->addArg(ValExt);
2716 Call->addArg(Instr->getArg(2)); 3170 Call->addArg(Instr->getArg(2));
2717 lowerCall(Call); 3171 lowerCall(Call);
2718 return; 3172 return;
2719 } 3173 }
2720 case Intrinsics::NaClReadTP: { 3174 case Intrinsics::NaClReadTP: {
2721 if (Ctx->getFlags().getUseSandboxing()) { 3175 if (Ctx->getFlags().getUseSandboxing()) {
2722 UnimplementedError(Func->getContext()->getFlags()); 3176 UnimplementedError(Func->getContext()->getFlags());
2723 } else { 3177 } else {
2724 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); 3178 InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0);
2725 lowerCall(Call); 3179 lowerCall(Call);
2726 } 3180 }
2727 return; 3181 return;
2728 } 3182 }
2729 case Intrinsics::Setjmp: { 3183 case Intrinsics::Setjmp: {
2730 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); 3184 InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1);
2731 Call->addArg(Instr->getArg(0)); 3185 Call->addArg(Instr->getArg(0));
2732 lowerCall(Call); 3186 lowerCall(Call);
2733 return; 3187 return;
2734 } 3188 }
2735 case Intrinsics::Sqrt: { 3189 case Intrinsics::Sqrt: {
2736 Variable *Src = legalizeToReg(Instr->getArg(0)); 3190 Variable *Src = legalizeToReg(Instr->getArg(0));
2737 Variable *Dest = Instr->getDest();
2738 Variable *T = makeReg(Dest->getType()); 3191 Variable *T = makeReg(Dest->getType());
2739 _vsqrt(T, Src); 3192 _vsqrt(T, Src);
2740 _mov(Dest, T); 3193 _mov(Dest, T);
2741 return; 3194 return;
2742 } 3195 }
2743 case Intrinsics::Stacksave: { 3196 case Intrinsics::Stacksave: {
2744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 3197 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2745 Variable *Dest = Instr->getDest();
2746 _mov(Dest, SP); 3198 _mov(Dest, SP);
2747 return; 3199 return;
2748 } 3200 }
2749 case Intrinsics::Stackrestore: { 3201 case Intrinsics::Stackrestore: {
2750 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 3202 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2751 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); 3203 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
2752 _mov_redefined(SP, Val); 3204 _mov_redefined(SP, Val);
2753 return; 3205 return;
2754 } 3206 }
2755 case Intrinsics::Trap: 3207 case Intrinsics::Trap:
(...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after
3217 } 3669 }
3218 // If we didn't do address mode optimization, then we only have a base/offset 3670 // If we didn't do address mode optimization, then we only have a base/offset
3219 // to work with. ARM always requires a base register, so just use that to 3671 // to work with. ARM always requires a base register, so just use that to
3220 // hold the operand. 3672 // hold the operand.
3221 Variable *Base = legalizeToReg(Operand); 3673 Variable *Base = legalizeToReg(Operand);
3222 return OperandARM32Mem::create( 3674 return OperandARM32Mem::create(
3223 Func, Ty, Base, 3675 Func, Ty, Base,
3224 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 3676 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
3225 } 3677 }
3226 3678
3679 Variable64On32 *TargetARM32::makeI64RegPair() {
3680 Variable64On32 *Reg =
3681 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3682 Reg->setMustHaveReg();
3683 Reg->initHiLo(Func);
3684 Reg->getLo()->setMustNotHaveReg();
3685 Reg->getHi()->setMustNotHaveReg();
3686 return Reg;
3687 }
3688
3227 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { 3689 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
3228 // There aren't any 64-bit integer registers for ARM32. 3690 // There aren't any 64-bit integer registers for ARM32.
3229 assert(Type != IceType_i64); 3691 assert(Type != IceType_i64);
3230 Variable *Reg = Func->makeVariable(Type); 3692 Variable *Reg = Func->makeVariable(Type);
3231 if (RegNum == Variable::NoRegister) 3693 if (RegNum == Variable::NoRegister)
3232 Reg->setMustHaveReg(); 3694 Reg->setMustHaveReg();
3233 else 3695 else
3234 Reg->setRegNum(RegNum); 3696 Reg->setRegNum(RegNum);
3235 return Reg; 3697 return Reg;
3236 } 3698 }
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
3479 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 3941 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
3480 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 3942 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3481 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 3943 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3482 } 3944 }
3483 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 3945 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3484 // However, for compatibility with current NaCl LLVM, don't claim that. 3946 // However, for compatibility with current NaCl LLVM, don't claim that.
3485 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 3947 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3486 } 3948 }
3487 3949
3488 } // end of namespace Ice 3950 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698