Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(488)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1419903002: Subzero: Refactor x86 register definitions to use the alias mechanism. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Add some comments Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after
580 Node->getInsts().insert(I3, RMW); 580 Node->getInsts().insert(I3, RMW);
581 } 581 }
582 } 582 }
583 if (Func->isVerbose(IceV_RMW)) 583 if (Func->isVerbose(IceV_RMW))
584 Func->getContext()->unlockStr(); 584 Func->getContext()->unlockStr();
585 } 585 }
586 586
587 // Converts a ConstantInteger32 operand into its constant value, or 587 // Converts a ConstantInteger32 operand into its constant value, or
588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 588 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
591 return Integer->getValue(); 591 return Integer->getValue();
592 return Intrinsics::MemoryOrderInvalid; 592 return Intrinsics::MemoryOrderInvalid;
593 } 593 }
594 594
595 /// Determines whether the dest of a Load instruction can be folded into one of 595 /// Determines whether the dest of a Load instruction can be folded into one of
596 /// the src operands of a 2-operand instruction. This is true as long as the 596 /// the src operands of a 2-operand instruction. This is true as long as the
597 /// load dest matches exactly one of the binary instruction's src operands. 597 /// load dest matches exactly one of the binary instruction's src operands.
598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. 598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.
599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
600 Operand *&Src0, Operand *&Src1) { 600 Operand *&Src0, Operand *&Src1) {
(...skipping 14 matching lines...) Expand all
615 while (!Context.atEnd()) { 615 while (!Context.atEnd()) {
616 Variable *LoadDest = nullptr; 616 Variable *LoadDest = nullptr;
617 Operand *LoadSrc = nullptr; 617 Operand *LoadSrc = nullptr;
618 Inst *CurInst = Context.getCur(); 618 Inst *CurInst = Context.getCur();
619 Inst *Next = Context.getNextInst(); 619 Inst *Next = Context.getNextInst();
620 // Determine whether the current instruction is a Load instruction or 620 // Determine whether the current instruction is a Load instruction or
621 // equivalent. 621 // equivalent.
622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
623 // An InstLoad always qualifies. 623 // An InstLoad always qualifies.
624 LoadDest = Load->getDest(); 624 LoadDest = Load->getDest();
625 const bool DoLegalize = false; 625 constexpr bool DoLegalize = false;
626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), 626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
627 LoadDest->getType(), DoLegalize); 627 LoadDest->getType(), DoLegalize);
628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { 628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory 629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
630 // ordering, and can be implemented in a single instruction (i.e., not 630 // ordering, and can be implemented in a single instruction (i.e., not
631 // i64 on x86-32). 631 // i64 on x86-32).
632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; 632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
633 if (ID == Intrinsics::AtomicLoad && 633 if (ID == Intrinsics::AtomicLoad &&
634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && 634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
635 Intrinsics::isMemoryOrderValid( 635 Intrinsics::isMemoryOrderValid(
636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { 636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
637 LoadDest = Intrin->getDest(); 637 LoadDest = Intrin->getDest();
638 const bool DoLegalize = false; 638 constexpr bool DoLegalize = false;
639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), 639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
640 DoLegalize); 640 DoLegalize);
641 } 641 }
642 } 642 }
643 // A Load instruction can be folded into the following instruction only 643 // A Load instruction can be folded into the following instruction only
644 // if the following instruction ends the Load's Dest variable's live 644 // if the following instruction ends the Load's Dest variable's live
645 // range. 645 // range.
646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { 646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
647 assert(LoadSrc); 647 assert(LoadSrc);
648 Inst *NewInst = nullptr; 648 Inst *NewInst = nullptr;
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
726 // considered live upon function entry. Otherwise it's possible to get 726 // considered live upon function entry. Otherwise it's possible to get
727 // liveness validation errors for saving callee-save registers. 727 // liveness validation errors for saving callee-save registers.
728 Func->addImplicitArg(Reg); 728 Func->addImplicitArg(Reg);
729 // Don't bother tracking the live range of a named physical register. 729 // Don't bother tracking the live range of a named physical register.
730 Reg->setIgnoreLiveness(); 730 Reg->setIgnoreLiveness();
731 } 731 }
732 return Reg; 732 return Reg;
733 } 733 }
734 734
735 template <class Machine> 735 template <class Machine>
736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const {
737 return Traits::getRegName(RegNum, Ty); 737 return Traits::getRegName(RegNum);
738 } 738 }
739 739
740 template <class Machine> 740 template <class Machine>
741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { 741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
742 if (!BuildDefs::dump()) 742 if (!BuildDefs::dump())
743 return; 743 return;
744 Ostream &Str = Ctx->getStrEmit(); 744 Ostream &Str = Ctx->getStrEmit();
745 if (Var->hasReg()) { 745 if (Var->hasReg()) {
746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
747 return; 747 return;
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
790 if (Var->mustHaveReg()) { 790 if (Var->mustHaveReg()) {
791 llvm_unreachable("Infinite-weight Variable has no register assigned"); 791 llvm_unreachable("Infinite-weight Variable has no register assigned");
792 } 792 }
793 int32_t Offset = Var->getStackOffset(); 793 int32_t Offset = Var->getStackOffset();
794 int32_t BaseRegNum = Var->getBaseRegNum(); 794 int32_t BaseRegNum = Var->getBaseRegNum();
795 if (Var->getBaseRegNum() == Variable::NoRegister) { 795 if (Var->getBaseRegNum() == Variable::NoRegister) {
796 BaseRegNum = getFrameOrStackReg(); 796 BaseRegNum = getFrameOrStackReg();
797 if (!hasFramePointer()) 797 if (!hasFramePointer())
798 Offset += getStackAdjustment(); 798 Offset += getStackAdjustment();
799 } 799 }
800 return typename Traits::Address( 800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset);
801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
802 } 801 }
803 802
804 /// Helper function for addProlog(). 803 /// Helper function for addProlog().
805 /// 804 ///
806 /// This assumes Arg is an argument passed on the stack. This sets the frame 805 /// This assumes Arg is an argument passed on the stack. This sets the frame
807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 806 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
808 /// I64 arg that has been split into Lo and Hi components, it calls itself 807 /// I64 arg that has been split into Lo and Hi components, it calls itself
809 /// recursively on the components, taking care to handle Lo first because of the 808 /// recursively on the components, taking care to handle Lo first because of the
810 /// little-endian architecture. Lastly, this function generates an instruction 809 /// little-endian architecture. Lastly, this function generates an instruction
811 /// to copy Arg into its assigned register if applicable. 810 /// to copy Arg into its assigned register if applicable.
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
1040 Src1 /= 2; 1039 Src1 /= 2;
1041 } else { 1040 } else {
1042 return false; 1041 return false;
1043 } 1042 }
1044 } 1043 }
1045 // Lea optimization only works for i16 and i32 types, not i8. 1044 // Lea optimization only works for i16 and i32 types, not i8.
1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) 1045 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1047 return false; 1046 return false;
1048 // Limit the number of lea/shl operations for a single multiply, to a 1047 // Limit the number of lea/shl operations for a single multiply, to a
1049 // somewhat arbitrary choice of 3. 1048 // somewhat arbitrary choice of 3.
1050 const uint32_t MaxOpsForOptimizedMul = 3; 1049 constexpr uint32_t MaxOpsForOptimizedMul = 3;
1051 if (CountOps > MaxOpsForOptimizedMul) 1050 if (CountOps > MaxOpsForOptimizedMul)
1052 return false; 1051 return false;
1053 _mov(T, Src0); 1052 _mov(T, Src0);
1054 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1053 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1055 for (uint32_t i = 0; i < Count9; ++i) { 1054 for (uint32_t i = 0; i < Count9; ++i) {
1056 const uint16_t Shift = 3; // log2(9-1) 1055 constexpr uint16_t Shift = 3; // log2(9-1)
1057 _lea(T, 1056 _lea(T,
1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1057 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1059 } 1058 }
1060 for (uint32_t i = 0; i < Count5; ++i) { 1059 for (uint32_t i = 0; i < Count5; ++i) {
1061 const uint16_t Shift = 2; // log2(5-1) 1060 constexpr uint16_t Shift = 2; // log2(5-1)
1062 _lea(T, 1061 _lea(T,
1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1062 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1064 } 1063 }
1065 for (uint32_t i = 0; i < Count3; ++i) { 1064 for (uint32_t i = 0; i < Count3; ++i) {
1066 const uint16_t Shift = 1; // log2(3-1) 1065 constexpr uint16_t Shift = 1; // log2(3-1)
1067 _lea(T, 1066 _lea(T,
1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1067 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1069 } 1068 }
1070 if (Count2) { 1069 if (Count2) {
1071 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1070 _shl(T, Ctx->getConstantInt(Ty, Count2));
1072 } 1071 }
1073 if (Src1IsNegative) 1072 if (Src1IsNegative)
1074 _neg(T); 1073 _neg(T);
1075 _mov(Dest, T); 1074 _mov(Dest, T);
1076 return true; 1075 return true;
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
1208 } 1207 }
1209 } else { 1208 } else {
1210 // NON-CONSTANT CASES. 1209 // NON-CONSTANT CASES.
1211 Constant *BitTest = Ctx->getConstantInt32(0x20); 1210 Constant *BitTest = Ctx->getConstantInt32(0x20);
1212 typename Traits::Insts::Label *Label = 1211 typename Traits::Insts::Label *Label =
1213 Traits::Insts::Label::create(Func, this); 1212 Traits::Insts::Label::create(Func, this);
1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==> 1213 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1215 // t1:ecx = c.lo & 0xff 1214 // t1:ecx = c.lo & 0xff
1216 // t2 = b.lo 1215 // t2 = b.lo
1217 // t3 = b.hi 1216 // t3 = b.hi
1218 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1217 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1218 _mov(T_1, Src1Lo);
1219 _mov(T_2, Src0Lo); 1219 _mov(T_2, Src0Lo);
1220 _mov(T_3, Src0Hi); 1220 _mov(T_3, Src0Hi);
1221 switch (Op) { 1221 switch (Op) {
1222 default: 1222 default:
1223 assert(0 && "non-shift op"); 1223 assert(0 && "non-shift op");
1224 break; 1224 break;
1225 case InstArithmetic::Shl: { 1225 case InstArithmetic::Shl: {
1226 // a=b<<c ==> 1226 // a=b<<c ==>
1227 // t3 = shld t3, t2, t1 1227 // t3 = shld t3, t2, t1
1228 // t2 = shl t2, t1 1228 // t2 = shl t2, t1
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
1316 } 1316 }
1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1318 // These x86-32 helper-call-involved instructions are lowered in this 1318 // These x86-32 helper-call-involved instructions are lowered in this
1319 // separate switch. This is because loOperand() and hiOperand() may insert 1319 // separate switch. This is because loOperand() and hiOperand() may insert
1320 // redundant instructions for constant blinding and pooling. Such redundant 1320 // redundant instructions for constant blinding and pooling. Such redundant
1321 // instructions will fail liveness analysis under -Om1 setting. And, 1321 // instructions will fail liveness analysis under -Om1 setting. And,
1322 // actually these arguments do not need to be processed with loOperand() 1322 // actually these arguments do not need to be processed with loOperand()
1323 // and hiOperand() to be used. 1323 // and hiOperand() to be used.
1324 switch (Inst->getOp()) { 1324 switch (Inst->getOp()) {
1325 case InstArithmetic::Udiv: { 1325 case InstArithmetic::Udiv: {
1326 const SizeT MaxSrcs = 2; 1326 constexpr SizeT MaxSrcs = 2;
1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1328 Call->addArg(Inst->getSrc(0)); 1328 Call->addArg(Inst->getSrc(0));
1329 Call->addArg(Inst->getSrc(1)); 1329 Call->addArg(Inst->getSrc(1));
1330 lowerCall(Call); 1330 lowerCall(Call);
1331 return; 1331 return;
1332 } 1332 }
1333 case InstArithmetic::Sdiv: { 1333 case InstArithmetic::Sdiv: {
1334 const SizeT MaxSrcs = 2; 1334 constexpr SizeT MaxSrcs = 2;
1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); 1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1336 Call->addArg(Inst->getSrc(0)); 1336 Call->addArg(Inst->getSrc(0));
1337 Call->addArg(Inst->getSrc(1)); 1337 Call->addArg(Inst->getSrc(1));
1338 lowerCall(Call); 1338 lowerCall(Call);
1339 return; 1339 return;
1340 } 1340 }
1341 case InstArithmetic::Urem: { 1341 case InstArithmetic::Urem: {
1342 const SizeT MaxSrcs = 2; 1342 constexpr SizeT MaxSrcs = 2;
1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); 1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1344 Call->addArg(Inst->getSrc(0)); 1344 Call->addArg(Inst->getSrc(0));
1345 Call->addArg(Inst->getSrc(1)); 1345 Call->addArg(Inst->getSrc(1));
1346 lowerCall(Call); 1346 lowerCall(Call);
1347 return; 1347 return;
1348 } 1348 }
1349 case InstArithmetic::Srem: { 1349 case InstArithmetic::Srem: {
1350 const SizeT MaxSrcs = 2; 1350 constexpr SizeT MaxSrcs = 2;
1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); 1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1352 Call->addArg(Inst->getSrc(0)); 1352 Call->addArg(Inst->getSrc(0));
1353 Call->addArg(Inst->getSrc(1)); 1353 Call->addArg(Inst->getSrc(1));
1354 lowerCall(Call); 1354 lowerCall(Call);
1355 return; 1355 return;
1356 } 1356 }
1357 default: 1357 default:
1358 break; 1358 break;
1359 } 1359 }
1360 1360
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
1521 // pmuludq T1, Src1 1521 // pmuludq T1, Src1
1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1523 // pmuludq T2, T3 1523 // pmuludq T2, T3
1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1525 // shufps T1, T2, {0,2,0,2} 1525 // shufps T1, T2, {0,2,0,2}
1526 // pshufd T4, T1, {0,2,1,3} 1526 // pshufd T4, T1, {0,2,1,3}
1527 // movups Dest, T4 1527 // movups Dest, T4
1528 1528
1529 // Mask that directs pshufd to create a vector with entries 1529 // Mask that directs pshufd to create a vector with entries
1530 // Src[1, 0, 3, 0] 1530 // Src[1, 0, 3, 0]
1531 const unsigned Constant1030 = 0x31; 1531 constexpr unsigned Constant1030 = 0x31;
1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); 1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1533 // Mask that directs shufps to create a vector with entries 1533 // Mask that directs shufps to create a vector with entries
1534 // Dest[0, 2], Src[0, 2] 1534 // Dest[0, 2], Src[0, 2]
1535 const unsigned Mask0202 = 0x88; 1535 constexpr unsigned Mask0202 = 0x88;
1536 // Mask that directs pshufd to create a vector with entries 1536 // Mask that directs pshufd to create a vector with entries
1537 // Src[0, 2, 1, 3] 1537 // Src[0, 2, 1, 3]
1538 const unsigned Mask0213 = 0xd8; 1538 constexpr unsigned Mask0213 = 0xd8;
1539 Variable *T1 = makeReg(IceType_v4i32); 1539 Variable *T1 = makeReg(IceType_v4i32);
1540 Variable *T2 = makeReg(IceType_v4i32); 1540 Variable *T2 = makeReg(IceType_v4i32);
1541 Variable *T3 = makeReg(IceType_v4i32); 1541 Variable *T3 = makeReg(IceType_v4i32);
1542 Variable *T4 = makeReg(IceType_v4i32); 1542 Variable *T4 = makeReg(IceType_v4i32);
1543 _movp(T1, Src0); 1543 _movp(T1, Src0);
1544 _pshufd(T2, Src0, Mask1030); 1544 _pshufd(T2, Src0, Mask1030);
1545 _pshufd(T3, Src1, Mask1030); 1545 _pshufd(T3, Src1, Mask1030);
1546 _pmuludq(T1, Src1); 1546 _pmuludq(T1, Src1);
1547 _pmuludq(T2, T3); 1547 _pmuludq(T2, T3);
1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); 1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
1623 _mov(T, Src0); 1623 _mov(T, Src0);
1624 _sub(T, Src1); 1624 _sub(T, Src1);
1625 _mov(Dest, T); 1625 _mov(Dest, T);
1626 break; 1626 break;
1627 case InstArithmetic::Mul: 1627 case InstArithmetic::Mul:
1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1629 if (optimizeScalarMul(Dest, Src0, C->getValue())) 1629 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1630 return; 1630 return;
1631 } 1631 }
1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must 1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must
1633 // be in eax. 1633 // be in al.
1634 if (isByteSizedArithType(Dest->getType())) { 1634 if (isByteSizedArithType(Dest->getType())) {
1635 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1635 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1637 _imul(T, Src0 == Src1 ? T : Src1); 1637 _imul(T, Src0 == Src1 ? T : Src1);
1638 _mov(Dest, T); 1638 _mov(Dest, T);
1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1640 T = makeReg(Dest->getType()); 1640 T = makeReg(Dest->getType());
1641 _imul_imm(T, Src0, ImmConst); 1641 _imul_imm(T, Src0, ImmConst);
1642 _mov(Dest, T); 1642 _mov(Dest, T);
1643 } else { 1643 } else {
1644 _mov(T, Src0); 1644 _mov(T, Src0);
1645 _imul(T, Src0 == Src1 ? T : Src1); 1645 _imul(T, Src0 == Src1 ? T : Src1);
1646 _mov(Dest, T); 1646 _mov(Dest, T);
1647 } 1647 }
1648 break; 1648 break;
1649 case InstArithmetic::Shl: 1649 case InstArithmetic::Shl:
1650 _mov(T, Src0); 1650 _mov(T, Src0);
1651 if (!llvm::isa<ConstantInteger32>(Src1)) 1651 if (!llvm::isa<ConstantInteger32>(Src1)) {
1652 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1652 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1653 _mov(Cl, Src1);
1654 Src1 = Cl;
1655 }
1653 _shl(T, Src1); 1656 _shl(T, Src1);
1654 _mov(Dest, T); 1657 _mov(Dest, T);
1655 break; 1658 break;
1656 case InstArithmetic::Lshr: 1659 case InstArithmetic::Lshr:
1657 _mov(T, Src0); 1660 _mov(T, Src0);
1658 if (!llvm::isa<ConstantInteger32>(Src1)) 1661 if (!llvm::isa<ConstantInteger32>(Src1)) {
1659 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1662 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1663 _mov(Cl, Src1);
1664 Src1 = Cl;
1665 }
1660 _shr(T, Src1); 1666 _shr(T, Src1);
1661 _mov(Dest, T); 1667 _mov(Dest, T);
1662 break; 1668 break;
1663 case InstArithmetic::Ashr: 1669 case InstArithmetic::Ashr:
1664 _mov(T, Src0); 1670 _mov(T, Src0);
1665 if (!llvm::isa<ConstantInteger32>(Src1)) 1671 if (!llvm::isa<ConstantInteger32>(Src1)) {
1666 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1672 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1673 _mov(Cl, Src1);
1674 Src1 = Cl;
1675 }
1667 _sar(T, Src1); 1676 _sar(T, Src1);
1668 _mov(Dest, T); 1677 _mov(Dest, T);
1669 break; 1678 break;
1670 case InstArithmetic::Udiv: 1679 case InstArithmetic::Udiv:
1671 // div and idiv are the few arithmetic operators that do not allow 1680 // div and idiv are the few arithmetic operators that do not allow
1672 // immediates as the operand. 1681 // immediates as the operand.
1673 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1682 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1674 if (isByteSizedArithType(Dest->getType())) { 1683 if (isByteSizedArithType(Dest->getType())) {
1675 // For 8-bit unsigned division we need to zero-extend al into ah. A mov 1684 // For 8-bit unsigned division we need to zero-extend al into ah. A mov
1676 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 1685 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
1677 // assembler refuses to encode %ah (encoding %spl with a REX prefix 1686 // assembler refuses to encode %ah (encoding %spl with a REX prefix
1678 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah 1687 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
1679 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and 1688 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
1680 // d[%lh], which means the X86 target lowering (and the register 1689 // d[%lh], which means the X86 target lowering (and the register
1681 // allocator) would have to be aware of this restriction. For now, we 1690 // allocator) would have to be aware of this restriction. For now, we
1682 // simply zero %eax completely, and move the dividend into %al. 1691 // simply zero %eax completely, and move the dividend into %al.
1683 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1692 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1684 Context.insert(InstFakeDef::create(Func, T_eax)); 1693 Context.insert(InstFakeDef::create(Func, T_eax));
1685 _xor(T_eax, T_eax); 1694 _xor(T_eax, T_eax);
1686 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1695 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1687 _div(T, Src1, T); 1696 _div(T, Src1, T);
1688 _mov(Dest, T); 1697 _mov(Dest, T);
1689 Context.insert(InstFakeUse::create(Func, T_eax)); 1698 Context.insert(InstFakeUse::create(Func, T_eax));
1690 } else { 1699 } else {
1691 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1700 Type Ty = Dest->getType();
1692 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1701 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1693 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); 1702 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1703 switch (Ty) {
1704 default:
1705 llvm_unreachable("Bad type for udiv");
1706 // fallthrough
1707 case IceType_i32:
1708 break;
1709 case IceType_i16:
1710 Eax = Traits::RegisterSet::Reg_ax;
1711 Edx = Traits::RegisterSet::Reg_dx;
1712 break;
1713 }
1714 Constant *Zero = Ctx->getConstantZero(Ty);
1715 _mov(T, Src0, Eax);
1716 _mov(T_edx, Zero, Edx);
1694 _div(T, Src1, T_edx); 1717 _div(T, Src1, T_edx);
1695 _mov(Dest, T); 1718 _mov(Dest, T);
1696 } 1719 }
1697 break; 1720 break;
1698 case InstArithmetic::Sdiv: 1721 case InstArithmetic::Sdiv:
1699 // TODO(stichnot): Enable this after doing better performance and cross 1722 // TODO(stichnot): Enable this after doing better performance and cross
1700 // testing. 1723 // testing.
1701 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1724 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1702 // Optimize division by constant power of 2, but not for Om1 or O0, just 1725 // Optimize division by constant power of 2, but not for Om1 or O0, just
1703 // to keep things simple there. 1726 // to keep things simple there.
(...skipping 21 matching lines...) Expand all
1725 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 1748 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1726 _add(T, Src0); 1749 _add(T, Src0);
1727 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1750 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1728 } 1751 }
1729 _mov(Dest, T); 1752 _mov(Dest, T);
1730 return; 1753 return;
1731 } 1754 }
1732 } 1755 }
1733 } 1756 }
1734 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1757 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1735 if (isByteSizedArithType(Dest->getType())) { 1758 switch (Type Ty = Dest->getType()) {
1759 default:
1760 llvm_unreachable("Bad type for sdiv");
1761 // fallthrough
1762 case IceType_i32:
1763 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1764 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1737 _cbwdq(T, T); 1765 break;
1738 _idiv(T, Src1, T); 1766 case IceType_i16:
1739 _mov(Dest, T); 1767 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1740 } else { 1768 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1741 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 1769 break;
1742 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1770 case IceType_i8:
1743 _cbwdq(T_edx, T); 1771 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1744 _idiv(T, Src1, T_edx); 1772 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1745 _mov(Dest, T); 1773 break;
1746 } 1774 }
1775 _cbwdq(T_edx, T);
1776 _idiv(T, Src1, T_edx);
1777 _mov(Dest, T);
1747 break; 1778 break;
1748 case InstArithmetic::Urem: 1779 case InstArithmetic::Urem:
1749 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1780 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1750 if (isByteSizedArithType(Dest->getType())) { 1781 if (isByteSizedArithType(Dest->getType())) {
1751 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1782 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1752 Context.insert(InstFakeDef::create(Func, T_eax)); 1783 Context.insert(InstFakeDef::create(Func, T_eax));
1753 _xor(T_eax, T_eax); 1784 _xor(T_eax, T_eax);
1754 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1785 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1755 _div(T, Src1, T); 1786 _div(T, Src1, T);
1756 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't 1787 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1757 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1788 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1758 // this ever becomes a problem we can introduce a pseudo rem instruction 1789 // this ever becomes a problem we can introduce a pseudo rem instruction
1759 // that returns the remainder in %al directly (and uses a mov for copying 1790 // that returns the remainder in %al directly (and uses a mov for copying
1760 // %ah to %al.) 1791 // %ah to %al.)
1761 static constexpr uint8_t AlSizeInBits = 8; 1792 static constexpr uint8_t AlSizeInBits = 8;
1762 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1793 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1763 _mov(Dest, T); 1794 _mov(Dest, T);
1764 Context.insert(InstFakeUse::create(Func, T_eax)); 1795 Context.insert(InstFakeUse::create(Func, T_eax));
1765 } else { 1796 } else {
1766 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1797 Type Ty = Dest->getType();
1767 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); 1798 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1799 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1800 switch (Ty) {
1801 default:
1802 llvm_unreachable("Bad type for urem");
1803 // fallthrough
1804 case IceType_i32:
1805 break;
1806 case IceType_i16:
1807 Eax = Traits::RegisterSet::Reg_ax;
1808 Edx = Traits::RegisterSet::Reg_dx;
1809 break;
1810 }
1811 Constant *Zero = Ctx->getConstantZero(Ty);
1812 T_edx = makeReg(Dest->getType(), Edx);
1768 _mov(T_edx, Zero); 1813 _mov(T_edx, Zero);
1769 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1814 _mov(T, Src0, Eax);
1770 _div(T_edx, Src1, T); 1815 _div(T_edx, Src1, T);
1771 _mov(Dest, T_edx); 1816 _mov(Dest, T_edx);
1772 } 1817 }
1773 break; 1818 break;
1774 case InstArithmetic::Srem: 1819 case InstArithmetic::Srem:
1775 // TODO(stichnot): Enable this after doing better performance and cross 1820 // TODO(stichnot): Enable this after doing better performance and cross
1776 // testing. 1821 // testing.
1777 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1822 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1778 // Optimize mod by constant power of 2, but not for Om1 or O0, just to 1823 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
1779 // keep things simple there. 1824 // keep things simple there.
(...skipping 26 matching lines...) Expand all
1806 _add(T, Src0); 1851 _add(T, Src0);
1807 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1808 _sub(T, Src0); 1853 _sub(T, Src0);
1809 _neg(T); 1854 _neg(T);
1810 _mov(Dest, T); 1855 _mov(Dest, T);
1811 return; 1856 return;
1812 } 1857 }
1813 } 1858 }
1814 } 1859 }
1815 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1816 if (isByteSizedArithType(Dest->getType())) { 1861 switch (Type Ty = Dest->getType()) {
1817 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1862 default:
1818 // T is %al. 1863 llvm_unreachable("Bad type for srem");
1819 _cbwdq(T, T); 1864 // fallthrough
1820 _idiv(T, Src1, T); 1865 case IceType_i32:
1821 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1866 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1822 Context.insert(InstFakeDef::create(Func, T_eax));
1823 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1824 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1825 // this ever becomes a problem we can introduce a pseudo rem instruction
1826 // that returns the remainder in %al directly (and uses a mov for copying
1827 // %ah to %al.)
1828 static constexpr uint8_t AlSizeInBits = 8;
1829 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1830 _mov(Dest, T);
1831 Context.insert(InstFakeUse::create(Func, T_eax));
1832 } else {
1833 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1834 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1867 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1835 _cbwdq(T_edx, T); 1868 _cbwdq(T_edx, T);
1836 _idiv(T_edx, Src1, T); 1869 _idiv(T_edx, Src1, T);
1837 _mov(Dest, T_edx); 1870 _mov(Dest, T_edx);
1871 break;
1872 case IceType_i16:
1873 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1874 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1875 _cbwdq(T_edx, T);
1876 _idiv(T_edx, Src1, T);
1877 _mov(Dest, T_edx);
1878 break;
1879 case IceType_i8:
1880 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1881 // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
1882 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
1883 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1884 _cbwdq(T_edx, T);
1885 _idiv(T_edx, Src1, T);
1886 static constexpr uint8_t AlSizeInBits = 8;
1887 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
1888 _mov(Dest, T_edx);
1889 break;
1838 } 1890 }
1839 break; 1891 break;
1840 case InstArithmetic::Fadd: 1892 case InstArithmetic::Fadd:
1841 _mov(T, Src0); 1893 _mov(T, Src0);
1842 _addss(T, Src1); 1894 _addss(T, Src1);
1843 _mov(Dest, T); 1895 _mov(Dest, T);
1844 break; 1896 break;
1845 case InstArithmetic::Fsub: 1897 case InstArithmetic::Fsub:
1846 _mov(T, Src0); 1898 _mov(T, Src0);
1847 _subss(T, Src1); 1899 _subss(T, Src1);
1848 _mov(Dest, T); 1900 _mov(Dest, T);
1849 break; 1901 break;
1850 case InstArithmetic::Fmul: 1902 case InstArithmetic::Fmul:
1851 _mov(T, Src0); 1903 _mov(T, Src0);
1852 _mulss(T, Src0 == Src1 ? T : Src1); 1904 _mulss(T, Src0 == Src1 ? T : Src1);
1853 _mov(Dest, T); 1905 _mov(Dest, T);
1854 break; 1906 break;
1855 case InstArithmetic::Fdiv: 1907 case InstArithmetic::Fdiv:
1856 _mov(T, Src0); 1908 _mov(T, Src0);
1857 _divss(T, Src1); 1909 _divss(T, Src1);
1858 _mov(Dest, T); 1910 _mov(Dest, T);
1859 break; 1911 break;
1860 case InstArithmetic::Frem: { 1912 case InstArithmetic::Frem: {
1861 const SizeT MaxSrcs = 2; 1913 constexpr SizeT MaxSrcs = 2;
1862 Type Ty = Dest->getType(); 1914 Type Ty = Dest->getType();
1863 InstCall *Call = makeHelperCall( 1915 InstCall *Call = makeHelperCall(
1864 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 1916 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1865 Call->addArg(Src0); 1917 Call->addArg(Src0);
1866 Call->addArg(Src1); 1918 Call->addArg(Src1);
1867 return lowerCall(Call); 1919 return lowerCall(Call);
1868 } 1920 }
1869 } 1921 }
1870 } 1922 }
1871 1923
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
2106 if (isVectorType(Dest->getType())) { 2158 if (isVectorType(Dest->getType())) {
2107 assert(Dest->getType() == IceType_v4i32 && 2159 assert(Dest->getType() == IceType_v4i32 &&
2108 Inst->getSrc(0)->getType() == IceType_v4f32); 2160 Inst->getSrc(0)->getType() == IceType_v4f32);
2109 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2161 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2110 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2162 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2111 Src0RM = legalizeToReg(Src0RM); 2163 Src0RM = legalizeToReg(Src0RM);
2112 Variable *T = makeReg(Dest->getType()); 2164 Variable *T = makeReg(Dest->getType());
2113 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2165 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2114 _movp(Dest, T); 2166 _movp(Dest, T);
2115 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 2167 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2116 const SizeT MaxSrcs = 1; 2168 constexpr SizeT MaxSrcs = 1;
2117 Type SrcType = Inst->getSrc(0)->getType(); 2169 Type SrcType = Inst->getSrc(0)->getType();
2118 InstCall *Call = 2170 InstCall *Call =
2119 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2171 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2120 : H_fptosi_f64_i64, 2172 : H_fptosi_f64_i64,
2121 Dest, MaxSrcs); 2173 Dest, MaxSrcs);
2122 Call->addArg(Inst->getSrc(0)); 2174 Call->addArg(Inst->getSrc(0));
2123 lowerCall(Call); 2175 lowerCall(Call);
2124 } else { 2176 } else {
2125 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2177 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2126 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2178 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
(...skipping 10 matching lines...) Expand all
2137 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2189 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2138 if (Dest->getType() == IceType_i1) 2190 if (Dest->getType() == IceType_i1)
2139 _and(T_2, Ctx->getConstantInt1(1)); 2191 _and(T_2, Ctx->getConstantInt1(1));
2140 _mov(Dest, T_2); 2192 _mov(Dest, T_2);
2141 } 2193 }
2142 break; 2194 break;
2143 case InstCast::Fptoui: 2195 case InstCast::Fptoui:
2144 if (isVectorType(Dest->getType())) { 2196 if (isVectorType(Dest->getType())) {
2145 assert(Dest->getType() == IceType_v4i32 && 2197 assert(Dest->getType() == IceType_v4i32 &&
2146 Inst->getSrc(0)->getType() == IceType_v4f32); 2198 Inst->getSrc(0)->getType() == IceType_v4f32);
2147 const SizeT MaxSrcs = 1; 2199 constexpr SizeT MaxSrcs = 1;
2148 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); 2200 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2149 Call->addArg(Inst->getSrc(0)); 2201 Call->addArg(Inst->getSrc(0));
2150 lowerCall(Call); 2202 lowerCall(Call);
2151 } else if (Dest->getType() == IceType_i64 || 2203 } else if (Dest->getType() == IceType_i64 ||
2152 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { 2204 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
2153 // Use a helper for both x86-32 and x86-64. 2205 // Use a helper for both x86-32 and x86-64.
2154 const SizeT MaxSrcs = 1; 2206 constexpr SizeT MaxSrcs = 1;
2155 Type DestType = Dest->getType(); 2207 Type DestType = Dest->getType();
2156 Type SrcType = Inst->getSrc(0)->getType(); 2208 Type SrcType = Inst->getSrc(0)->getType();
2157 IceString TargetString; 2209 IceString TargetString;
2158 if (Traits::Is64Bit) { 2210 if (Traits::Is64Bit) {
2159 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2160 : H_fptoui_f64_i64; 2212 : H_fptoui_f64_i64;
2161 } else if (isInt32Asserting32Or64(DestType)) { 2213 } else if (isInt32Asserting32Or64(DestType)) {
2162 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2163 : H_fptoui_f64_i32; 2215 : H_fptoui_f64_i32;
2164 } else { 2216 } else {
(...skipping 28 matching lines...) Expand all
2193 assert(Dest->getType() == IceType_v4f32 && 2245 assert(Dest->getType() == IceType_v4f32 &&
2194 Inst->getSrc(0)->getType() == IceType_v4i32); 2246 Inst->getSrc(0)->getType() == IceType_v4i32);
2195 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2247 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2196 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2248 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2197 Src0RM = legalizeToReg(Src0RM); 2249 Src0RM = legalizeToReg(Src0RM);
2198 Variable *T = makeReg(Dest->getType()); 2250 Variable *T = makeReg(Dest->getType());
2199 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2251 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2200 _movp(Dest, T); 2252 _movp(Dest, T);
2201 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { 2253 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2202 // Use a helper for x86-32. 2254 // Use a helper for x86-32.
2203 const SizeT MaxSrcs = 1; 2255 constexpr SizeT MaxSrcs = 1;
2204 Type DestType = Dest->getType(); 2256 Type DestType = Dest->getType();
2205 InstCall *Call = 2257 InstCall *Call =
2206 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2258 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2207 : H_sitofp_i64_f64, 2259 : H_sitofp_i64_f64,
2208 Dest, MaxSrcs); 2260 Dest, MaxSrcs);
2209 // TODO: Call the correct compiler-rt helper function. 2261 // TODO: Call the correct compiler-rt helper function.
2210 Call->addArg(Inst->getSrc(0)); 2262 Call->addArg(Inst->getSrc(0));
2211 lowerCall(Call); 2263 lowerCall(Call);
2212 return; 2264 return;
2213 } else { 2265 } else {
(...skipping 14 matching lines...) Expand all
2228 _movsx(T_1, Src0RM); 2280 _movsx(T_1, Src0RM);
2229 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2281 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2230 _mov(Dest, T_2); 2282 _mov(Dest, T_2);
2231 } 2283 }
2232 break; 2284 break;
2233 case InstCast::Uitofp: { 2285 case InstCast::Uitofp: {
2234 Operand *Src0 = Inst->getSrc(0); 2286 Operand *Src0 = Inst->getSrc(0);
2235 if (isVectorType(Src0->getType())) { 2287 if (isVectorType(Src0->getType())) {
2236 assert(Dest->getType() == IceType_v4f32 && 2288 assert(Dest->getType() == IceType_v4f32 &&
2237 Src0->getType() == IceType_v4i32); 2289 Src0->getType() == IceType_v4i32);
2238 const SizeT MaxSrcs = 1; 2290 constexpr SizeT MaxSrcs = 1;
2239 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2291 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2240 Call->addArg(Src0); 2292 Call->addArg(Src0);
2241 lowerCall(Call); 2293 lowerCall(Call);
2242 } else if (Src0->getType() == IceType_i64 || 2294 } else if (Src0->getType() == IceType_i64 ||
2243 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { 2295 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2244 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on 2296 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
2245 // x86-32. 2297 // x86-32.
2246 const SizeT MaxSrcs = 1; 2298 constexpr SizeT MaxSrcs = 1;
2247 Type DestType = Dest->getType(); 2299 Type DestType = Dest->getType();
2248 IceString TargetString; 2300 IceString TargetString;
2249 if (isInt32Asserting32Or64(Src0->getType())) { 2301 if (isInt32Asserting32Or64(Src0->getType())) {
2250 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2302 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2251 : H_uitofp_i32_f64; 2303 : H_uitofp_i32_f64;
2252 } else { 2304 } else {
2253 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2305 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2254 : H_uitofp_i64_f64; 2306 : H_uitofp_i64_f64;
2255 } 2307 }
2256 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2308 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
2452 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2504 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2453 ConstantInteger32 *ElementIndex = 2505 ConstantInteger32 *ElementIndex =
2454 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); 2506 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2455 // Only constant indices are allowed in PNaCl IR. 2507 // Only constant indices are allowed in PNaCl IR.
2456 assert(ElementIndex); 2508 assert(ElementIndex);
2457 2509
2458 unsigned Index = ElementIndex->getValue(); 2510 unsigned Index = ElementIndex->getValue();
2459 Type Ty = SourceVectNotLegalized->getType(); 2511 Type Ty = SourceVectNotLegalized->getType();
2460 Type ElementTy = typeElementType(Ty); 2512 Type ElementTy = typeElementType(Ty);
2461 Type InVectorElementTy = Traits::getInVectorElementType(Ty); 2513 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2462 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2463 2514
2464 // TODO(wala): Determine the best lowering sequences for each type. 2515 // TODO(wala): Determine the best lowering sequences for each type.
2465 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || 2516 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
2466 InstructionSet >= Traits::SSE4_1; 2517 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
2467 if (CanUsePextr && Ty != IceType_v4f32) { 2518 Variable *ExtractedElementR =
2468 // Use pextrb, pextrw, or pextrd. 2519 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
2520 if (CanUsePextr) {
2521 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
2522 // bits of the destination register, so we represent this by always
2523 // extracting into an i32 register. The _mov into Dest below will do
2524 // truncation as necessary.
2469 Constant *Mask = Ctx->getConstantInt32(Index); 2525 Constant *Mask = Ctx->getConstantInt32(Index);
2470 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); 2526 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
2471 _pextr(ExtractedElementR, SourceVectR, Mask); 2527 _pextr(ExtractedElementR, SourceVectR, Mask);
2472 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2528 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2473 // Use pshufd and movd/movss. 2529 // Use pshufd and movd/movss.
2474 Variable *T = nullptr; 2530 Variable *T = nullptr;
2475 if (Index) { 2531 if (Index) {
2476 // The shuffle only needs to occur if the element to be extracted is not 2532 // The shuffle only needs to occur if the element to be extracted is not
2477 // at the lowest index. 2533 // at the lowest index.
2478 Constant *Mask = Ctx->getConstantInt32(Index); 2534 Constant *Mask = Ctx->getConstantInt32(Index);
(...skipping 492 matching lines...) Expand 10 before | Expand all | Expand 10 after
2971 // Use insertps, pinsrb, pinsrw, or pinsrd. 3027 // Use insertps, pinsrb, pinsrw, or pinsrd.
2972 Operand *ElementRM = 3028 Operand *ElementRM =
2973 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 3029 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2974 Operand *SourceVectRM = 3030 Operand *SourceVectRM =
2975 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3031 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2976 Variable *T = makeReg(Ty); 3032 Variable *T = makeReg(Ty);
2977 _movp(T, SourceVectRM); 3033 _movp(T, SourceVectRM);
2978 if (Ty == IceType_v4f32) 3034 if (Ty == IceType_v4f32)
2979 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 3035 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
2980 else 3036 else
3037 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
3038 // operand is a register, it must be a full r32 register like eax, and not
3039 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
3040 // for the use of r16 and r8 by converting them through getBaseReg(),
3041 // while emitIAS() validates that the original and base register encodings
3042 // are the same. But for an "interior" register like ah, it should
3043 // probably be copied into an r32 via movzx so that the types work out.
2981 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); 3044 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
2982 _movp(Inst->getDest(), T); 3045 _movp(Inst->getDest(), T);
2983 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 3046 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2984 // Use shufps or movss. 3047 // Use shufps or movss.
2985 Variable *ElementR = nullptr; 3048 Variable *ElementR = nullptr;
2986 Operand *SourceVectRM = 3049 Operand *SourceVectRM =
2987 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3050 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2988 3051
2989 if (InVectorElementTy == IceType_f32) { 3052 if (InVectorElementTy == IceType_f32) {
2990 // ElementR will be in an XMM register since it is floating point. 3053 // ElementR will be in an XMM register since it is floating point.
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after
3305 // well-defined value. 3368 // well-defined value.
3306 Operand *Val = legalize(Instr->getArg(0)); 3369 Operand *Val = legalize(Instr->getArg(0));
3307 Operand *FirstVal; 3370 Operand *FirstVal;
3308 Operand *SecondVal = nullptr; 3371 Operand *SecondVal = nullptr;
3309 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 3372 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
3310 FirstVal = loOperand(Val); 3373 FirstVal = loOperand(Val);
3311 SecondVal = hiOperand(Val); 3374 SecondVal = hiOperand(Val);
3312 } else { 3375 } else {
3313 FirstVal = Val; 3376 FirstVal = Val;
3314 } 3377 }
3315 const bool IsCttz = false; 3378 constexpr bool IsCttz = false;
3316 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3379 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3317 SecondVal); 3380 SecondVal);
3318 return; 3381 return;
3319 } 3382 }
3320 case Intrinsics::Cttz: { 3383 case Intrinsics::Cttz: {
3321 // The "is zero undef" parameter is ignored and we always return a 3384 // The "is zero undef" parameter is ignored and we always return a
3322 // well-defined value. 3385 // well-defined value.
3323 Operand *Val = legalize(Instr->getArg(0)); 3386 Operand *Val = legalize(Instr->getArg(0));
3324 Operand *FirstVal; 3387 Operand *FirstVal;
3325 Operand *SecondVal = nullptr; 3388 Operand *SecondVal = nullptr;
3326 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 3389 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
3327 FirstVal = hiOperand(Val); 3390 FirstVal = hiOperand(Val);
3328 SecondVal = loOperand(Val); 3391 SecondVal = loOperand(Val);
3329 } else { 3392 } else {
3330 FirstVal = Val; 3393 FirstVal = Val;
3331 } 3394 }
3332 const bool IsCttz = true; 3395 constexpr bool IsCttz = true;
3333 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3396 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3334 SecondVal); 3397 SecondVal);
3335 return; 3398 return;
3336 } 3399 }
3337 case Intrinsics::Fabs: { 3400 case Intrinsics::Fabs: {
3338 Operand *Src = legalize(Instr->getArg(0)); 3401 Operand *Src = legalize(Instr->getArg(0));
3339 Type Ty = Src->getType(); 3402 Type Ty = Src->getType();
3340 Variable *Dest = Instr->getDest(); 3403 Variable *Dest = Instr->getDest();
3341 Variable *T = makeVectorOfFabsMask(Ty); 3404 Variable *T = makeVectorOfFabsMask(Ty);
3342 // The pand instruction operates on an m128 memory operand, so if Src is an 3405 // The pand instruction operates on an m128 memory operand, so if Src is an
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
3420 Func->setError("Should not be lowering UnknownIntrinsic"); 3483 Func->setError("Should not be lowering UnknownIntrinsic");
3421 return; 3484 return;
3422 } 3485 }
3423 return; 3486 return;
3424 } 3487 }
3425 3488
3426 template <class Machine> 3489 template <class Machine>
3427 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3490 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3428 Operand *Ptr, Operand *Expected, 3491 Operand *Ptr, Operand *Expected,
3429 Operand *Desired) { 3492 Operand *Desired) {
3430 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { 3493 Type Ty = Expected->getType();
3494 if (!Traits::Is64Bit && Ty == IceType_i64) {
3431 // Reserve the pre-colored registers first, before adding any more 3495 // Reserve the pre-colored registers first, before adding any more
3432 // infinite-weight variables from formMemoryOperand's legalization. 3496 // infinite-weight variables from formMemoryOperand's legalization.
3433 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3497 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3434 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3498 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3435 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3499 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3436 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3500 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3437 _mov(T_eax, loOperand(Expected)); 3501 _mov(T_eax, loOperand(Expected));
3438 _mov(T_edx, hiOperand(Expected)); 3502 _mov(T_edx, hiOperand(Expected));
3439 _mov(T_ebx, loOperand(Desired)); 3503 _mov(T_ebx, loOperand(Desired));
3440 _mov(T_ecx, hiOperand(Desired)); 3504 _mov(T_ecx, hiOperand(Desired));
3441 typename Traits::X86OperandMem *Addr = 3505 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3442 formMemoryOperand(Ptr, Expected->getType()); 3506 constexpr bool Locked = true;
3443 const bool Locked = true;
3444 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3507 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3445 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3508 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3446 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3509 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3447 _mov(DestLo, T_eax); 3510 _mov(DestLo, T_eax);
3448 _mov(DestHi, T_edx); 3511 _mov(DestHi, T_edx);
3449 return; 3512 return;
3450 } 3513 }
3451 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); 3514 int32_t Eax;
3515 switch (Ty) {
3516 default:
3517 llvm_unreachable("Bad type for cmpxchg");
3518 // fallthrough
3519 case IceType_i32:
3520 Eax = Traits::RegisterSet::Reg_eax;
3521 break;
3522 case IceType_i16:
3523 Eax = Traits::RegisterSet::Reg_ax;
3524 break;
3525 case IceType_i8:
3526 Eax = Traits::RegisterSet::Reg_al;
3527 break;
3528 }
3529 Variable *T_eax = makeReg(Ty, Eax);
3452 _mov(T_eax, Expected); 3530 _mov(T_eax, Expected);
3453 typename Traits::X86OperandMem *Addr = 3531 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3454 formMemoryOperand(Ptr, Expected->getType());
3455 Variable *DesiredReg = legalizeToReg(Desired); 3532 Variable *DesiredReg = legalizeToReg(Desired);
3456 const bool Locked = true; 3533 constexpr bool Locked = true;
3457 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3534 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3458 _mov(DestPrev, T_eax); 3535 _mov(DestPrev, T_eax);
3459 } 3536 }
3460 3537
3461 template <class Machine> 3538 template <class Machine>
3462 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 3539 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3463 Operand *PtrToMem, 3540 Operand *PtrToMem,
3464 Operand *Expected, 3541 Operand *Expected,
3465 Operand *Desired) { 3542 Operand *Desired) {
3466 if (Ctx->getFlags().getOptLevel() == Opt_m1) 3543 if (Ctx->getFlags().getOptLevel() == Opt_m1)
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3548 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3625 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3549 // All the fall-through paths must set this to true, but use this 3626 // All the fall-through paths must set this to true, but use this
3550 // for asserting. 3627 // for asserting.
3551 NeedsCmpxchg = true; 3628 NeedsCmpxchg = true;
3552 Op_Lo = &TargetX86Base<Machine>::_add; 3629 Op_Lo = &TargetX86Base<Machine>::_add;
3553 Op_Hi = &TargetX86Base<Machine>::_adc; 3630 Op_Hi = &TargetX86Base<Machine>::_adc;
3554 break; 3631 break;
3555 } 3632 }
3556 typename Traits::X86OperandMem *Addr = 3633 typename Traits::X86OperandMem *Addr =
3557 formMemoryOperand(Ptr, Dest->getType()); 3634 formMemoryOperand(Ptr, Dest->getType());
3558 const bool Locked = true; 3635 constexpr bool Locked = true;
3559 Variable *T = nullptr; 3636 Variable *T = nullptr;
3560 _mov(T, Val); 3637 _mov(T, Val);
3561 _xadd(Addr, T, Locked); 3638 _xadd(Addr, T, Locked);
3562 _mov(Dest, T); 3639 _mov(Dest, T);
3563 return; 3640 return;
3564 } 3641 }
3565 case Intrinsics::AtomicSub: { 3642 case Intrinsics::AtomicSub: {
3566 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3643 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3567 NeedsCmpxchg = true; 3644 NeedsCmpxchg = true;
3568 Op_Lo = &TargetX86Base<Machine>::_sub; 3645 Op_Lo = &TargetX86Base<Machine>::_sub;
3569 Op_Hi = &TargetX86Base<Machine>::_sbb; 3646 Op_Hi = &TargetX86Base<Machine>::_sbb;
3570 break; 3647 break;
3571 } 3648 }
3572 typename Traits::X86OperandMem *Addr = 3649 typename Traits::X86OperandMem *Addr =
3573 formMemoryOperand(Ptr, Dest->getType()); 3650 formMemoryOperand(Ptr, Dest->getType());
3574 const bool Locked = true; 3651 constexpr bool Locked = true;
3575 Variable *T = nullptr; 3652 Variable *T = nullptr;
3576 _mov(T, Val); 3653 _mov(T, Val);
3577 _neg(T); 3654 _neg(T);
3578 _xadd(Addr, T, Locked); 3655 _xadd(Addr, T, Locked);
3579 _mov(Dest, T); 3656 _mov(Dest, T);
3580 return; 3657 return;
3581 } 3658 }
3582 case Intrinsics::AtomicOr: 3659 case Intrinsics::AtomicOr:
3583 // TODO(jvoung): If Dest is null or dead, then some of these 3660 // TODO(jvoung): If Dest is null or dead, then some of these
3584 // operations do not need an "exchange", but just a locked op. 3661 // operations do not need an "exchange", but just a locked op.
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
3672 _mov(T_ecx, T_edx); 3749 _mov(T_ecx, T_edx);
3673 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3750 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3674 } else { 3751 } else {
3675 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3752 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3676 // It just needs the Val loaded into ebx and ecx. 3753 // It just needs the Val loaded into ebx and ecx.
3677 // That can also be done before the loop. 3754 // That can also be done before the loop.
3678 _mov(T_ebx, loOperand(Val)); 3755 _mov(T_ebx, loOperand(Val));
3679 _mov(T_ecx, hiOperand(Val)); 3756 _mov(T_ecx, hiOperand(Val));
3680 Context.insert(Label); 3757 Context.insert(Label);
3681 } 3758 }
3682 const bool Locked = true; 3759 constexpr bool Locked = true;
3683 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3760 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3684 _br(Traits::Cond::Br_ne, Label); 3761 _br(Traits::Cond::Br_ne, Label);
3685 if (!IsXchg8b) { 3762 if (!IsXchg8b) {
3686 // If Val is a variable, model the extended live range of Val through 3763 // If Val is a variable, model the extended live range of Val through
3687 // the end of the loop, since it will be re-used by the loop. 3764 // the end of the loop, since it will be re-used by the loop.
3688 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3765 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3689 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); 3766 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3690 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); 3767 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3691 Context.insert(InstFakeUse::create(Func, ValLo)); 3768 Context.insert(InstFakeUse::create(Func, ValLo));
3692 Context.insert(InstFakeUse::create(Func, ValHi)); 3769 Context.insert(InstFakeUse::create(Func, ValHi));
3693 } 3770 }
3694 } else { 3771 } else {
3695 // For xchg, the loop is slightly smaller and ebx/ecx are used. 3772 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3696 Context.insert(InstFakeUse::create(Func, T_ebx)); 3773 Context.insert(InstFakeUse::create(Func, T_ebx));
3697 Context.insert(InstFakeUse::create(Func, T_ecx)); 3774 Context.insert(InstFakeUse::create(Func, T_ecx));
3698 } 3775 }
3699 // The address base (if any) is also reused in the loop. 3776 // The address base (if any) is also reused in the loop.
3700 if (Variable *Base = Addr->getBase()) 3777 if (Variable *Base = Addr->getBase())
3701 Context.insert(InstFakeUse::create(Func, Base)); 3778 Context.insert(InstFakeUse::create(Func, Base));
3702 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3779 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3780 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3704 _mov(DestLo, T_eax); 3781 _mov(DestLo, T_eax);
3705 _mov(DestHi, T_edx); 3782 _mov(DestHi, T_edx);
3706 return; 3783 return;
3707 } 3784 }
3708 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3785 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3709 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); 3786 int32_t Eax;
3787 switch (Ty) {
3788 default:
3789 llvm_unreachable("Bad type for atomicRMW");
3790 // fallthrough
3791 case IceType_i32:
3792 Eax = Traits::RegisterSet::Reg_eax;
3793 break;
3794 case IceType_i16:
3795 Eax = Traits::RegisterSet::Reg_ax;
3796 break;
3797 case IceType_i8:
3798 Eax = Traits::RegisterSet::Reg_al;
3799 break;
3800 }
3801 Variable *T_eax = makeReg(Ty, Eax);
3710 _mov(T_eax, Addr); 3802 _mov(T_eax, Addr);
3711 typename Traits::Insts::Label *Label = 3803 typename Traits::Insts::Label *Label =
3712 Traits::Insts::Label::create(Func, this); 3804 Traits::Insts::Label::create(Func, this);
3713 Context.insert(Label); 3805 Context.insert(Label);
3714 // We want to pick a different register for T than Eax, so don't use 3806 // We want to pick a different register for T than Eax, so don't use
3715 // _mov(T == nullptr, T_eax). 3807 // _mov(T == nullptr, T_eax).
3716 Variable *T = makeReg(Ty); 3808 Variable *T = makeReg(Ty);
3717 _mov(T, T_eax); 3809 _mov(T, T_eax);
3718 (this->*Op_Lo)(T, Val); 3810 (this->*Op_Lo)(T, Val);
3719 const bool Locked = true; 3811 constexpr bool Locked = true;
3720 _cmpxchg(Addr, T_eax, T, Locked); 3812 _cmpxchg(Addr, T_eax, T, Locked);
3721 _br(Traits::Cond::Br_ne, Label); 3813 _br(Traits::Cond::Br_ne, Label);
3722 // If Val is a variable, model the extended live range of Val through 3814 // If Val is a variable, model the extended live range of Val through
3723 // the end of the loop, since it will be re-used by the loop. 3815 // the end of the loop, since it will be re-used by the loop.
3724 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3816 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3725 Context.insert(InstFakeUse::create(Func, ValVar)); 3817 Context.insert(InstFakeUse::create(Func, ValVar));
3726 } 3818 }
3727 // The address base (if any) is also reused in the loop. 3819 // The address base (if any) is also reused in the loop.
3728 if (Variable *Base = Addr->getBase()) 3820 if (Variable *Base = Addr->getBase())
3729 Context.insert(InstFakeUse::create(Func, Base)); 3821 Context.insert(InstFakeUse::create(Func, Base));
(...skipping 1357 matching lines...) Expand 10 before | Expand all | Expand 10 after
5087 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 5179 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5088 Ty == IceType_v16i8); 5180 Ty == IceType_v16i8);
5089 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 5181 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5090 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 5182 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5091 SizeT Shift = 5183 SizeT Shift =
5092 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; 5184 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
5093 _psll(Reg, Ctx->getConstantInt8(Shift)); 5185 _psll(Reg, Ctx->getConstantInt8(Shift));
5094 return Reg; 5186 return Reg;
5095 } else { 5187 } else {
5096 // SSE has no left shift operation for vectors of 8 bit integers. 5188 // SSE has no left shift operation for vectors of 8 bit integers.
5097 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 5189 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
5098 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 5190 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
5099 Variable *Reg = makeReg(Ty, RegNum); 5191 Variable *Reg = makeReg(Ty, RegNum);
5100 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 5192 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5101 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 5193 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5102 return Reg; 5194 return Reg;
5103 } 5195 }
5104 } 5196 }
5105 5197
5106 /// Construct a mask in a register that can be and'ed with a floating-point 5198 /// Construct a mask in a register that can be and'ed with a floating-point
5107 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 5199 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
(...skipping 13 matching lines...) Expand all
5121 typename TargetX86Base<Machine>::Traits::X86OperandMem * 5213 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5122 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 5214 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
5123 uint32_t Offset) { 5215 uint32_t Offset) {
5124 // Ensure that Loc is a stack slot. 5216 // Ensure that Loc is a stack slot.
5125 assert(Slot->mustNotHaveReg()); 5217 assert(Slot->mustNotHaveReg());
5126 assert(Slot->getRegNum() == Variable::NoRegister); 5218 assert(Slot->getRegNum() == Variable::NoRegister);
5127 // Compute the location of Loc in memory. 5219 // Compute the location of Loc in memory.
5128 // TODO(wala,stichnot): lea should not 5220 // TODO(wala,stichnot): lea should not
5129 // be required. The address of the stack slot is known at compile time 5221 // be required. The address of the stack slot is known at compile time
5130 // (although not until after addProlog()). 5222 // (although not until after addProlog()).
5131 const Type PointerType = IceType_i32; 5223 constexpr Type PointerType = IceType_i32;
5132 Variable *Loc = makeReg(PointerType); 5224 Variable *Loc = makeReg(PointerType);
5133 _lea(Loc, Slot); 5225 _lea(Loc, Slot);
5134 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 5226 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5135 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); 5227 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
5136 } 5228 }
5137 5229
5138 /// Helper for legalize() to emit the right code to lower an operand to a 5230 /// Helper for legalize() to emit the right code to lower an operand to a
5139 /// register of the appropriate type. 5231 /// register of the appropriate type.
5140 template <class Machine> 5232 template <class Machine>
5141 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 5233 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
5174 if (Subst->mustHaveReg() && !Subst->hasReg()) { 5266 if (Subst->mustHaveReg() && !Subst->hasReg()) {
5175 // At this point we know the substitution will have a register. 5267 // At this point we know the substitution will have a register.
5176 if (From->getType() == Subst->getType()) { 5268 if (From->getType() == Subst->getType()) {
5177 // At this point we know the substitution's register is compatible. 5269 // At this point we know the substitution's register is compatible.
5178 return Subst; 5270 return Subst;
5179 } 5271 }
5180 } 5272 }
5181 } 5273 }
5182 } 5274 }
5183 5275
5184 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { 5276 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
5185 // Before doing anything with a Mem operand, we need to ensure that the 5277 // Before doing anything with a Mem operand, we need to ensure that the
5186 // Base and Index components are in physical registers. 5278 // Base and Index components are in physical registers.
5187 Variable *Base = Mem->getBase(); 5279 Variable *Base = Mem->getBase();
5188 Variable *Index = Mem->getIndex(); 5280 Variable *Index = Mem->getIndex();
5189 Variable *RegBase = nullptr; 5281 Variable *RegBase = nullptr;
5190 Variable *RegIndex = nullptr; 5282 Variable *RegIndex = nullptr;
5191 if (Base) { 5283 if (Base) {
5192 RegBase = legalizeToReg(Base); 5284 RegBase = legalizeToReg(Base);
5193 } 5285 }
5194 if (Index) { 5286 if (Index) {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
5252 // Immediate specifically not allowed 5344 // Immediate specifically not allowed
5253 NeedsReg = true; 5345 NeedsReg = true;
5254 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 5346 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5255 // On x86, FP constants are lowered to mem operands. 5347 // On x86, FP constants are lowered to mem operands.
5256 NeedsReg = true; 5348 NeedsReg = true;
5257 if (NeedsReg) { 5349 if (NeedsReg) {
5258 From = copyToReg(From, RegNum); 5350 From = copyToReg(From, RegNum);
5259 } 5351 }
5260 return From; 5352 return From;
5261 } 5353 }
5262 if (auto Var = llvm::dyn_cast<Variable>(From)) { 5354 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5263 // Check if the variable is guaranteed a physical register. This can happen 5355 // Check if the variable is guaranteed a physical register. This can happen
5264 // either when the variable is pre-colored or when it is assigned infinite 5356 // either when the variable is pre-colored or when it is assigned infinite
5265 // weight. 5357 // weight.
5266 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 5358 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5267 // We need a new physical register for the operand if: 5359 // We need a new physical register for the operand if:
5268 // Mem is not allowed and Var isn't guaranteed a physical 5360 // Mem is not allowed and Var isn't guaranteed a physical
5269 // register, or 5361 // register, or
5270 // RegNum is required and Var->getRegNum() doesn't match. 5362 // RegNum is required and Var->getRegNum() doesn't match.
5271 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 5363 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5272 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5364 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
5507 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); 5599 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5508 Immediate->setShouldBePooled(true); 5600 Immediate->setShouldBePooled(true);
5509 // if we have already assigned a phy register, we must come from 5601 // if we have already assigned a phy register, we must come from
5510 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the 5602 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
5511 // assigned register as this assignment is that start of its use-def 5603 // assigned register as this assignment is that start of its use-def
5512 // chain. So we add RegNum argument here. 5604 // chain. So we add RegNum argument here.
5513 Variable *Reg = makeReg(Immediate->getType(), RegNum); 5605 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5514 IceString Label; 5606 IceString Label;
5515 llvm::raw_string_ostream Label_stream(Label); 5607 llvm::raw_string_ostream Label_stream(Label);
5516 Immediate->emitPoolLabel(Label_stream, Ctx); 5608 Immediate->emitPoolLabel(Label_stream, Ctx);
5517 const RelocOffsetT Offset = 0; 5609 constexpr RelocOffsetT Offset = 0;
5518 const bool SuppressMangling = true; 5610 constexpr bool SuppressMangling = true;
5519 Constant *Symbol = 5611 Constant *Symbol =
5520 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); 5612 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
5521 typename Traits::X86OperandMem *MemOperand = 5613 typename Traits::X86OperandMem *MemOperand =
5522 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, 5614 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5523 Symbol); 5615 Symbol);
5524 _mov(Reg, MemOperand); 5616 _mov(Reg, MemOperand);
5525 return Reg; 5617 return Reg;
5526 } 5618 }
5527 assert("Unsupported -randomize-pool-immediates option" && false); 5619 assert("Unsupported -randomize-pool-immediates option" && false);
5528 } 5620 }
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
5604 // phi lowering, we should not ask for new physical registers in 5696 // phi lowering, we should not ask for new physical registers in
5605 // general. However, if we do meet Memory Operand during phi lowering, 5697 // general. However, if we do meet Memory Operand during phi lowering,
5606 // we should not blind or pool the immediates for now. 5698 // we should not blind or pool the immediates for now.
5607 if (RegNum != Variable::NoRegister) 5699 if (RegNum != Variable::NoRegister)
5608 return MemOperand; 5700 return MemOperand;
5609 Variable *RegTemp = makeReg(IceType_i32); 5701 Variable *RegTemp = makeReg(IceType_i32);
5610 IceString Label; 5702 IceString Label;
5611 llvm::raw_string_ostream Label_stream(Label); 5703 llvm::raw_string_ostream Label_stream(Label);
5612 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); 5704 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);
5613 MemOperand->getOffset()->setShouldBePooled(true); 5705 MemOperand->getOffset()->setShouldBePooled(true);
5614 const RelocOffsetT SymOffset = 0; 5706 constexpr RelocOffsetT SymOffset = 0;
5615 bool SuppressMangling = true; 5707 constexpr bool SuppressMangling = true;
5616 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), 5708 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5617 SuppressMangling); 5709 SuppressMangling);
5618 typename Traits::X86OperandMem *SymbolOperand = 5710 typename Traits::X86OperandMem *SymbolOperand =
5619 Traits::X86OperandMem::create( 5711 Traits::X86OperandMem::create(
5620 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); 5712 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
5621 _mov(RegTemp, SymbolOperand); 5713 _mov(RegTemp, SymbolOperand);
5622 // If we have a base variable here, we should add the lea instruction 5714 // If we have a base variable here, we should add the lea instruction
5623 // to add the value of the base variable to RegTemp. If there is no 5715 // to add the value of the base variable to RegTemp. If there is no
5624 // base variable, we won't need this lea instruction. 5716 // base variable, we won't need this lea instruction.
5625 if (MemOperand->getBase()) { 5717 if (MemOperand->getBase()) {
(...skipping 15 matching lines...) Expand all
5641 } 5733 }
5642 // the offset is not eligible for blinding or pooling, return the original 5734 // the offset is not eligible for blinding or pooling, return the original
5643 // mem operand 5735 // mem operand
5644 return MemOperand; 5736 return MemOperand;
5645 } 5737 }
5646 5738
5647 } // end of namespace X86Internal 5739 } // end of namespace X86Internal
5648 } // end of namespace Ice 5740 } // end of namespace Ice
5649 5741
5650 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5742 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698