src/IceTargetLoweringX86BaseImpl.h - Issue 1419903002: Subzero: Refactor x86 register definitions to use the alias mechanism.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1419903002: Subzero: Refactor x86 register definitions to use the alias mechanism. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Add some comments Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 569 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
580 Node->getInsts().insert(I3, RMW);	580 Node->getInsts().insert(I3, RMW);

581 }	581 }

582 }	582 }

583 if (Func->isVerbose(IceV_RMW))	583 if (Func->isVerbose(IceV_RMW))

584 Func->getContext()->unlockStr();	584 Func->getContext()->unlockStr();

585 }	585 }

586	586

587 // Converts a ConstantInteger32 operand into its constant value, or	587 // Converts a ConstantInteger32 operand into its constant value, or

588 // MemoryOrderInvalid if the operand is not a ConstantInteger32.	588 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {	589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {

590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))	590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

591 return Integer->getValue();	591 return Integer->getValue();

592 return Intrinsics::MemoryOrderInvalid;	592 return Intrinsics::MemoryOrderInvalid;

593 }	593 }

594	594

595 /// Determines whether the dest of a Load instruction can be folded into one of	595 /// Determines whether the dest of a Load instruction can be folded into one of

596 /// the src operands of a 2-operand instruction. This is true as long as the	596 /// the src operands of a 2-operand instruction. This is true as long as the

597 /// load dest matches exactly one of the binary instruction's src operands.	597 /// load dest matches exactly one of the binary instruction's src operands.

598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.	598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.

599 inline bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,	599 inline bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

600 Operand &Src0, Operand &Src1) {	600 Operand &Src0, Operand &Src1) {

(...skipping 14 matching lines...) Expand all Loading...
615 while (!Context.atEnd()) {	615 while (!Context.atEnd()) {

616 Variable *LoadDest = nullptr;	616 Variable *LoadDest = nullptr;

617 Operand *LoadSrc = nullptr;	617 Operand *LoadSrc = nullptr;

618 Inst *CurInst = Context.getCur();	618 Inst *CurInst = Context.getCur();

619 Inst *Next = Context.getNextInst();	619 Inst *Next = Context.getNextInst();

620 // Determine whether the current instruction is a Load instruction or	620 // Determine whether the current instruction is a Load instruction or

621 // equivalent.	621 // equivalent.

622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {	622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

623 // An InstLoad always qualifies.	623 // An InstLoad always qualifies.

624 LoadDest = Load->getDest();	624 LoadDest = Load->getDest();

625 const bool DoLegalize = false;	625 constexpr bool DoLegalize = false;

626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),	626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),

627 LoadDest->getType(), DoLegalize);	627 LoadDest->getType(), DoLegalize);

628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {	628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory	629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory

630 // ordering, and can be implemented in a single instruction (i.e., not	630 // ordering, and can be implemented in a single instruction (i.e., not

631 // i64 on x86-32).	631 // i64 on x86-32).

632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;	632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

633 if (ID == Intrinsics::AtomicLoad &&	633 if (ID == Intrinsics::AtomicLoad &&

634 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&	634 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&

635 Intrinsics::isMemoryOrderValid(	635 Intrinsics::isMemoryOrderValid(

636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {	636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {

637 LoadDest = Intrin->getDest();	637 LoadDest = Intrin->getDest();

638 const bool DoLegalize = false;	638 constexpr bool DoLegalize = false;

639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),	639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),

640 DoLegalize);	640 DoLegalize);

641 }	641 }

642 }	642 }

643 // A Load instruction can be folded into the following instruction only	643 // A Load instruction can be folded into the following instruction only

644 // if the following instruction ends the Load's Dest variable's live	644 // if the following instruction ends the Load's Dest variable's live

645 // range.	645 // range.

646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {	646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {

647 assert(LoadSrc);	647 assert(LoadSrc);

648 Inst *NewInst = nullptr;	648 Inst *NewInst = nullptr;

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
726 // considered live upon function entry. Otherwise it's possible to get	726 // considered live upon function entry. Otherwise it's possible to get

727 // liveness validation errors for saving callee-save registers.	727 // liveness validation errors for saving callee-save registers.

728 Func->addImplicitArg(Reg);	728 Func->addImplicitArg(Reg);

729 // Don't bother tracking the live range of a named physical register.	729 // Don't bother tracking the live range of a named physical register.

730 Reg->setIgnoreLiveness();	730 Reg->setIgnoreLiveness();

731 }	731 }

732 return Reg;	732 return Reg;

733 }	733 }

734	734

735 template <class Machine>	735 template <class Machine>

736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {	736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const {

737 return Traits::getRegName(RegNum, Ty);	737 return Traits::getRegName(RegNum);

738 }	738 }

739	739

740 template <class Machine>	740 template <class Machine>

741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {	741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {

742 if (!BuildDefs::dump())	742 if (!BuildDefs::dump())

743 return;	743 return;

744 Ostream &Str = Ctx->getStrEmit();	744 Ostream &Str = Ctx->getStrEmit();

745 if (Var->hasReg()) {	745 if (Var->hasReg()) {

746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());	746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());

747 return;	747 return;

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
790 if (Var->mustHaveReg()) {	790 if (Var->mustHaveReg()) {

791 llvm_unreachable("Infinite-weight Variable has no register assigned");	791 llvm_unreachable("Infinite-weight Variable has no register assigned");

792 }	792 }

793 int32_t Offset = Var->getStackOffset();	793 int32_t Offset = Var->getStackOffset();

794 int32_t BaseRegNum = Var->getBaseRegNum();	794 int32_t BaseRegNum = Var->getBaseRegNum();

795 if (Var->getBaseRegNum() == Variable::NoRegister) {	795 if (Var->getBaseRegNum() == Variable::NoRegister) {

796 BaseRegNum = getFrameOrStackReg();	796 BaseRegNum = getFrameOrStackReg();

797 if (!hasFramePointer())	797 if (!hasFramePointer())

798 Offset += getStackAdjustment();	798 Offset += getStackAdjustment();

799 }	799 }

800 return typename Traits::Address(	800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset);

801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);

802 }	801 }

803	802

804 /// Helper function for addProlog().	803 /// Helper function for addProlog().

805 ///	804 ///

806 /// This assumes Arg is an argument passed on the stack. This sets the frame	805 /// This assumes Arg is an argument passed on the stack. This sets the frame

807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an	806 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

808 /// I64 arg that has been split into Lo and Hi components, it calls itself	807 /// I64 arg that has been split into Lo and Hi components, it calls itself

809 /// recursively on the components, taking care to handle Lo first because of the	808 /// recursively on the components, taking care to handle Lo first because of the

810 /// little-endian architecture. Lastly, this function generates an instruction	809 /// little-endian architecture. Lastly, this function generates an instruction

811 /// to copy Arg into its assigned register if applicable.	810 /// to copy Arg into its assigned register if applicable.

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1040 Src1 /= 2;	1039 Src1 /= 2;

1041 } else {	1040 } else {

1042 return false;	1041 return false;

1043 }	1042 }

1044 }	1043 }

1045 // Lea optimization only works for i16 and i32 types, not i8.	1044 // Lea optimization only works for i16 and i32 types, not i8.

1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))	1045 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))

1047 return false;	1046 return false;

1048 // Limit the number of lea/shl operations for a single multiply, to a	1047 // Limit the number of lea/shl operations for a single multiply, to a

1049 // somewhat arbitrary choice of 3.	1048 // somewhat arbitrary choice of 3.

1050 const uint32_t MaxOpsForOptimizedMul = 3;	1049 constexpr uint32_t MaxOpsForOptimizedMul = 3;

1051 if (CountOps > MaxOpsForOptimizedMul)	1050 if (CountOps > MaxOpsForOptimizedMul)

1052 return false;	1051 return false;

1053 _mov(T, Src0);	1052 _mov(T, Src0);

1054 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1053 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1055 for (uint32_t i = 0; i < Count9; ++i) {	1054 for (uint32_t i = 0; i < Count9; ++i) {

1056 const uint16_t Shift = 3; // log2(9-1)	1055 constexpr uint16_t Shift = 3; // log2(9-1)

1057 _lea(T,	1056 _lea(T,

1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1057 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1059 }	1058 }

1060 for (uint32_t i = 0; i < Count5; ++i) {	1059 for (uint32_t i = 0; i < Count5; ++i) {

1061 const uint16_t Shift = 2; // log2(5-1)	1060 constexpr uint16_t Shift = 2; // log2(5-1)

1062 _lea(T,	1061 _lea(T,

1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1062 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1064 }	1063 }

1065 for (uint32_t i = 0; i < Count3; ++i) {	1064 for (uint32_t i = 0; i < Count3; ++i) {

1066 const uint16_t Shift = 1; // log2(3-1)	1065 constexpr uint16_t Shift = 1; // log2(3-1)

1067 _lea(T,	1066 _lea(T,

1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1067 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1069 }	1068 }

1070 if (Count2) {	1069 if (Count2) {

1071 _shl(T, Ctx->getConstantInt(Ty, Count2));	1070 _shl(T, Ctx->getConstantInt(Ty, Count2));

1072 }	1071 }

1073 if (Src1IsNegative)	1072 if (Src1IsNegative)

1074 _neg(T);	1073 _neg(T);

1075 _mov(Dest, T);	1074 _mov(Dest, T);

1076 return true;	1075 return true;

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1208 }	1207 }

1209 } else {	1208 } else {

1210 // NON-CONSTANT CASES.	1209 // NON-CONSTANT CASES.

1211 Constant *BitTest = Ctx->getConstantInt32(0x20);	1210 Constant *BitTest = Ctx->getConstantInt32(0x20);

1212 typename Traits::Insts::Label *Label =	1211 typename Traits::Insts::Label *Label =

1213 Traits::Insts::Label::create(Func, this);	1212 Traits::Insts::Label::create(Func, this);

1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==>	1213 // COMMON PREFIX OF: a=b SHIFT_OP c ==>

1215 // t1:ecx = c.lo & 0xff	1214 // t1:ecx = c.lo & 0xff

1216 // t2 = b.lo	1215 // t2 = b.lo

1217 // t3 = b.hi	1216 // t3 = b.hi

1218 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);	1217 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1218 _mov(T_1, Src1Lo);

1219 _mov(T_2, Src0Lo);	1219 _mov(T_2, Src0Lo);

1220 _mov(T_3, Src0Hi);	1220 _mov(T_3, Src0Hi);

1221 switch (Op) {	1221 switch (Op) {

1222 default:	1222 default:

1223 assert(0 && "non-shift op");	1223 assert(0 && "non-shift op");

1224 break;	1224 break;

1225 case InstArithmetic::Shl: {	1225 case InstArithmetic::Shl: {

1226 // a=b<<c ==>	1226 // a=b<<c ==>

1227 // t3 = shld t3, t2, t1	1227 // t3 = shld t3, t2, t1

1228 // t2 = shl t2, t1	1228 // t2 = shl t2, t1

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1316 }	1316 }

1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1318 // These x86-32 helper-call-involved instructions are lowered in this	1318 // These x86-32 helper-call-involved instructions are lowered in this

1319 // separate switch. This is because loOperand() and hiOperand() may insert	1319 // separate switch. This is because loOperand() and hiOperand() may insert

1320 // redundant instructions for constant blinding and pooling. Such redundant	1320 // redundant instructions for constant blinding and pooling. Such redundant

1321 // instructions will fail liveness analysis under -Om1 setting. And,	1321 // instructions will fail liveness analysis under -Om1 setting. And,

1322 // actually these arguments do not need to be processed with loOperand()	1322 // actually these arguments do not need to be processed with loOperand()

1323 // and hiOperand() to be used.	1323 // and hiOperand() to be used.

1324 switch (Inst->getOp()) {	1324 switch (Inst->getOp()) {

1325 case InstArithmetic::Udiv: {	1325 case InstArithmetic::Udiv: {

1326 const SizeT MaxSrcs = 2;	1326 constexpr SizeT MaxSrcs = 2;

1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);	1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);

1328 Call->addArg(Inst->getSrc(0));	1328 Call->addArg(Inst->getSrc(0));

1329 Call->addArg(Inst->getSrc(1));	1329 Call->addArg(Inst->getSrc(1));

1330 lowerCall(Call);	1330 lowerCall(Call);

1331 return;	1331 return;

1332 }	1332 }

1333 case InstArithmetic::Sdiv: {	1333 case InstArithmetic::Sdiv: {

1334 const SizeT MaxSrcs = 2;	1334 constexpr SizeT MaxSrcs = 2;

1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);	1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);

1336 Call->addArg(Inst->getSrc(0));	1336 Call->addArg(Inst->getSrc(0));

1337 Call->addArg(Inst->getSrc(1));	1337 Call->addArg(Inst->getSrc(1));

1338 lowerCall(Call);	1338 lowerCall(Call);

1339 return;	1339 return;

1340 }	1340 }

1341 case InstArithmetic::Urem: {	1341 case InstArithmetic::Urem: {

1342 const SizeT MaxSrcs = 2;	1342 constexpr SizeT MaxSrcs = 2;

1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);	1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);

1344 Call->addArg(Inst->getSrc(0));	1344 Call->addArg(Inst->getSrc(0));

1345 Call->addArg(Inst->getSrc(1));	1345 Call->addArg(Inst->getSrc(1));

1346 lowerCall(Call);	1346 lowerCall(Call);

1347 return;	1347 return;

1348 }	1348 }

1349 case InstArithmetic::Srem: {	1349 case InstArithmetic::Srem: {

1350 const SizeT MaxSrcs = 2;	1350 constexpr SizeT MaxSrcs = 2;

1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);	1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);

1352 Call->addArg(Inst->getSrc(0));	1352 Call->addArg(Inst->getSrc(0));

1353 Call->addArg(Inst->getSrc(1));	1353 Call->addArg(Inst->getSrc(1));

1354 lowerCall(Call);	1354 lowerCall(Call);

1355 return;	1355 return;

1356 }	1356 }

1357 default:	1357 default:

1358 break;	1358 break;

1359 }	1359 }

1360	1360

(...skipping 160 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1521 // pmuludq T1, Src1	1521 // pmuludq T1, Src1

1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}	1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

1523 // pmuludq T2, T3	1523 // pmuludq T2, T3

1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}	1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}

1525 // shufps T1, T2, {0,2,0,2}	1525 // shufps T1, T2, {0,2,0,2}

1526 // pshufd T4, T1, {0,2,1,3}	1526 // pshufd T4, T1, {0,2,1,3}

1527 // movups Dest, T4	1527 // movups Dest, T4

1528	1528

1529 // Mask that directs pshufd to create a vector with entries	1529 // Mask that directs pshufd to create a vector with entries

1530 // Src[1, 0, 3, 0]	1530 // Src[1, 0, 3, 0]

1531 const unsigned Constant1030 = 0x31;	1531 constexpr unsigned Constant1030 = 0x31;

1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);	1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);

1533 // Mask that directs shufps to create a vector with entries	1533 // Mask that directs shufps to create a vector with entries

1534 // Dest[0, 2], Src[0, 2]	1534 // Dest[0, 2], Src[0, 2]

1535 const unsigned Mask0202 = 0x88;	1535 constexpr unsigned Mask0202 = 0x88;

1536 // Mask that directs pshufd to create a vector with entries	1536 // Mask that directs pshufd to create a vector with entries

1537 // Src[0, 2, 1, 3]	1537 // Src[0, 2, 1, 3]

1538 const unsigned Mask0213 = 0xd8;	1538 constexpr unsigned Mask0213 = 0xd8;

1539 Variable *T1 = makeReg(IceType_v4i32);	1539 Variable *T1 = makeReg(IceType_v4i32);

1540 Variable *T2 = makeReg(IceType_v4i32);	1540 Variable *T2 = makeReg(IceType_v4i32);

1541 Variable *T3 = makeReg(IceType_v4i32);	1541 Variable *T3 = makeReg(IceType_v4i32);

1542 Variable *T4 = makeReg(IceType_v4i32);	1542 Variable *T4 = makeReg(IceType_v4i32);

1543 _movp(T1, Src0);	1543 _movp(T1, Src0);

1544 _pshufd(T2, Src0, Mask1030);	1544 _pshufd(T2, Src0, Mask1030);

1545 _pshufd(T3, Src1, Mask1030);	1545 _pshufd(T3, Src1, Mask1030);

1546 _pmuludq(T1, Src1);	1546 _pmuludq(T1, Src1);

1547 _pmuludq(T2, T3);	1547 _pmuludq(T2, T3);

1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));	1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1623 _mov(T, Src0);	1623 _mov(T, Src0);

1624 _sub(T, Src1);	1624 _sub(T, Src1);

1625 _mov(Dest, T);	1625 _mov(Dest, T);

1626 break;	1626 break;

1627 case InstArithmetic::Mul:	1627 case InstArithmetic::Mul:

1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1629 if (optimizeScalarMul(Dest, Src0, C->getValue()))	1629 if (optimizeScalarMul(Dest, Src0, C->getValue()))

1630 return;	1630 return;

1631 }	1631 }

1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must	1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must

1633 // be in eax.	1633 // be in al.

1634 if (isByteSizedArithType(Dest->getType())) {	1634 if (isByteSizedArithType(Dest->getType())) {

1635 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1635 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1636 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1636 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1637 _imul(T, Src0 == Src1 ? T : Src1);	1637 _imul(T, Src0 == Src1 ? T : Src1);

1638 _mov(Dest, T);	1638 _mov(Dest, T);

1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1640 T = makeReg(Dest->getType());	1640 T = makeReg(Dest->getType());

1641 _imul_imm(T, Src0, ImmConst);	1641 _imul_imm(T, Src0, ImmConst);

1642 _mov(Dest, T);	1642 _mov(Dest, T);

1643 } else {	1643 } else {

1644 _mov(T, Src0);	1644 _mov(T, Src0);

1645 _imul(T, Src0 == Src1 ? T : Src1);	1645 _imul(T, Src0 == Src1 ? T : Src1);

1646 _mov(Dest, T);	1646 _mov(Dest, T);

1647 }	1647 }

1648 break;	1648 break;

1649 case InstArithmetic::Shl:	1649 case InstArithmetic::Shl:

1650 _mov(T, Src0);	1650 _mov(T, Src0);

1651 if (!llvm::isa<ConstantInteger32>(Src1))	1651 if (!llvm::isa<ConstantInteger32>(Src1)) {

1652 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1652 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1653 _mov(Cl, Src1);

	1654 Src1 = Cl;

	1655 }

1653 _shl(T, Src1);	1656 _shl(T, Src1);

1654 _mov(Dest, T);	1657 _mov(Dest, T);

1655 break;	1658 break;

1656 case InstArithmetic::Lshr:	1659 case InstArithmetic::Lshr:

1657 _mov(T, Src0);	1660 _mov(T, Src0);

1658 if (!llvm::isa<ConstantInteger32>(Src1))	1661 if (!llvm::isa<ConstantInteger32>(Src1)) {

1659 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1662 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1663 _mov(Cl, Src1);

	1664 Src1 = Cl;

	1665 }

1660 _shr(T, Src1);	1666 _shr(T, Src1);

1661 _mov(Dest, T);	1667 _mov(Dest, T);

1662 break;	1668 break;

1663 case InstArithmetic::Ashr:	1669 case InstArithmetic::Ashr:

1664 _mov(T, Src0);	1670 _mov(T, Src0);

1665 if (!llvm::isa<ConstantInteger32>(Src1))	1671 if (!llvm::isa<ConstantInteger32>(Src1)) {

1666 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1672 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1673 _mov(Cl, Src1);

	1674 Src1 = Cl;

	1675 }

1667 _sar(T, Src1);	1676 _sar(T, Src1);

1668 _mov(Dest, T);	1677 _mov(Dest, T);

1669 break;	1678 break;

1670 case InstArithmetic::Udiv:	1679 case InstArithmetic::Udiv:

1671 // div and idiv are the few arithmetic operators that do not allow	1680 // div and idiv are the few arithmetic operators that do not allow

1672 // immediates as the operand.	1681 // immediates as the operand.

1673 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1682 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1674 if (isByteSizedArithType(Dest->getType())) {	1683 if (isByteSizedArithType(Dest->getType())) {

1675 // For 8-bit unsigned division we need to zero-extend al into ah. A mov	1684 // For 8-bit unsigned division we need to zero-extend al into ah. A mov

1676 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64	1685 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64

1677 // assembler refuses to encode %ah (encoding %spl with a REX prefix	1686 // assembler refuses to encode %ah (encoding %spl with a REX prefix

1678 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah	1687 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah

1679 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and	1688 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and

1680 // d[%lh], which means the X86 target lowering (and the register	1689 // d[%lh], which means the X86 target lowering (and the register

1681 // allocator) would have to be aware of this restriction. For now, we	1690 // allocator) would have to be aware of this restriction. For now, we

1682 // simply zero %eax completely, and move the dividend into %al.	1691 // simply zero %eax completely, and move the dividend into %al.

1683 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1692 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1684 Context.insert(InstFakeDef::create(Func, T_eax));	1693 Context.insert(InstFakeDef::create(Func, T_eax));

1685 _xor(T_eax, T_eax);	1694 _xor(T_eax, T_eax);

1686 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1695 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1687 _div(T, Src1, T);	1696 _div(T, Src1, T);

1688 _mov(Dest, T);	1697 _mov(Dest, T);

1689 Context.insert(InstFakeUse::create(Func, T_eax));	1698 Context.insert(InstFakeUse::create(Func, T_eax));

1690 } else {	1699 } else {

1691 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1700 Type Ty = Dest->getType();

1692 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1701 uint32_t Eax = Traits::RegisterSet::Reg_eax;

1693 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1702 uint32_t Edx = Traits::RegisterSet::Reg_edx;

	1703 switch (Ty) {

	1704 default:

	1705 llvm_unreachable("Bad type for udiv");

	1706 // fallthrough

	1707 case IceType_i32:

	1708 break;

	1709 case IceType_i16:

	1710 Eax = Traits::RegisterSet::Reg_ax;

	1711 Edx = Traits::RegisterSet::Reg_dx;

	1712 break;

	1713 }

	1714 Constant *Zero = Ctx->getConstantZero(Ty);

	1715 _mov(T, Src0, Eax);

	1716 _mov(T_edx, Zero, Edx);

1694 _div(T, Src1, T_edx);	1717 _div(T, Src1, T_edx);

1695 _mov(Dest, T);	1718 _mov(Dest, T);

1696 }	1719 }

1697 break;	1720 break;

1698 case InstArithmetic::Sdiv:	1721 case InstArithmetic::Sdiv:

1699 // TODO(stichnot): Enable this after doing better performance and cross	1722 // TODO(stichnot): Enable this after doing better performance and cross

1700 // testing.	1723 // testing.

1701 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1724 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1702 // Optimize division by constant power of 2, but not for Om1 or O0, just	1725 // Optimize division by constant power of 2, but not for Om1 or O0, just

1703 // to keep things simple there.	1726 // to keep things simple there.

(...skipping 21 matching lines...) Expand all Loading...
1725 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	1748 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

1726 _add(T, Src0);	1749 _add(T, Src0);

1727 _sar(T, Ctx->getConstantInt(Ty, LogDiv));	1750 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

1728 }	1751 }

1729 _mov(Dest, T);	1752 _mov(Dest, T);

1730 return;	1753 return;

1731 }	1754 }

1732 }	1755 }

1733 }	1756 }

1734 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1757 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1735 if (isByteSizedArithType(Dest->getType())) {	1758 switch (Type Ty = Dest->getType()) {

	1759 default:

	1760 llvm_unreachable("Bad type for sdiv");

	1761 // fallthrough

	1762 case IceType_i32:

	1763 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1764 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1737 _cbwdq(T, T);	1765 break;

1738 _idiv(T, Src1, T);	1766 case IceType_i16:

1739 _mov(Dest, T);	1767 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);

1740 } else {	1768 _mov(T, Src0, Traits::RegisterSet::Reg_ax);

1741 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	1769 break;

1742 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1770 case IceType_i8:

1743 _cbwdq(T_edx, T);	1771 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);

1744 _idiv(T, Src1, T_edx);	1772 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1745 _mov(Dest, T);	1773 break;

1746 }	1774 }

	1775 _cbwdq(T_edx, T);

	1776 _idiv(T, Src1, T_edx);

	1777 _mov(Dest, T);

1747 break;	1778 break;

1748 case InstArithmetic::Urem:	1779 case InstArithmetic::Urem:

1749 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1780 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1750 if (isByteSizedArithType(Dest->getType())) {	1781 if (isByteSizedArithType(Dest->getType())) {

1751 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1782 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1752 Context.insert(InstFakeDef::create(Func, T_eax));	1783 Context.insert(InstFakeDef::create(Func, T_eax));

1753 _xor(T_eax, T_eax);	1784 _xor(T_eax, T_eax);

1754 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1785 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1755 _div(T, Src1, T);	1786 _div(T, Src1, T);

1756 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't	1787 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

1757 // mov %ah, %al because it would make x86-64 codegen more complicated. If	1788 // mov %ah, %al because it would make x86-64 codegen more complicated. If

1758 // this ever becomes a problem we can introduce a pseudo rem instruction	1789 // this ever becomes a problem we can introduce a pseudo rem instruction

1759 // that returns the remainder in %al directly (and uses a mov for copying	1790 // that returns the remainder in %al directly (and uses a mov for copying

1760 // %ah to %al.)	1791 // %ah to %al.)

1761 static constexpr uint8_t AlSizeInBits = 8;	1792 static constexpr uint8_t AlSizeInBits = 8;

1762 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));	1793 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

1763 _mov(Dest, T);	1794 _mov(Dest, T);

1764 Context.insert(InstFakeUse::create(Func, T_eax));	1795 Context.insert(InstFakeUse::create(Func, T_eax));

1765 } else {	1796 } else {

1766 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1797 Type Ty = Dest->getType();

1767 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);	1798 uint32_t Eax = Traits::RegisterSet::Reg_eax;

	1799 uint32_t Edx = Traits::RegisterSet::Reg_edx;

	1800 switch (Ty) {

	1801 default:

	1802 llvm_unreachable("Bad type for urem");

	1803 // fallthrough

	1804 case IceType_i32:

	1805 break;

	1806 case IceType_i16:

	1807 Eax = Traits::RegisterSet::Reg_ax;

	1808 Edx = Traits::RegisterSet::Reg_dx;

	1809 break;

	1810 }

	1811 Constant *Zero = Ctx->getConstantZero(Ty);

	1812 T_edx = makeReg(Dest->getType(), Edx);

1768 _mov(T_edx, Zero);	1813 _mov(T_edx, Zero);

1769 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1814 _mov(T, Src0, Eax);

1770 _div(T_edx, Src1, T);	1815 _div(T_edx, Src1, T);

1771 _mov(Dest, T_edx);	1816 _mov(Dest, T_edx);

1772 }	1817 }

1773 break;	1818 break;

1774 case InstArithmetic::Srem:	1819 case InstArithmetic::Srem:

1775 // TODO(stichnot): Enable this after doing better performance and cross	1820 // TODO(stichnot): Enable this after doing better performance and cross

1776 // testing.	1821 // testing.

1777 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1822 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1778 // Optimize mod by constant power of 2, but not for Om1 or O0, just to	1823 // Optimize mod by constant power of 2, but not for Om1 or O0, just to

1779 // keep things simple there.	1824 // keep things simple there.

(...skipping 26 matching lines...) Expand all Loading...
1806 _add(T, Src0);	1851 _add(T, Src0);

1807 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

1808 _sub(T, Src0);	1853 _sub(T, Src0);

1809 _neg(T);	1854 _neg(T);

1810 _mov(Dest, T);	1855 _mov(Dest, T);

1811 return;	1856 return;

1812 }	1857 }

1813 }	1858 }

1814 }	1859 }

1815 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1860 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1816 if (isByteSizedArithType(Dest->getType())) {	1861 switch (Type Ty = Dest->getType()) {

1817 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1862 default:

1818 // T is %al.	1863 llvm_unreachable("Bad type for srem");

1819 _cbwdq(T, T);	1864 // fallthrough

1820 _idiv(T, Src1, T);	1865 case IceType_i32:

1821 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1866 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1822 Context.insert(InstFakeDef::create(Func, T_eax));

1823 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

1824 // mov %ah, %al because it would make x86-64 codegen more complicated. If

1825 // this ever becomes a problem we can introduce a pseudo rem instruction

1826 // that returns the remainder in %al directly (and uses a mov for copying

1827 // %ah to %al.)

1828 static constexpr uint8_t AlSizeInBits = 8;

1829 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

1830 _mov(Dest, T);

1831 Context.insert(InstFakeUse::create(Func, T_eax));

1832 } else {

1833 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);

1834 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1867 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1835 _cbwdq(T_edx, T);	1868 _cbwdq(T_edx, T);

1836 _idiv(T_edx, Src1, T);	1869 _idiv(T_edx, Src1, T);

1837 _mov(Dest, T_edx);	1870 _mov(Dest, T_edx);

	1871 break;

	1872 case IceType_i16:

	1873 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);

	1874 _mov(T, Src0, Traits::RegisterSet::Reg_ax);

	1875 _cbwdq(T_edx, T);

	1876 _idiv(T_edx, Src1, T);

	1877 _mov(Dest, T_edx);

	1878 break;

	1879 case IceType_i8:

	1880 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);

	1881 // TODO(stichnot): Use register ah for T_edx, and remove the _shr().

	1882 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);

	1883 _mov(T, Src0, Traits::RegisterSet::Reg_al);

	1884 _cbwdq(T_edx, T);

	1885 _idiv(T_edx, Src1, T);

	1886 static constexpr uint8_t AlSizeInBits = 8;

	1887 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));

	1888 _mov(Dest, T_edx);

	1889 break;

1838 }	1890 }

1839 break;	1891 break;

1840 case InstArithmetic::Fadd:	1892 case InstArithmetic::Fadd:

1841 _mov(T, Src0);	1893 _mov(T, Src0);

1842 _addss(T, Src1);	1894 _addss(T, Src1);

1843 _mov(Dest, T);	1895 _mov(Dest, T);

1844 break;	1896 break;

1845 case InstArithmetic::Fsub:	1897 case InstArithmetic::Fsub:

1846 _mov(T, Src0);	1898 _mov(T, Src0);

1847 _subss(T, Src1);	1899 _subss(T, Src1);

1848 _mov(Dest, T);	1900 _mov(Dest, T);

1849 break;	1901 break;

1850 case InstArithmetic::Fmul:	1902 case InstArithmetic::Fmul:

1851 _mov(T, Src0);	1903 _mov(T, Src0);

1852 _mulss(T, Src0 == Src1 ? T : Src1);	1904 _mulss(T, Src0 == Src1 ? T : Src1);

1853 _mov(Dest, T);	1905 _mov(Dest, T);

1854 break;	1906 break;

1855 case InstArithmetic::Fdiv:	1907 case InstArithmetic::Fdiv:

1856 _mov(T, Src0);	1908 _mov(T, Src0);

1857 _divss(T, Src1);	1909 _divss(T, Src1);

1858 _mov(Dest, T);	1910 _mov(Dest, T);

1859 break;	1911 break;

1860 case InstArithmetic::Frem: {	1912 case InstArithmetic::Frem: {

1861 const SizeT MaxSrcs = 2;	1913 constexpr SizeT MaxSrcs = 2;

1862 Type Ty = Dest->getType();	1914 Type Ty = Dest->getType();

1863 InstCall *Call = makeHelperCall(	1915 InstCall *Call = makeHelperCall(

1864 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);	1916 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1865 Call->addArg(Src0);	1917 Call->addArg(Src0);

1866 Call->addArg(Src1);	1918 Call->addArg(Src1);

1867 return lowerCall(Call);	1919 return lowerCall(Call);

1868 }	1920 }

1869 }	1921 }

1870 }	1922 }

1871	1923

(...skipping 234 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2106 if (isVectorType(Dest->getType())) {	2158 if (isVectorType(Dest->getType())) {

2107 assert(Dest->getType() == IceType_v4i32 &&	2159 assert(Dest->getType() == IceType_v4i32 &&

2108 Inst->getSrc(0)->getType() == IceType_v4f32);	2160 Inst->getSrc(0)->getType() == IceType_v4f32);

2109 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2161 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2110 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2162 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2111 Src0RM = legalizeToReg(Src0RM);	2163 Src0RM = legalizeToReg(Src0RM);

2112 Variable *T = makeReg(Dest->getType());	2164 Variable *T = makeReg(Dest->getType());

2113 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);	2165 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

2114 _movp(Dest, T);	2166 _movp(Dest, T);

2115 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	2167 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

2116 const SizeT MaxSrcs = 1;	2168 constexpr SizeT MaxSrcs = 1;

2117 Type SrcType = Inst->getSrc(0)->getType();	2169 Type SrcType = Inst->getSrc(0)->getType();

2118 InstCall *Call =	2170 InstCall *Call =

2119 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64	2171 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2120 : H_fptosi_f64_i64,	2172 : H_fptosi_f64_i64,

2121 Dest, MaxSrcs);	2173 Dest, MaxSrcs);

2122 Call->addArg(Inst->getSrc(0));	2174 Call->addArg(Inst->getSrc(0));

2123 lowerCall(Call);	2175 lowerCall(Call);

2124 } else {	2176 } else {

2125 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2177 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2126 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2178 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

(...skipping 10 matching lines...) Expand all Loading...
2137 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2189 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2138 if (Dest->getType() == IceType_i1)	2190 if (Dest->getType() == IceType_i1)

2139 _and(T_2, Ctx->getConstantInt1(1));	2191 _and(T_2, Ctx->getConstantInt1(1));

2140 _mov(Dest, T_2);	2192 _mov(Dest, T_2);

2141 }	2193 }

2142 break;	2194 break;

2143 case InstCast::Fptoui:	2195 case InstCast::Fptoui:

2144 if (isVectorType(Dest->getType())) {	2196 if (isVectorType(Dest->getType())) {

2145 assert(Dest->getType() == IceType_v4i32 &&	2197 assert(Dest->getType() == IceType_v4i32 &&

2146 Inst->getSrc(0)->getType() == IceType_v4f32);	2198 Inst->getSrc(0)->getType() == IceType_v4f32);

2147 const SizeT MaxSrcs = 1;	2199 constexpr SizeT MaxSrcs = 1;

2148 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);	2200 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);

2149 Call->addArg(Inst->getSrc(0));	2201 Call->addArg(Inst->getSrc(0));

2150 lowerCall(Call);	2202 lowerCall(Call);

2151 } else if (Dest->getType() == IceType_i64 \|\|	2203 } else if (Dest->getType() == IceType_i64 \|\|

2152 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {	2204 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {

2153 // Use a helper for both x86-32 and x86-64.	2205 // Use a helper for both x86-32 and x86-64.

2154 const SizeT MaxSrcs = 1;	2206 constexpr SizeT MaxSrcs = 1;

2155 Type DestType = Dest->getType();	2207 Type DestType = Dest->getType();

2156 Type SrcType = Inst->getSrc(0)->getType();	2208 Type SrcType = Inst->getSrc(0)->getType();

2157 IceString TargetString;	2209 IceString TargetString;

2158 if (Traits::Is64Bit) {	2210 if (Traits::Is64Bit) {

2159 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64	2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2160 : H_fptoui_f64_i64;	2212 : H_fptoui_f64_i64;

2161 } else if (isInt32Asserting32Or64(DestType)) {	2213 } else if (isInt32Asserting32Or64(DestType)) {

2162 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32	2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

2163 : H_fptoui_f64_i32;	2215 : H_fptoui_f64_i32;

2164 } else {	2216 } else {

(...skipping 28 matching lines...) Expand all Loading...
2193 assert(Dest->getType() == IceType_v4f32 &&	2245 assert(Dest->getType() == IceType_v4f32 &&

2194 Inst->getSrc(0)->getType() == IceType_v4i32);	2246 Inst->getSrc(0)->getType() == IceType_v4i32);

2195 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2247 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2196 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2248 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2197 Src0RM = legalizeToReg(Src0RM);	2249 Src0RM = legalizeToReg(Src0RM);

2198 Variable *T = makeReg(Dest->getType());	2250 Variable *T = makeReg(Dest->getType());

2199 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);	2251 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);

2200 _movp(Dest, T);	2252 _movp(Dest, T);

2201 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {	2253 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {

2202 // Use a helper for x86-32.	2254 // Use a helper for x86-32.

2203 const SizeT MaxSrcs = 1;	2255 constexpr SizeT MaxSrcs = 1;

2204 Type DestType = Dest->getType();	2256 Type DestType = Dest->getType();

2205 InstCall *Call =	2257 InstCall *Call =

2206 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32	2258 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32

2207 : H_sitofp_i64_f64,	2259 : H_sitofp_i64_f64,

2208 Dest, MaxSrcs);	2260 Dest, MaxSrcs);

2209 // TODO: Call the correct compiler-rt helper function.	2261 // TODO: Call the correct compiler-rt helper function.

2210 Call->addArg(Inst->getSrc(0));	2262 Call->addArg(Inst->getSrc(0));

2211 lowerCall(Call);	2263 lowerCall(Call);

2212 return;	2264 return;

2213 } else {	2265 } else {

(...skipping 14 matching lines...) Expand all Loading...
2228 _movsx(T_1, Src0RM);	2280 _movsx(T_1, Src0RM);

2229 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2281 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2230 _mov(Dest, T_2);	2282 _mov(Dest, T_2);

2231 }	2283 }

2232 break;	2284 break;

2233 case InstCast::Uitofp: {	2285 case InstCast::Uitofp: {

2234 Operand *Src0 = Inst->getSrc(0);	2286 Operand *Src0 = Inst->getSrc(0);

2235 if (isVectorType(Src0->getType())) {	2287 if (isVectorType(Src0->getType())) {

2236 assert(Dest->getType() == IceType_v4f32 &&	2288 assert(Dest->getType() == IceType_v4f32 &&

2237 Src0->getType() == IceType_v4i32);	2289 Src0->getType() == IceType_v4i32);

2238 const SizeT MaxSrcs = 1;	2290 constexpr SizeT MaxSrcs = 1;

2239 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);	2291 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2240 Call->addArg(Src0);	2292 Call->addArg(Src0);

2241 lowerCall(Call);	2293 lowerCall(Call);

2242 } else if (Src0->getType() == IceType_i64 \|\|	2294 } else if (Src0->getType() == IceType_i64 \|\|

2243 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {	2295 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

2244 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on	2296 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on

2245 // x86-32.	2297 // x86-32.

2246 const SizeT MaxSrcs = 1;	2298 constexpr SizeT MaxSrcs = 1;

2247 Type DestType = Dest->getType();	2299 Type DestType = Dest->getType();

2248 IceString TargetString;	2300 IceString TargetString;

2249 if (isInt32Asserting32Or64(Src0->getType())) {	2301 if (isInt32Asserting32Or64(Src0->getType())) {

2250 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32	2302 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32

2251 : H_uitofp_i32_f64;	2303 : H_uitofp_i32_f64;

2252 } else {	2304 } else {

2253 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32	2305 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32

2254 : H_uitofp_i64_f64;	2306 : H_uitofp_i64_f64;

2255 }	2307 }

2256 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);	2308 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2452 Operand *SourceVectNotLegalized = Inst->getSrc(0);	2504 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2453 ConstantInteger32 *ElementIndex =	2505 ConstantInteger32 *ElementIndex =

2454 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));	2506 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));

2455 // Only constant indices are allowed in PNaCl IR.	2507 // Only constant indices are allowed in PNaCl IR.

2456 assert(ElementIndex);	2508 assert(ElementIndex);

2457	2509

2458 unsigned Index = ElementIndex->getValue();	2510 unsigned Index = ElementIndex->getValue();

2459 Type Ty = SourceVectNotLegalized->getType();	2511 Type Ty = SourceVectNotLegalized->getType();

2460 Type ElementTy = typeElementType(Ty);	2512 Type ElementTy = typeElementType(Ty);

2461 Type InVectorElementTy = Traits::getInVectorElementType(Ty);	2513 Type InVectorElementTy = Traits::getInVectorElementType(Ty);

2462 Variable *ExtractedElementR = makeReg(InVectorElementTy);

2463	2514

2464 // TODO(wala): Determine the best lowering sequences for each type.	2515 // TODO(wala): Determine the best lowering sequences for each type.

2465 bool CanUsePextr = Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|	2516 bool CanUsePextr = Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|

2466 InstructionSet >= Traits::SSE4_1;	2517 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);

2467 if (CanUsePextr && Ty != IceType_v4f32) {	2518 Variable *ExtractedElementR =

2468 // Use pextrb, pextrw, or pextrd.	2519 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);

	2520 if (CanUsePextr) {

	2521 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper

	2522 // bits of the destination register, so we represent this by always

	2523 // extracting into an i32 register. The _mov into Dest below will do

	2524 // truncation as necessary.

2469 Constant *Mask = Ctx->getConstantInt32(Index);	2525 Constant *Mask = Ctx->getConstantInt32(Index);

2470 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);	2526 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);

2471 _pextr(ExtractedElementR, SourceVectR, Mask);	2527 _pextr(ExtractedElementR, SourceVectR, Mask);

2472 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2528 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2473 // Use pshufd and movd/movss.	2529 // Use pshufd and movd/movss.

2474 Variable *T = nullptr;	2530 Variable *T = nullptr;

2475 if (Index) {	2531 if (Index) {

2476 // The shuffle only needs to occur if the element to be extracted is not	2532 // The shuffle only needs to occur if the element to be extracted is not

2477 // at the lowest index.	2533 // at the lowest index.

2478 Constant *Mask = Ctx->getConstantInt32(Index);	2534 Constant *Mask = Ctx->getConstantInt32(Index);

(...skipping 492 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2971 // Use insertps, pinsrb, pinsrw, or pinsrd.	3027 // Use insertps, pinsrb, pinsrw, or pinsrd.

2972 Operand *ElementRM =	3028 Operand *ElementRM =

2973 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);	3029 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

2974 Operand *SourceVectRM =	3030 Operand *SourceVectRM =

2975 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	3031 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2976 Variable *T = makeReg(Ty);	3032 Variable *T = makeReg(Ty);

2977 _movp(T, SourceVectRM);	3033 _movp(T, SourceVectRM);

2978 if (Ty == IceType_v4f32)	3034 if (Ty == IceType_v4f32)

2979 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));	3035 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));

2980 else	3036 else

	3037 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source

	3038 // operand is a register, it must be a full r32 register like eax, and not

	3039 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates

	3040 // for the use of r16 and r8 by converting them through getBaseReg(),

	3041 // while emitIAS() validates that the original and base register encodings

	3042 // are the same. But for an "interior" register like ah, it should

	3043 // probably be copied into an r32 via movzx so that the types work out.

2981 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));	3044 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));

2982 _movp(Inst->getDest(), T);	3045 _movp(Inst->getDest(), T);

2983 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	3046 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2984 // Use shufps or movss.	3047 // Use shufps or movss.

2985 Variable *ElementR = nullptr;	3048 Variable *ElementR = nullptr;

2986 Operand *SourceVectRM =	3049 Operand *SourceVectRM =

2987 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	3050 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2988	3051

2989 if (InVectorElementTy == IceType_f32) {	3052 if (InVectorElementTy == IceType_f32) {

2990 // ElementR will be in an XMM register since it is floating point.	3053 // ElementR will be in an XMM register since it is floating point.

(...skipping 314 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3305 // well-defined value.	3368 // well-defined value.

3306 Operand *Val = legalize(Instr->getArg(0));	3369 Operand *Val = legalize(Instr->getArg(0));

3307 Operand *FirstVal;	3370 Operand *FirstVal;

3308 Operand *SecondVal = nullptr;	3371 Operand *SecondVal = nullptr;

3309 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	3372 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

3310 FirstVal = loOperand(Val);	3373 FirstVal = loOperand(Val);

3311 SecondVal = hiOperand(Val);	3374 SecondVal = hiOperand(Val);

3312 } else {	3375 } else {

3313 FirstVal = Val;	3376 FirstVal = Val;

3314 }	3377 }

3315 const bool IsCttz = false;	3378 constexpr bool IsCttz = false;

3316 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3379 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3317 SecondVal);	3380 SecondVal);

3318 return;	3381 return;

3319 }	3382 }

3320 case Intrinsics::Cttz: {	3383 case Intrinsics::Cttz: {

3321 // The "is zero undef" parameter is ignored and we always return a	3384 // The "is zero undef" parameter is ignored and we always return a

3322 // well-defined value.	3385 // well-defined value.

3323 Operand *Val = legalize(Instr->getArg(0));	3386 Operand *Val = legalize(Instr->getArg(0));

3324 Operand *FirstVal;	3387 Operand *FirstVal;

3325 Operand *SecondVal = nullptr;	3388 Operand *SecondVal = nullptr;

3326 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	3389 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

3327 FirstVal = hiOperand(Val);	3390 FirstVal = hiOperand(Val);

3328 SecondVal = loOperand(Val);	3391 SecondVal = loOperand(Val);

3329 } else {	3392 } else {

3330 FirstVal = Val;	3393 FirstVal = Val;

3331 }	3394 }

3332 const bool IsCttz = true;	3395 constexpr bool IsCttz = true;

3333 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3396 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3334 SecondVal);	3397 SecondVal);

3335 return;	3398 return;

3336 }	3399 }

3337 case Intrinsics::Fabs: {	3400 case Intrinsics::Fabs: {

3338 Operand *Src = legalize(Instr->getArg(0));	3401 Operand *Src = legalize(Instr->getArg(0));

3339 Type Ty = Src->getType();	3402 Type Ty = Src->getType();

3340 Variable *Dest = Instr->getDest();	3403 Variable *Dest = Instr->getDest();

3341 Variable *T = makeVectorOfFabsMask(Ty);	3404 Variable *T = makeVectorOfFabsMask(Ty);

3342 // The pand instruction operates on an m128 memory operand, so if Src is an	3405 // The pand instruction operates on an m128 memory operand, so if Src is an

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3420 Func->setError("Should not be lowering UnknownIntrinsic");	3483 Func->setError("Should not be lowering UnknownIntrinsic");

3421 return;	3484 return;

3422 }	3485 }

3423 return;	3486 return;

3424 }	3487 }

3425	3488

3426 template <class Machine>	3489 template <class Machine>

3427 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,	3490 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,

3428 Operand Ptr, Operand Expected,	3491 Operand Ptr, Operand Expected,

3429 Operand *Desired) {	3492 Operand *Desired) {

3430 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {	3493 Type Ty = Expected->getType();

	3494 if (!Traits::Is64Bit && Ty == IceType_i64) {

3431 // Reserve the pre-colored registers first, before adding any more	3495 // Reserve the pre-colored registers first, before adding any more

3432 // infinite-weight variables from formMemoryOperand's legalization.	3496 // infinite-weight variables from formMemoryOperand's legalization.

3433 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	3497 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

3434 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	3498 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

3435 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);	3499 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

3436 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);	3500 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);

3437 _mov(T_eax, loOperand(Expected));	3501 _mov(T_eax, loOperand(Expected));

3438 _mov(T_edx, hiOperand(Expected));	3502 _mov(T_edx, hiOperand(Expected));

3439 _mov(T_ebx, loOperand(Desired));	3503 _mov(T_ebx, loOperand(Desired));

3440 _mov(T_ecx, hiOperand(Desired));	3504 _mov(T_ecx, hiOperand(Desired));

3441 typename Traits::X86OperandMem *Addr =	3505 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3442 formMemoryOperand(Ptr, Expected->getType());	3506 constexpr bool Locked = true;

3443 const bool Locked = true;

3444 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3507 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3445 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));	3508 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

3446 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));	3509 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

3447 _mov(DestLo, T_eax);	3510 _mov(DestLo, T_eax);

3448 _mov(DestHi, T_edx);	3511 _mov(DestHi, T_edx);

3449 return;	3512 return;

3450 }	3513 }

3451 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);	3514 int32_t Eax;

	3515 switch (Ty) {

	3516 default:

	3517 llvm_unreachable("Bad type for cmpxchg");

	3518 // fallthrough

	3519 case IceType_i32:

	3520 Eax = Traits::RegisterSet::Reg_eax;

	3521 break;

	3522 case IceType_i16:

	3523 Eax = Traits::RegisterSet::Reg_ax;

	3524 break;

	3525 case IceType_i8:

	3526 Eax = Traits::RegisterSet::Reg_al;

	3527 break;

	3528 }

	3529 Variable *T_eax = makeReg(Ty, Eax);

3452 _mov(T_eax, Expected);	3530 _mov(T_eax, Expected);

3453 typename Traits::X86OperandMem *Addr =	3531 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3454 formMemoryOperand(Ptr, Expected->getType());

3455 Variable *DesiredReg = legalizeToReg(Desired);	3532 Variable *DesiredReg = legalizeToReg(Desired);

3456 const bool Locked = true;	3533 constexpr bool Locked = true;

3457 _cmpxchg(Addr, T_eax, DesiredReg, Locked);	3534 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

3458 _mov(DestPrev, T_eax);	3535 _mov(DestPrev, T_eax);

3459 }	3536 }

3460	3537

3461 template <class Machine>	3538 template <class Machine>

3462 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,	3539 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,

3463 Operand *PtrToMem,	3540 Operand *PtrToMem,

3464 Operand *Expected,	3541 Operand *Expected,

3465 Operand *Desired) {	3542 Operand *Desired) {

3466 if (Ctx->getFlags().getOptLevel() == Opt_m1)	3543 if (Ctx->getFlags().getOptLevel() == Opt_m1)

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3548 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	3625 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3549 // All the fall-through paths must set this to true, but use this	3626 // All the fall-through paths must set this to true, but use this

3550 // for asserting.	3627 // for asserting.

3551 NeedsCmpxchg = true;	3628 NeedsCmpxchg = true;

3552 Op_Lo = &TargetX86Base<Machine>::_add;	3629 Op_Lo = &TargetX86Base<Machine>::_add;

3553 Op_Hi = &TargetX86Base<Machine>::_adc;	3630 Op_Hi = &TargetX86Base<Machine>::_adc;

3554 break;	3631 break;

3555 }	3632 }

3556 typename Traits::X86OperandMem *Addr =	3633 typename Traits::X86OperandMem *Addr =

3557 formMemoryOperand(Ptr, Dest->getType());	3634 formMemoryOperand(Ptr, Dest->getType());

3558 const bool Locked = true;	3635 constexpr bool Locked = true;

3559 Variable *T = nullptr;	3636 Variable *T = nullptr;

3560 _mov(T, Val);	3637 _mov(T, Val);

3561 _xadd(Addr, T, Locked);	3638 _xadd(Addr, T, Locked);

3562 _mov(Dest, T);	3639 _mov(Dest, T);

3563 return;	3640 return;

3564 }	3641 }

3565 case Intrinsics::AtomicSub: {	3642 case Intrinsics::AtomicSub: {

3566 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	3643 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3567 NeedsCmpxchg = true;	3644 NeedsCmpxchg = true;

3568 Op_Lo = &TargetX86Base<Machine>::_sub;	3645 Op_Lo = &TargetX86Base<Machine>::_sub;

3569 Op_Hi = &TargetX86Base<Machine>::_sbb;	3646 Op_Hi = &TargetX86Base<Machine>::_sbb;

3570 break;	3647 break;

3571 }	3648 }

3572 typename Traits::X86OperandMem *Addr =	3649 typename Traits::X86OperandMem *Addr =

3573 formMemoryOperand(Ptr, Dest->getType());	3650 formMemoryOperand(Ptr, Dest->getType());

3574 const bool Locked = true;	3651 constexpr bool Locked = true;

3575 Variable *T = nullptr;	3652 Variable *T = nullptr;

3576 _mov(T, Val);	3653 _mov(T, Val);

3577 _neg(T);	3654 _neg(T);

3578 _xadd(Addr, T, Locked);	3655 _xadd(Addr, T, Locked);

3579 _mov(Dest, T);	3656 _mov(Dest, T);

3580 return;	3657 return;

3581 }	3658 }

3582 case Intrinsics::AtomicOr:	3659 case Intrinsics::AtomicOr:

3583 // TODO(jvoung): If Dest is null or dead, then some of these	3660 // TODO(jvoung): If Dest is null or dead, then some of these

3584 // operations do not need an "exchange", but just a locked op.	3661 // operations do not need an "exchange", but just a locked op.

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3672 _mov(T_ecx, T_edx);	3749 _mov(T_ecx, T_edx);

3673 (this->*Op_Hi)(T_ecx, hiOperand(Val));	3750 (this->*Op_Hi)(T_ecx, hiOperand(Val));

3674 } else {	3751 } else {

3675 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.	3752 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.

3676 // It just needs the Val loaded into ebx and ecx.	3753 // It just needs the Val loaded into ebx and ecx.

3677 // That can also be done before the loop.	3754 // That can also be done before the loop.

3678 _mov(T_ebx, loOperand(Val));	3755 _mov(T_ebx, loOperand(Val));

3679 _mov(T_ecx, hiOperand(Val));	3756 _mov(T_ecx, hiOperand(Val));

3680 Context.insert(Label);	3757 Context.insert(Label);

3681 }	3758 }

3682 const bool Locked = true;	3759 constexpr bool Locked = true;

3683 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3760 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3684 _br(Traits::Cond::Br_ne, Label);	3761 _br(Traits::Cond::Br_ne, Label);

3685 if (!IsXchg8b) {	3762 if (!IsXchg8b) {

3686 // If Val is a variable, model the extended live range of Val through	3763 // If Val is a variable, model the extended live range of Val through

3687 // the end of the loop, since it will be re-used by the loop.	3764 // the end of the loop, since it will be re-used by the loop.

3688 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3765 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3689 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));	3766 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));

3690 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));	3767 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));

3691 Context.insert(InstFakeUse::create(Func, ValLo));	3768 Context.insert(InstFakeUse::create(Func, ValLo));

3692 Context.insert(InstFakeUse::create(Func, ValHi));	3769 Context.insert(InstFakeUse::create(Func, ValHi));

3693 }	3770 }

3694 } else {	3771 } else {

3695 // For xchg, the loop is slightly smaller and ebx/ecx are used.	3772 // For xchg, the loop is slightly smaller and ebx/ecx are used.

3696 Context.insert(InstFakeUse::create(Func, T_ebx));	3773 Context.insert(InstFakeUse::create(Func, T_ebx));

3697 Context.insert(InstFakeUse::create(Func, T_ecx));	3774 Context.insert(InstFakeUse::create(Func, T_ecx));

3698 }	3775 }

3699 // The address base (if any) is also reused in the loop.	3776 // The address base (if any) is also reused in the loop.

3700 if (Variable *Base = Addr->getBase())	3777 if (Variable *Base = Addr->getBase())

3701 Context.insert(InstFakeUse::create(Func, Base));	3778 Context.insert(InstFakeUse::create(Func, Base));

3702 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3779 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3780 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3704 _mov(DestLo, T_eax);	3781 _mov(DestLo, T_eax);

3705 _mov(DestHi, T_edx);	3782 _mov(DestHi, T_edx);

3706 return;	3783 return;

3707 }	3784 }

3708 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);	3785 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3709 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);	3786 int32_t Eax;

	3787 switch (Ty) {

	3788 default:

	3789 llvm_unreachable("Bad type for atomicRMW");

	3790 // fallthrough

	3791 case IceType_i32:

	3792 Eax = Traits::RegisterSet::Reg_eax;

	3793 break;

	3794 case IceType_i16:

	3795 Eax = Traits::RegisterSet::Reg_ax;

	3796 break;

	3797 case IceType_i8:

	3798 Eax = Traits::RegisterSet::Reg_al;

	3799 break;

	3800 }

	3801 Variable *T_eax = makeReg(Ty, Eax);

3710 _mov(T_eax, Addr);	3802 _mov(T_eax, Addr);

3711 typename Traits::Insts::Label *Label =	3803 typename Traits::Insts::Label *Label =

3712 Traits::Insts::Label::create(Func, this);	3804 Traits::Insts::Label::create(Func, this);

3713 Context.insert(Label);	3805 Context.insert(Label);

3714 // We want to pick a different register for T than Eax, so don't use	3806 // We want to pick a different register for T than Eax, so don't use

3715 // _mov(T == nullptr, T_eax).	3807 // _mov(T == nullptr, T_eax).

3716 Variable *T = makeReg(Ty);	3808 Variable *T = makeReg(Ty);

3717 _mov(T, T_eax);	3809 _mov(T, T_eax);

3718 (this->*Op_Lo)(T, Val);	3810 (this->*Op_Lo)(T, Val);

3719 const bool Locked = true;	3811 constexpr bool Locked = true;

3720 _cmpxchg(Addr, T_eax, T, Locked);	3812 _cmpxchg(Addr, T_eax, T, Locked);

3721 _br(Traits::Cond::Br_ne, Label);	3813 _br(Traits::Cond::Br_ne, Label);

3722 // If Val is a variable, model the extended live range of Val through	3814 // If Val is a variable, model the extended live range of Val through

3723 // the end of the loop, since it will be re-used by the loop.	3815 // the end of the loop, since it will be re-used by the loop.

3724 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3816 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3725 Context.insert(InstFakeUse::create(Func, ValVar));	3817 Context.insert(InstFakeUse::create(Func, ValVar));

3726 }	3818 }

3727 // The address base (if any) is also reused in the loop.	3819 // The address base (if any) is also reused in the loop.

3728 if (Variable *Base = Addr->getBase())	3820 if (Variable *Base = Addr->getBase())

3729 Context.insert(InstFakeUse::create(Func, Base));	3821 Context.insert(InstFakeUse::create(Func, Base));

(...skipping 1357 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5087 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|	5179 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

5088 Ty == IceType_v16i8);	5180 Ty == IceType_v16i8);

5089 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {	5181 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

5090 Variable *Reg = makeVectorOfOnes(Ty, RegNum);	5182 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

5091 SizeT Shift =	5183 SizeT Shift =

5092 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;	5184 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;

5093 _psll(Reg, Ctx->getConstantInt8(Shift));	5185 _psll(Reg, Ctx->getConstantInt8(Shift));

5094 return Reg;	5186 return Reg;

5095 } else {	5187 } else {

5096 // SSE has no left shift operation for vectors of 8 bit integers.	5188 // SSE has no left shift operation for vectors of 8 bit integers.

5097 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	5189 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

5098 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);	5190 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

5099 Variable *Reg = makeReg(Ty, RegNum);	5191 Variable *Reg = makeReg(Ty, RegNum);

5100 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	5192 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

5101 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	5193 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

5102 return Reg;	5194 return Reg;

5103 }	5195 }

5104 }	5196 }

5105	5197

5106 /// Construct a mask in a register that can be and'ed with a floating-point	5198 /// Construct a mask in a register that can be and'ed with a floating-point

5107 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32	5199 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32

(...skipping 13 matching lines...) Expand all Loading...
5121 typename TargetX86Base<Machine>::Traits::X86OperandMem *	5213 typename TargetX86Base<Machine>::Traits::X86OperandMem *

5122 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,	5214 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

5123 uint32_t Offset) {	5215 uint32_t Offset) {

5124 // Ensure that Loc is a stack slot.	5216 // Ensure that Loc is a stack slot.

5125 assert(Slot->mustNotHaveReg());	5217 assert(Slot->mustNotHaveReg());

5126 assert(Slot->getRegNum() == Variable::NoRegister);	5218 assert(Slot->getRegNum() == Variable::NoRegister);

5127 // Compute the location of Loc in memory.	5219 // Compute the location of Loc in memory.

5128 // TODO(wala,stichnot): lea should not	5220 // TODO(wala,stichnot): lea should not

5129 // be required. The address of the stack slot is known at compile time	5221 // be required. The address of the stack slot is known at compile time

5130 // (although not until after addProlog()).	5222 // (although not until after addProlog()).

5131 const Type PointerType = IceType_i32;	5223 constexpr Type PointerType = IceType_i32;

5132 Variable *Loc = makeReg(PointerType);	5224 Variable *Loc = makeReg(PointerType);

5133 _lea(Loc, Slot);	5225 _lea(Loc, Slot);

5134 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);	5226 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

5135 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);	5227 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);

5136 }	5228 }

5137	5229

5138 /// Helper for legalize() to emit the right code to lower an operand to a	5230 /// Helper for legalize() to emit the right code to lower an operand to a

5139 /// register of the appropriate type.	5231 /// register of the appropriate type.

5140 template <class Machine>	5232 template <class Machine>

5141 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {	5233 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5174 if (Subst->mustHaveReg() && !Subst->hasReg()) {	5266 if (Subst->mustHaveReg() && !Subst->hasReg()) {

5175 // At this point we know the substitution will have a register.	5267 // At this point we know the substitution will have a register.

5176 if (From->getType() == Subst->getType()) {	5268 if (From->getType() == Subst->getType()) {

5177 // At this point we know the substitution's register is compatible.	5269 // At this point we know the substitution's register is compatible.

5178 return Subst;	5270 return Subst;

5179 }	5271 }

5180 }	5272 }

5181 }	5273 }

5182 }	5274 }

5183	5275

5184 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {	5276 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {

5185 // Before doing anything with a Mem operand, we need to ensure that the	5277 // Before doing anything with a Mem operand, we need to ensure that the

5186 // Base and Index components are in physical registers.	5278 // Base and Index components are in physical registers.

5187 Variable *Base = Mem->getBase();	5279 Variable *Base = Mem->getBase();

5188 Variable *Index = Mem->getIndex();	5280 Variable *Index = Mem->getIndex();

5189 Variable *RegBase = nullptr;	5281 Variable *RegBase = nullptr;

5190 Variable *RegIndex = nullptr;	5282 Variable *RegIndex = nullptr;

5191 if (Base) {	5283 if (Base) {

5192 RegBase = legalizeToReg(Base);	5284 RegBase = legalizeToReg(Base);

5193 }	5285 }

5194 if (Index) {	5286 if (Index) {

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5252 // Immediate specifically not allowed	5344 // Immediate specifically not allowed

5253 NeedsReg = true;	5345 NeedsReg = true;

5254 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))	5346 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))

5255 // On x86, FP constants are lowered to mem operands.	5347 // On x86, FP constants are lowered to mem operands.

5256 NeedsReg = true;	5348 NeedsReg = true;

5257 if (NeedsReg) {	5349 if (NeedsReg) {

5258 From = copyToReg(From, RegNum);	5350 From = copyToReg(From, RegNum);

5259 }	5351 }

5260 return From;	5352 return From;

5261 }	5353 }

5262 if (auto Var = llvm::dyn_cast<Variable>(From)) {	5354 if (auto *Var = llvm::dyn_cast<Variable>(From)) {

5263 // Check if the variable is guaranteed a physical register. This can happen	5355 // Check if the variable is guaranteed a physical register. This can happen

5264 // either when the variable is pre-colored or when it is assigned infinite	5356 // either when the variable is pre-colored or when it is assigned infinite

5265 // weight.	5357 // weight.

5266 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());	5358 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());

5267 // We need a new physical register for the operand if:	5359 // We need a new physical register for the operand if:

5268 // Mem is not allowed and Var isn't guaranteed a physical	5360 // Mem is not allowed and Var isn't guaranteed a physical

5269 // register, or	5361 // register, or

5270 // RegNum is required and Var->getRegNum() doesn't match.	5362 // RegNum is required and Var->getRegNum() doesn't match.

5271 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|	5363 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|

5272 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {	5364 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

(...skipping 234 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5507 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);	5599 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);

5508 Immediate->setShouldBePooled(true);	5600 Immediate->setShouldBePooled(true);

5509 // if we have already assigned a phy register, we must come from	5601 // if we have already assigned a phy register, we must come from

5510 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the	5602 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the

5511 // assigned register as this assignment is that start of its use-def	5603 // assigned register as this assignment is that start of its use-def

5512 // chain. So we add RegNum argument here.	5604 // chain. So we add RegNum argument here.

5513 Variable *Reg = makeReg(Immediate->getType(), RegNum);	5605 Variable *Reg = makeReg(Immediate->getType(), RegNum);

5514 IceString Label;	5606 IceString Label;

5515 llvm::raw_string_ostream Label_stream(Label);	5607 llvm::raw_string_ostream Label_stream(Label);

5516 Immediate->emitPoolLabel(Label_stream, Ctx);	5608 Immediate->emitPoolLabel(Label_stream, Ctx);

5517 const RelocOffsetT Offset = 0;	5609 constexpr RelocOffsetT Offset = 0;

5518 const bool SuppressMangling = true;	5610 constexpr bool SuppressMangling = true;

5519 Constant *Symbol =	5611 Constant *Symbol =

5520 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);	5612 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);

5521 typename Traits::X86OperandMem *MemOperand =	5613 typename Traits::X86OperandMem *MemOperand =

5522 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,	5614 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,

5523 Symbol);	5615 Symbol);

5524 _mov(Reg, MemOperand);	5616 _mov(Reg, MemOperand);

5525 return Reg;	5617 return Reg;

5526 }	5618 }

5527 assert("Unsupported -randomize-pool-immediates option" && false);	5619 assert("Unsupported -randomize-pool-immediates option" && false);

5528 }	5620 }

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5604 // phi lowering, we should not ask for new physical registers in	5696 // phi lowering, we should not ask for new physical registers in

5605 // general. However, if we do meet Memory Operand during phi lowering,	5697 // general. However, if we do meet Memory Operand during phi lowering,

5606 // we should not blind or pool the immediates for now.	5698 // we should not blind or pool the immediates for now.

5607 if (RegNum != Variable::NoRegister)	5699 if (RegNum != Variable::NoRegister)

5608 return MemOperand;	5700 return MemOperand;

5609 Variable *RegTemp = makeReg(IceType_i32);	5701 Variable *RegTemp = makeReg(IceType_i32);

5610 IceString Label;	5702 IceString Label;

5611 llvm::raw_string_ostream Label_stream(Label);	5703 llvm::raw_string_ostream Label_stream(Label);

5612 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);	5704 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);

5613 MemOperand->getOffset()->setShouldBePooled(true);	5705 MemOperand->getOffset()->setShouldBePooled(true);

5614 const RelocOffsetT SymOffset = 0;	5706 constexpr RelocOffsetT SymOffset = 0;

5615 bool SuppressMangling = true;	5707 constexpr bool SuppressMangling = true;

5616 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),	5708 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),

5617 SuppressMangling);	5709 SuppressMangling);

5618 typename Traits::X86OperandMem *SymbolOperand =	5710 typename Traits::X86OperandMem *SymbolOperand =

5619 Traits::X86OperandMem::create(	5711 Traits::X86OperandMem::create(

5620 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);	5712 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);

5621 _mov(RegTemp, SymbolOperand);	5713 _mov(RegTemp, SymbolOperand);

5622 // If we have a base variable here, we should add the lea instruction	5714 // If we have a base variable here, we should add the lea instruction

5623 // to add the value of the base variable to RegTemp. If there is no	5715 // to add the value of the base variable to RegTemp. If there is no

5624 // base variable, we won't need this lea instruction.	5716 // base variable, we won't need this lea instruction.

5625 if (MemOperand->getBase()) {	5717 if (MemOperand->getBase()) {

(...skipping 15 matching lines...) Expand all Loading...
5641 }	5733 }

5642 // the offset is not eligible for blinding or pooling, return the original	5734 // the offset is not eligible for blinding or pooling, return the original

5643 // mem operand	5735 // mem operand

5644 return MemOperand;	5736 return MemOperand;

5645 }	5737 }

5646	5738

5647 } // end of namespace X86Internal	5739 } // end of namespace X86Internal

5648 } // end of namespace Ice	5740 } // end of namespace Ice

5649	5741

5650 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5742 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceTargetLoweringX8664Traits.h ('K') | « src/IceTargetLoweringX8664Traits.h ('k') | src/IceTimerTree.cpp » ('j') | no next file with comments »