| Index: src/IceTargetLoweringX86BaseImpl.h | 
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h | 
| index 09bc6dde0a0a0e819b0e2a029e4c6d488a671412..1ad9b2941799f01b340cbfff45cbe1d9f6ea2775 100644 | 
| --- a/src/IceTargetLoweringX86BaseImpl.h | 
| +++ b/src/IceTargetLoweringX86BaseImpl.h | 
| @@ -587,7 +587,7 @@ template <class Machine> void TargetX86Base<Machine>::findRMW() { | 
| // Converts a ConstantInteger32 operand into its constant value, or | 
| // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 
| inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 
| -  if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 
| +  if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 
| return Integer->getValue(); | 
| return Intrinsics::MemoryOrderInvalid; | 
| } | 
| @@ -622,7 +622,7 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { | 
| if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 
| // An InstLoad always qualifies. | 
| LoadDest = Load->getDest(); | 
| -        const bool DoLegalize = false; | 
| +        constexpr bool DoLegalize = false; | 
| LoadSrc = formMemoryOperand(Load->getSourceAddress(), | 
| LoadDest->getType(), DoLegalize); | 
| } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | 
| @@ -635,7 +635,7 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { | 
| Intrinsics::isMemoryOrderValid( | 
| ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | 
| LoadDest = Intrin->getDest(); | 
| -          const bool DoLegalize = false; | 
| +          constexpr bool DoLegalize = false; | 
| LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | 
| DoLegalize); | 
| } | 
| @@ -733,8 +733,8 @@ Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 
| } | 
|  | 
| template <class Machine> | 
| -IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 
| -  return Traits::getRegName(RegNum, Ty); | 
| +IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const { | 
| +  return Traits::getRegName(RegNum); | 
| } | 
|  | 
| template <class Machine> | 
| @@ -797,9 +797,8 @@ TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | 
| if (!hasFramePointer()) | 
| Offset += getStackAdjustment(); | 
| } | 
| -  return typename Traits::Address( | 
| -      Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset, | 
| -      AssemblerFixup::NoFixup); | 
| +  return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset, | 
| +                                  AssemblerFixup::NoFixup); | 
| } | 
|  | 
| /// Helper function for addProlog(). | 
| @@ -1048,23 +1047,23 @@ bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 
| return false; | 
| // Limit the number of lea/shl operations for a single multiply, to a | 
| // somewhat arbitrary choice of 3. | 
| -  const uint32_t MaxOpsForOptimizedMul = 3; | 
| +  constexpr uint32_t MaxOpsForOptimizedMul = 3; | 
| if (CountOps > MaxOpsForOptimizedMul) | 
| return false; | 
| _mov(T, Src0); | 
| Constant *Zero = Ctx->getConstantZero(IceType_i32); | 
| for (uint32_t i = 0; i < Count9; ++i) { | 
| -    const uint16_t Shift = 3; // log2(9-1) | 
| +    constexpr uint16_t Shift = 3; // log2(9-1) | 
| _lea(T, | 
| Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 
| } | 
| for (uint32_t i = 0; i < Count5; ++i) { | 
| -    const uint16_t Shift = 2; // log2(5-1) | 
| +    constexpr uint16_t Shift = 2; // log2(5-1) | 
| _lea(T, | 
| Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 
| } | 
| for (uint32_t i = 0; i < Count3; ++i) { | 
| -    const uint16_t Shift = 1; // log2(3-1) | 
| +    constexpr uint16_t Shift = 1; // log2(3-1) | 
| _lea(T, | 
| Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 
| } | 
| @@ -1216,7 +1215,8 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op, | 
| //   t1:ecx = c.lo & 0xff | 
| //   t2 = b.lo | 
| //   t3 = b.hi | 
| -    _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 
| +    T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 
| +    _mov(T_1, Src1Lo); | 
| _mov(T_2, Src0Lo); | 
| _mov(T_3, Src0Hi); | 
| switch (Op) { | 
| @@ -1324,7 +1324,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| // and hiOperand() to be used. | 
| switch (Inst->getOp()) { | 
| case InstArithmetic::Udiv: { | 
| -      const SizeT MaxSrcs = 2; | 
| +      constexpr SizeT MaxSrcs = 2; | 
| InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 
| Call->addArg(Inst->getSrc(0)); | 
| Call->addArg(Inst->getSrc(1)); | 
| @@ -1332,7 +1332,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| return; | 
| } | 
| case InstArithmetic::Sdiv: { | 
| -      const SizeT MaxSrcs = 2; | 
| +      constexpr SizeT MaxSrcs = 2; | 
| InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); | 
| Call->addArg(Inst->getSrc(0)); | 
| Call->addArg(Inst->getSrc(1)); | 
| @@ -1340,7 +1340,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| return; | 
| } | 
| case InstArithmetic::Urem: { | 
| -      const SizeT MaxSrcs = 2; | 
| +      constexpr SizeT MaxSrcs = 2; | 
| InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); | 
| Call->addArg(Inst->getSrc(0)); | 
| Call->addArg(Inst->getSrc(1)); | 
| @@ -1348,7 +1348,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| return; | 
| } | 
| case InstArithmetic::Srem: { | 
| -      const SizeT MaxSrcs = 2; | 
| +      constexpr SizeT MaxSrcs = 2; | 
| InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); | 
| Call->addArg(Inst->getSrc(0)); | 
| Call->addArg(Inst->getSrc(1)); | 
| @@ -1529,14 +1529,14 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
|  | 
| // Mask that directs pshufd to create a vector with entries | 
| // Src[1, 0, 3, 0] | 
| -        const unsigned Constant1030 = 0x31; | 
| +        constexpr unsigned Constant1030 = 0x31; | 
| Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); | 
| // Mask that directs shufps to create a vector with entries | 
| // Dest[0, 2], Src[0, 2] | 
| -        const unsigned Mask0202 = 0x88; | 
| +        constexpr unsigned Mask0202 = 0x88; | 
| // Mask that directs pshufd to create a vector with entries | 
| // Src[0, 2, 1, 3] | 
| -        const unsigned Mask0213 = 0xd8; | 
| +        constexpr unsigned Mask0213 = 0xd8; | 
| Variable *T1 = makeReg(IceType_v4i32); | 
| Variable *T2 = makeReg(IceType_v4i32); | 
| Variable *T3 = makeReg(IceType_v4i32); | 
| @@ -1631,9 +1631,9 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| return; | 
| } | 
| // The 8-bit version of imul only allows the form "imul r/m8" where T must | 
| -    // be in eax. | 
| +    // be in al. | 
| if (isByteSizedArithType(Dest->getType())) { | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_al); | 
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 
| _imul(T, Src0 == Src1 ? T : Src1); | 
| _mov(Dest, T); | 
| @@ -1649,22 +1649,31 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| break; | 
| case InstArithmetic::Shl: | 
| _mov(T, Src0); | 
| -    if (!llvm::isa<ConstantInteger32>(Src1)) | 
| -      Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 
| +    if (!llvm::isa<ConstantInteger32>(Src1)) { | 
| +      Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 
| +      _mov(Cl, Src1); | 
| +      Src1 = Cl; | 
| +    } | 
| _shl(T, Src1); | 
| _mov(Dest, T); | 
| break; | 
| case InstArithmetic::Lshr: | 
| _mov(T, Src0); | 
| -    if (!llvm::isa<ConstantInteger32>(Src1)) | 
| -      Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 
| +    if (!llvm::isa<ConstantInteger32>(Src1)) { | 
| +      Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 
| +      _mov(Cl, Src1); | 
| +      Src1 = Cl; | 
| +    } | 
| _shr(T, Src1); | 
| _mov(Dest, T); | 
| break; | 
| case InstArithmetic::Ashr: | 
| _mov(T, Src0); | 
| -    if (!llvm::isa<ConstantInteger32>(Src1)) | 
| -      Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 
| +    if (!llvm::isa<ConstantInteger32>(Src1)) { | 
| +      Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 
| +      _mov(Cl, Src1); | 
| +      Src1 = Cl; | 
| +    } | 
| _sar(T, Src1); | 
| _mov(Dest, T); | 
| break; | 
| @@ -1684,14 +1693,28 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 
| Context.insert(InstFakeDef::create(Func, T_eax)); | 
| _xor(T_eax, T_eax); | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_al); | 
| _div(T, Src1, T); | 
| _mov(Dest, T); | 
| Context.insert(InstFakeUse::create(Func, T_eax)); | 
| } else { | 
| -      Constant *Zero = Ctx->getConstantZero(IceType_i32); | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| -      _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 
| +      Type Ty = Dest->getType(); | 
| +      uint32_t Eax = Traits::RegisterSet::Reg_eax; | 
| +      uint32_t Edx = Traits::RegisterSet::Reg_edx; | 
| +      switch (Ty) { | 
| +      default: | 
| +        llvm_unreachable("Bad type for udiv"); | 
| +      // fallthrough | 
| +      case IceType_i32: | 
| +        break; | 
| +      case IceType_i16: | 
| +        Eax = Traits::RegisterSet::Reg_ax; | 
| +        Edx = Traits::RegisterSet::Reg_dx; | 
| +        break; | 
| +      } | 
| +      Constant *Zero = Ctx->getConstantZero(Ty); | 
| +      _mov(T, Src0, Eax); | 
| +      _mov(T_edx, Zero, Edx); | 
| _div(T, Src1, T_edx); | 
| _mov(Dest, T); | 
| } | 
| @@ -1733,18 +1756,26 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| } | 
| } | 
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 
| -    if (isByteSizedArithType(Dest->getType())) { | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| -      _cbwdq(T, T); | 
| -      _idiv(T, Src1, T); | 
| -      _mov(Dest, T); | 
| -    } else { | 
| -      T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 
| +    switch (Type Ty = Dest->getType()) { | 
| +    default: | 
| +      llvm_unreachable("Bad type for sdiv"); | 
| +    // fallthrough | 
| +    case IceType_i32: | 
| +      T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | 
| _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| -      _cbwdq(T_edx, T); | 
| -      _idiv(T, Src1, T_edx); | 
| -      _mov(Dest, T); | 
| +      break; | 
| +    case IceType_i16: | 
| +      T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 
| +      break; | 
| +    case IceType_i8: | 
| +      T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_al); | 
| +      break; | 
| } | 
| +    _cbwdq(T_edx, T); | 
| +    _idiv(T, Src1, T_edx); | 
| +    _mov(Dest, T); | 
| break; | 
| case InstArithmetic::Urem: | 
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 
| @@ -1752,7 +1783,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 
| Context.insert(InstFakeDef::create(Func, T_eax)); | 
| _xor(T_eax, T_eax); | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_al); | 
| _div(T, Src1, T); | 
| // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 
| // mov %ah, %al because it would make x86-64 codegen more complicated. If | 
| @@ -1764,10 +1795,24 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| _mov(Dest, T); | 
| Context.insert(InstFakeUse::create(Func, T_eax)); | 
| } else { | 
| -      Constant *Zero = Ctx->getConstantZero(IceType_i32); | 
| -      T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | 
| +      Type Ty = Dest->getType(); | 
| +      uint32_t Eax = Traits::RegisterSet::Reg_eax; | 
| +      uint32_t Edx = Traits::RegisterSet::Reg_edx; | 
| +      switch (Ty) { | 
| +      default: | 
| +        llvm_unreachable("Bad type for urem"); | 
| +      // fallthrough | 
| +      case IceType_i32: | 
| +        break; | 
| +      case IceType_i16: | 
| +        Eax = Traits::RegisterSet::Reg_ax; | 
| +        Edx = Traits::RegisterSet::Reg_dx; | 
| +        break; | 
| +      } | 
| +      Constant *Zero = Ctx->getConstantZero(Ty); | 
| +      T_edx = makeReg(Dest->getType(), Edx); | 
| _mov(T_edx, Zero); | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| +      _mov(T, Src0, Eax); | 
| _div(T_edx, Src1, T); | 
| _mov(Dest, T_edx); | 
| } | 
| @@ -1814,28 +1859,35 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| } | 
| } | 
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 
| -    if (isByteSizedArithType(Dest->getType())) { | 
| -      _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| -      // T is %al. | 
| -      _cbwdq(T, T); | 
| -      _idiv(T, Src1, T); | 
| -      Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 
| -      Context.insert(InstFakeDef::create(Func, T_eax)); | 
| -      // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 
| -      // mov %ah, %al because it would make x86-64 codegen more complicated. If | 
| -      // this ever becomes a problem we can introduce a pseudo rem instruction | 
| -      // that returns the remainder in %al directly (and uses a mov for copying | 
| -      // %ah to %al.) | 
| -      static constexpr uint8_t AlSizeInBits = 8; | 
| -      _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 
| -      _mov(Dest, T); | 
| -      Context.insert(InstFakeUse::create(Func, T_eax)); | 
| -    } else { | 
| -      T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | 
| +    switch (Type Ty = Dest->getType()) { | 
| +    default: | 
| +      llvm_unreachable("Bad type for srem"); | 
| +    // fallthrough | 
| +    case IceType_i32: | 
| +      T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | 
| _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 
| _cbwdq(T_edx, T); | 
| _idiv(T_edx, Src1, T); | 
| _mov(Dest, T_edx); | 
| +      break; | 
| +    case IceType_i16: | 
| +      T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 
| +      _cbwdq(T_edx, T); | 
| +      _idiv(T_edx, Src1, T); | 
| +      _mov(Dest, T_edx); | 
| +      break; | 
| +    case IceType_i8: | 
| +      T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 
| +      // TODO(stichnot): Use register ah for T_edx, and remove the _shr(). | 
| +      // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah); | 
| +      _mov(T, Src0, Traits::RegisterSet::Reg_al); | 
| +      _cbwdq(T_edx, T); | 
| +      _idiv(T_edx, Src1, T); | 
| +      static constexpr uint8_t AlSizeInBits = 8; | 
| +      _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits)); | 
| +      _mov(Dest, T_edx); | 
| +      break; | 
| } | 
| break; | 
| case InstArithmetic::Fadd: | 
| @@ -1859,7 +1911,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 
| _mov(Dest, T); | 
| break; | 
| case InstArithmetic::Frem: { | 
| -    const SizeT MaxSrcs = 2; | 
| +    constexpr SizeT MaxSrcs = 2; | 
| Type Ty = Dest->getType(); | 
| InstCall *Call = makeHelperCall( | 
| isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 
| @@ -2114,7 +2166,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 
| _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 
| _movp(Dest, T); | 
| } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 
| -      const SizeT MaxSrcs = 1; | 
| +      constexpr SizeT MaxSrcs = 1; | 
| Type SrcType = Inst->getSrc(0)->getType(); | 
| InstCall *Call = | 
| makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 
| @@ -2145,14 +2197,14 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 
| if (isVectorType(Dest->getType())) { | 
| assert(Dest->getType() == IceType_v4i32 && | 
| Inst->getSrc(0)->getType() == IceType_v4f32); | 
| -      const SizeT MaxSrcs = 1; | 
| +      constexpr SizeT MaxSrcs = 1; | 
| InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 
| Call->addArg(Inst->getSrc(0)); | 
| lowerCall(Call); | 
| } else if (Dest->getType() == IceType_i64 || | 
| (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 
| // Use a helper for both x86-32 and x86-64. | 
| -      const SizeT MaxSrcs = 1; | 
| +      constexpr SizeT MaxSrcs = 1; | 
| Type DestType = Dest->getType(); | 
| Type SrcType = Inst->getSrc(0)->getType(); | 
| IceString TargetString; | 
| @@ -2201,7 +2253,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 
| _movp(Dest, T); | 
| } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 
| // Use a helper for x86-32. | 
| -      const SizeT MaxSrcs = 1; | 
| +      constexpr SizeT MaxSrcs = 1; | 
| Type DestType = Dest->getType(); | 
| InstCall *Call = | 
| makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 
| @@ -2236,7 +2288,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 
| if (isVectorType(Src0->getType())) { | 
| assert(Dest->getType() == IceType_v4f32 && | 
| Src0->getType() == IceType_v4i32); | 
| -      const SizeT MaxSrcs = 1; | 
| +      constexpr SizeT MaxSrcs = 1; | 
| InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 
| Call->addArg(Src0); | 
| lowerCall(Call); | 
| @@ -2244,7 +2296,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 
| (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { | 
| // Use a helper for x86-32 and x86-64. Also use a helper for i32 on | 
| // x86-32. | 
| -      const SizeT MaxSrcs = 1; | 
| +      constexpr SizeT MaxSrcs = 1; | 
| Type DestType = Dest->getType(); | 
| IceString TargetString; | 
| if (isInt32Asserting32Or64(Src0->getType())) { | 
| @@ -2460,13 +2512,17 @@ void TargetX86Base<Machine>::lowerExtractElement( | 
| Type Ty = SourceVectNotLegalized->getType(); | 
| Type ElementTy = typeElementType(Ty); | 
| Type InVectorElementTy = Traits::getInVectorElementType(Ty); | 
| -  Variable *ExtractedElementR = makeReg(InVectorElementTy); | 
|  | 
| // TODO(wala): Determine the best lowering sequences for each type. | 
| bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 
| -                     InstructionSet >= Traits::SSE4_1; | 
| -  if (CanUsePextr && Ty != IceType_v4f32) { | 
| -    // Use pextrb, pextrw, or pextrd. | 
| +                     (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32); | 
| +  Variable *ExtractedElementR = | 
| +      makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy); | 
| +  if (CanUsePextr) { | 
| +    // Use pextrb, pextrw, or pextrd.  The "b" and "w" versions clear the upper | 
| +    // bits of the destination register, so we represent this by always | 
| +    // extracting into an i32 register.  The _mov into Dest below will do | 
| +    // truncation as necessary. | 
| Constant *Mask = Ctx->getConstantInt32(Index); | 
| Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); | 
| _pextr(ExtractedElementR, SourceVectR, Mask); | 
| @@ -2983,6 +3039,13 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 
| if (Ty == IceType_v4f32) | 
| _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 
| else | 
| +      // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source | 
| +      // operand is a register, it must be a full r32 register like eax, and not | 
| +      // ax/al/ah.  For filetype=asm, InstX86Pinsr<Machine>::emit() compensates | 
| +      // for the use of r16 and r8 by converting them through getBaseReg(), | 
| +      // while emitIAS() validates that the original and base register encodings | 
| +      // are the same.  But for an "interior" register like ah, it should | 
| +      // probably be copied into an r32 via movzx so that the types work out. | 
| _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 
| _movp(Inst->getDest(), T); | 
| } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 
| @@ -3317,7 +3380,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall( | 
| } else { | 
| FirstVal = Val; | 
| } | 
| -    const bool IsCttz = false; | 
| +    constexpr bool IsCttz = false; | 
| lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 
| SecondVal); | 
| return; | 
| @@ -3334,7 +3397,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall( | 
| } else { | 
| FirstVal = Val; | 
| } | 
| -    const bool IsCttz = true; | 
| +    constexpr bool IsCttz = true; | 
| lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 
| SecondVal); | 
| return; | 
| @@ -3432,7 +3495,8 @@ template <class Machine> | 
| void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 
| Operand *Ptr, Operand *Expected, | 
| Operand *Desired) { | 
| -  if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { | 
| +  Type Ty = Expected->getType(); | 
| +  if (!Traits::Is64Bit && Ty == IceType_i64) { | 
| // Reserve the pre-colored registers first, before adding any more | 
| // infinite-weight variables from formMemoryOperand's legalization. | 
| Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 
| @@ -3443,9 +3507,8 @@ void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 
| _mov(T_edx, hiOperand(Expected)); | 
| _mov(T_ebx, loOperand(Desired)); | 
| _mov(T_ecx, hiOperand(Desired)); | 
| -    typename Traits::X86OperandMem *Addr = | 
| -        formMemoryOperand(Ptr, Expected->getType()); | 
| -    const bool Locked = true; | 
| +    typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 
| +    constexpr bool Locked = true; | 
| _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 
| Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 
| Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 
| @@ -3453,12 +3516,26 @@ void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 
| _mov(DestHi, T_edx); | 
| return; | 
| } | 
| -  Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); | 
| +  int32_t Eax; | 
| +  switch (Ty) { | 
| +  default: | 
| +    llvm_unreachable("Bad type for cmpxchg"); | 
| +  // fallthrough | 
| +  case IceType_i32: | 
| +    Eax = Traits::RegisterSet::Reg_eax; | 
| +    break; | 
| +  case IceType_i16: | 
| +    Eax = Traits::RegisterSet::Reg_ax; | 
| +    break; | 
| +  case IceType_i8: | 
| +    Eax = Traits::RegisterSet::Reg_al; | 
| +    break; | 
| +  } | 
| +  Variable *T_eax = makeReg(Ty, Eax); | 
| _mov(T_eax, Expected); | 
| -  typename Traits::X86OperandMem *Addr = | 
| -      formMemoryOperand(Ptr, Expected->getType()); | 
| +  typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 
| Variable *DesiredReg = legalizeToReg(Desired); | 
| -  const bool Locked = true; | 
| +  constexpr bool Locked = true; | 
| _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 
| _mov(DestPrev, T_eax); | 
| } | 
| @@ -3560,7 +3637,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 
| } | 
| typename Traits::X86OperandMem *Addr = | 
| formMemoryOperand(Ptr, Dest->getType()); | 
| -    const bool Locked = true; | 
| +    constexpr bool Locked = true; | 
| Variable *T = nullptr; | 
| _mov(T, Val); | 
| _xadd(Addr, T, Locked); | 
| @@ -3576,7 +3653,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 
| } | 
| typename Traits::X86OperandMem *Addr = | 
| formMemoryOperand(Ptr, Dest->getType()); | 
| -    const bool Locked = true; | 
| +    constexpr bool Locked = true; | 
| Variable *T = nullptr; | 
| _mov(T, Val); | 
| _neg(T); | 
| @@ -3684,7 +3761,7 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, | 
| _mov(T_ecx, hiOperand(Val)); | 
| Context.insert(Label); | 
| } | 
| -    const bool Locked = true; | 
| +    constexpr bool Locked = true; | 
| _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 
| _br(Traits::Cond::Br_ne, Label); | 
| if (!IsXchg8b) { | 
| @@ -3711,7 +3788,22 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, | 
| return; | 
| } | 
| typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 
| -  Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); | 
| +  int32_t Eax; | 
| +  switch (Ty) { | 
| +  default: | 
| +    llvm_unreachable("Bad type for atomicRMW"); | 
| +  // fallthrough | 
| +  case IceType_i32: | 
| +    Eax = Traits::RegisterSet::Reg_eax; | 
| +    break; | 
| +  case IceType_i16: | 
| +    Eax = Traits::RegisterSet::Reg_ax; | 
| +    break; | 
| +  case IceType_i8: | 
| +    Eax = Traits::RegisterSet::Reg_al; | 
| +    break; | 
| +  } | 
| +  Variable *T_eax = makeReg(Ty, Eax); | 
| _mov(T_eax, Addr); | 
| typename Traits::Insts::Label *Label = | 
| Traits::Insts::Label::create(Func, this); | 
| @@ -3721,7 +3813,7 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, | 
| Variable *T = makeReg(Ty); | 
| _mov(T, T_eax); | 
| (this->*Op_Lo)(T, Val); | 
| -  const bool Locked = true; | 
| +  constexpr bool Locked = true; | 
| _cmpxchg(Addr, T_eax, T, Locked); | 
| _br(Traits::Cond::Br_ne, Label); | 
| // If Val is a variable, model the extended live range of Val through | 
| @@ -5218,7 +5310,7 @@ Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, | 
| return Reg; | 
| } else { | 
| // SSE has no left shift operation for vectors of 8 bit integers. | 
| -    const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 
| +    constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 
| Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 
| Variable *Reg = makeReg(Ty, RegNum); | 
| _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 
| @@ -5252,7 +5344,7 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 
| // TODO(wala,stichnot): lea should not | 
| // be required. The address of the stack slot is known at compile time | 
| // (although not until after addProlog()). | 
| -  const Type PointerType = IceType_i32; | 
| +  constexpr Type PointerType = IceType_i32; | 
| Variable *Loc = makeReg(PointerType); | 
| _lea(Loc, Slot); | 
| Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 
| @@ -5305,7 +5397,7 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, | 
| } | 
| } | 
|  | 
| -  if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 
| +  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 
| // Before doing anything with a Mem operand, we need to ensure that the | 
| // Base and Index components are in physical registers. | 
| Variable *Base = Mem->getBase(); | 
| @@ -5383,7 +5475,7 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, | 
| } | 
| return From; | 
| } | 
| -  if (auto Var = llvm::dyn_cast<Variable>(From)) { | 
| +  if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 
| // Check if the variable is guaranteed a physical register. This can happen | 
| // either when the variable is pre-colored or when it is assigned infinite | 
| // weight. | 
| @@ -5638,8 +5730,8 @@ Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, | 
| IceString Label; | 
| llvm::raw_string_ostream Label_stream(Label); | 
| Immediate->emitPoolLabel(Label_stream, Ctx); | 
| -      const RelocOffsetT Offset = 0; | 
| -      const bool SuppressMangling = true; | 
| +      constexpr RelocOffsetT Offset = 0; | 
| +      constexpr bool SuppressMangling = true; | 
| Constant *Symbol = | 
| Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); | 
| typename Traits::X86OperandMem *MemOperand = | 
| @@ -5735,8 +5827,8 @@ TargetX86Base<Machine>::randomizeOrPoolImmediate( | 
| llvm::raw_string_ostream Label_stream(Label); | 
| MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); | 
| MemOperand->getOffset()->setShouldBePooled(true); | 
| -        const RelocOffsetT SymOffset = 0; | 
| -        bool SuppressMangling = true; | 
| +        constexpr RelocOffsetT SymOffset = 0; | 
| +        constexpr bool SuppressMangling = true; | 
| Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), | 
| SuppressMangling); | 
| typename Traits::X86OperandMem *SymbolOperand = | 
|  |