diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h --- a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h @@ -22,8 +22,7 @@ bool optimizeMOVSX(MCInst &MI); bool optimizeINCDEC(MCInst &MI, bool In64BitMode); bool optimizeMOV(MCInst &MI, bool In64BitMode); -bool optimizeToFixedRegisterForm(MCInst &MI); -bool optimizeToShortImmediateForm(MCInst &MI); +bool optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI); } // namespace X86 } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp @@ -370,7 +370,7 @@ /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with /// a short fixed-register form. -bool X86::optimizeToFixedRegisterForm(MCInst &MI) { +static bool optimizeToFixedRegisterForm(MCInst &MI) { unsigned NewOpc; switch (MI.getOpcode()) { default: @@ -425,7 +425,7 @@ return true; } -bool X86::optimizeToShortImmediateForm(MCInst &MI) { +static bool optimizeToShortImmediateForm(MCInst &MI) { unsigned NewOpc; switch (MI.getOpcode()) { default: @@ -442,9 +442,59 @@ FROM_TO(SBB32ri, SBB32ri8) FROM_TO(SBB64mi32, SBB64mi8) FROM_TO(SBB64ri32, SBB64ri8) + FROM_TO(ADD16mi, ADD16mi8) + FROM_TO(ADD16ri, ADD16ri8) + FROM_TO(ADD32mi, ADD32mi8) + FROM_TO(ADD32ri, ADD32ri8) + FROM_TO(ADD64mi32, ADD64mi8) + FROM_TO(ADD64ri32, ADD64ri8) + FROM_TO(AND16mi, AND16mi8) + FROM_TO(AND16ri, AND16ri8) + FROM_TO(AND32mi, AND32mi8) + FROM_TO(AND32ri, AND32ri8) + FROM_TO(AND64mi32, AND64mi8) + FROM_TO(AND64ri32, AND64ri8) + FROM_TO(OR16mi, OR16mi8) + FROM_TO(OR16ri, OR16ri8) + FROM_TO(OR32mi, OR32mi8) + FROM_TO(OR32ri, OR32ri8) + FROM_TO(OR64mi32, OR64mi8) + FROM_TO(OR64ri32, OR64ri8) + FROM_TO(SUB16mi, SUB16mi8) + FROM_TO(SUB16ri, SUB16ri8) + FROM_TO(SUB32mi, SUB32mi8) + FROM_TO(SUB32ri, SUB32ri8) + FROM_TO(SUB64mi32, SUB64mi8) + FROM_TO(SUB64ri32, SUB64ri8) + FROM_TO(XOR16mi, XOR16mi8) + FROM_TO(XOR16ri, XOR16ri8) + FROM_TO(XOR32mi, XOR32mi8) + FROM_TO(XOR32ri, XOR32ri8) + FROM_TO(XOR64mi32, XOR64mi8) + FROM_TO(XOR64ri32, XOR64ri8) + FROM_TO(CMP16mi, CMP16mi8) + FROM_TO(CMP16ri, CMP16ri8) + FROM_TO(CMP32mi, CMP32mi8) + FROM_TO(CMP32ri, CMP32ri8) + FROM_TO(CMP64mi32, CMP64mi8) + FROM_TO(CMP64ri32, CMP64ri8) + } + MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1); + if (LastOp.isExpr()) { + const MCSymbolRefExpr *SRE = dyn_cast(LastOp.getExpr()); + if (!SRE || SRE->getKind() != MCSymbolRefExpr::VK_X86_ABS8) + return false; + } else if (LastOp.isImm()) { + if (!isInt<8>(LastOp.getImm())) + return false; } - if (!isInt<8>(MI.getOperand(MI.getNumOperands() - 1).getImm())) - return false; MI.setOpcode(NewOpc); return true; } + +bool X86::optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI) { + // We may optimize twice here. + bool ShortImm = optimizeToShortImmediateForm(MI); + bool FixedReg = optimizeToFixedRegisterForm(MI); + return ShortImm || FixedReg; +} diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp --- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp @@ -285,15 +285,15 @@ // The instructions we actually care about are movs onto the stack or special // cases of constant-stores to stack switch (MI->getOpcode()) { - case X86::AND16mi8: - case X86::AND32mi8: - case X86::AND64mi8: { + case X86::AND16mi: + case X86::AND32mi: + case X86::AND64mi32: { const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands); return ImmOp.getImm() == 0 ? Convert : Exit; } - case X86::OR16mi8: - case X86::OR32mi8: - case X86::OR64mi8: { + case X86::OR16mi: + case X86::OR32mi: + case X86::OR64mi32: { const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands); return ImmOp.getImm() == -1 ? Convert : Exit; } @@ -512,12 +512,12 @@ switch (Store->getOpcode()) { default: llvm_unreachable("Unexpected Opcode!"); - case X86::AND16mi8: - case X86::AND32mi8: - case X86::AND64mi8: - case X86::OR16mi8: - case X86::OR32mi8: - case X86::OR64mi8: + case X86::AND16mi: + case X86::AND32mi: + case X86::AND64mi32: + case X86::OR16mi: + case X86::OR32mi: + case X86::OR64mi32: case X86::MOV32mi: case X86::MOV64mi32: PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32; diff --git a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp --- a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp +++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp @@ -189,10 +189,10 @@ } } -static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) { +static unsigned getSubOpcode(bool Is64Bit) { if (Is64Bit) - return isInt<8>(Amount) ? X86::SUB64ri8 : X86::SUB64ri32; - return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri; + return X86::SUB64ri32; + return X86::SUB32ri; } void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { @@ -242,8 +242,7 @@ .addReg(RegA, RegState::Undef); } else { // Sub. - BuildMI(*MBB, I, DL, - TII->get(getSubOpcode(Is64BitAlloca, Amount)), StackPtr) + BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64BitAlloca)), StackPtr) .addReg(StackPtr) .addImm(Amount); } diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1376,7 +1376,6 @@ /// If we have a comparison with RHS as the RHS of the comparison, return an /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0. static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { - int64_t Val = RHSC->getSExtValue(); switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. default: @@ -1384,21 +1383,13 @@ case MVT::i8: return X86::CMP8ri; case MVT::i16: - if (isInt<8>(Val)) - return X86::CMP16ri8; return X86::CMP16ri; case MVT::i32: - if (isInt<8>(Val)) - return X86::CMP32ri8; return X86::CMP32ri; case MVT::i64: - if (isInt<8>(Val)) - return X86::CMP64ri8; // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext // field. - if (isInt<32>(Val)) - return X86::CMP64ri32; - return 0; + return isInt<32>(RHSC->getSExtValue()) ? X86::CMP64ri32 : 0; } } diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp --- a/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -186,13 +186,9 @@ // Only convert instructions that we've verified are safe. return nullptr; case X86::ADD64ri32: - case X86::ADD64ri8: case X86::ADD64ri32_DB: - case X86::ADD64ri8_DB: case X86::ADD32ri: - case X86::ADD32ri8: case X86::ADD32ri_DB: - case X86::ADD32ri8_DB: if (!MI.getOperand(2).isImm()) { // convertToThreeAddress will call getImm() // which requires isImm() to be true @@ -374,15 +370,14 @@ static inline unsigned getADDriFromLEA(unsigned LEAOpcode, const MachineOperand &Offset) { - bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); switch (LEAOpcode) { default: llvm_unreachable("Unexpected LEA instruction"); case X86::LEA32r: case X86::LEA64_32r: - return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; + return X86::ADD32ri; case X86::LEA64r: - return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; + return X86::ADD64ri32; } } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -105,28 +105,12 @@ (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); } -static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { - if (IsLP64) { - if (isInt<8>(Imm)) - return X86::SUB64ri8; - return X86::SUB64ri32; - } else { - if (isInt<8>(Imm)) - return X86::SUB32ri8; - return X86::SUB32ri; - } +static unsigned getSUBriOpcode(bool IsLP64) { + return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri; } -static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) { - if (IsLP64) { - if (isInt<8>(Imm)) - return X86::ADD64ri8; - return X86::ADD64ri32; - } else { - if (isInt<8>(Imm)) - return X86::ADD32ri8; - return X86::ADD32ri; - } +static unsigned getADDriOpcode(bool IsLP64) { + return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri; } static unsigned getSUBrrOpcode(bool IsLP64) { @@ -138,14 +122,7 @@ } static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { - if (IsLP64) { - if (isInt<8>(Imm)) - return X86::AND64ri8; - return X86::AND64ri32; - } - if (isInt<8>(Imm)) - return X86::AND32ri8; - return X86::AND32ri; + return IsLP64 ? X86::AND64ri32 : X86::AND32ri; } static unsigned getLEArOpcode(bool IsLP64) { @@ -363,8 +340,8 @@ } else { bool IsSub = Offset < 0; uint64_t AbsOffset = IsSub ? -Offset : Offset; - const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) - : getADDriOpcode(Uses64BitFramePtr, AbsOffset); + const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr) + : getADDriOpcode(Uses64BitFramePtr); MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(AbsOffset); @@ -400,9 +377,8 @@ unsigned Opc = PI->getOpcode(); int Offset = 0; - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - PI->getOperand(0).getReg() == StackPtr){ + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) && + PI->getOperand(0).getReg() == StackPtr) { assert(PI->getOperand(1).getReg() == StackPtr); Offset = PI->getOperand(2).getImm(); } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && @@ -413,8 +389,7 @@ PI->getOperand(5).getReg() == X86::NoRegister) { // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. Offset = PI->getOperand(4).getImm(); - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) && PI->getOperand(0).getReg() == StackPtr) { assert(PI->getOperand(1).getReg() == StackPtr); Offset = -PI->getOperand(2).getImm(); @@ -833,7 +808,7 @@ // save loop bound { const unsigned BoundOffset = alignDown(Offset, StackProbeSize); - const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset); + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr); BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) .addReg(FinalStackProbed) .addImm(BoundOffset) @@ -1336,7 +1311,7 @@ { const unsigned SUBOpc = - getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + getSUBriOpcode(Uses64BitFramePtr); BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) .addReg(StackPtr) .addImm(StackProbeSize) @@ -1367,7 +1342,7 @@ .setMIFlag(MachineInstr::FrameSetup); const unsigned SUBOpc = - getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + getSUBriOpcode(Uses64BitFramePtr); BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) .addReg(StackPtr) .addImm(StackProbeSize) @@ -1800,7 +1775,7 @@ .addImm(8) .addUse(X86::NoRegister) .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP) .addUse(X86::RSP) .addImm(8) .setMIFlag(MachineInstr::FrameSetup); @@ -2419,7 +2394,7 @@ if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) + (Opc != X86::ADD64ri32 || !PI->getFlag(MachineInstr::FrameDestroy))) break; FirstCSPop = PI; } @@ -3793,7 +3768,7 @@ if (UsedReg == FramePtr) { // ADD $offset, %ebp - unsigned ADDri = getADDriOpcode(false, EndOffset); + unsigned ADDri = getADDriOpcode(false); BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) .addReg(FramePtr) .addImm(EndOffset) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3400,26 +3400,6 @@ llvm_unreachable("Invalid opcode!"); } }; - auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) { - switch (Opc) { - case X86ISD::ADD: - return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0); - case X86ISD::ADC: - return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0); - case X86ISD::SUB: - return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0); - case X86ISD::SBB: - return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0); - case X86ISD::AND: - return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0); - case X86ISD::OR: - return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0); - case X86ISD::XOR: - return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0); - default: - llvm_unreachable("Invalid opcode!"); - } - }; auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { switch (Opc) { case X86ISD::ADD: @@ -3468,12 +3448,7 @@ Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; } - // First try to fit this into an Imm8 operand. If it doesn't fit, then try - // the larger immediate operand. - if (MemVT != MVT::i8 && isInt<8>(OperandV)) { - Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); - NewOpc = SelectImm8Opcode(Opc); - } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) { + if (MemVT != MVT::i64 || isInt<32>(OperandV)) { Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); NewOpc = SelectImmOpcode(Opc); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28916,7 +28916,7 @@ SDValue Chain = Op->getOperand(0); SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64); SDValue Result = - SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP, + SDValue(DAG.getMachineNode(X86::SUB64ri32, dl, MVT::i64, CopyRBP, DAG.getTargetConstant(8, dl, MVT::i32)), 0); // Return { result, chain }. @@ -36847,16 +36847,11 @@ return SinkMBB; } -static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { - if (IsLP64) { - if (isInt<8>(Imm)) - return X86::SUB64ri8; +static unsigned getSUBriOpcode(bool IsLP64) { + if (IsLP64) return X86::SUB64ri32; - } else { - if (isInt<8>(Imm)) - return X86::SUB32ri8; + else return X86::SUB32ri; - } } MachineBasicBlock * @@ -36924,12 +36919,12 @@ // The property we want to enforce is to never have more than [page alloc] between two probes. const unsigned XORMIOpc = - TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; + TFI.Uses64BitFramePtr ? X86::XOR64mi32 : X86::XOR32mi; addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) .addImm(0); BuildMI(blockMBB, DL, - TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) + TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr)), physSPReg) .addReg(physSPReg) .addImm(ProbeSize); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2990,7 +2990,7 @@ def : Pat<(insert_subvector (v16i1 immAllZerosV), (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), - (KMOVWkr (AND32ri8 + (KMOVWkr (AND32ri (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), (i32 1)))>; } diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -235,21 +235,13 @@ let ImmT = Imm8; // Always 8-bit immediate. } -// BinOpRI8_F - Binary instructions with inputs "reg, imm8", where the pattern -// has EFLAGS as a result. -class BinOpRI8_F opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode, Format f> - : BinOpRI8; +// BinOpRI8_F - Binary instructions with inputs "reg, imm8". +class BinOpRI8_F opcode, string mnemonic, X86TypeInfo typeinfo, Format f> + : BinOpRI8; -// BinOpRI8_RF - Binary instructions with inputs "reg, imm8", where the pattern -// has both a regclass and EFLAGS as a result. -class BinOpRI8_RF opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode, Format f> - : BinOpRI8; +// BinOpRI8_RF - Binary instructions with inputs "reg, imm8". +class BinOpRI8_RF opcode, string mnemonic, X86TypeInfo typeinfo, Format f> + : BinOpRI8; // BinOpRI8_RFF - Binary instructions with inputs "reg, imm8". class BinOpRI8_RFF opcode, string mnemonic, X86TypeInfo typeinfo, Format f> @@ -347,28 +339,17 @@ let ImmT = Imm8; // Always 8-bit immediate. } -// BinOpMI8_RMW - Binary instructions with inputs "[mem], imm8", where the -// pattern implicitly use EFLAGS. -class BinOpMI8_RMW - : BinOpMI8, - Sched<[WriteALURMW]>; +// BinOpMI8_RMW - Binary instructions with inputs "[mem], imm8". +class BinOpMI8_RMW + : BinOpMI8, Sched<[WriteALURMW]>; // BinOpMI8_RMW_FF - Binary instructions with inputs "[mem], imm8". class BinOpMI8_RMW_FF : BinOpMI8, Sched<[WriteADCRMW]>; -// BinOpMI8_F - Binary instructions with inputs "[mem], imm8", where the pattern -// has EFLAGS as a result. -class BinOpMI8_F - : BinOpMI8, - Sched<[WriteALU.Folded]>; +// BinOpMI8_F - Binary instructions with inputs "[mem], imm8" +class BinOpMI8_F + : BinOpMI8, Sched<[WriteALU.Folded]>; // BinOpAI - Binary instructions with input imm, that implicitly use A reg and // implicitly define Areg and EFLAGS. @@ -876,14 +857,14 @@ def NAME#32rm : BinOpRM_RF; def NAME#64rm : BinOpRM_RF; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects= 0 in { def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; - def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; - def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; + def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, RegMRM>; + def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, RegMRM>; + def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, RegMRM>; def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; @@ -891,34 +872,34 @@ } } // Constraints = "$src1 = $dst" - let mayLoad = 1, mayStore = 1 in { + let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { def NAME#8mr : BinOpMR_RMW; def NAME#16mr : BinOpMR_RMW; def NAME#32mr : BinOpMR_RMW; def NAME#64mr : BinOpMR_RMW; - } - - // NOTE: These are order specific, we want the mi8 forms to be listed - // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_RMW; - def NAME#32mi8 : BinOpMI8_RMW; - let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_RMW; - def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>; - def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>; + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def NAME#16mi8 : BinOpMI8_RMW; + def NAME#32mi8 : BinOpMI8_RMW; + let Predicates = [In64BitMode] in + def NAME#64mi8 : BinOpMI8_RMW; + + def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>; + let Predicates = [In64BitMode] in + def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { let Constraints = "$src1 = $dst" in - def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, null_frag, RegMRM>; + def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>; let mayLoad = 1, mayStore = 1 in - def NAME#8mi8 : BinOpMI8_RMW; + def NAME#8mi8 : BinOpMI8_RMW; } } // Defs = [EFLAGS] @@ -965,7 +946,7 @@ def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, RegMRM>; @@ -985,6 +966,7 @@ // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. + let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { def NAME#16mi8 : BinOpMI8_RMW_FF; def NAME#32mi8 : BinOpMI8_RMW_FF; let Predicates = [In64BitMode] in @@ -995,6 +977,7 @@ def NAME#32mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi32, opnode, MemMRM>; let Predicates = [In64BitMode] in def NAME#64mi32 : BinOpMI_RMW_FF<0x80, mnemonic, Xi64, opnode, MemMRM>; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. @@ -1047,12 +1030,12 @@ def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; - def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; - def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, RegMRM>; + def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, RegMRM>; + def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, RegMRM>; def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; @@ -1066,24 +1049,26 @@ // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_F; - def NAME#32mi8 : BinOpMI8_F; - let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_F; - - def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>; - def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>; - let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>; + let mayLoad = 1, hasSideEffects = 0 in { + def NAME#16mi8 : BinOpMI8_F; + def NAME#32mi8 : BinOpMI8_F; + let Predicates = [In64BitMode] in + def NAME#64mi8 : BinOpMI8_F; + + def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>; + let Predicates = [In64BitMode] in + def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { - def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, null_frag, RegMRM>; + def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>; let mayLoad = 1 in - def NAME#8mi8 : BinOpMI8_F; + def NAME#8mi8 : BinOpMI8_F; } } // Defs = [EFLAGS] @@ -1159,31 +1144,19 @@ multiclass ArithBinOp_RF_relocImm_Pats { def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), (!cast(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2), - (!cast(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), (!cast(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2), - (!cast(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), (!cast(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2), - (!cast(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), (!cast(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), (!cast(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), i16relocImmSExt8_su:$src), addr:$dst), - (!cast(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>; def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), (!cast(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), i32relocImmSExt8_su:$src), addr:$dst), - (!cast(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>; def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), (!cast(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt8_su:$src), addr:$dst), - (!cast(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>; def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), (!cast(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; } @@ -1191,31 +1164,19 @@ multiclass ArithBinOp_RFF_relocImm_Pats { def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), (!cast(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2, EFLAGS), - (!cast(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), (!cast(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2, EFLAGS), - (!cast(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), (!cast(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2, EFLAGS), - (!cast(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), (!cast(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), (!cast(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), i16relocImmSExt8_su:$src, EFLAGS), addr:$dst), - (!cast(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>; def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), (!cast(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), i32relocImmSExt8_su:$src, EFLAGS), addr:$dst), - (!cast(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>; def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), (!cast(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt8_su:$src, EFLAGS), addr:$dst), - (!cast(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>; def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), (!cast(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; } @@ -1223,31 +1184,19 @@ multiclass ArithBinOp_F_relocImm_Pats { def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), (!cast(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2), - (!cast(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), (!cast(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2), - (!cast(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), (!cast(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2), - (!cast(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), (!cast(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; def : Pat<(OpNodeFlag (loadi8 addr:$src1), relocImm8_su:$src2), (!cast(NAME#"8mi") addr:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag (loadi16 addr:$src1), i16relocImmSExt8_su:$src2), - (!cast(NAME#"16mi8") addr:$src1, i16relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag (loadi16 addr:$src1), relocImm16_su:$src2), (!cast(NAME#"16mi") addr:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag (loadi32 addr:$src1), i32relocImmSExt8_su:$src2), - (!cast(NAME#"32mi8") addr:$src1, i32relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag (loadi32 addr:$src1), relocImm32_su:$src2), (!cast(NAME#"32mi") addr:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt8_su:$src2), - (!cast(NAME#"64mi8") addr:$src1, i64relocImmSExt8_su:$src2)>; def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt32_su:$src2), (!cast(NAME#"64mi32") addr:$src1, i64relocImmSExt32_su:$src2)>; } diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1225,12 +1225,12 @@ // binary size compared to a regular MOV, but it introduces an unnecessary // load, so is not suitable for regular or optsize functions. let Predicates = [OptForMinSize] in { -def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>; -def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>; -def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>; -def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>; -def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>; -def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi addr:$dst, 0)>; +def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi addr:$dst, 0)>; +def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi32 addr:$dst, 0)>; +def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi addr:$dst, -1)>; +def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi addr:$dst, -1)>; +def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi32 addr:$dst, -1)>; } // In kernel code model, we can get the address of a label @@ -1526,35 +1526,16 @@ [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; } // isCommutable -// NOTE: These are order specific, we want the ri8 forms to be listed -// first so that they are slightly preferred to the ri forms. - def ADD8ri_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "", // orb/addb REG, imm8 [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>; -def ADD16ri8_DB : I<0, Pseudo, - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "", // orw/addw REG, imm8 - [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>; def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "", // orw/addw REG, imm [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; - -def ADD32ri8_DB : I<0, Pseudo, - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "", // orl/addl REG, imm8 - [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>; def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "", // orl/addl REG, imm [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; - - -def ADD64ri8_DB : I<0, Pseudo, - (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), - "", // orq/addq REG, imm8 - [(set GR64:$dst, (or_is_add GR64:$src1, - i64immSExt8:$src2))]>; def ADD64ri32_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "", // orq/addq REG, imm @@ -1585,26 +1566,26 @@ // Odd encoding trick: -128 fits into an 8-bit immediate field while // +128 doesn't, so in this special case use a sub instead of an add. def : Pat<(add GR16:$src1, 128), - (SUB16ri8 GR16:$src1, -128)>; + (SUB16ri GR16:$src1, -128)>; def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), - (SUB16mi8 addr:$dst, -128)>; + (SUB16mi addr:$dst, -128)>; def : Pat<(add GR32:$src1, 128), - (SUB32ri8 GR32:$src1, -128)>; + (SUB32ri GR32:$src1, -128)>; def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), - (SUB32mi8 addr:$dst, -128)>; + (SUB32mi addr:$dst, -128)>; def : Pat<(add GR64:$src1, 128), - (SUB64ri8 GR64:$src1, -128)>; + (SUB64ri32 GR64:$src1, -128)>; def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), - (SUB64mi8 addr:$dst, -128)>; + (SUB64mi32 addr:$dst, -128)>; def : Pat<(X86add_flag_nocf GR16:$src1, 128), - (SUB16ri8 GR16:$src1, -128)>; + (SUB16ri GR16:$src1, -128)>; def : Pat<(X86add_flag_nocf GR32:$src1, 128), - (SUB32ri8 GR32:$src1, -128)>; + (SUB32ri GR32:$src1, -128)>; def : Pat<(X86add_flag_nocf GR64:$src1, 128), - (SUB64ri8 GR64:$src1, -128)>; + (SUB64ri32 GR64:$src1, -128)>; // The same trick applies for 32-bit immediate fields in 64-bit // instructions. @@ -1612,7 +1593,6 @@ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; - def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; @@ -1625,14 +1605,6 @@ // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { -def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), - (SUBREG_TO_REG - (i64 0), - (AND32ri8 - (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo32XForm imm:$imm))), - sub_32bit)>; - def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), @@ -2057,14 +2029,7 @@ def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(add GR16:$src1, i16immSExt8:$src2), - (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(add GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; // sub reg, reg def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; @@ -2089,12 +2054,6 @@ (SUB16ri GR16:$src1, imm:$src2)>; def : Pat<(sub GR32:$src1, imm:$src2), (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, i16immSExt8:$src2), - (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(sub GR32:$src1, i32immSExt8:$src2), - (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt8:$src2), - (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(sub GR64:$src1, i64immSExt32:$src2), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; @@ -2190,12 +2149,6 @@ def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, i16immSExt8:$src2), - (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(or GR32:$src1, i32immSExt8:$src2), - (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt8:$src2), - (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(or GR64:$src1, i64immSExt32:$src2), (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; @@ -2222,12 +2175,6 @@ (XOR16ri GR16:$src1, imm:$src2)>; def : Pat<(xor GR32:$src1, imm:$src2), (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, i16immSExt8:$src2), - (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(xor GR32:$src1, i32immSExt8:$src2), - (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt8:$src2), - (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(xor GR64:$src1, i64immSExt32:$src2), (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; @@ -2254,12 +2201,6 @@ (AND16ri GR16:$src1, imm:$src2)>; def : Pat<(and GR32:$src1, imm:$src2), (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, i16immSExt8:$src2), - (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(and GR32:$src1, i32immSExt8:$src2), - (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt8:$src2), - (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1207,9 +1207,7 @@ case X86::ADD8ri: case X86::ADD8ri_DB: case X86::ADD16ri: - case X86::ADD16ri8: case X86::ADD16ri_DB: - case X86::ADD16ri8_DB: addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); break; case X86::ADD8rr: @@ -1520,18 +1518,14 @@ case X86::ADD16rr_DB: return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); case X86::ADD64ri32: - case X86::ADD64ri8: case X86::ADD64ri32_DB: - case X86::ADD64ri8_DB: assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addOffset( BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src), MI.getOperand(2)); break; case X86::ADD32ri: - case X86::ADD32ri8: - case X86::ADD32ri_DB: - case X86::ADD32ri8_DB: { + case X86::ADD32ri_DB: { assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; @@ -1559,16 +1553,12 @@ Is8BitOp = true; [[fallthrough]]; case X86::ADD16ri: - case X86::ADD16ri8: case X86::ADD16ri_DB: - case X86::ADD16ri8_DB: return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); case X86::SUB8ri: - case X86::SUB16ri8: case X86::SUB16ri: /// FIXME: Support these similar to ADD8ri/ADD16ri*. return nullptr; - case X86::SUB32ri8: case X86::SUB32ri: { if (!MI.getOperand(2).isImm()) return nullptr; @@ -1599,7 +1589,6 @@ break; } - case X86::SUB64ri8: case X86::SUB64ri32: { if (!MI.getOperand(2).isImm()) return nullptr; @@ -4040,11 +4029,8 @@ switch (MI.getOpcode()) { default: break; case X86::CMP64ri32: - case X86::CMP64ri8: case X86::CMP32ri: - case X86::CMP32ri8: case X86::CMP16ri: - case X86::CMP16ri8: case X86::CMP8ri: SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; @@ -4075,11 +4061,8 @@ CmpValue = 0; return true; case X86::SUB64ri32: - case X86::SUB64ri8: case X86::SUB32ri: - case X86::SUB32ri8: case X86::SUB16ri: - case X86::SUB16ri8: case X86::SUB8ri: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; @@ -4147,18 +4130,12 @@ return false; } case X86::CMP64ri32: - case X86::CMP64ri8: case X86::CMP32ri: - case X86::CMP32ri8: case X86::CMP16ri: - case X86::CMP16ri8: case X86::CMP8ri: case X86::SUB64ri32: - case X86::SUB64ri8: case X86::SUB32ri: - case X86::SUB32ri8: case X86::SUB16ri: - case X86::SUB16ri8: case X86::SUB8ri: case X86::TEST64rr: case X86::TEST32rr: @@ -4233,14 +4210,12 @@ case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8: return getTruncatedShiftCount(MI, 3) != 0; - case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri: - case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8: + case X86::SUB64ri32: case X86::SUB32ri: case X86::SUB16ri: case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: - case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: - case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: + case X86::ADD64ri32: case X86::ADD32ri: case X86::ADD16ri: case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: @@ -4264,18 +4239,15 @@ case X86::TZCNT32rr: case X86::TZCNT32rm: case X86::TZCNT64rr: case X86::TZCNT64rm: return true; - case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: - case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: + case X86::AND64ri32: case X86::AND32ri: case X86::AND16ri: case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: - case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: - case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: + case X86::XOR64ri32: case X86::XOR32ri: case X86::XOR16ri: case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: - case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: - case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: + case X86::OR64ri32: case X86::OR32ri: case X86::OR16ri: case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: @@ -4374,11 +4346,8 @@ switch (CmpInstr.getOpcode()) { default: break; case X86::SUB64ri32: - case X86::SUB64ri8: case X86::SUB32ri: - case X86::SUB32ri8: case X86::SUB16ri: - case X86::SUB16ri8: case X86::SUB8ri: case X86::SUB64rm: case X86::SUB32rm: @@ -4403,11 +4372,8 @@ case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; - case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; - case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; - case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; } CmpInstr.setDesc(get(NewOpcode)); @@ -5188,9 +5154,6 @@ case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break; case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break; case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break; - case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break; - case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break; - case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break; } return false; } @@ -6409,9 +6372,9 @@ switch (MI.getOpcode()) { default: return nullptr; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; } // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -6788,9 +6751,9 @@ switch (MI.getOpcode()) { default: return nullptr; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; } // Change to CMPXXri r, 0 first. MI.setDesc(get(NewOpc)); @@ -7086,11 +7049,8 @@ switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: - case X86::CMP64ri8: case X86::CMP32ri: - case X86::CMP32ri8: case X86::CMP16ri: - case X86::CMP16ri8: case X86::CMP8ri: { MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); @@ -7098,11 +7058,8 @@ unsigned NewOpc; switch (DataMI->getOpcode()) { default: llvm_unreachable("Unreachable!"); - case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; - case X86::CMP32ri8: case X86::CMP32ri: NewOpc = X86::TEST32rr; break; - case X86::CMP16ri8: case X86::CMP16ri: NewOpc = X86::TEST16rr; break; case X86::CMP8ri: NewOpc = X86::TEST8rr; break; } @@ -7215,20 +7172,14 @@ switch (Opc) { default: break; case X86::CMP64ri32: - case X86::CMP64ri8: case X86::CMP32ri: - case X86::CMP32ri8: case X86::CMP16ri: - case X86::CMP16ri8: case X86::CMP8ri: if (isNullConstant(BeforeOps[1])) { switch (Opc) { default: llvm_unreachable("Unreachable!"); - case X86::CMP64ri8: case X86::CMP64ri32: Opc = X86::TEST64rr; break; - case X86::CMP32ri8: case X86::CMP32ri: Opc = X86::TEST32rr; break; - case X86::CMP16ri8: case X86::CMP16ri: Opc = X86::TEST16rr; break; case X86::CMP8ri: Opc = X86::TEST8rr; break; } diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -838,11 +838,11 @@ if (DstTy == LLT::scalar(8)) AndOpc = X86::AND8ri; else if (DstTy == LLT::scalar(16)) - AndOpc = X86::AND16ri8; + AndOpc = X86::AND16ri; else if (DstTy == LLT::scalar(32)) - AndOpc = X86::AND32ri8; + AndOpc = X86::AND32ri; else if (DstTy == LLT::scalar(64)) - AndOpc = X86::AND64ri8; + AndOpc = X86::AND64ri32; else return false; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -405,8 +405,7 @@ X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) || X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) || X86::optimizeMOV(OutMI, In64BitMode) || - X86::optimizeToFixedRegisterForm(OutMI) || - X86::optimizeToShortImmediateForm(OutMI)) + X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI)) return; // Handle a few special cases to eliminate operand modifiers. diff --git a/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir b/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir --- a/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir +++ b/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir @@ -355,7 +355,7 @@ bb.5.if.then: liveins: $eax - $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags + $rsp = frame-destroy ADD64ri32 $rsp, 8, implicit-def dead $eflags CFI_INSTRUCTION def_cfa_offset 24 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa_offset 16 @@ -367,7 +367,7 @@ successors: %bb.8(0x30000000), %bb.7(0x50000000) liveins: $rbx, $r14 - CMP32mi8 $rsp, 1, $noreg, 4, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from %ir.idx) + CMP32mi $rsp, 1, $noreg, 4, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from %ir.idx) JCC_1 %bb.8, 8, implicit killed $eflags JMP_1 %bb.7 @@ -375,7 +375,7 @@ successors: %bb.8(0x30000000), %bb.3(0x50000000) liveins: $rbx, $r14 - CMP32mi8 renamable $rbx, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load (s32) from %ir.1) + CMP32mi renamable $rbx, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load (s32) from %ir.1) JCC_1 %bb.3, 5, implicit killed $eflags JMP_1 %bb.8 diff --git a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll --- a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll @@ -33,10 +33,10 @@ ; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.3, align 16) ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 49, $noreg, [[MOV32rm2]].sub_8bit :: (store (s512) into %stack.0 + 49, align 1, basealign 4) ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, [[MOV32rm2]].sub_8bit :: (store (s512) into %stack.0 + 48, align 4) - ; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[AND32ri8_]], -64, implicit-def dead $eflags + ; CHECK-NEXT: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[AND32ri_]], -64, implicit-def dead $eflags ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[COPY83]].sub_32bit ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 18, $noreg, [[COPY2]].sub_16bit :: (store (s512) into %stack.0 + 18, align 2, basealign 4) - ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[SUB32rr]], [[AND32ri8_]], implicit-def dead $eflags + ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[SUB32rr]], [[AND32ri_]], implicit-def dead $eflags ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 18, $noreg, [[SUB32rr]].sub_16bit :: (store (s512) into %stack.0 + 18, align 2, basealign 4) ; CHECK-NEXT: [[MOVZX32rr16_:%[0-9]+]]:gr32 = MOVZX32rr16 [[SUB32rr]].sub_16bit ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 50, $noreg, [[MOVZX32rr16_]].sub_8bit :: (store (s512) into %stack.0 + 50, align 2, basealign 4) diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir b/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir @@ -58,8 +58,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK-NEXT: [[SUB32ri8_:%[0-9]+]]:gr32 = SUB32ri8 [[MOV32r0_]], 0, implicit-def $eflags - ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32ri8_]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: [[SUB32ri:%[0-9]+]]:gr32 = SUB32ri [[MOV32r0_]], 0, implicit-def $eflags + ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32ri]], [[COPY]], implicit-def $eflags ; CHECK-NEXT: $edi = COPY [[AND32rr]] %0(s32) = COPY $edi %1(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/X86/GlobalISel/select-cmp.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-cmp.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-cmp.mir @@ -104,8 +104,8 @@ ; CHECK: CMP8rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s8) = COPY $dil %1(s8) = COPY $sil @@ -140,8 +140,8 @@ ; CHECK: CMP16rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s16) = COPY $di %1(s16) = COPY $si @@ -176,8 +176,8 @@ ; CHECK: CMP64rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s64) = COPY $rdi %1(s64) = COPY $rsi @@ -212,8 +212,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -248,8 +248,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 5, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -284,8 +284,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -320,8 +320,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 3, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -356,8 +356,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -392,8 +392,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -428,8 +428,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 15, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -464,8 +464,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 13, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -500,8 +500,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 12, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi @@ -536,8 +536,8 @@ ; CHECK: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 14, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[SETCCr]] - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[MOVZX32rr8_]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[MOVZX32rr8_]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/llvm/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir @@ -40,8 +40,8 @@ ; ALL: [[COPY:%[0-9]+]]:gr8 = COPY $dil ; ALL: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF ; ALL: [[INSERT_SUBREG:%[0-9]+]]:gr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_8bit - ; ALL: [[AND64ri8_:%[0-9]+]]:gr64 = AND64ri8 [[INSERT_SUBREG]], 1, implicit-def $eflags - ; ALL: $rax = COPY [[AND64ri8_]] + ; ALL: [[AND64ri32_:%[0-9]+]]:gr64 = AND64ri32 [[INSERT_SUBREG]], 1, implicit-def $eflags + ; ALL: $rax = COPY [[AND64ri32_]] ; ALL: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-ext.mir b/llvm/test/CodeGen/X86/GlobalISel/select-ext.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-ext.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-ext.mir @@ -98,16 +98,16 @@ ; X86-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; X86-NEXT: [[DEF:%[0-9]+]]:gr16 = IMPLICIT_DEF ; X86-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gr16 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.sub_8bit - ; X86-NEXT: [[AND16ri8_:%[0-9]+]]:gr16 = AND16ri8 [[INSERT_SUBREG]], 1, implicit-def $eflags - ; X86-NEXT: $ax = COPY [[AND16ri8_]] + ; X86-NEXT: [[AND16ri_:%[0-9]+]]:gr16 = AND16ri [[INSERT_SUBREG]], 1, implicit-def $eflags + ; X86-NEXT: $ax = COPY [[AND16ri_]] ; X86-NEXT: RET 0, implicit $ax ; X64-LABEL: name: test_zext_i1toi16 ; X64: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; X64-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; X64-NEXT: [[DEF:%[0-9]+]]:gr16 = IMPLICIT_DEF ; X64-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gr16 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.sub_8bit - ; X64-NEXT: [[AND16ri8_:%[0-9]+]]:gr16 = AND16ri8 [[INSERT_SUBREG]], 1, implicit-def $eflags - ; X64-NEXT: $ax = COPY [[AND16ri8_]] + ; X64-NEXT: [[AND16ri_:%[0-9]+]]:gr16 = AND16ri [[INSERT_SUBREG]], 1, implicit-def $eflags + ; X64-NEXT: $ax = COPY [[AND16ri_]] ; X64-NEXT: RET 0, implicit $ax %0(s32) = COPY $edi %1(s1) = G_TRUNC %0(s32) @@ -135,16 +135,16 @@ ; X86-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; X86-NEXT: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF ; X86-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gr32 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.sub_8bit - ; X86-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[INSERT_SUBREG]], 1, implicit-def $eflags - ; X86-NEXT: $eax = COPY [[AND32ri8_]] + ; X86-NEXT: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[INSERT_SUBREG]], 1, implicit-def $eflags + ; X86-NEXT: $eax = COPY [[AND32ri_]] ; X86-NEXT: RET 0, implicit $eax ; X64-LABEL: name: test_zext_i1 ; X64: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; X64-NEXT: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; X64-NEXT: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF ; X64-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gr32 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.sub_8bit - ; X64-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[INSERT_SUBREG]], 1, implicit-def $eflags - ; X64-NEXT: $eax = COPY [[AND32ri8_]] + ; X64-NEXT: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[INSERT_SUBREG]], 1, implicit-def $eflags + ; X64-NEXT: $eax = COPY [[AND32ri_]] ; X64-NEXT: RET 0, implicit $eax %0(s32) = COPY $edi %1(s1) = G_TRUNC %0(s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir @@ -105,8 +105,8 @@ ; CHECK: liveins: $edi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; CHECK: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; CHECK: [[AND16ri8_:%[0-9]+]]:gr16 = AND16ri8 [[COPY1]], 1, implicit-def $eflags - ; CHECK: $ax = COPY [[AND16ri8_]] + ; CHECK: [[AND16ri_:%[0-9]+]]:gr16 = AND16ri [[COPY1]], 1, implicit-def $eflags + ; CHECK: $ax = COPY [[AND16ri_]] ; CHECK: RET 0, implicit $ax %1:gpr(s32) = COPY $edi %3:gpr(s16) = G_CONSTANT i16 1 @@ -135,8 +135,8 @@ ; CHECK-LABEL: name: zext_i1_to_i32 ; CHECK: liveins: $edi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY]], 1, implicit-def $eflags - ; CHECK: $eax = COPY [[AND32ri8_]] + ; CHECK: [[AND32ri_:%[0-9]+]]:gr32 = AND32ri [[COPY]], 1, implicit-def $eflags + ; CHECK: $eax = COPY [[AND32ri_]] ; CHECK: RET 0, implicit $eax %1:gpr(s32) = COPY $edi %3:gpr(s32) = G_CONSTANT i32 1 @@ -167,8 +167,8 @@ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; CHECK: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32bit - ; CHECK: [[AND64ri8_:%[0-9]+]]:gr64 = AND64ri8 [[INSERT_SUBREG]], 1, implicit-def $eflags - ; CHECK: $rax = COPY [[AND64ri8_]] + ; CHECK: [[AND64ri32_:%[0-9]+]]:gr64 = AND64ri32 [[INSERT_SUBREG]], 1, implicit-def $eflags + ; CHECK: $rax = COPY [[AND64ri32_]] ; CHECK: RET 0, implicit $rax %1:gpr(s32) = COPY $edi %3:gpr(s64) = G_CONSTANT i64 1 diff --git a/llvm/test/CodeGen/X86/avxvnni-combine.ll b/llvm/test/CodeGen/X86/avxvnni-combine.ll --- a/llvm/test/CodeGen/X86/avxvnni-combine.ll +++ b/llvm/test/CodeGen/X86/avxvnni-combine.ll @@ -42,99 +42,52 @@ declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>) #1 define <2 x i64> @foo_128(i32 %0, <2 x i64> %1, <2 x i64> %2, ptr %3) { -; ADL-LABEL: foo_128: -; ADL: # %bb.0: -; ADL-NEXT: testl %edi, %edi -; ADL-NEXT: jle .LBB1_6 -; ADL-NEXT: # %bb.1: -; ADL-NEXT: movl %edi, %edx -; ADL-NEXT: movl %edx, %eax -; ADL-NEXT: andl $3, %eax -; ADL-NEXT: cmpl $4, %edi -; ADL-NEXT: jae .LBB1_7 -; ADL-NEXT: # %bb.2: -; ADL-NEXT: xorl %ecx, %ecx -; ADL-NEXT: jmp .LBB1_3 -; ADL-NEXT: .LBB1_7: -; ADL-NEXT: andl $-4, %edx -; ADL-NEXT: leaq 48(%rsi), %rdi -; ADL-NEXT: xorl %ecx, %ecx -; ADL-NEXT: .p2align 4, 0x90 -; ADL-NEXT: .LBB1_8: # =>This Inner Loop Header: Depth=1 -; ADL-NEXT: {vex} vpdpwssd -48(%rdi), %xmm1, %xmm0 -; ADL-NEXT: vpmaddwd -32(%rdi), %xmm1, %xmm2 -; ADL-NEXT: vpmaddwd -16(%rdi), %xmm1, %xmm3 -; ADL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; ADL-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; ADL-NEXT: vpmaddwd (%rdi), %xmm1, %xmm2 -; ADL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; ADL-NEXT: addq $4, %rcx -; ADL-NEXT: addq $64, %rdi -; ADL-NEXT: cmpq %rcx, %rdx -; ADL-NEXT: jne .LBB1_8 -; ADL-NEXT: .LBB1_3: -; ADL-NEXT: testq %rax, %rax -; ADL-NEXT: je .LBB1_6 -; ADL-NEXT: # %bb.4: # %.preheader -; ADL-NEXT: shlq $4, %rcx -; ADL-NEXT: addq %rcx, %rsi -; ADL-NEXT: shlq $4, %rax -; ADL-NEXT: xorl %ecx, %ecx -; ADL-NEXT: .p2align 4, 0x90 -; ADL-NEXT: .LBB1_5: # =>This Inner Loop Header: Depth=1 -; ADL-NEXT: {vex} vpdpwssd (%rsi,%rcx), %xmm1, %xmm0 -; ADL-NEXT: addq $16, %rcx -; ADL-NEXT: cmpq %rcx, %rax -; ADL-NEXT: jne .LBB1_5 -; ADL-NEXT: .LBB1_6: -; ADL-NEXT: retq -; -; SPR-LABEL: foo_128: -; SPR: # %bb.0: -; SPR-NEXT: testl %edi, %edi -; SPR-NEXT: jle .LBB1_6 -; SPR-NEXT: # %bb.1: -; SPR-NEXT: movl %edi, %edx -; SPR-NEXT: movl %edx, %eax -; SPR-NEXT: andl $3, %eax -; SPR-NEXT: cmpl $4, %edi -; SPR-NEXT: jae .LBB1_7 -; SPR-NEXT: # %bb.2: -; SPR-NEXT: xorl %ecx, %ecx -; SPR-NEXT: jmp .LBB1_3 -; SPR-NEXT: .LBB1_7: -; SPR-NEXT: andl $-4, %edx -; SPR-NEXT: leaq 48(%rsi), %rdi -; SPR-NEXT: xorl %ecx, %ecx -; SPR-NEXT: .p2align 4, 0x90 -; SPR-NEXT: .LBB1_8: # =>This Inner Loop Header: Depth=1 -; SPR-NEXT: {vex} vpdpwssd -48(%rdi), %xmm1, %xmm0 -; SPR-NEXT: vpmaddwd -32(%rdi), %xmm1, %xmm2 -; SPR-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; SPR-NEXT: vpmaddwd -16(%rdi), %xmm1, %xmm2 -; SPR-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; SPR-NEXT: vpmaddwd (%rdi), %xmm1, %xmm2 -; SPR-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; SPR-NEXT: addq $4, %rcx -; SPR-NEXT: addq $64, %rdi -; SPR-NEXT: cmpq %rcx, %rdx -; SPR-NEXT: jne .LBB1_8 -; SPR-NEXT: .LBB1_3: -; SPR-NEXT: testq %rax, %rax -; SPR-NEXT: je .LBB1_6 -; SPR-NEXT: # %bb.4: # %.preheader -; SPR-NEXT: shlq $4, %rcx -; SPR-NEXT: addq %rcx, %rsi -; SPR-NEXT: shlq $4, %rax -; SPR-NEXT: xorl %ecx, %ecx -; SPR-NEXT: .p2align 4, 0x90 -; SPR-NEXT: .LBB1_5: # =>This Inner Loop Header: Depth=1 -; SPR-NEXT: {vex} vpdpwssd (%rsi,%rcx), %xmm1, %xmm0 -; SPR-NEXT: addq $16, %rcx -; SPR-NEXT: cmpq %rcx, %rax -; SPR-NEXT: jne .LBB1_5 -; SPR-NEXT: .LBB1_6: -; SPR-NEXT: retq +; AVX-LABEL: foo_128: +; AVX: # %bb.0: +; AVX-NEXT: testl %edi, %edi +; AVX-NEXT: jle .LBB1_6 +; AVX-NEXT: # %bb.1: +; AVX-NEXT: movl %edi, %edx +; AVX-NEXT: movl %edx, %eax +; AVX-NEXT: andl $3, %eax +; AVX-NEXT: cmpl $4, %edi +; AVX-NEXT: jae .LBB1_7 +; AVX-NEXT: # %bb.2: +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: jmp .LBB1_3 +; AVX-NEXT: .LBB1_7: +; AVX-NEXT: andl $-4, %edx +; AVX-NEXT: leaq 48(%rsi), %rdi +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: .p2align 4, 0x90 +; AVX-NEXT: .LBB1_8: # =>This Inner Loop Header: Depth=1 +; AVX-NEXT: {vex} vpdpwssd -48(%rdi), %xmm1, %xmm0 +; AVX-NEXT: vpmaddwd -32(%rdi), %xmm1, %xmm2 +; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpmaddwd -16(%rdi), %xmm1, %xmm2 +; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpmaddwd (%rdi), %xmm1, %xmm2 +; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; AVX-NEXT: addq $4, %rcx +; AVX-NEXT: addq $64, %rdi +; AVX-NEXT: cmpq %rcx, %rdx +; AVX-NEXT: jne .LBB1_8 +; AVX-NEXT: .LBB1_3: +; AVX-NEXT: testq %rax, %rax +; AVX-NEXT: je .LBB1_6 +; AVX-NEXT: # %bb.4: # %.preheader +; AVX-NEXT: shlq $4, %rcx +; AVX-NEXT: addq %rcx, %rsi +; AVX-NEXT: shlq $4, %rax +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: .p2align 4, 0x90 +; AVX-NEXT: .LBB1_5: # =>This Inner Loop Header: Depth=1 +; AVX-NEXT: {vex} vpdpwssd (%rsi,%rcx), %xmm1, %xmm0 +; AVX-NEXT: addq $16, %rcx +; AVX-NEXT: cmpq %rcx, %rax +; AVX-NEXT: jne .LBB1_5 +; AVX-NEXT: .LBB1_6: +; AVX-NEXT: retq ; ; AVX512-LABEL: foo_128: ; AVX512: # %bb.0: @@ -476,99 +429,52 @@ ; } define <4 x i64> @foo_256(i32 %0, <4 x i64> %1, <4 x i64> %2, ptr %3) { -; ADL-LABEL: foo_256: -; ADL: # %bb.0: -; ADL-NEXT: testl %edi, %edi -; ADL-NEXT: jle .LBB4_6 -; ADL-NEXT: # %bb.1: -; ADL-NEXT: movl %edi, %edx -; ADL-NEXT: movl %edx, %eax -; ADL-NEXT: andl $3, %eax -; ADL-NEXT: cmpl $4, %edi -; ADL-NEXT: jae .LBB4_7 -; ADL-NEXT: # %bb.2: -; ADL-NEXT: xorl %ecx, %ecx -; ADL-NEXT: jmp .LBB4_3 -; ADL-NEXT: .LBB4_7: -; ADL-NEXT: andl $-4, %edx -; ADL-NEXT: leaq 96(%rsi), %rdi -; ADL-NEXT: xorl %ecx, %ecx -; ADL-NEXT: .p2align 4, 0x90 -; ADL-NEXT: .LBB4_8: # =>This Inner Loop Header: Depth=1 -; ADL-NEXT: {vex} vpdpwssd -96(%rdi), %ymm1, %ymm0 -; ADL-NEXT: vpmaddwd -64(%rdi), %ymm1, %ymm2 -; ADL-NEXT: vpmaddwd -32(%rdi), %ymm1, %ymm3 -; ADL-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; ADL-NEXT: vpaddd %ymm3, %ymm0, %ymm0 -; ADL-NEXT: vpmaddwd (%rdi), %ymm1, %ymm2 -; ADL-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; ADL-NEXT: addq $4, %rcx -; ADL-NEXT: subq $-128, %rdi -; ADL-NEXT: cmpq %rcx, %rdx -; ADL-NEXT: jne .LBB4_8 -; ADL-NEXT: .LBB4_3: -; ADL-NEXT: testq %rax, %rax -; ADL-NEXT: je .LBB4_6 -; ADL-NEXT: # %bb.4: # %.preheader -; ADL-NEXT: shlq $5, %rcx -; ADL-NEXT: addq %rcx, %rsi -; ADL-NEXT: shlq $5, %rax -; ADL-NEXT: xorl %ecx, %ecx -; ADL-NEXT: .p2align 4, 0x90 -; ADL-NEXT: .LBB4_5: # =>This Inner Loop Header: Depth=1 -; ADL-NEXT: {vex} vpdpwssd (%rsi,%rcx), %ymm1, %ymm0 -; ADL-NEXT: addq $32, %rcx -; ADL-NEXT: cmpq %rcx, %rax -; ADL-NEXT: jne .LBB4_5 -; ADL-NEXT: .LBB4_6: -; ADL-NEXT: retq -; -; SPR-LABEL: foo_256: -; SPR: # %bb.0: -; SPR-NEXT: testl %edi, %edi -; SPR-NEXT: jle .LBB4_6 -; SPR-NEXT: # %bb.1: -; SPR-NEXT: movl %edi, %edx -; SPR-NEXT: movl %edx, %eax -; SPR-NEXT: andl $3, %eax -; SPR-NEXT: cmpl $4, %edi -; SPR-NEXT: jae .LBB4_7 -; SPR-NEXT: # %bb.2: -; SPR-NEXT: xorl %ecx, %ecx -; SPR-NEXT: jmp .LBB4_3 -; SPR-NEXT: .LBB4_7: -; SPR-NEXT: andl $-4, %edx -; SPR-NEXT: leaq 96(%rsi), %rdi -; SPR-NEXT: xorl %ecx, %ecx -; SPR-NEXT: .p2align 4, 0x90 -; SPR-NEXT: .LBB4_8: # =>This Inner Loop Header: Depth=1 -; SPR-NEXT: {vex} vpdpwssd -96(%rdi), %ymm1, %ymm0 -; SPR-NEXT: vpmaddwd -64(%rdi), %ymm1, %ymm2 -; SPR-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; SPR-NEXT: vpmaddwd -32(%rdi), %ymm1, %ymm2 -; SPR-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; SPR-NEXT: vpmaddwd (%rdi), %ymm1, %ymm2 -; SPR-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; SPR-NEXT: addq $4, %rcx -; SPR-NEXT: subq $-128, %rdi -; SPR-NEXT: cmpq %rcx, %rdx -; SPR-NEXT: jne .LBB4_8 -; SPR-NEXT: .LBB4_3: -; SPR-NEXT: testq %rax, %rax -; SPR-NEXT: je .LBB4_6 -; SPR-NEXT: # %bb.4: # %.preheader -; SPR-NEXT: shlq $5, %rcx -; SPR-NEXT: addq %rcx, %rsi -; SPR-NEXT: shlq $5, %rax -; SPR-NEXT: xorl %ecx, %ecx -; SPR-NEXT: .p2align 4, 0x90 -; SPR-NEXT: .LBB4_5: # =>This Inner Loop Header: Depth=1 -; SPR-NEXT: {vex} vpdpwssd (%rsi,%rcx), %ymm1, %ymm0 -; SPR-NEXT: addq $32, %rcx -; SPR-NEXT: cmpq %rcx, %rax -; SPR-NEXT: jne .LBB4_5 -; SPR-NEXT: .LBB4_6: -; SPR-NEXT: retq +; AVX-LABEL: foo_256: +; AVX: # %bb.0: +; AVX-NEXT: testl %edi, %edi +; AVX-NEXT: jle .LBB4_6 +; AVX-NEXT: # %bb.1: +; AVX-NEXT: movl %edi, %edx +; AVX-NEXT: movl %edx, %eax +; AVX-NEXT: andl $3, %eax +; AVX-NEXT: cmpl $4, %edi +; AVX-NEXT: jae .LBB4_7 +; AVX-NEXT: # %bb.2: +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: jmp .LBB4_3 +; AVX-NEXT: .LBB4_7: +; AVX-NEXT: andl $-4, %edx +; AVX-NEXT: leaq 96(%rsi), %rdi +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: .p2align 4, 0x90 +; AVX-NEXT: .LBB4_8: # =>This Inner Loop Header: Depth=1 +; AVX-NEXT: {vex} vpdpwssd -96(%rdi), %ymm1, %ymm0 +; AVX-NEXT: vpmaddwd -64(%rdi), %ymm1, %ymm2 +; AVX-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vpmaddwd -32(%rdi), %ymm1, %ymm2 +; AVX-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vpmaddwd (%rdi), %ymm1, %ymm2 +; AVX-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; AVX-NEXT: addq $4, %rcx +; AVX-NEXT: subq $-128, %rdi +; AVX-NEXT: cmpq %rcx, %rdx +; AVX-NEXT: jne .LBB4_8 +; AVX-NEXT: .LBB4_3: +; AVX-NEXT: testq %rax, %rax +; AVX-NEXT: je .LBB4_6 +; AVX-NEXT: # %bb.4: # %.preheader +; AVX-NEXT: shlq $5, %rcx +; AVX-NEXT: addq %rcx, %rsi +; AVX-NEXT: shlq $5, %rax +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: .p2align 4, 0x90 +; AVX-NEXT: .LBB4_5: # =>This Inner Loop Header: Depth=1 +; AVX-NEXT: {vex} vpdpwssd (%rsi,%rcx), %ymm1, %ymm0 +; AVX-NEXT: addq $32, %rcx +; AVX-NEXT: cmpq %rcx, %rax +; AVX-NEXT: jne .LBB4_5 +; AVX-NEXT: .LBB4_6: +; AVX-NEXT: retq ; ; AVX512-LABEL: foo_256: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/cfi-xmm.ll b/llvm/test/CodeGen/X86/cfi-xmm.ll --- a/llvm/test/CodeGen/X86/cfi-xmm.ll +++ b/llvm/test/CodeGen/X86/cfi-xmm.ll @@ -22,7 +22,7 @@ ; CHECK: .cfi_endproc ; PEI-LABEL: name: _Z1fv -; PEI: $rsp = frame-setup SUB64ri8 $rsp, 40, implicit-def dead $eflags +; PEI: $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags ; PEI-NEXT: frame-setup MOVAPSmr $rsp, 1, $noreg, 16, $noreg, killed $xmm15 :: (store (s128) into %fixed-stack.1) ; PEI-NEXT: frame-setup MOVAPSmr $rsp, 1, $noreg, 0, $noreg, killed $xmm10 :: (store (s128) into %fixed-stack.0) ; PEI-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48 @@ -31,5 +31,5 @@ ; PEI-NEXT: INLINEASM {{.*}} ; PEI-NEXT: $xmm10 = MOVAPSrm $rsp, 1, $noreg, 0, $noreg :: (load (s128) from %fixed-stack.0) ; PEI-NEXT: $xmm15 = MOVAPSrm $rsp, 1, $noreg, 16, $noreg :: (load (s128) from %fixed-stack.1) -; PEI-NEXT: $rsp = frame-destroy ADD64ri8 $rsp, 40, implicit-def dead $eflags +; PEI-NEXT: $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags ; PEI-NEXT: RET 0 diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -9,7 +9,7 @@ ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load (s32) from %ir.p) ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax :: (store (s64) into %stack.0) - ; CHECK-NEXT: SUB64ri8 renamable $rax, 3, implicit-def $eflags, debug-location !7 + ; CHECK-NEXT: SUB64ri32 renamable $rax, 3, implicit-def $eflags, debug-location !7 switch i32 %tmp, label %bb7 [ i32 0, label %bb1 diff --git a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir --- a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir +++ b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir @@ -139,7 +139,7 @@ ; CHECK: $rbp = frame-setup MOV64rr $rsp ; CHECK: CFI_INSTRUCTION def_cfa_register $rbp ; CHECK: frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp, debug-location !13 - ; CHECK: $rsp = frame-setup SUB64ri8 $rsp, 40, implicit-def dead $eflags + ; CHECK: $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags ; CHECK: CFI_INSTRUCTION offset $rbx, -24 ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !13 :: (dereferenceable load (s32) from %ir.a.addr) ; CHECK: renamable $rax = KILL killed renamable $eax, debug-location !13 diff --git a/llvm/test/CodeGen/X86/leaFixup32.mir b/llvm/test/CodeGen/X86/leaFixup32.mir --- a/llvm/test/CodeGen/X86/leaFixup32.mir +++ b/llvm/test/CodeGen/X86/leaFixup32.mir @@ -8,7 +8,7 @@ ;generated using: llc -stop-after x86-pad-short-functions fixup-lea.ll > leaFinxup32.mir ;test2add_32: 3 operands LEA32r that can be replaced with 2 add instructions - ; where ADD32ri8 is chosen + ; where ADD32ri is chosen define i32 @test2add_32() { ret i32 0 } @@ -109,7 +109,7 @@ ; CHECK-LABEL: name: test2add_32 ; CHECK: liveins: $eax, $ebp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags - ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags + ; CHECK: $eax = ADD32ri $eax, -5, implicit-def $eflags ; CHECK: RET64 $eax $eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg RET64 $eax @@ -147,7 +147,7 @@ ; CHECK-LABEL: name: test2add_ebp_32 ; CHECK: liveins: $eax, $ebp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags - ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags + ; CHECK: $ebp = ADD32ri $ebp, -5, implicit-def $eflags ; CHECK: RET64 $ebp $ebp = LEA32r killed $ebp, 1, killed $eax, -5, $noreg RET64 $ebp @@ -223,7 +223,7 @@ ; CHECK-LABEL: name: testleaadd_32 ; CHECK: liveins: $eax, $ebp, $esi ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: $ebx = ADD32ri $ebx, -5, implicit-def $eflags ; CHECK: RET64 $ebx $ebx = LEA32r killed $eax, 1, killed $ebp, -5, $noreg RET64 $ebx @@ -262,7 +262,7 @@ ; CHECK-LABEL: name: testleaadd_ebp_32 ; CHECK: liveins: $eax, $ebp ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: $ebx = ADD32ri $ebx, -5, implicit-def $eflags ; CHECK: RET64 $ebx $ebx = LEA32r killed $ebp, 1, killed $eax, -5, $noreg RET64 $ebx @@ -531,7 +531,7 @@ ; CHECK: bb.1: ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebp = LEA32r killed $ebx, 4, killed $ebx, 0, $noreg - ; CHECK: $ebp = ADD32ri8 $ebp, 5, implicit-def $eflags + ; CHECK: $ebp = ADD32ri $ebp, 5, implicit-def $eflags ; CHECK: RET64 $ebp bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx diff --git a/llvm/test/CodeGen/X86/leaFixup64.mir b/llvm/test/CodeGen/X86/leaFixup64.mir --- a/llvm/test/CodeGen/X86/leaFixup64.mir +++ b/llvm/test/CodeGen/X86/leaFixup64.mir @@ -186,7 +186,7 @@ ; CHECK-LABEL: name: testleaadd_64_32_1 ; CHECK: liveins: $rax, $rbp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp - ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags + ; CHECK: $eax = ADD32ri $eax, -5, implicit-def $eflags ; CHECK: RET64 $eax $eax = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg RET64 $eax @@ -224,7 +224,7 @@ ; CHECK-LABEL: name: testleaadd_rbp_64_32_1 ; CHECK: liveins: $rax, $rbp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax - ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags + ; CHECK: $ebp = ADD32ri $ebp, -5, implicit-def $eflags ; CHECK: RET64 $ebp $ebp = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg RET64 $ebp @@ -299,7 +299,7 @@ ; CHECK-LABEL: name: test2add_64 ; CHECK: liveins: $rax, $rbp ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags - ; CHECK: $rax = ADD64ri8 $rax, -5, implicit-def $eflags + ; CHECK: $rax = ADD64ri32 $rax, -5, implicit-def $eflags ; CHECK: RET64 $eax $rax = LEA64r killed $rax, 1, killed $rbp, -5, $noreg RET64 $eax @@ -337,7 +337,7 @@ ; CHECK-LABEL: name: test2add_rbp_64 ; CHECK: liveins: $rax, $rbp ; CHECK: $rbp = ADD64rr $rbp, $rax, implicit-def $eflags - ; CHECK: $rbp = ADD64ri8 $rbp, -5, implicit-def $eflags + ; CHECK: $rbp = ADD64ri32 $rbp, -5, implicit-def $eflags ; CHECK: RET64 $ebp $rbp = LEA64r killed $rbp, 1, killed $rax, -5, $noreg RET64 $ebp @@ -413,7 +413,7 @@ ; CHECK-LABEL: name: testleaadd_64_32 ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: $ebx = ADD32ri $ebx, -5, implicit-def $eflags ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg RET64 $ebx @@ -452,7 +452,7 @@ ; CHECK-LABEL: name: testleaadd_rbp_64_32 ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: $ebx = ADD32ri $ebx, -5, implicit-def $eflags ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg RET64 $ebx @@ -529,7 +529,7 @@ ; CHECK-LABEL: name: testleaadd_64 ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $rbx = ADD64ri8 $rbx, -5, implicit-def $eflags + ; CHECK: $rbx = ADD64ri32 $rbx, -5, implicit-def $eflags ; CHECK: RET64 $ebx $rbx = LEA64r killed $rax, 1, killed $rbp, -5, $noreg RET64 $ebx @@ -568,7 +568,7 @@ ; CHECK-LABEL: name: testleaadd_rbp_64 ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $rbx = ADD64ri8 $rbx, -5, implicit-def $eflags + ; CHECK: $rbx = ADD64ri32 $rbx, -5, implicit-def $eflags ; CHECK: RET64 $ebx $rbx = LEA64r killed $rbp, 1, killed $rax, -5, $noreg RET64 $ebx @@ -1026,7 +1026,7 @@ ; CHECK: bb.1: ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbp = LEA64r killed $rbx, 4, killed $rbx, 0, $noreg - ; CHECK: $rbp = ADD64ri8 $rbp, 5, implicit-def $eflags + ; CHECK: $rbp = ADD64ri32 $rbp, 5, implicit-def $eflags ; CHECK: RET64 $ebp bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx @@ -1115,7 +1115,7 @@ ; CHECK: bb.1: ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebp = LEA64_32r killed $rbx, 4, killed $rbx, 0, $noreg - ; CHECK: $ebp = ADD32ri8 $ebp, 5, implicit-def $eflags + ; CHECK: $ebp = ADD32ri $ebp, 5, implicit-def $eflags ; CHECK: RET64 $ebp bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir --- a/llvm/test/CodeGen/X86/limit-split-cost.mir +++ b/llvm/test/CodeGen/X86/limit-split-cost.mir @@ -101,14 +101,14 @@ successors: %bb.6(0x20000000), %bb.2(0x60000000) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15, 12, implicit-def dead early-clobber $eflags, !3 - CMP32ri8 %0, 2, implicit-def $eflags + CMP32ri %0, 2, implicit-def $eflags JCC_1 %bb.6, 4, implicit killed $eflags JMP_1 %bb.2 bb.2.do.body: successors: %bb.5(0x2aaaaaab), %bb.3(0x55555555) - CMP32ri8 %0, 1, implicit-def $eflags + CMP32ri %0, 1, implicit-def $eflags JCC_1 %bb.5, 4, implicit killed $eflags JMP_1 %bb.3 @@ -140,7 +140,7 @@ bb.7.do.cond: successors: %bb.8(0x04000000), %bb.1(0x7c000000) - CMP32mi8 %6, 1, $noreg, 0, $noreg, 5, implicit-def $eflags :: (dereferenceable load (s32) from @m, !tbaa !4) + CMP32mi %6, 1, $noreg, 0, $noreg, 5, implicit-def $eflags :: (dereferenceable load (s32) from @m, !tbaa !4) JCC_1 %bb.1, 5, implicit killed $eflags JMP_1 %bb.8 diff --git a/llvm/test/CodeGen/X86/machinesink-debug-inv-0.mir b/llvm/test/CodeGen/X86/machinesink-debug-inv-0.mir --- a/llvm/test/CodeGen/X86/machinesink-debug-inv-0.mir +++ b/llvm/test/CodeGen/X86/machinesink-debug-inv-0.mir @@ -73,7 +73,7 @@ ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.2(0x50000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr8 = PHI [[MOV8ri]], %bb.0, %8, %bb.3 - ; CHECK-NEXT: CMP32mi8 [[MOV64rm]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from @d, align 1) + ; CHECK-NEXT: CMP32mi [[MOV64rm]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from @d, align 1) ; CHECK-NEXT: JCC_1 %bb.3, 4, implicit $eflags ; CHECK-NEXT: JMP_1 %bb.2 ; CHECK-NEXT: {{ $}} @@ -110,7 +110,7 @@ successors: %bb.3(0x30000000), %bb.2(0x50000000) %0:gr8 = PHI %2, %bb.0, %8, %bb.3 - CMP32mi8 %3, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from @d, align 1) + CMP32mi %3, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from @d, align 1) %1:gr32 = MOV32rm %4, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from @e, align 1) JCC_1 %bb.3, 4, implicit $eflags JMP_1 %bb.2 diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir --- a/llvm/test/CodeGen/X86/optimize-compare.mir +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -27,7 +27,7 @@ %0:gr64 = COPY $rsi %1:gr64 = DEC64r %0, implicit-def $eflags ; CMP should be removed. - CMP64ri8 %1, 0, implicit-def $eflags + CMP64ri32 %1, 0, implicit-def $eflags %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg $al = SETCCr 4, implicit $eflags ... @@ -228,15 +228,15 @@ ; CHECK-LABEL: name: opt_redundant_flags_cmp_cmp_2 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags + ; CHECK-NEXT: CMP64ri32 [[COPY]], 15, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr64 = COPY $rsi %1:gr64 = COPY $rdi - CMP64ri8 %0, 15, implicit-def $eflags + CMP64ri32 %0, 15, implicit-def $eflags $cl = SETCCr 2, implicit $eflags ; 2nd CMP should be removed. - CMP64ri8 %0, 15, implicit-def $eflags + CMP64ri32 %0, 15, implicit-def $eflags $bl = SETCCr 2, implicit $eflags ... --- @@ -316,11 +316,11 @@ bb.0: ; CHECK-LABEL: name: opt_redundant_flags_cmp_test ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi - ; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags + ; CHECK-NEXT: CMP32ri [[COPY]], 0, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr32 = COPY $esi - CMP32ri8 %0, 0, implicit-def $eflags + CMP32ri %0, 0, implicit-def $eflags $cl = SETCCr 2, implicit $eflags ; TEST should be removed TEST32rr %0, %0, implicit-def $eflags @@ -339,7 +339,7 @@ TEST32rr %0, %0, implicit-def $eflags $cl = SETCCr 2, implicit $eflags ; TEST should be removed - CMP32ri8 %0, 0, implicit-def $eflags + CMP32ri %0, 0, implicit-def $eflags $bl = SETCCr 2, implicit $eflags ... --- @@ -385,7 +385,7 @@ bb.0: ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_0 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: CMP64ri8 [[COPY]], 1, implicit-def $eflags + ; CHECK-NEXT: CMP64ri32 [[COPY]], 1, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags @@ -393,10 +393,10 @@ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags %0:gr64 = COPY $rsi ; CMP+SETCC %0 == 1 - CMP64ri8 %0, 1, implicit-def $eflags + CMP64ri32 %0, 1, implicit-def $eflags $cl = SETCCr 4, implicit $eflags ; CMP+SETCC %0 >= 2; CMP can be removed. - CMP64ri8 %0, 2, implicit-def $eflags + CMP64ri32 %0, 2, implicit-def $eflags ; %0 >=s 2 --> %0 >s 1 $bl = SETCCr 13, implicit $eflags ; %0 >=u 2 --> %0 >u 1 @@ -412,7 +412,7 @@ bb.0: ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: CMP64ri8 [[COPY]], 42, implicit-def $eflags + ; CHECK-NEXT: CMP64ri32 [[COPY]], 42, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags @@ -420,10 +420,10 @@ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr64 = COPY $rsi ; CMP+SETCC %0 != 42 - CMP64ri8 %0, 42, implicit-def $eflags + CMP64ri32 %0, 42, implicit-def $eflags $cl = SETCCr 5, implicit $eflags ; CMP+SETCC %0 >= 2; CMP can be removed. - CMP64ri8 %0, 41, implicit-def $eflags + CMP64ri32 %0, 41, implicit-def $eflags ; %0 >s 41 --> %0 >=s 42 $bl = SETCCr 15, implicit $eflags ; %0 >u 41 --> %0 >=u 42 @@ -484,16 +484,16 @@ bb.0: ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_0 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi - ; CHECK-NEXT: CMP64ri8 [[COPY]], 42, implicit-def $eflags + ; CHECK-NEXT: CMP64ri32 [[COPY]], 42, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags - ; CHECK-NEXT: CMP64ri8 [[COPY]], 41, implicit-def $eflags + ; CHECK-NEXT: CMP64ri32 [[COPY]], 41, implicit-def $eflags ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags %0:gr64 = COPY $rsi ; CMP+SETCC %0 &1 | FileCheck %s --match-full-lines -; CHECK: %5:gr32 = SUB32ri8 %0:gr32(tied-def 0), 1, implicit-def $eflags, debug-location !24; a.c:3:13 +; CHECK: %5:gr32 = SUB32ri %0:gr32(tied-def 0), 1, implicit-def $eflags, debug-location !24; a.c:3:13 ; CHECK-NEXT: %10:gr32 = MOVSX32rr8 %4:gr8 ; CHECK-NEXT: JCC_1 %bb.2, 15, implicit $eflags, debug-location !25; a.c:3:18 diff --git a/llvm/test/DebugInfo/X86/sdag-dbgvalue-ssareg.ll b/llvm/test/DebugInfo/X86/sdag-dbgvalue-ssareg.ll --- a/llvm/test/DebugInfo/X86/sdag-dbgvalue-ssareg.ll +++ b/llvm/test/DebugInfo/X86/sdag-dbgvalue-ssareg.ll @@ -19,7 +19,7 @@ define dso_local i32 @main(i32 %arg0, i32 %arg1) local_unnamed_addr !dbg !11 { entry: ; CHECK-LABEL: bb.0.entry: -; INSTRREF: ADD32ri8 {{.*}} debug-instr-number 1 +; INSTRREF: ADD32ri {{.*}} debug-instr-number 1 %0 = add i32 %arg0, 42, !dbg !26 %1 = add i32 %arg1, 101, !dbg !26 %cmp = icmp eq i32 %1, 0 diff --git a/llvm/test/DebugInfo/assignment-tracking/X86/lower-to-value.ll b/llvm/test/DebugInfo/assignment-tracking/X86/lower-to-value.ll --- a/llvm/test/DebugInfo/assignment-tracking/X86/lower-to-value.ll +++ b/llvm/test/DebugInfo/assignment-tracking/X86/lower-to-value.ll @@ -42,9 +42,9 @@ ;; The final assignment (X.B += 2) doesn't get stored back to the alloca. This ;; means that that the stack location isn't valid for the entire lifetime of X. -; DBGVALUE: %2:gr64 = nsw ADD64ri8 %1, 2, implicit-def dead $eflags, debug-location +; DBGVALUE: %2:gr64 = nsw ADD64ri32 %1, 2, implicit-def dead $eflags, debug-location ; DBGVALUE-NEXT: DBG_VALUE %2, $noreg, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), debug-location -; INSTRREF: %2:gr64 = nsw ADD64ri8 %1, 2, implicit-def dead $eflags, debug-instr-number 1 +; INSTRREF: %2:gr64 = nsw ADD64ri32 %1, 2, implicit-def dead $eflags, debug-instr-number 1 ; INSTRREF-NEXT: DBG_INSTR_REF ![[VAR]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_fragment, 64, 64), dbg-instr-ref(1, 0), debug-location ;; Bits [0, 64) are still stack homed. FIXME, this particular reinstatement is diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1,12 +1,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { - {X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE}, {X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE}, {X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE}, - {X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE}, {X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE}, {X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE}, {X86::ADD64ri32_DB, X86::ADD64mi32, TB_NO_REVERSE}, - {X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE}, {X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE}, {X86::ADD8ri_DB, X86::ADD8mi, TB_NO_REVERSE}, {X86::ADD8rr_DB, X86::ADD8mr, TB_NO_REVERSE}, diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll @@ -26,7 +26,7 @@ call void @llvm.pseudoprobe(i64 -6878943695821059507, i64 9, i32 0, i64 -1) ;; Check an opeq form of instruction is created. ; CHECK: %[[#REG:]]:gr64_nosp = COPY killed %[[#]] -; CHECK: %[[#REG]]:gr64_nosp = nuw ADD64ri8 %[[#REG]], 4, implicit-def dead $eflags +; CHECK: %[[#REG]]:gr64_nosp = nuw ADD64ri32 %[[#REG]], 4, implicit-def dead $eflags %niter137.nsub.3 = add i64 %niter137, -4 %niter137.ncmp.3 = icmp eq i64 %niter137.nsub.3, 0 br i1 %niter137.ncmp.3, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14 diff --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def --- a/llvm/utils/TableGen/X86ManualFoldTables.def +++ b/llvm/utils/TableGen/X86ManualFoldTables.def @@ -233,13 +233,10 @@ // The following entries are added manually b/c the encodings of reg form does not match the // encoding of memory form ENTRY(ADD16ri_DB, ADD16mi, TB_NO_REVERSE) -ENTRY(ADD16ri8_DB, ADD16mi8, TB_NO_REVERSE) ENTRY(ADD16rr_DB, ADD16mr, TB_NO_REVERSE) ENTRY(ADD32ri_DB, ADD32mi, TB_NO_REVERSE) -ENTRY(ADD32ri8_DB, ADD32mi8, TB_NO_REVERSE) ENTRY(ADD32rr_DB, ADD32mr, TB_NO_REVERSE) ENTRY(ADD64ri32_DB, ADD64mi32, TB_NO_REVERSE) -ENTRY(ADD64ri8_DB, ADD64mi8, TB_NO_REVERSE) ENTRY(ADD64rr_DB, ADD64mr, TB_NO_REVERSE) ENTRY(ADD8ri_DB, ADD8mi, TB_NO_REVERSE) ENTRY(ADD8rr_DB, ADD8mr, TB_NO_REVERSE)