Index: include/llvm/Support/X86DisassemblerDecoderCommon.h =================================================================== --- include/llvm/Support/X86DisassemblerDecoderCommon.h +++ include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -393,6 +393,7 @@ ENUM_ENTRY(ENCODING_IRC, "Immediate for static rounding control") \ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ "opcode byte") \ + ENUM_ENTRY(ENCODING_CC, "Condition code encoded in opcode") \ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ "in type") \ ENUM_ENTRY(ENCODING_SI, "Source index; encoded in OpSize/Adsize prefix") \ Index: lib/Target/X86/Disassembler/X86Disassembler.cpp =================================================================== --- lib/Target/X86/Disassembler/X86Disassembler.cpp +++ lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -781,6 +781,9 @@ case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_CC: + mcInst.addOperand(MCOperand::createImm(insn.immediates[0])); + return false; case ENCODING_FP: translateFPRegister(mcInst, insn.modRM & 7); return false; Index: lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp =================================================================== --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1846,6 +1846,9 @@ if (readOpcodeRegister(insn, 0)) return -1; break; + case ENCODING_CC: + insn->immediates[0] = insn->opcode & 0xf; + break; case ENCODING_FP: break; case ENCODING_VVVV: Index: lib/Target/X86/InstPrinter/X86InstPrinterCommon.h =================================================================== --- lib/Target/X86/InstPrinter/X86InstPrinterCommon.h +++ lib/Target/X86/InstPrinter/X86InstPrinterCommon.h @@ -23,6 +23,7 @@ using MCInstPrinter::MCInstPrinter; virtual void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) = 0; + void printCondCode(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS); void printVPCOMMnemonic(const MCInst *MI, raw_ostream &OS); void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS); Index: lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp =================================================================== --- lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp +++ lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp @@ -24,6 +24,30 @@ using namespace llvm; +void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm(); + switch (Imm) { + default: llvm_unreachable("Invalid condcode argument!"); + case 0: O << "o"; break; + case 1: O << "no"; break; + case 2: O << "b"; break; + case 3: O << "ae"; break; + case 4: O << "e"; break; + case 5: O << "ne"; break; + case 6: O << "be"; break; + case 7: O << "a"; break; + case 8: O << "s"; break; + case 9: O << "ns"; break; + case 0xa: O << "p"; break; + case 0xb: O << "np"; break; + case 0xc: O << "l"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "le"; break; + case 0xf: O << "g"; break; + } +} + void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O) { int64_t Imm = MI->getOperand(Op).getImm(); Index: lib/Target/X86/MCTargetDesc/X86BaseInfo.h =================================================================== --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -62,6 +62,38 @@ IP_HAS_LOCK = 16, IP_HAS_NOTRACK = 32 }; + + // X86 specific condition code. These correspond to X86_*_COND in + // X86InstrInfo.td. They must be kept in synch. + enum CondCode { + COND_O = 0, + COND_NO = 1, + COND_B = 2, + COND_AE = 3, + COND_E = 4, + COND_NE = 5, + COND_BE = 6, + COND_A = 7, + COND_S = 8, + COND_NS = 9, + COND_P = 10, + COND_NP = 11, + COND_L = 12, + COND_GE = 13, + COND_LE = 14, + COND_G = 15, + LAST_VALID_COND = COND_G, + + // Artificial condition codes. These are used by AnalyzeBranch + // to indicate a block terminated with two conditional branches that together + // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, + // which can't be represented on x86 with a single condition. These + // are never used in MachineInstrs and are inverses of one another. + COND_NE_OR_P, + COND_E_AND_NP, + + COND_INVALID + }; } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that @@ -308,6 +340,11 @@ /// MRMSrcMemOp4 = 35, + /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM + /// byte to specify the operands and also encodes a condition code. + /// + MRMSrcMemCC = 36, + /// MRMXm - This form is used for instructions that use the Mod/RM byte /// to specify a memory source, but doesn't use the middle field. /// @@ -337,6 +374,11 @@ /// MRMSrcRegOp4 = 51, + /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM + /// byte to specify the operands and also encodes a condition code + /// + MRMSrcRegCC = 52, + /// MRMXr - This form is used for instructions that use the Mod/RM byte /// to specify a register source, but doesn't use the middle field. /// @@ -722,10 +764,15 @@ case X86II::MRMSrcMemOp4: // Skip registers encoded in reg, VEX_VVVV, and I8IMM. return 3; + case X86II::MRMSrcMemCC: + // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a + // mask register. + return 1; case X86II::MRMDestReg: case X86II::MRMSrcReg: case X86II::MRMSrcReg4VOp3: case X86II::MRMSrcRegOp4: + case X86II::MRMSrcRegCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: Index: lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1059,16 +1059,17 @@ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; case X86II::MRMSrcReg: + case X86II::MRMSrcRegCC: REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; - case X86II::MRMSrcMem: { + case X86II::MRMSrcMem: + case X86II::MRMSrcMemCC: REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X CurOp += X86::AddrNumOperands; break; - } case X86II::MRMDestReg: REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R @@ -1435,6 +1436,17 @@ CurOp = SrcRegNum + 1; break; } + case X86II::MRMSrcRegCC: { + unsigned FirstOp = CurOp++; + unsigned SecondOp = CurOp++; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + EmitRegModRMByte(MI.getOperand(SecondOp), + GetX86RegNum(MI.getOperand(FirstOp)), CurByte, OS); + break; + } case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp+1; @@ -1480,6 +1492,18 @@ CurOp = FirstMemOp + X86::AddrNumOperands; break; } + case X86II::MRMSrcMemCC: { + unsigned RegOp = CurOp++; + unsigned FirstMemOp = CurOp; + CurOp = FirstMemOp + X86::AddrNumOperands; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(RegOp)), + TSFlags, Rex, CurByte, OS, Fixups, STI); + break; + } case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: Index: lib/Target/X86/X86CmovConversion.cpp =================================================================== --- lib/Target/X86/X86CmovConversion.cpp +++ lib/Target/X86/X86CmovConversion.cpp @@ -290,7 +290,7 @@ // Skip debug instructions. if (I.isDebugInstr()) continue; - X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode()); + X86::CondCode CC = X86::getCondFromCMov(I); // Check if we found a X86::CMOVrr instruction. if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { if (Group.empty()) { @@ -545,7 +545,7 @@ } unsigned CondCost = - DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth; + DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth; unsigned ValCost = getDepthOfOptCmov( DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); @@ -593,7 +593,7 @@ /// move all debug instructions to after the last CMOV instruction, making the /// CMOV group consecutive. static void packCmovGroup(MachineInstr *First, MachineInstr *Last) { - assert(X86::getCondFromCMovOpc(Last->getOpcode()) != X86::COND_INVALID && + assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID && "Last instruction in a CMOV group must be a CMOV instruction"); SmallVector DBGInstructions; @@ -651,14 +651,14 @@ MachineInstr *LastCMOV = Group.back(); DebugLoc DL = MI.getDebugLoc(); - X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode())); + X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI)); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); // Potentially swap the condition codes so that any memory operand to a CMOV // is in the *false* position instead of the *true* position. We can invert // any non-memory operand CMOV instructions to cope with this and we ensure // memory operand CMOVs are only included with a single condition code. if (llvm::any_of(Group, [&](MachineInstr *I) { - return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC; + return I->mayLoad() && X86::getCondFromCMov(*I) == CC; })) std::swap(CC, OppCC); @@ -712,8 +712,7 @@ if (!MI.mayLoad()) { // Remember the false-side register input. unsigned FalseReg = - MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2) - .getReg(); + MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); // Walk back through any intermediate cmovs referenced. while (true) { auto FRIt = FalseBBRegRewriteTable.find(FalseReg); @@ -728,7 +727,7 @@ // The condition must be the *opposite* of the one we've decided to branch // on as the branch will go *around* the load and the load should happen // when the CMOV condition is false. - assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC && + assert(X86::getCondFromCMov(MI) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction."); @@ -767,7 +766,7 @@ // Move the new CMOV to just before the old one and reset any impacted // iterator. auto *NewCMOV = NewMIs.pop_back_val(); - assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC && + assert(X86::getCondFromCMov(*NewCMOV) == OppCC && "Last new instruction isn't the expected CMOV!"); LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump()); MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV); @@ -819,7 +818,7 @@ // If this CMOV we are processing is the opposite condition from the jump we // generated, then we have to swap the operands for the PHI that is going to // be generated. - if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC) + if (X86::getCondFromCMov(*MIIt) == OppCC) std::swap(Op1Reg, Op2Reg); auto Op1Itr = RegRewriteTable.find(Op1Reg); Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -2144,9 +2144,9 @@ return false; const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); - unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); - unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, - LHSReg, LHSIsKill); + unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8); + unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill, CC); updateValueMap(I, ResultReg); return true; } Index: lib/Target/X86/X86FlagsCopyLowering.cpp =================================================================== --- lib/Target/X86/X86FlagsCopyLowering.cpp +++ lib/Target/X86/X86FlagsCopyLowering.cpp @@ -599,7 +599,7 @@ } // Otherwise we can just rewrite in-place. - if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { + if (X86::getCondFromCMov(MI) != X86::COND_INVALID) { rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) { @@ -841,7 +841,7 @@ MachineOperand &FlagUse, CondRegArray &CondRegs) { // First get the register containing this specific condition. - X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); + X86::CondCode Cond = X86::getCondFromCMov(CMovI); unsigned CondReg; bool Inverted; std::tie(CondReg, Inverted) = @@ -852,12 +852,10 @@ // Insert a direct test of the saved register. insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); - // Rewrite the CMov to use the !ZF flag from the test (but match register - // size and memory operand), and then kill its use of the flags afterward. - auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); - CMovI.setDesc(TII->get(X86::getCMovFromCond( - Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, - !CMovI.memoperands_empty()))); + // Rewrite the CMov to use the !ZF flag from the test, and then kill its use + // of the flags afterward. + CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1) + .setImm(Inverted ? X86::COND_E : X86::COND_NE); FlagUse.setIsKill(true); LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); } Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -653,9 +653,10 @@ BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) .addReg(CopyReg) .addReg(SizeReg); - BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg) + BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) .addReg(TestReg) - .addReg(ZeroReg); + .addReg(ZeroReg) + .addImm(X86::COND_B); // FinalReg now holds final stack pointer value, or zero if // allocation would overflow. Compare against the current stack Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2326,14 +2326,21 @@ CR->getSignedMax().slt(1ull << Width); } -static X86::CondCode getCondFromOpc(unsigned Opc) { +static X86::CondCode getCondFromNode(SDNode *N) { + assert(N->isMachineOpcode() && "Unexpected node"); X86::CondCode CC = X86::COND_INVALID; if (CC == X86::COND_INVALID) - CC = X86::getCondFromBranchOpc(Opc); + CC = X86::getCondFromBranchOpc(N->getMachineOpcode()); if (CC == X86::COND_INVALID) - CC = X86::getCondFromSETOpc(Opc); - if (CC == X86::COND_INVALID) - CC = X86::getCondFromCMovOpc(Opc); + CC = X86::getCondFromSETOpc(N->getMachineOpcode()); + if (CC == X86::COND_INVALID) { + unsigned Opc = N->getMachineOpcode(); + if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || Opc == X86::CMOV64rr) + CC = static_cast(N->getConstantOperandVal(2)); + else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || + Opc == X86::CMOV64rm) + CC = static_cast(N->getConstantOperandVal(6)); + } return CC; } @@ -2359,7 +2366,7 @@ // Anything unusual: assume conservatively. if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); switch (CC) { // Comparisons which only use the zero flag. @@ -2395,7 +2402,7 @@ // Anything unusual: assume conservatively. if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); switch (CC) { // Comparisons which don't examine the SF flag. @@ -2456,7 +2463,7 @@ if (!FlagUI->isMachineOpcode()) return false; // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + X86::CondCode CC = getCondFromNode(*FlagUI); if (mayUseCarryFlag(CC)) return false; Index: lib/Target/X86/X86InstrCMovSetCC.td =================================================================== --- lib/Target/X86/X86InstrCMovSetCC.td +++ lib/Target/X86/X86InstrCMovSetCC.td @@ -13,67 +13,79 @@ // CMOV instructions. -multiclass CMOV opc, string Mnemonic, X86FoldableSchedWrite Sched, - PatLeaf CondNode> { - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1, SchedRW = [Sched] in { - def NAME#16rr - : I, - TB, OpSize16; - def NAME#32rr - : I, - TB, OpSize32; - def NAME#64rr - :RI, TB; - } +let isCodeGenOnly = 1, ForceDisassemble = 1 in { +let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + isCommutable = 1, SchedRW = [WriteCMOV] in { + def CMOV16rr + : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond), + "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>, + TB, OpSize16; + def CMOV32rr + : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond), + "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>, + TB, OpSize32; + def CMOV64rr + :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond), + "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, + (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB; +} - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - SchedRW = [Sched.Folded, Sched.ReadAfterFold] in { - def NAME#16rm - : I, TB, OpSize16; - def NAME#32rm - : I, TB, OpSize32; - def NAME#64rm - :RI, TB; - } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" -} // end multiclass +let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in { + def CMOV16rm + : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond), + "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + imm:$cond, EFLAGS))]>, TB, OpSize16; + def CMOV32rm + : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond), + "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + imm:$cond, EFLAGS))]>, TB, OpSize32; + def CMOV64rm + :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond), + "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + imm:$cond, EFLAGS))]>, TB; +} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" +} // isCodeGenOnly = 1, ForceDisassemble = 1 +multiclass CMOV_Aliases { + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; + def : InstAlias; +} -// Conditional Moves. -defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>; -defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>; -defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>; -defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>; -defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>; -defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>; -defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>; -defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>; -defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>; -defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>; -defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>; -defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>; -defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>; -defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>; -defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>; -defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>; +defm : CMOV_Aliases<"cmovo" , 0>; +defm : CMOV_Aliases<"cmovno", 1>; +defm : CMOV_Aliases<"cmovb" , 2>; +defm : CMOV_Aliases<"cmovae", 3>; +defm : CMOV_Aliases<"cmove" , 4>; +defm : CMOV_Aliases<"cmovne", 5>; +defm : CMOV_Aliases<"cmovbe", 6>; +defm : CMOV_Aliases<"cmova" , 7>; +defm : CMOV_Aliases<"cmovs" , 8>; +defm : CMOV_Aliases<"cmovns", 9>; +defm : CMOV_Aliases<"cmovp" , 10>; +defm : CMOV_Aliases<"cmovnp", 11>; +defm : CMOV_Aliases<"cmovl" , 12>; +defm : CMOV_Aliases<"cmovge", 13>; +defm : CMOV_Aliases<"cmovle", 14>; +defm : CMOV_Aliases<"cmovg" , 15>; // SetCC instructions. Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1236,37 +1236,23 @@ def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), + SDLoc(N), MVT::i8); +}]>; + // Conditional moves with folded loads with operands swapped and conditions // inverted. -multiclass CMOVmr { - let Predicates = [HasCMov] in { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; - } +let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS), + (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS), + (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS), + (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; } -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; -defm : CMOVmr; - // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. Index: lib/Target/X86/X86InstrFoldTables.cpp =================================================================== --- lib/Target/X86/X86InstrFoldTables.cpp +++ lib/Target/X86/X86InstrFoldTables.cpp @@ -1249,54 +1249,9 @@ { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, - { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, - { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, - { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, - { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, - { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, - { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, - { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, - { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, - { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, - { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, - { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, - { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, - { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, - { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, - { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, - { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, - { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, - { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, - { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, - { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, - { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, - { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, - { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, - { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, - { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, - { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, - { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, - { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, - { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, - { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, - { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, - { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, - { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, - { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, - { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, - { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, - { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, - { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, - { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, - { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, - { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, - { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, - { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, - { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, - { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMOV16rr, X86::CMOV16rm, 0 }, + { X86::CMOV32rr, X86::CMOV32rm, 0 }, + { X86::CMOV64rr, X86::CMOV64rm, 0 }, { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, { X86::CMPSDrr, X86::CMPSDrm, 0 }, Index: lib/Target/X86/X86InstrFormats.td =================================================================== --- lib/Target/X86/X86InstrFormats.td +++ lib/Target/X86/X86InstrFormats.td @@ -30,6 +30,7 @@ def MRMSrcMem : Format<33>; def MRMSrcMem4VOp3 : Format<34>; def MRMSrcMemOp4 : Format<35>; +def MRMSrcMemCC : Format<36>; def MRMXm : Format<39>; def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>; def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>; @@ -38,6 +39,7 @@ def MRMSrcReg : Format<49>; def MRMSrcReg4VOp3 : Format<50>; def MRMSrcRegOp4 : Format<51>; +def MRMSrcRegCC : Format<52>; def MRMXr : Format<55>; def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>; def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>; Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -35,38 +35,6 @@ AC_EVEX_2_VEX = MachineInstr::TAsmComments }; -// X86 specific condition code. These correspond to X86_*_COND in -// X86InstrInfo.td. They must be kept in synch. -enum CondCode { - COND_A = 0, - COND_AE = 1, - COND_B = 2, - COND_BE = 3, - COND_E = 4, - COND_G = 5, - COND_GE = 6, - COND_L = 7, - COND_LE = 8, - COND_NE = 9, - COND_NO = 10, - COND_NP = 11, - COND_NS = 12, - COND_O = 13, - COND_P = 14, - COND_S = 15, - LAST_VALID_COND = COND_S, - - // Artificial condition codes. These are used by AnalyzeBranch - // to indicate a block terminated with two conditional branches that together - // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, - // which can't be represented on x86 with a single condition. These - // are never used in MachineInstrs and are inverses of one another. - COND_NE_OR_P, - COND_E_AND_NP, - - COND_INVALID -}; - // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); @@ -78,10 +46,8 @@ /// a memory operand. unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); -/// Return a cmov opcode for the given condition, register size in -/// bytes, and operand type. -unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand = false); +/// Return a cmov opcode for the given register size in bytes, and operand type. +unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false); // Turn jCC opcode into condition code. CondCode getCondFromBranchOpc(unsigned Opc); @@ -90,7 +56,7 @@ CondCode getCondFromSETOpc(unsigned Opc); // Turn CMov opcode into condition code. -CondCode getCondFromCMovOpc(unsigned Opc); +CondCode getCondFromCMov(const MachineInstr &MI); /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -1641,76 +1641,11 @@ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } - case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: - case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: - case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: - case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: - case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: - case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: - case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: - case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: - case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: - case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: - case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: - case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: - case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: - case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: - case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: - case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { - unsigned Opc; - switch (MI.getOpcode()) { - default: llvm_unreachable("Unreachable!"); - case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; - case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; - case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; - case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; - case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; - case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; - case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; - case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; - case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; - case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; - case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; - case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; - case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; - case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; - case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; - case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; - case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; - case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; - case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; - case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; - case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; - case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; - case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; - case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; - case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; - case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; - case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; - case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; - case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; - case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; - case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; - case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; - case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; - case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; - case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; - case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; - case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; - case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; - case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; - case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; - case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; - case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; - case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; - case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; - case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; - case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; - case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; - case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; - } + case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { auto &WorkingMI = cloneIfNew(MI); - WorkingMI.setDesc(get(Opc)); + unsigned OpNo = MI.getDesc().getNumOperands() - 1; + X86::CondCode CC = static_cast(MI.getOperand(OpNo).getImm()); + WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -2090,57 +2025,13 @@ } /// Return condition code of a CMov opcode. -X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) { - switch (Opc) { +X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return X86::COND_INVALID; - case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: - case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr: - return X86::COND_A; - case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm: - case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr: - return X86::COND_AE; - case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm: - case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr: - return X86::COND_B; - case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm: - case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr: - return X86::COND_BE; - case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm: - case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr: - return X86::COND_E; - case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm: - case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr: - return X86::COND_G; - case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm: - case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr: - return X86::COND_GE; - case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm: - case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr: - return X86::COND_L; - case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm: - case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr: - return X86::COND_LE; - case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm: - case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr: - return X86::COND_NE; - case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm: - case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr: - return X86::COND_NO; - case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm: - case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr: - return X86::COND_NP; - case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm: - case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr: - return X86::COND_NS; - case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm: - case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr: - return X86::COND_O; - case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm: - case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr: - return X86::COND_P; - case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm: - case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr: - return X86::COND_S; + case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: + case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: + return static_cast( + MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); } } @@ -2252,22 +2143,22 @@ /// whether it has memory operand. unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { static const uint16_t Opc[16][2] = { - { X86::SETAr, X86::SETAm }, - { X86::SETAEr, X86::SETAEm }, + { X86::SETOr, X86::SETOm }, + { X86::SETNOr, X86::SETNOm }, { X86::SETBr, X86::SETBm }, - { X86::SETBEr, X86::SETBEm }, + { X86::SETAEr, X86::SETAEm }, { X86::SETEr, X86::SETEm }, - { X86::SETGr, X86::SETGm }, - { X86::SETGEr, X86::SETGEm }, - { X86::SETLr, X86::SETLm }, - { X86::SETLEr, X86::SETLEm }, { X86::SETNEr, X86::SETNEm }, - { X86::SETNOr, X86::SETNOm }, - { X86::SETNPr, X86::SETNPm }, + { X86::SETBEr, X86::SETBEm }, + { X86::SETAr, X86::SETAm }, + { X86::SETSr, X86::SETSm }, { X86::SETNSr, X86::SETNSm }, - { X86::SETOr, X86::SETOm }, { X86::SETPr, X86::SETPm }, - { X86::SETSr, X86::SETSm } + { X86::SETNPr, X86::SETNPm }, + { X86::SETLr, X86::SETLm }, + { X86::SETGEr, X86::SETGEm }, + { X86::SETLEr, X86::SETLEm }, + { X86::SETGr, X86::SETGm }, }; assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); @@ -2276,50 +2167,12 @@ /// Return a cmov opcode for the given condition, /// register size in bytes, and operand type. -unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { - static const uint16_t Opc[32][3] = { - { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, - { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, - { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, - { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr }, - { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr }, - { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr }, - { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr }, - { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr }, - { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr }, - { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr }, - { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr }, - { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr }, - { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, - { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, - { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, - { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }, - { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm }, - { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm }, - { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm }, - { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm }, - { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm }, - { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm }, - { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm }, - { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm }, - { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm }, - { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm }, - { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm }, - { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm }, - { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm }, - { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm }, - { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm }, - { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm } - }; - - assert(CC < 16 && "Can only handle standard cond codes"); - unsigned Idx = HasMemoryOperand ? 16+CC : CC; +unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) { switch(RegBytes) { default: llvm_unreachable("Illegal register size!"); - case 2: return Opc[Idx][0]; - case 4: return Opc[Idx][1]; - case 8: return Opc[Idx][2]; + case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr; + case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr; + case 8: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV64rr; } } @@ -2870,10 +2723,12 @@ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); assert(Cond.size() == 1 && "Invalid Cond array"); - unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - TRI.getRegSizeInBits(RC) / 8, - false /*HasMemoryOperand*/); - BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); + unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8, + false /*HasMemoryOperand*/); + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addImm(Cond[0].getImm()); } /// Test if the given register is a physical h register. @@ -3728,7 +3583,7 @@ if (OldCC != X86::COND_INVALID) OpcIsSET = true; else - OldCC = X86::getCondFromCMovOpc(Instr.getOpcode()); + OldCC = X86::getCondFromCMov(Instr); } if (OldCC == X86::COND_INVALID) return false; } @@ -3781,10 +3636,7 @@ else if(OpcIsSET) NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand); else { - unsigned DstReg = Instr.getOperand(0).getReg(); - const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - NewOpc = getCMovFromCond(ReplacementCC, TRI->getRegSizeInBits(*DstRC)/8, - HasMemoryOperand); + NewOpc = ReplacementCC; } // Push the MachineInstr to OpsToUpdate. @@ -3844,8 +3696,13 @@ CmpInstr.eraseFromParent(); // Modify the condition code of instructions in OpsToUpdate. - for (auto &Op : OpsToUpdate) - Op.first->setDesc(get(Op.second)); + for (auto &Op : OpsToUpdate) { + if (X86::getCondFromCMov(*Op.first) != X86::COND_INVALID) + Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) + .setImm(Op.second); + else + Op.first->setDesc(get(Op.second)); + } return true; } Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -593,6 +593,10 @@ def offset64_64 : X86MemOffsOperand; +def ccode : Operand { + let PrintMethod = "printCondCode"; +} + class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; @@ -946,22 +950,22 @@ // X86 specific condition code. These correspond to CondCode in // X86InstrInfo.h. They must be kept in synch. -def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE -def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC +def X86_COND_O : PatLeaf<(i8 0)>; +def X86_COND_NO : PatLeaf<(i8 1)>; def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C -def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA +def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z -def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE -def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL -def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE -def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG -def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ -def X86_COND_NO : PatLeaf<(i8 10)>; +def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ +def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA +def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE +def X86_COND_S : PatLeaf<(i8 8)>; +def X86_COND_NS : PatLeaf<(i8 9)>; +def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO -def X86_COND_NS : PatLeaf<(i8 12)>; -def X86_COND_O : PatLeaf<(i8 13)>; -def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE -def X86_COND_S : PatLeaf<(i8 15)>; +def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE +def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL +def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG +def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE def i16immSExt8 : ImmLeaf(Imm); }]>; def i32immSExt8 : ImmLeaf(Imm); }]>; Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -160,7 +160,6 @@ def : WriteRes; // LEA instructions can't fold loads. defm : BWWriteResPair; // Conditional move. -defm : BWWriteResPair; // // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. @@ -1602,4 +1601,30 @@ def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 2; + let ResourceCycles = [1,1]; + let NumMicroOps = 2; +} + +def BWWriteCMOVA_CMOVBErm : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 7; + let ResourceCycles = [1,1,1]; + let NumMicroOps = 3; +} + +def BWCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [BWWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def BWCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [BWWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[BWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -165,7 +165,6 @@ defm : HWWriteResPair; defm : HWWriteResPair; // Conditional move. -defm : HWWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1886,4 +1885,30 @@ def : InstRW<[HWWriteADC], (instrs ADC16ri8, ADC32ri8, ADC64ri8, SBB16ri8, SBB32ri8, SBB64ri8)>; +// CMOVs that use both Z and C flag require an extra uop. +def HWWriteCMOVA_CMOVBErr : SchedWriteRes<[HWPort06,HWPort0156]> { + let Latency = 3; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def HWWriteCMOVA_CMOVBErm : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { + let Latency = 8; + let ResourceCycles = [1,1,2]; + let NumMicroOps = 4; +} + +def HWCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [HWWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def HWCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [HWWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[HWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel Index: lib/Target/X86/X86SchedPredicates.td =================================================================== --- lib/Target/X86/X86SchedPredicates.td +++ lib/Target/X86/X86SchedPredicates.td @@ -60,3 +60,22 @@ // X86GenInstrInfo. def IsThreeOperandsLEAFn : TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>; + +// A predicate to check for COND_A and COND_BE CMOVs which have an extra uop +// on recent Intel CPUs. +def IsCMOVA_Or_CMOVBE : CheckAny<[ + CheckAll<[ + CheckNumOperands<4>, + CheckAny<[ + CheckImmOperand_s<3, "X86::COND_A">, + CheckImmOperand_s<3, "X86::COND_BE"> + ]> + ]>, + CheckAll<[ + CheckNumOperands<8>, + CheckAny<[ + CheckImmOperand_s<7, "X86::COND_A">, + CheckImmOperand_s<7, "X86::COND_BE"> + ]> + ]>, +]>; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -160,7 +160,6 @@ defm : SBWriteResPair; defm : SBWriteResPair; // Conditional move. -defm : SBWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1173,4 +1172,30 @@ ]>; def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>; +// CMOVs that use both Z and C flag require an extra uop. +def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> { + let Latency = 3; + let ResourceCycles = [2,1]; + let NumMicroOps = 3; +} + +def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> { + let Latency = 8; + let ResourceCycles = [1,2,1]; + let NumMicroOps = 4; +} + +def SBCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SBWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SBCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SBWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -158,7 +158,6 @@ def : WriteRes; // LEA instructions can't fold loads. defm : SKLWriteResPair; // Conditional move. -defm : SKLWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -1757,4 +1756,30 @@ def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKLWriteCMOVA_CMOVBErm : SchedWriteRes<[SKLPort23,SKLPort06]> { + let Latency = 7; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def SKLCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SKLWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SKLCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SKLWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SKLCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -159,7 +159,6 @@ def : WriteRes; // LEA instructions can't fold loads. defm : SKXWriteResPair; // Conditional move. -defm : SKXWriteResPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes { @@ -2473,4 +2472,30 @@ def: InstRW<[WriteZero], (instrs CLC)>; +// CMOVs that use both Z and C flag require an extra uop. +def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 7; + let ResourceCycles = [1,2]; + let NumMicroOps = 3; +} + +def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ + SchedVar, [SKXWriteCMOVA_CMOVBErr]>, + SchedVar +]>; + +def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ + SchedVar, [SKXWriteCMOVA_CMOVBErm]>, + SchedVar +]>; + +def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; +def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; + } // SchedModel Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -163,7 +163,6 @@ defm WriteLZCNT : X86SchedWritePair; // Leading zero count. defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. defm WriteCMOV : X86SchedWritePair; // Conditional move. -defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move. def WriteFCMOV : SchedWrite; // X87 conditional move. def WriteSETCC : SchedWrite; // Set register based on condition code. def WriteSETCCStore : SchedWrite; Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -113,7 +113,6 @@ defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; defm : X86WriteRes; // x87 conditional move. def : WriteRes; Index: lib/Target/X86/X86ScheduleBdVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBdVer2.td +++ lib/Target/X86/X86ScheduleBdVer2.td @@ -444,12 +444,24 @@ def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; defm : PdWriteResExPair; // Conditional move. -defm : PdWriteResExPair; // Conditional (CF + ZF flag) move. -def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm, - CMOVGE16rm, CMOVGE32rm, CMOVGE64rm, - CMOVL16rm, CMOVL32rm, CMOVL64rm, - CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>; +def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { + let Latency = 5; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; +} + +def PdWriteCMOVmVar : SchedWriteVariant<[ + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar>, [PdWriteCMOVm]>, + SchedVar +]>; + +def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; defm : PdWriteRes; // x87 conditional move. Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -221,7 +221,6 @@ defm : JWriteResIntPair; defm : JWriteResIntPair; // Conditional move. -defm : JWriteResIntPair; // Conditional (CF + ZF flag) move. defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes; Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -131,7 +131,6 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; defm : X86WriteRes; // x87 conditional move. def : WriteRes; def : WriteRes { Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -215,7 +215,6 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResPair; -defm : ZnWriteResPair; def : WriteRes; def : WriteRes; defm : X86WriteRes; Index: lib/Target/X86/X86SpeculativeLoadHardening.cpp =================================================================== --- lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -751,7 +751,7 @@ for (X86::CondCode Cond : Conds) { int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); // Note that we intentionally use an empty debug location so that @@ -759,7 +759,8 @@ auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(CurStateReg) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(Cond); // If this is the last cmov and the EFLAGS weren't originally // live-in, mark them as killed. if (!LiveEFLAGS && Cond == Conds.back()) @@ -1176,12 +1177,13 @@ // Now cmov over the predicate if the comparison wasn't equal. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(PS->InitialReg) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(X86::COND_NE); CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); @@ -2545,12 +2547,13 @@ // Now conditionally update the predicate state we just extracted if we ended // up at a different return address than expected. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; - auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) .addReg(NewStateReg, RegState::Kill) - .addReg(PS->PoisonReg); + .addReg(PS->PoisonReg) + .addImm(X86::COND_NE); CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); ++NumInstsInserted; LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); Index: test/CodeGen/X86/flags-copy-lowering.mir =================================================================== --- test/CodeGen/X86/flags-copy-lowering.mir +++ test/CodeGen/X86/flags-copy-lowering.mir @@ -283,19 +283,19 @@ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %2 - %3:gr64 = CMOVA64rr %0, %1, implicit $eflags - %4:gr64 = CMOVB64rr %0, %1, implicit $eflags - %5:gr64 = CMOVE64rr %0, %1, implicit $eflags - %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 7, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 2, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 4, implicit $eflags + %6:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %6:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3 MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 @@ -396,12 +396,12 @@ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %3 - %4:gr64 = CMOVE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 4, implicit $eflags %5:gr64 = MOV64ri32 42 %6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags @@ -435,12 +435,12 @@ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $eflags = COPY %3 - %4:gr64 = CMOVE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 4, implicit $eflags %5:gr64 = MOV64ri32 42 %6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags @@ -628,30 +628,30 @@ bb.1: liveins: $eflags - %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %3 RET 0, $rax bb.2: liveins: $eflags - %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %4 RET 0, $rax bb.3: liveins: $eflags - %5:gr64 = CMOVS64rr %0, %1, implicit killed $eflags + %5:gr64 = CMOV64rr %0, %1, 8, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[S_REG]], %[[S_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %5 RET 0, $rax @@ -703,10 +703,10 @@ bb.1: liveins: $eflags - %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %3:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %3:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %3 RET 0, $rax @@ -728,30 +728,30 @@ bb.3: liveins: $eflags - %4:gr64 = CMOVNE64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 5, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[NE_REG]], %[[NE_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %4 RET 0, $rax bb.4: liveins: $eflags - %5:gr64 = CMOVP64rr %0, %1, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 10, implicit $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[P_REG]], %[[P_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %5 RET 0, $rax bb.5: liveins: $eflags - %6:gr64 = CMOVS64rr %0, %1, implicit killed $eflags + %6:gr64 = CMOV64rr %0, %1, 8, implicit killed $eflags ; CHECK-NOT: $eflags = ; CHECK: TEST8rr %[[S_REG]], %[[S_REG]], implicit-def $eflags - ; CHECK-NEXT: %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %6:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags $rax = COPY %6 RET 0, $rax @@ -876,11 +876,11 @@ liveins: $eflags ; Just use $eflags on this side of the diamond. - %4:gr64 = CMOVA64rr %0, %1, implicit $eflags + %4:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK: bb.5: ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %4:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 JMP_1 %bb.7 @@ -890,21 +890,21 @@ liveins: $eflags ; Use, copy, and then use $eflags again. - %5:gr64 = CMOVA64rr %0, %1, implicit $eflags + %5:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK: bb.6: ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %5:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 %6:gr64 = COPY $eflags $eflags = COPY %6:gr64 - %7:gr64 = CMOVA64rr %0, %1, implicit $eflags + %7:gr64 = CMOV64rr %0, %1, 7, implicit $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags - ; CHECK-NEXT: %7:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags + ; CHECK-NEXT: %7:gr64 = CMOV64rr %0, %1, 5, implicit killed $eflags ; CHECK-NOT: COPY{{( killed)?}} $eflags MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7 JMP_1 %bb.7 @@ -940,12 +940,12 @@ liveins: $eflags ; And we're done. - %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + %8:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags $rax = COPY %8 RET 0, $rax ; CHECK: bb.9: ; CHECK-NOT: $eflags - ; CHECK: %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags + ; CHECK: %8:gr64 = CMOV64rr %0, %1, 4, implicit killed $eflags ... --- Index: test/CodeGen/X86/non-value-mem-operand.mir =================================================================== --- test/CodeGen/X86/non-value-mem-operand.mir +++ test/CodeGen/X86/non-value-mem-operand.mir @@ -217,7 +217,7 @@ $rax = MOV64ri @global.1 $rax = MOV64rm killed $rax, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from @global.1) TEST64rr $rax, $rax, implicit-def $eflags - $rax = CMOVE64rr undef $rax, killed $rax, implicit killed $eflags + $rax = CMOV64rr undef $rax, killed $rax, 4, implicit killed $eflags $ecx = MOV32rm undef $rax, 1, $noreg, 0, $noreg :: (load 4 from `i32* undef`) $rdx = MOV64rm $r12, 8, $r14, 0, $noreg :: (load 8 from %ir.tmp3) $r15 = LEA64r $rdx, 1, $noreg, 1, _ Index: test/CodeGen/X86/post-ra-sched-with-debug.mir =================================================================== --- test/CodeGen/X86/post-ra-sched-with-debug.mir +++ test/CodeGen/X86/post-ra-sched-with-debug.mir @@ -283,13 +283,13 @@ $edx = XOR32rr undef $edx, undef $edx, implicit-def dead $eflags, implicit-def $rdx TEST64rr $rcx, $rcx, implicit-def $eflags $esi = MOV32ri @o, implicit-def $rsi - $rsi = CMOVNE64rr killed $rsi, $rdx, implicit killed $eflags + $rsi = CMOV64rr killed $rsi, $rdx, 5, implicit killed $eflags $rsi = OR64rr killed $rsi, killed $rcx, implicit-def $eflags $rcx = LEA64r $rbp, 1, $noreg, -20, $noreg DBG_VALUE $rcx, $noreg, !46, !17, debug-location !48 DBG_VALUE $rcx, $noreg, !39, !17, debug-location !44 DBG_VALUE $rbp, -20, !29, !17, debug-location !36 - $rcx = CMOVNE64rr killed $rcx, killed $rdx, implicit killed $eflags + $rcx = CMOV64rr killed $rcx, killed $rdx, 5, implicit killed $eflags $rcx = OR64rr killed $rcx, killed $rsi, implicit-def dead $eflags $rdx = MOVSX64rm32 $rbx, 1, $noreg, 0, $noreg :: (load 4, align 8) TEST32mr killed $rcx, 4, killed $rdx, 0, $noreg, killed $eax, implicit-def $eflags :: (load 4) Index: test/CodeGen/X86/tail-call-conditional.mir =================================================================== --- test/CodeGen/X86/tail-call-conditional.mir +++ test/CodeGen/X86/tail-call-conditional.mir @@ -48,7 +48,7 @@ ; CHECK-NEXT: $rdi = COPY $rsi ; CHECK-NEXT: $rsi = COPY $rax ; CHECK-NEXT: CMP64ri8 $rax, 9, implicit-def $eflags - ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 3, csr_64, implicit $rsp, implicit $eflags, implicit $ssp, implicit $rsp, implicit $rdi, implicit $rsi, implicit $rdi, implicit-def $rdi, implicit $hsi, implicit-def $hsi, implicit $sih, implicit-def $sih, implicit $sil, implicit-def $sil, implicit $si, implicit-def $si, implicit $esi, implicit-def $esi, implicit $rsi, implicit-def $rsi, implicit $hdi, implicit-def $hdi, implicit $dih, implicit-def $dih, implicit $dil, implicit-def $dil, implicit $di, implicit-def $di, implicit $edi, implicit-def $edi + ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 6, csr_64, implicit $rsp, implicit $eflags, implicit $ssp, implicit $rsp, implicit $rdi, implicit $rsi, implicit $rdi, implicit-def $rdi, implicit $hsi, implicit-def $hsi, implicit $sih, implicit-def $sih, implicit $sil, implicit-def $sil, implicit $si, implicit-def $si, implicit $esi, implicit-def $esi, implicit $rsi, implicit-def $rsi, implicit $hdi, implicit-def $hdi, implicit $dih, implicit-def $dih, implicit $dil, implicit-def $dil, implicit $di, implicit-def $di, implicit $edi, implicit-def $edi bb.1: successors: %bb.2, %bb.3 Index: unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -227,19 +227,20 @@ } TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { - // CMOVA32rr has tied variables, we enumerate the possible values to execute + // CMOV32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. - // - CMOVA32rr + // - CMOV32rr // - Op0 Explicit Def RegClass(GR32) // - Op1 Explicit Use RegClass(GR32) TiedToOp0 // - Op2 Explicit Use RegClass(GR32) + // - Op3 Explicit Use Immediate // - Op3 Implicit Use Reg(EFLAGS) // - Var0 [Op0,Op1] // - Var1 [Op2] // - hasTiedRegisters (execution is always serial) // - hasAliasingRegisters - const unsigned Opcode = llvm::X86::CMOVA32rr; + const unsigned Opcode = llvm::X86::CMOV32rr; const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; @@ -249,7 +250,7 @@ ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); std::unordered_set AllDefRegisters; for (const auto &IT : CT.Instructions) { - ASSERT_THAT(IT.VariableValues, SizeIs(2)); + ASSERT_THAT(IT.VariableValues, SizeIs(3)); AllDefRegisters.insert(IT.VariableValues[0].getReg()); } EXPECT_THAT(AllDefRegisters, SizeIs(kInstructionCount)) Index: utils/TableGen/X86RecognizableInstr.h =================================================================== --- utils/TableGen/X86RecognizableInstr.h +++ utils/TableGen/X86RecognizableInstr.h @@ -105,6 +105,7 @@ MRMSrcMem = 33, MRMSrcMem4VOp3 = 34, MRMSrcMemOp4 = 35, + MRMSrcMemCC = 36, MRMXm = 39, MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47, @@ -112,6 +113,7 @@ MRMSrcReg = 49, MRMSrcReg4VOp3 = 50, MRMSrcRegOp4 = 51, + MRMSrcRegCC = 52, MRMXr = 55, MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63, Index: utils/TableGen/X86RecognizableInstr.cpp =================================================================== --- utils/TableGen/X86RecognizableInstr.cpp +++ utils/TableGen/X86RecognizableInstr.cpp @@ -580,6 +580,13 @@ HANDLE_OPERAND(rmRegister) HANDLE_OPTIONAL(immediate) break; + case X86Local::MRMSrcRegCC: + assert(numPhysicalOperands == 3 && + "Unexpected number of operands for MRMSrcRegCC"); + HANDLE_OPERAND(roRegister) + HANDLE_OPERAND(rmRegister) + HANDLE_OPERAND(opcodeModifier) + break; case X86Local::MRMSrcMem: // Operand 1 is a register operand in the Reg/Opcode field. // Operand 2 is a memory operand (possibly SIB-extended) @@ -620,6 +627,13 @@ HANDLE_OPERAND(memory) HANDLE_OPTIONAL(immediate) break; + case X86Local::MRMSrcMemCC: + assert(numPhysicalOperands == 3 && + "Unexpected number of operands for MRMSrcMemCC"); + HANDLE_OPERAND(roRegister) + HANDLE_OPERAND(memory) + HANDLE_OPERAND(opcodeModifier) + break; case X86Local::MRMXr: case X86Local::MRM0r: case X86Local::MRM1r: @@ -729,6 +743,7 @@ case X86Local::MRMSrcReg: case X86Local::MRMSrcReg4VOp3: case X86Local::MRMSrcRegOp4: + case X86Local::MRMSrcRegCC: case X86Local::MRMXr: filter = llvm::make_unique(true); break; @@ -736,6 +751,7 @@ case X86Local::MRMSrcMem: case X86Local::MRMSrcMem4VOp3: case X86Local::MRMSrcMemOp4: + case X86Local::MRMSrcMemCC: case X86Local::MRMXm: filter = llvm::make_unique(false); break; @@ -768,14 +784,14 @@ assert(opcodeType && "Opcode type not set"); assert(filter && "Filter not set"); - if (Form == X86Local::AddRegFrm) { - assert(((opcodeToSet & 7) == 0) && - "ADDREG_FRM opcode not aligned"); + if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC || + Form == X86Local::MRMSrcMemCC) { + unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16; + assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned"); uint8_t currentOpcode; - for (currentOpcode = opcodeToSet; - currentOpcode < opcodeToSet + 8; + for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count; ++currentOpcode) tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter, UID, Is32Bit, OpPrefix == 0, @@ -850,6 +866,7 @@ TYPE("i64i32imm_pcrel", TYPE_REL) TYPE("i16imm_pcrel", TYPE_REL) TYPE("i32imm_pcrel", TYPE_REL) + TYPE("ccode", TYPE_IMM) TYPE("AVX512RC", TYPE_IMM) TYPE("brtarget32", TYPE_REL) TYPE("brtarget16", TYPE_REL) @@ -1165,6 +1182,7 @@ ENCODING("GR64", ENCODING_RO) ENCODING("GR16", ENCODING_Rv) ENCODING("GR8", ENCODING_RB) + ENCODING("ccode", ENCODING_CC) errs() << "Unhandled opcode modifier encoding " << s << "\n"; llvm_unreachable("Unhandled opcode modifier encoding"); }