diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -80,38 +80,39 @@ }; enum MIFlag { - NoFlags = 0, - FrameSetup = 1 << 0, // Instruction is used as a part of - // function frame setup code. - FrameDestroy = 1 << 1, // Instruction is used as a part of - // function frame destruction code. - BundledPred = 1 << 2, // Instruction has bundled predecessors. - BundledSucc = 1 << 3, // Instruction has bundled successors. - FmNoNans = 1 << 4, // Instruction does not support Fast - // math nan values. - FmNoInfs = 1 << 5, // Instruction does not support Fast - // math infinity values. - FmNsz = 1 << 6, // Instruction is not required to retain - // signed zero values. - FmArcp = 1 << 7, // Instruction supports Fast math - // reciprocal approximations. - FmContract = 1 << 8, // Instruction supports Fast math - // contraction operations like fma. - FmAfn = 1 << 9, // Instruction may map to Fast math - // intrinsic approximation. - FmReassoc = 1 << 10, // Instruction supports Fast math - // reassociation of operand order. - NoUWrap = 1 << 11, // Instruction supports binary operator - // no unsigned wrap. - NoSWrap = 1 << 12, // Instruction supports binary operator - // no signed wrap. - IsExact = 1 << 13, // Instruction supports division is - // known to be exact. - NoFPExcept = 1 << 14, // Instruction does not raise - // floatint-point exceptions. - NoMerge = 1 << 15, // Passes that drop source location info - // (e.g. branch folding) should skip - // this instruction. + NoFlags = 0, + FrameSetup = 1 << 0, // Instruction is used as a part of + // function frame setup code. + FrameDestroy = 1 << 1, // Instruction is used as a part of + // function frame destruction code. + BundledPred = 1 << 2, // Instruction has bundled predecessors. + BundledSucc = 1 << 3, // Instruction has bundled successors. + FmNoNans = 1 << 4, // Instruction does not support Fast + // math nan values. + FmNoInfs = 1 << 5, // Instruction does not support Fast + // math infinity values. + FmNsz = 1 << 6, // Instruction is not required to retain + // signed zero values. + FmArcp = 1 << 7, // Instruction supports Fast math + // reciprocal approximations. + FmContract = 1 << 8, // Instruction supports Fast math + // contraction operations like fma. + FmAfn = 1 << 9, // Instruction may map to Fast math + // intrinsic approximation. + FmReassoc = 1 << 10, // Instruction supports Fast math + // reassociation of operand order. + NoUWrap = 1 << 11, // Instruction supports binary operator + // no unsigned wrap. + NoSWrap = 1 << 12, // Instruction supports binary operator + // no signed wrap. + IsExact = 1 << 13, // Instruction supports division is + // known to be exact. + NoFPExcept = 1 << 14, // Instruction does not raise + // floatint-point exceptions. + NoMerge = 1 << 15, // Passes that drop source location info + // (e.g. branch folding) should skip + // this instruction. + Unpredictable = 1 << 16, // Instruction with unpredictable condition. }; private: @@ -120,12 +121,10 @@ // Operands are allocated by an ArrayRecycler. MachineOperand *Operands = nullptr; // Pointer to the first operand. - uint16_t NumOperands = 0; // Number of operands on instruction. - - uint16_t Flags = 0; // Various bits of additional + uint32_t Flags = 0; // Various bits of additional // information about machine // instruction. - + uint16_t NumOperands = 0; // Number of operands on instruction. uint8_t AsmPrinterFlags = 0; // Various bits of information used by // the AsmPrinter to emit helpful // comments. This is *not* semantic @@ -357,7 +356,7 @@ } /// Return the MI flags bitvector. - uint16_t getFlags() const { + uint32_t getFlags() const { return Flags; } @@ -368,7 +367,7 @@ /// Set a MI flag. void setFlag(MIFlag Flag) { - Flags |= (uint16_t)Flag; + Flags |= (uint32_t)Flag; } void setFlags(unsigned flags) { @@ -379,7 +378,7 @@ /// clearFlag - Clear a MI flag. void clearFlag(MIFlag Flag) { - Flags &= ~((uint16_t)Flag); + Flags &= ~((uint32_t)Flag); } /// Return true if MI is in a bundle (but not the first MI in a bundle). @@ -1889,9 +1888,9 @@ /// Return the MIFlags which represent both MachineInstrs. This /// should be used when merging two MachineInstrs into one. This routine does /// not modify the MIFlags of this MachineInstr. - uint16_t mergeFlagsWith(const MachineInstr& Other) const; + uint32_t mergeFlagsWith(const MachineInstr& Other) const; - static uint16_t copyFlagsFromInstruction(const Instruction &I); + static uint32_t copyFlagsFromInstruction(const Instruction &I); /// Copy all flags to MachineInst MIFlags void copyIRFlags(const Instruction &I); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -395,6 +395,8 @@ // negative "NoFPExcept" flag here (that defaults to true) makes the flag // intersection logic more straightforward. bool NoFPExcept : 1; + // Instructions with attached 'unpredictable' metadata on IR level. + bool Unpredictable : 1; public: /// Default constructor turns off all optimization flags. @@ -402,7 +404,7 @@ : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false) {} + AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -427,6 +429,7 @@ void setApproximateFuncs(bool b) { ApproximateFuncs = b; } void setAllowReassociation(bool b) { AllowReassociation = b; } void setNoFPExcept(bool b) { NoFPExcept = b; } + void setUnpredictable(bool b) { Unpredictable = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -440,6 +443,7 @@ bool hasApproximateFuncs() const { return ApproximateFuncs; } bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } + bool hasUnpredictable() const { return Unpredictable; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. @@ -455,6 +459,7 @@ ApproximateFuncs &= Flags.ApproximateFuncs; AllowReassociation &= Flags.AllowReassociation; NoFPExcept &= Flags.NoFPExcept; + Unpredictable &= Flags.Unpredictable; } }; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -300,7 +300,7 @@ Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; + uint32_t Flags = 0; if (isa(U)) { const Instruction &I = cast(U); Flags = MachineInstr::copyFlagsFromInstruction(I); @@ -314,7 +314,7 @@ MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; + uint32_t Flags = 0; if (isa(U)) { const Instruction &I = cast(U); Flags = MachineInstr::copyFlagsFromInstruction(I); @@ -345,7 +345,7 @@ MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { - uint16_t Flags = 0; + uint32_t Flags = 0; if (CI) Flags = MachineInstr::copyFlagsFromInstruction(*CI); MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); @@ -1438,7 +1438,7 @@ ArrayRef Op0Regs = getOrCreateVRegs(*U.getOperand(1)); ArrayRef Op1Regs = getOrCreateVRegs(*U.getOperand(2)); - uint16_t Flags = 0; + uint32_t Flags = 0; if (const SelectInst *SI = dyn_cast(&U)) Flags = MachineInstr::copyFlagsFromInstruction(*SI); @@ -1864,7 +1864,7 @@ if (!Opcode) return false; - unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI); if (EB == fp::ExceptionBehavior::ebIgnore) Flags |= MachineInstr::NoFPExcept; @@ -2370,7 +2370,7 @@ return CLI->lowerCall(MIRBuilder, Info); } case Intrinsic::fptrunc_round: { - unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); // Convert the metadata argument to a constant integer Metadata *MD = cast(CI.getArgOperand(1))->getMetadata(); diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -73,6 +73,7 @@ kw_nsw, kw_exact, kw_nofpexcept, + kw_unpredictable, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -214,6 +214,7 @@ .Case("nsw", MIToken::kw_nsw) .Case("exact", MIToken::kw_exact) .Case("nofpexcept", MIToken::kw_nofpexcept) + .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) .Case("debug-instr-number", MIToken::kw_debug_instr_number) .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref) diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1451,7 +1451,8 @@ Token.is(MIToken::kw_nuw) || Token.is(MIToken::kw_nsw) || Token.is(MIToken::kw_exact) || - Token.is(MIToken::kw_nofpexcept)) { + Token.is(MIToken::kw_nofpexcept) || + Token.is(MIToken::kw_unpredictable)) { // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) Flags |= MachineInstr::FrameSetup; @@ -1479,6 +1480,8 @@ Flags |= MachineInstr::IsExact; if (Token.is(MIToken::kw_nofpexcept)) Flags |= MachineInstr::NoFPExcept; + if (Token.is(MIToken::kw_unpredictable)) + Flags |= MachineInstr::Unpredictable; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -801,6 +801,8 @@ OS << "nofpexcept "; if (MI.getFlag(MachineInstr::NoMerge)) OS << "nomerge "; + if (MI.getFlag(MachineInstr::Unpredictable)) + OS << "unpredictable "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -534,14 +534,14 @@ setPCSections(MF, MI.getPCSections()); } -uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { +uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { // For now, the just return the union of the flags. If the flags get more // complicated over time, we might need more logic here. return getFlags() | Other.getFlags(); } -uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { - uint16_t MIFlags = 0; +uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { + uint32_t MIFlags = 0; // Copy the wrapping flags. if (const OverflowingBinaryOperator *OB = dyn_cast(&I)) { @@ -575,6 +575,9 @@ MIFlags |= MachineInstr::MIFlag::FmReassoc; } + if (I.getMetadata(LLVMContext::MD_unpredictable)) + MIFlags |= MachineInstr::MIFlag::Unpredictable; + return MIFlags; } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1078,6 +1078,9 @@ if (Flags.hasNoFPExcept()) MI->setFlag(MachineInstr::MIFlag::NoFPExcept); + + if (Flags.hasUnpredictable()) + MI->setFlag(MachineInstr::MIFlag::Unpredictable); } // Emit all of the actual operands of this instruction, adding them to the diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3381,6 +3381,9 @@ if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); + Flags.setUnpredictable( + cast(I).getMetadata(LLVMContext::MD_unpredictable)); + // Min/max matching is only viable if all output VTs are the same. if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6889,7 +6889,7 @@ // Set the flags on the inserted instructions to be the merged flags of the // instructions that we have combined. - uint16_t Flags = Root.getFlags(); + uint32_t Flags = Root.getFlags(); if (MUL) Flags = Root.mergeFlagsWith(*MUL); for (auto *MI : InsInstrs) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -518,7 +518,7 @@ // PowerPC specific version of setSpecialOperandAttr that copies Flags to MI // and clears nuw, nsw, and exact flags. - void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const; + void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const; bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -226,7 +226,7 @@ MachineInstr &NewMI2) const { // Propagate FP flags from the original instructions. // But clear poison-generating flags because those may not be valid now. - uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); @@ -239,7 +239,7 @@ } void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI, - uint16_t Flags) const { + uint32_t Flags) const { MI.setFlags(Flags); MI.clearFlag(MachineInstr::MIFlag::NoSWrap); MI.clearFlag(MachineInstr::MIFlag::NoUWrap); @@ -841,7 +841,7 @@ } } - uint16_t IntersectedFlags = 0; + uint32_t IntersectedFlags = 0; if (IsILPReassociate) IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); else diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1344,7 +1344,7 @@ MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const { - uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI2.setFlags(IntersectedFlags); } @@ -1630,7 +1630,7 @@ Register DstReg = Dst.getReg(); unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); - auto IntersectedFlags = Root.getFlags() & Prev.getFlags(); + uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); DebugLoc MergedLoc = DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -217,7 +217,7 @@ GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF()); // Copy MIFlags from Def to ASSIGN_TYPE instruction. It's required to keep // the flags after instruction selection. - const uint16_t Flags = Def->getFlags(); + const uint32_t Flags = Def->getFlags(); MIB.buildInstr(SPIRV::ASSIGN_TYPE) .addDef(Reg) .addUse(NewReg) diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp --- a/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -305,9 +305,13 @@ // Skip debug instructions. if (I.isDebugInstr()) continue; + X86::CondCode CC = X86::getCondFromCMov(I); - // Check if we found a X86::CMOVrr instruction. - if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { + // Check if we found a X86::CMOVrr instruction. If it is marked as + // unpredictable, skip it and do not convert it to branch. + if (CC != X86::COND_INVALID && + !I.getFlag(MachineInstr::MIFlag::Unpredictable) && + (IncludeLoads || !I.mayLoad())) { if (Group.empty()) { // We found first CMOV in the range, reset flags. FirstCC = CC; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26506,7 +26506,7 @@ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); + return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags()); } static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9267,7 +9267,7 @@ // Propagate FP flags from the original instructions. // But clear poison-generating flags because those may not be valid now. // TODO: There should be a helper function for copying only fast-math-flags. - uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll --- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll +++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll @@ -356,62 +356,50 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } -; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch. +; If cmov instruction is marked as unpredictable, do not convert it to branch. define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 { ; CHECK-LABEL: MaxIndex_unpredictable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $2, %edi -; CHECK-NEXT: jl .LBB3_5 +; CHECK-NEXT: jl .LBB3_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: .LBB3_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl (%rsi,%rdx,4), %r8d -; CHECK-NEXT: movslq %edi, %r9 -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: cmpl (%rsi,%r9,4), %r8d -; CHECK-NEXT: jg .LBB3_4 -; CHECK-NEXT: # %bb.3: # %for.body -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: .LBB3_4: # %for.body -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: movl (%rsi,%rdx,4), %edi +; CHECK-NEXT: cltq +; CHECK-NEXT: cmpl (%rsi,%rax,4), %edi +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: cmpq %rdx, %rcx ; CHECK-NEXT: jne .LBB3_2 -; CHECK-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-NEXT: .LBB3_3: # %for.cond.cleanup +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq ; ; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable: ; CHECK-FORCEALL: # %bb.0: # %entry ; CHECK-FORCEALL-NEXT: xorl %eax, %eax ; CHECK-FORCEALL-NEXT: cmpl $2, %edi -; CHECK-FORCEALL-NEXT: jl .LBB3_5 +; CHECK-FORCEALL-NEXT: jl .LBB3_3 ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader ; CHECK-FORCEALL-NEXT: movl %edi, %ecx -; CHECK-FORCEALL-NEXT: xorl %edi, %edi +; CHECK-FORCEALL-NEXT: xorl %eax, %eax ; CHECK-FORCEALL-NEXT: movl $1, %edx ; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r8d -; CHECK-FORCEALL-NEXT: movslq %edi, %r9 -; CHECK-FORCEALL-NEXT: movl %edx, %eax -; CHECK-FORCEALL-NEXT: cmpl (%rsi,%r9,4), %r8d -; CHECK-FORCEALL-NEXT: jg .LBB3_4 -; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body -; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-FORCEALL-NEXT: movl %edi, %eax -; CHECK-FORCEALL-NEXT: .LBB3_4: # %for.body -; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %edi +; CHECK-FORCEALL-NEXT: cltq +; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rax,4), %edi +; CHECK-FORCEALL-NEXT: cmovgl %edx, %eax ; CHECK-FORCEALL-NEXT: addq $1, %rdx -; CHECK-FORCEALL-NEXT: movl %eax, %edi ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx ; CHECK-FORCEALL-NEXT: jne .LBB3_2 -; CHECK-FORCEALL-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-FORCEALL-NEXT: .LBB3_3: # %for.cond.cleanup +; CHECK-FORCEALL-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-FORCEALL-NEXT: retq entry: %cmp14 = icmp sgt i32 %n, 1 @@ -724,26 +712,20 @@ ret i32 %z } -; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch. +; If cmov instruction is marked as unpredictable, do not convert it to branch. define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 { ; CHECK-LABEL: test_cmov_memoperand_unpredictable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: ja .LBB8_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movl (%rcx), %eax -; CHECK-NEXT: .LBB8_2: # %entry +; CHECK-NEXT: cmovbel (%rcx), %eax ; CHECK-NEXT: retq ; ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable: ; CHECK-FORCEALL: # %bb.0: # %entry ; CHECK-FORCEALL-NEXT: movl %edx, %eax ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi -; CHECK-FORCEALL-NEXT: ja .LBB8_2 -; CHECK-FORCEALL-NEXT: # %bb.1: # %entry -; CHECK-FORCEALL-NEXT: movl (%rcx), %eax -; CHECK-FORCEALL-NEXT: .LBB8_2: # %entry +; CHECK-FORCEALL-NEXT: cmovbel (%rcx), %eax ; CHECK-FORCEALL-NEXT: retq entry: %cond = icmp ugt i32 %a, %b