Index: llvm/include/llvm/CodeGen/MachineInstr.h =================================================================== --- llvm/include/llvm/CodeGen/MachineInstr.h +++ llvm/include/llvm/CodeGen/MachineInstr.h @@ -110,6 +110,7 @@ NoMerge = 1 << 15, // Passes that drop source location info // (e.g. branch folding) should skip // this instruction. + Unpredictable = 1 << 16, // Instruction is unpredictable. }; private: @@ -120,7 +121,7 @@ MachineOperand *Operands = nullptr; // Pointer to the first operand. unsigned NumOperands = 0; // Number of operands on instruction. - uint16_t Flags = 0; // Various bits of additional + uint32_t Flags = 0; // Various bits of additional // information about machine // instruction. @@ -322,7 +323,7 @@ } /// Return the MI flags bitvector. - uint16_t getFlags() const { + uint32_t getFlags() const { return Flags; } @@ -333,7 +334,7 @@ /// Set a MI flag. void setFlag(MIFlag Flag) { - Flags |= (uint16_t)Flag; + Flags |= (uint32_t)Flag; } void setFlags(unsigned flags) { @@ -344,7 +345,7 @@ /// clearFlag - Clear a MI flag. void clearFlag(MIFlag Flag) { - Flags &= ~((uint16_t)Flag); + Flags &= ~((uint32_t)Flag); } /// Return true if MI is in a bundle (but not the first MI in a bundle). @@ -1809,9 +1810,9 @@ /// Return the MIFlags which represent both MachineInstrs. This /// should be used when merging two MachineInstrs into one. This routine does /// not modify the MIFlags of this MachineInstr. - uint16_t mergeFlagsWith(const MachineInstr& Other) const; + uint32_t mergeFlagsWith(const MachineInstr& Other) const; - static uint16_t copyFlagsFromInstruction(const Instruction &I); + static uint32_t copyFlagsFromInstruction(const Instruction &I); /// Copy all flags to MachineInst MIFlags void copyIRFlags(const Instruction &I); Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -386,6 +386,7 @@ // negative "NoFPExcept" flag here (that defaults to true) makes the flag // intersection logic more straightforward. bool NoFPExcept : 1; + bool Unpredictable : 1; public: /// Default constructor turns off all optimization flags. @@ -393,7 +394,7 @@ : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false) {} + AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -418,6 +419,7 @@ void setApproximateFuncs(bool b) { ApproximateFuncs = b; } void setAllowReassociation(bool b) { AllowReassociation = b; } void setNoFPExcept(bool b) { NoFPExcept = b; } + void setUnpredictable(bool b) { Unpredictable = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -431,6 +433,7 @@ bool hasApproximateFuncs() const { return ApproximateFuncs; } bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } + bool hasUnpredictable() const { return Unpredictable; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. @@ -446,6 +449,7 @@ ApproximateFuncs &= Flags.ApproximateFuncs; AllowReassociation &= Flags.AllowReassociation; NoFPExcept &= Flags.NoFPExcept; + Unpredictable &= Flags.Unpredictable; } }; Index: llvm/lib/CodeGen/MIRPrinter.cpp =================================================================== --- llvm/lib/CodeGen/MIRPrinter.cpp +++ llvm/lib/CodeGen/MIRPrinter.cpp @@ -784,6 +784,8 @@ OS << "nofpexcept "; if (MI.getFlag(MachineInstr::NoMerge)) OS << "nomerge "; + if (MI.getFlag(MachineInstr::Unpredictable)) + OS << "unpredictable "; OS << TII->getName(MI.getOpcode()); if (I < E) Index: llvm/lib/CodeGen/MachineInstr.cpp =================================================================== --- llvm/lib/CodeGen/MachineInstr.cpp +++ llvm/lib/CodeGen/MachineInstr.cpp @@ -528,14 +528,14 @@ setHeapAllocMarker(MF, MI.getHeapAllocMarker()); } -uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { +uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { // For now, the just return the union of the flags. If the flags get more // complicated over time, we might need more logic here. return getFlags() | Other.getFlags(); } -uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { - uint16_t MIFlags = 0; +uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { + uint32_t MIFlags = 0; // Copy the wrapping flags. if (const OverflowingBinaryOperator *OB = dyn_cast(&I)) { @@ -569,6 +569,9 @@ MIFlags |= MachineInstr::MIFlag::FmReassoc; } + if (I.getMetadata(LLVMContext::MD_unpredictable)) + MIFlags |= MachineInstr::MIFlag::Unpredictable; + return MIFlags; } Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1038,6 +1038,9 @@ if (Flags.hasNoFPExcept()) MI->setFlag(MachineInstr::MIFlag::NoFPExcept); + + if (Flags.hasUnpredictable()) + MI->setFlag(MachineInstr::MIFlag::Unpredictable); } // Emit all of the actual operands of this instruction, adding them to the Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3287,6 +3287,9 @@ if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); + Flags.setUnpredictable( + cast(I).getMetadata(LLVMContext::MD_unpredictable)); + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; Index: llvm/lib/Target/X86/X86CmovConversion.cpp =================================================================== --- llvm/lib/Target/X86/X86CmovConversion.cpp +++ llvm/lib/Target/X86/X86CmovConversion.cpp @@ -291,6 +291,11 @@ // Skip debug instructions. if (I.isDebugInstr()) continue; + + // If cmov instruction is marked as unpredicatable, do not convert it to branch. + if (I.getFlag(MachineInstr::MIFlag::Unpredictable)) + continue; + X86::CondCode CC = X86::getCondFromCMov(I); // Check if we found a X86::CMOVrr instruction. if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24778,6 +24778,7 @@ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. + SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags()); SDValue Ops[] = { Op2, Op1, CC, Cond }; return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); } Index: llvm/test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- llvm/test/CodeGen/X86/x86-cmov-converter.ll +++ llvm/test/CodeGen/X86/x86-cmov-converter.ll @@ -197,6 +197,39 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; CHECK-LABEL: MaxIndex_unpredictable +; CHECK-NOT: jg +; CHECK: cmov + +define i32 @MaxIndex_unpredictable(i32 %n, i32* nocapture readonly %a) #0 { +entry: + %cmp14 = icmp sgt i32 %n, 1 + br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ] + ret i32 %t.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] + %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %t.015 to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %cmp3 = icmp sgt i32 %0, %1 + %2 = trunc i64 %indvars.iv to i32 + %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + ; CHECK-LABEL: MaxValue ; CHECK-NOT: jg ; CHECK: cmovg @@ -341,6 +374,19 @@ ret i32 %z } +; If cmov instruction is marked as unpredicatable, do not convert it to branch. +define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 { +; CHECK-LABEL: test_cmov_memoperand_unpredictable: +entry: + %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax +; CHECK: cmpl + %load = load i32, i32* %y + %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0 +; CHECK: cmov + ret i32 %z +} + ; Test that we can convert a group of cmovs where only one has a memory ; operand. define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { @@ -500,29 +546,29 @@ %end = load i32*, i32** @end, align 8 br label %loop.body -; CHECK-NEXT: .LBB13_1: # %loop.body +; CHECK-NEXT: .LBB15_1: # %loop.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: addq $8, %rcx ; CHECK-NEXT: cmpq %rdx, %rcx -; CHECK-NEXT: ja .LBB13_3 +; CHECK-NEXT: ja .LBB15_3 ; CHECK-NEXT: # %bb.2: # %loop.body -; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 ; CHECK-NEXT: movq (%r8), %rcx -; CHECK-NEXT: .LBB13_3: # %loop.body -; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: .LBB15_3: # %loop.body +; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 ; CHECK-NEXT: movl %edi, (%rcx) ; CHECK-NEXT: addq $8, %rcx ; CHECK-NEXT: cmpq %rdx, %rcx -; CHECK-NEXT: ja .LBB13_5 +; CHECK-NEXT: ja .LBB15_5 ; CHECK-NEXT: # %bb.4: # %loop.body -; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 ; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: .LBB13_5: # %loop.body -; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: .LBB15_5: # %loop.body +; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 ; CHECK-NEXT: movl %edi, (%rcx) ; CHECK-NEXT: addl $1, %esi ; CHECK-NEXT: cmpl $1024, %esi # imm = 0x400 -; CHECK-NEXT: jl .LBB13_1 +; CHECK-NEXT: jl .LBB15_1 loop.body: %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ] @@ -546,3 +592,4 @@ } attributes #0 = {"target-cpu"="x86-64"} +!0 = !{}