Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -460,6 +460,10 @@ return false; } + /// Return true if instruction generated for ICMP_EQ could be folded with + /// instruction generated for ICMP_S(G|L)T + virtual bool isICMP_EQFoldedWithICMP_ST() const { return true; } + /// Return true if it is safe to transform an integer-domain bitwise operation /// into the equivalent floating-point operation. This should be set to true /// if the target has IEEE-754-compliant fabs/fneg operations for the input Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1406,6 +1406,79 @@ return MadeChange; } +static bool foldICmpWithDominatingICmp(CmpInst *Cmp, + const TargetLowering &TLI) { + // DomCond = icmp sgt/slt CmpOp0, C (might not be in DomBB) + // ... + // DomBB: + // ... + // br DomCond, TrueBB, CmpBB + // CmpBB: (with DomBB being the single predecessor) + // ... + // Cmp = icmp eq CmpOp0, C + // ... + if (TLI.isICMP_EQFoldedWithICMP_ST()) + return false; + + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred != ICmpInst::ICMP_EQ) + return false; + + Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); + const APInt *C; + if (!match(CmpOp1, m_APInt(C))) + return false; + + for (User *U : Cmp->users()) { + if (isa(U) && cast(U)->isConditional()) + continue; + if (isa(U) && cast(U)->getCondition() == Cmp) + continue; + return false; + } + + BasicBlock *CmpBB = Cmp->getParent(); + BasicBlock *DomBB = CmpBB->getSinglePredecessor(); + if (!DomBB) + return false; + + Value *DomCond; + BasicBlock *TrueBB, *FalseBB; + if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) + return false; + if (CmpBB != FalseBB) + return false; + + ICmpInst::Predicate DomPred; + if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) + return false; + if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) + return false; + + ICmpInst::Predicate NewPred = + DomPred == ICmpInst::ICMP_SGT ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT; + + for (User *U : Cmp->users()) { + if (auto *BI = dyn_cast(U)) { + assert(BI->isConditional() && "Must be conditional"); + BI->swapSuccessors(); + continue; + } + if (auto *SI = dyn_cast(U)) { + // Swap operands + Value *TrueValue = SI->getTrueValue(); + Value *FalseValue = SI->getFalseValue(); + SI->setTrueValue(FalseValue); + SI->setFalseValue(TrueValue); + SI->swapProfMetadata(); + continue; + } + llvm_unreachable("Must be a branch or a select"); + } + Cmp->setPredicate(NewPred); + return true; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1416,6 +1489,9 @@ if (combineToUSubWithOverflow(Cmp, ModifiedDT)) return true; + if (foldICmpWithDominatingICmp(Cmp, *TLI)) + return true; + return false; } Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -615,6 +615,8 @@ return true; } + bool isICMP_EQFoldedWithICMP_ST() const override { return false; } + bool hasAndNotCompare(SDValue) const override { return true; } Index: llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll =================================================================== --- llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll +++ llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll @@ -26,7 +26,7 @@ ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB0_2: # %if.end ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -55,7 +55,7 @@ ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB1_2: # %if.end ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -82,9 +82,8 @@ ; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB2_2: # %if.end -; PPC64LE-NEXT: cmpldi 5, 0 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -112,9 +111,8 @@ ; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB3_2: # %if.end -; PPC64LE-NEXT: cmpldi 5, 1 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -142,9 +140,8 @@ ; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB4_2: # %if.end -; PPC64LE-NEXT: cmpldi 5, 2 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -172,7 +169,7 @@ ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB5_2: # %if.end ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -199,7 +196,7 @@ ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB6_2: # %if.end ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -225,9 +222,8 @@ ; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB7_2: # %if.end -; PPC64LE-NEXT: cmpldi 3, 0 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -253,9 +249,8 @@ ; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB8_2: # %if.end -; PPC64LE-NEXT: cmpldi 3, 1 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -281,9 +276,8 @@ ; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr ; PPC64LE-NEXT: .LBB9_2: # %if.end -; PPC64LE-NEXT: cmpldi 3, 2 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 +; PPC64LE-NEXT: isel 4, 5, 4, 0 ; PPC64LE-NEXT: mulld 3, 4, 3 ; PPC64LE-NEXT: blr entry: @@ -306,7 +300,7 @@ ; PPC64LE-NEXT: slw 6, 3, 4 ; PPC64LE-NEXT: li 5, 1 ; PPC64LE-NEXT: cmpwi 6, -2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 +; PPC64LE-NEXT: isel 5, 5, 4, 0 ; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 @@ -334,7 +328,7 @@ ; PPC64LE-NEXT: slw 6, 3, 4 ; PPC64LE-NEXT: li 5, 1 ; PPC64LE-NEXT: cmpwi 6, -1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 +; PPC64LE-NEXT: isel 5, 5, 4, 0 ; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 @@ -359,11 +353,11 @@ define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { ; PPC64LE-LABEL: i_a_op_b_0: ; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: slw. 5, 3, 4 -; PPC64LE-NEXT: li 6, 1 -; PPC64LE-NEXT: isel 6, 4, 6, 2 -; PPC64LE-NEXT: cmpwi 5, 0 -; PPC64LE-NEXT: mullw 3, 6, 3 +; PPC64LE-NEXT: slw 6, 3, 4 +; PPC64LE-NEXT: li 5, 1 +; PPC64LE-NEXT: cmpwi 6, 0 +; PPC64LE-NEXT: isel 5, 5, 4, 0 +; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 ; PPC64LE-NEXT: blr @@ -389,9 +383,8 @@ ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: slw 6, 3, 4 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 6, 1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 ; PPC64LE-NEXT: cmpwi 6, 1 +; PPC64LE-NEXT: isel 5, 5, 4, 0 ; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 @@ -418,9 +411,8 @@ ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: slw 6, 3, 4 ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 6, 2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 ; PPC64LE-NEXT: cmpwi 6, 2 +; PPC64LE-NEXT: isel 5, 5, 4, 0 ; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 @@ -447,7 +439,7 @@ ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: li 5, 1 ; PPC64LE-NEXT: cmpwi 3, -2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 +; PPC64LE-NEXT: isel 5, 5, 4, 0 ; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 @@ -473,7 +465,7 @@ ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: li 5, 1 ; PPC64LE-NEXT: cmpwi 3, -1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 +; PPC64LE-NEXT: isel 5, 5, 4, 0 ; PPC64LE-NEXT: mullw 3, 5, 3 ; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 @@ -498,11 +490,10 @@ ; PPC64LE-LABEL: i_a_0: ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 3, 0 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 0, 3, 0 -; PPC64LE-NEXT: mullw 5, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 5, 1 +; PPC64LE-NEXT: cmpwi 3, 0 +; PPC64LE-NEXT: isel 5, 5, 4, 0 +; PPC64LE-NEXT: mullw 3, 5, 3 +; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 ; PPC64LE-NEXT: blr entry: @@ -525,11 +516,10 @@ ; PPC64LE-LABEL: i_a_1: ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 3, 1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 0, 3, 1 -; PPC64LE-NEXT: mullw 5, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 5, 1 +; PPC64LE-NEXT: cmpwi 3, 1 +; PPC64LE-NEXT: isel 5, 5, 4, 0 +; PPC64LE-NEXT: mullw 3, 5, 3 +; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 ; PPC64LE-NEXT: blr entry: @@ -552,11 +542,10 @@ ; PPC64LE-LABEL: i_a_2: ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 3, 2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 0, 3, 2 -; PPC64LE-NEXT: mullw 5, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 5, 1 +; PPC64LE-NEXT: cmpwi 3, 2 +; PPC64LE-NEXT: isel 5, 5, 4, 0 +; PPC64LE-NEXT: mullw 3, 5, 3 +; PPC64LE-NEXT: isel 3, 4, 3, 1 ; PPC64LE-NEXT: extsw 3, 3 ; PPC64LE-NEXT: blr entry: