diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1707,6 +1707,7 @@ template struct MaxMin_match { + using PredType = Pred_t; LHS_t L; RHS_t R; diff --git a/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h b/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h --- a/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h @@ -158,6 +158,19 @@ Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, Instruction *Dominatee); + // Try to match \p I as signed/unsigned Min/Max and reassociate it. \p + // OrigSCEV is set if \I matches Min/Max regardless whether resassociation is + // done or not. If reassociation was successful newly generated instruction is + // returned, otherwise nullptr. + template + Instruction *matchAndReassociateMinOrMax(Instruction *I, + const SCEV *&OrigSCEV); + + // Reassociate Min/Max. + template + Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS, + Value *RHS); + // GetElementPtrInst implicitly sign-extends an index if the index is shorter // than the pointer size. This function returns whether Index is shorter than // GEP's pointer size, i.e., whether Index needs to be sign-extended in order diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -80,6 +80,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -106,6 +107,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include #include @@ -268,6 +270,24 @@ return Changed; } +template +Instruction * +NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I, + const SCEV *&OrigSCEV) { + Value *LHS = nullptr; + Value *RHS = nullptr; + + auto MinMaxMatcher = + MaxMin_match, bind_ty, PredT>( + m_Value(LHS), m_Value(RHS)); + if (match(I, MinMaxMatcher)) { + OrigSCEV = SE->getSCEV(I); + return dyn_cast_or_null( + tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)); + } + return nullptr; +} + Instruction *NaryReassociatePass::tryReassociate(Instruction * I, const SCEV *&OrigSCEV) { @@ -283,10 +303,21 @@ OrigSCEV = SE->getSCEV(I); return tryReassociateGEP(cast(I)); default: - return nullptr; + break; } - llvm_unreachable("should not be reached"); + // Try to match signed/unsigned Min/Max. + Instruction *ResI = nullptr; + // TODO: Currently min/max reassociation is restricted to integer types only + // due to use of SCEVExpander which my introduce incompatible forms of min/max + // for pointer types. + if (I->getType()->isIntegerTy()) + if ((ResI = matchAndReassociateMinOrMax(I, OrigSCEV)) || + (ResI = matchAndReassociateMinOrMax(I, OrigSCEV)) || + (ResI = matchAndReassociateMinOrMax(I, OrigSCEV)) || + (ResI = matchAndReassociateMinOrMax(I, OrigSCEV))) + return ResI; + return nullptr; } @@ -539,3 +570,72 @@ } return nullptr; } + +template static SCEVTypes convertToSCEVype(MaxMinT &MM) { + if (std::is_same::value) + return scSMaxExpr; + else if (std::is_same::value) + return scUMaxExpr; + else if (std::is_same::value) + return scSMinExpr; + else if (std::is_same::value) + return scUMinExpr; + + llvm_unreachable("Can't convert MinMax pattern to SCEV type"); + return scUnknown; +} + +template +Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I, + MaxMinT MaxMinMatch, + Value *LHS, Value *RHS) { + Value *A = nullptr, *B = nullptr; + MaxMinT m_MaxMin(m_Value(A), m_Value(B)); + for (unsigned int i = 0; i < 2; ++i) { + if (match(LHS, m_MaxMin)) { + const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); + const SCEV *RHSExpr = SE->getSCEV(RHS); + for (unsigned int j = 0; j < 2; ++j) { + if (j == 0) { + if (BExpr == RHSExpr) + continue; + // Transform 'I = (A op B) op RHS' to 'I = (A op RHS) op B' on the + // first iteration. + std::swap(BExpr, RHSExpr); + } else { + if (AExpr == RHSExpr) + continue; + // Transform 'I = (A op RHS) op B' 'I = (B op RHS) op A' on the second + // iteration. + std::swap(AExpr, RHSExpr); + } + + SCEVExpander Expander(*SE, *DL, "nary-reassociate"); + SmallVector Ops1{ BExpr, AExpr }; + const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin); + const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1); + + Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I); + + if (!R1MinMax) + continue; + + LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax + << "\n"); + + R1Expr = SE->getUnknown(R1MinMax); + SmallVector Ops2{ RHSExpr, R1Expr }; + const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2); + + Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I); + NewMinMax->setName(Twine(I->getName()).concat(".nary")); + + LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n" + << "NARY: Inserting: " << *NewMinMax << "\n"); + return NewMinMax; + } + } + std::swap(LHS, RHS); + } + return nullptr; +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-smax.ll b/llvm/test/Transforms/NaryReassociate/nary-smax.ll --- a/llvm/test/Transforms/NaryReassociate/nary-smax.ll +++ b/llvm/test/Transforms/NaryReassociate/nary-smax.ll @@ -10,11 +10,8 @@ ; CHECK-LABEL: @smax_test1( ; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[SMAX2]], [[A]] -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[SMAX2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp sgt i32 %a, %b @@ -33,11 +30,8 @@ ; CHECK-LABEL: @smax_test2( ; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[A]], [[C:%.*]] -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[A]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[B]], [[SMAX2]] -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[B]], i32 [[SMAX2]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp sgt i32 %a, %b @@ -54,9 +48,8 @@ define i32 @smax_test3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @smax_test3( ; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: [[SMAX2:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 [[C:%.*]]) -; CHECK-NEXT: [[SMAX3:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX2]], i32 [[A]]) -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %smax1 = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -72,11 +65,8 @@ ; CHECK-LABEL: @umax_test4( ; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp sge i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[SMAX_OR_EQ2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[SMAX_OR_EQ2]], [[A]] -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[SMAX_OR_EQ2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp sgt i32 %a, %b @@ -95,11 +85,8 @@ ; CHECK-LABEL: @smax_test5( ; CHECK-NEXT: [[C1:%.*]] = icmp sge i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMAX_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp sge i32 [[SMAX2]], [[A]] -; CHECK-NEXT: [[SMAX_OR_EQ3:%.*]] = select i1 [[C3]], i32 [[SMAX2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX_OR_EQ1]], [[SMAX_OR_EQ3]] +; CHECK-NEXT: [[SMAX_OR_EQ3_NARY:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX_OR_EQ1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX_OR_EQ1]], [[SMAX_OR_EQ3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp sge i32 %a, %b @@ -157,3 +144,28 @@ %res = add i32 %smax1, %smax3 ret i32 %res } + +; Pointer types are not supported yet +define i32* @smax_test8(i32* %a, i32* %b, i32* %c) { +; CHECK-LABEL: @smax_test8( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32* [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32* [[A]], i32* [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32* [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32* [[B]], i32* [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32* [[SMAX2]], [[A]] +; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32* [[SMAX2]], i32* [[A]] +; CHECK-NEXT: [[C4:%.*]] = icmp sgt i32* [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C4]], i32* [[SMAX1]], i32* [[SMAX3]] +; CHECK-NEXT: ret i32* [[RES]] +; + %c1 = icmp sgt i32* %a, %b + %smax1 = select i1 %c1, i32* %a, i32* %b + %c2 = icmp sgt i32* %b, %c + %smax2 = select i1 %c2, i32* %b, i32* %c + %c3 = icmp sgt i32* %smax2, %a + %smax3 = select i1 %c3, i32* %smax2, i32* %a + %c4 = icmp sgt i32* %smax1, %smax3 + %res = select i1 %c4, i32* %smax1, i32* %smax3 + ret i32* %res +} + diff --git a/llvm/test/Transforms/NaryReassociate/nary-smin.ll b/llvm/test/Transforms/NaryReassociate/nary-smin.ll --- a/llvm/test/Transforms/NaryReassociate/nary-smin.ll +++ b/llvm/test/Transforms/NaryReassociate/nary-smin.ll @@ -10,11 +10,8 @@ ; CHECK-LABEL: @smin_test1( ; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[SMIN2]], [[A]] -; CHECK-NEXT: [[SMIN3:%.*]] = select i1 [[C3]], i32 [[SMIN2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp slt i32 %a, %b @@ -33,11 +30,8 @@ ; CHECK-LABEL: @smin_test2( ; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[A]], [[C:%.*]] -; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[C2]], i32 [[A]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[B]], [[SMIN2]] -; CHECK-NEXT: [[SMIN3:%.*]] = select i1 [[C3]], i32 [[B]], i32 [[SMIN2]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp slt i32 %a, %b @@ -54,9 +48,8 @@ define i32 @smin_test3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @smin_test3( ; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: [[SMIN2:%.*]] = call i32 @llvm.smin.i32(i32 [[B]], i32 [[C:%.*]]) -; CHECK-NEXT: [[SMIN3:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN2]], i32 [[A]]) -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %smin1 = call i32 @llvm.smin.i32(i32 %a, i32 %b) @@ -72,11 +65,8 @@ ; CHECK-LABEL: @umin_test4( ; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp sle i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[SMIN_OR_EQ2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[SMIN_OR_EQ2]], [[A]] -; CHECK-NEXT: [[SMIN3:%.*]] = select i1 [[C3]], i32 [[SMIN_OR_EQ2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp slt i32 %a, %b @@ -95,11 +85,8 @@ ; CHECK-LABEL: @smin_test5( ; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SMIN_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp sle i32 [[SMIN2]], [[A]] -; CHECK-NEXT: [[SMIN_OR_EQ3:%.*]] = select i1 [[C3]], i32 [[SMIN2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN_OR_EQ1]], [[SMIN_OR_EQ3]] +; CHECK-NEXT: [[SMIN_OR_EQ3_NARY:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN_OR_EQ1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN_OR_EQ1]], [[SMIN_OR_EQ3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp sle i32 %a, %b @@ -157,3 +144,27 @@ %res = add i32 %smin1, %smin3 ret i32 %res } + +; Pointer types are not supported yet +define i32* @smin_test8(i32* %a, i32* %b, i32* %c) { +; CHECK-LABEL: @smin_test8( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32* [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32* [[A]], i32* [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32* [[B]], [[C:%.*]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[C2]], i32* [[B]], i32* [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp slt i32* [[UMIN2]], [[A]] +; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32* [[UMIN2]], i32* [[A]] +; CHECK-NEXT: [[C4:%.*]] = icmp slt i32* [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C4]], i32* [[UMIN1]], i32* [[UMIN3]] +; CHECK-NEXT: ret i32* [[RES]] +; + %c1 = icmp slt i32* %a, %b + %umin1 = select i1 %c1, i32* %a, i32* %b + %c2 = icmp slt i32* %b, %c + %umin2 = select i1 %c2, i32* %b, i32* %c + %c3 = icmp slt i32* %umin2, %a + %umin3 = select i1 %c3, i32* %umin2, i32* %a + %c4 = icmp slt i32* %umin1, %umin3 + %res = select i1 %c4, i32* %umin1, i32* %umin3 + ret i32* %res +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-umax.ll b/llvm/test/Transforms/NaryReassociate/nary-umax.ll --- a/llvm/test/Transforms/NaryReassociate/nary-umax.ll +++ b/llvm/test/Transforms/NaryReassociate/nary-umax.ll @@ -10,11 +10,8 @@ ; CHECK-LABEL: @umax_test1( ; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[UMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ugt i32 [[UMAX2]], [[A]] -; CHECK-NEXT: [[UMAX3:%.*]] = select i1 [[C3]], i32 [[UMAX2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = call i32 @llvm.umax.i32(i32 [[UMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ugt i32 %a, %b @@ -33,11 +30,8 @@ ; CHECK-LABEL: @umax_test2( ; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[A]], [[C:%.*]] -; CHECK-NEXT: [[UMAX2:%.*]] = select i1 [[C2]], i32 [[A]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ugt i32 [[B]], [[UMAX2]] -; CHECK-NEXT: [[UMAX3:%.*]] = select i1 [[C3]], i32 [[B]], i32 [[UMAX2]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = call i32 @llvm.umax.i32(i32 [[UMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ugt i32 %a, %b @@ -54,9 +48,8 @@ define i32 @umax_test3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @umax_test3( ; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: [[UMAX2:%.*]] = call i32 @llvm.umax.i32(i32 [[B]], i32 [[C:%.*]]) -; CHECK-NEXT: [[UMAX3:%.*]] = call i32 @llvm.umax.i32(i32 [[UMAX2]], i32 [[A]]) -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = call i32 @llvm.umax.i32(i32 [[UMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %umax1 = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -72,11 +65,8 @@ ; CHECK-LABEL: @umax_test4( ; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[UMAX_OR_EQ2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ugt i32 [[UMAX_OR_EQ2]], [[A]] -; CHECK-NEXT: [[UMAX3:%.*]] = select i1 [[C3]], i32 [[UMAX_OR_EQ2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = call i32 @llvm.umax.i32(i32 [[UMAX1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ugt i32 %a, %b @@ -95,11 +85,8 @@ ; CHECK-LABEL: @umax_test5( ; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMAX_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[UMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp uge i32 [[UMAX2]], [[A]] -; CHECK-NEXT: [[UMAX_OR_EQ3:%.*]] = select i1 [[C3]], i32 [[UMAX2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX_OR_EQ1]], [[UMAX_OR_EQ3]] +; CHECK-NEXT: [[UMAX_OR_EQ3_NARY:%.*]] = call i32 @llvm.umax.i32(i32 [[UMAX_OR_EQ1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX_OR_EQ1]], [[UMAX_OR_EQ3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp uge i32 %a, %b @@ -157,3 +144,27 @@ %res = add i32 %umax1, %umax3 ret i32 %res } + +; Pointer types are not supported yet +define i32* @umax_test8(i32* %a, i32* %b, i32* %c) { +; CHECK-LABEL: @umax_test8( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32* [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32* [[A]], i32* [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32* [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32* [[B]], i32* [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp ugt i32* [[SMAX2]], [[A]] +; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32* [[SMAX2]], i32* [[A]] +; CHECK-NEXT: [[C4:%.*]] = icmp ugt i32* [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C4]], i32* [[SMAX1]], i32* [[SMAX3]] +; CHECK-NEXT: ret i32* [[RES]] +; + %c1 = icmp ugt i32* %a, %b + %smax1 = select i1 %c1, i32* %a, i32* %b + %c2 = icmp ugt i32* %b, %c + %smax2 = select i1 %c2, i32* %b, i32* %c + %c3 = icmp ugt i32* %smax2, %a + %smax3 = select i1 %c3, i32* %smax2, i32* %a + %c4 = icmp ugt i32* %smax1, %smax3 + %res = select i1 %c4, i32* %smax1, i32* %smax3 + ret i32* %res +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-umin.ll b/llvm/test/Transforms/NaryReassociate/nary-umin.ll --- a/llvm/test/Transforms/NaryReassociate/nary-umin.ll +++ b/llvm/test/Transforms/NaryReassociate/nary-umin.ll @@ -10,11 +10,8 @@ ; CHECK-LABEL: @umin_test1( ; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ult i32 [[UMIN2]], [[A]] -; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32 [[UMIN2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ult i32 %a, %b @@ -33,11 +30,8 @@ ; CHECK-LABEL: @umin_test2( ; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[A]], [[C:%.*]] -; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[C2]], i32 [[A]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ult i32 [[B]], [[UMIN2]] -; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32 [[B]], i32 [[UMIN2]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ult i32 %a, %b @@ -54,9 +48,8 @@ define i32 @umin_test3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @umin_test3( ; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[B]], i32 [[C:%.*]]) -; CHECK-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[A]]) -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %umin1 = call i32 @llvm.umin.i32(i32 %a, i32 %b) @@ -72,11 +65,8 @@ ; CHECK-LABEL: @umin_test4( ; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ule i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[UMIN_OR_EQ2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ult i32 [[UMIN_OR_EQ2]], [[A]] -; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32 [[UMIN_OR_EQ2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ult i32 %a, %b @@ -95,11 +85,8 @@ ; CHECK-LABEL: @umin_test5( ; CHECK-NEXT: [[C1:%.*]] = icmp ule i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[UMIN_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[C3:%.*]] = icmp ule i32 [[UMIN2]], [[A]] -; CHECK-NEXT: [[UMIN_OR_EQ3:%.*]] = select i1 [[C3]], i32 [[UMIN2]], i32 [[A]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN_OR_EQ1]], [[UMIN_OR_EQ3]] +; CHECK-NEXT: [[UMIN_OR_EQ3_NARY:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN_OR_EQ1]], i32 [[C:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN_OR_EQ1]], [[UMIN_OR_EQ3_NARY]] ; CHECK-NEXT: ret i32 [[RES]] ; %c1 = icmp ule i32 %a, %b @@ -157,3 +144,27 @@ %res = add i32 %umin1, %umin3 ret i32 %res } + +; Pointer types are not supported yet +define i32* @umin_test8(i32* %a, i32* %b, i32* %c) { +; CHECK-LABEL: @umin_test8( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32* [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32* [[A]], i32* [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32* [[B]], [[C:%.*]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[C2]], i32* [[B]], i32* [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp ult i32* [[UMIN2]], [[A]] +; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32* [[UMIN2]], i32* [[A]] +; CHECK-NEXT: [[C4:%.*]] = icmp ult i32* [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C4]], i32* [[UMIN1]], i32* [[UMIN3]] +; CHECK-NEXT: ret i32* [[RES]] +; + %c1 = icmp ult i32* %a, %b + %umin1 = select i1 %c1, i32* %a, i32* %b + %c2 = icmp ult i32* %b, %c + %umin2 = select i1 %c2, i32* %b, i32* %c + %c3 = icmp ult i32* %umin2, %a + %umin3 = select i1 %c3, i32* %umin2, i32* %a + %c4 = icmp ult i32* %umin1, %umin3 + %res = select i1 %c4, i32* %umin1, i32* %umin3 + ret i32* %res +}