diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1676,6 +1676,7 @@ template struct MaxMin_match { + using PredType = Pred_t; LHS_t L; RHS_t R; diff --git a/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h b/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h --- a/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h @@ -158,6 +158,11 @@ Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, Instruction *Dominatee); + // Reassociate Min/Max. + template + Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS, + Value *RHS); + // GetElementPtrInst implicitly sign-extends an index if the index is shorter // than the pointer size. This function returns whether Index is shorter than // GEP's pointer size, i.e., whether Index needs to be sign-extended in order diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -80,6 +80,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -106,6 +107,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include #include @@ -271,10 +273,52 @@ Instruction *NaryReassociatePass::tryReassociate(Instruction * I, const SCEV *&OrigSCEV) { + Value *LHS = nullptr; + Value *RHS = nullptr; if (!SE->isSCEVable(I->getType())) return nullptr; + { + // Try to match unsigned Min. + auto UMinMatch = m_UMin(m_Value(LHS), m_Value(RHS)); + if (match(I, UMinMatch)) { + OrigSCEV = SE->getSCEV(I); + return dyn_cast_or_null( + tryReassociateMinOrMax(I, UMinMatch, LHS, RHS)); + } + } + + { + // Try to match signed Min. + auto SMinMatch = m_SMin(m_Value(LHS), m_Value(RHS)); + if (match(I, SMinMatch)) { + OrigSCEV = SE->getSCEV(I); + return dyn_cast_or_null( + tryReassociateMinOrMax(I, SMinMatch, LHS, RHS)); + } + } + + { + // Try to match unsigned Max. + auto UMaxMatch = m_UMax(m_Value(LHS), m_Value(RHS)); + if (match(I, UMaxMatch)) { + OrigSCEV = SE->getSCEV(I); + return dyn_cast_or_null( + tryReassociateMinOrMax(I, UMaxMatch, LHS, RHS)); + } + } + + { + // Try to match signed Max. + auto SMaxMatch = m_SMax(m_Value(LHS), m_Value(RHS)); + if (match(I, SMaxMatch)) { + OrigSCEV = SE->getSCEV(I); + return dyn_cast_or_null( + tryReassociateMinOrMax(I, SMaxMatch, LHS, RHS)); + } + } + switch (I->getOpcode()) { case Instruction::Add: case Instruction::Mul: @@ -542,3 +586,73 @@ } return nullptr; } + +template static SCEVTypes convertToSCEVype(MaxMinT &MM) { + if (std::is_same::value) + return scSMaxExpr; + else if (std::is_same::value) + return scUMaxExpr; + else if (std::is_same::value) + return scSMinExpr; + else if (std::is_same::value) + return scUMinExpr; + + llvm_unreachable("Can't convert MinMax pattern to SCEV type"); + return scUnknown; +} + +template +Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I, + MaxMinT MaxMinMatch, + Value *LHS, Value *RHS) { + Value *A = nullptr, *B = nullptr; + MaxMinT m_MaxMin(m_Value(A), m_Value(B)); + for (uint i = 0; i < 2; ++i) { + if (match(LHS, m_MaxMin)) { + const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); + const SCEV *RHSExpr = SE->getSCEV(RHS); + for (uint j = 0; j < 2; ++j) { + if (j == 0) { + if (BExpr == RHSExpr) + continue; + // Transform 'I = (A op B) op RHS' to 'I = (A op RHS) op B' on the + // first iteration. + std::swap(BExpr, RHSExpr); + } else { + if (AExpr == RHSExpr) + continue; + // Transform 'I = (A op RHS) op B' 'I = (B op RHS) op A' on the second + // iteration. + std::swap(AExpr, RHSExpr); + } + + SCEVExpander Expander(*SE, *DL, "nary-reassociate"); + SmallVector Ops1{ BExpr, AExpr }; + const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin); + const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1); + + Value *R1MinMax = findClosestMatchingDominator(R1Expr, I); + + if (!R1MinMax) { + continue; + } + + LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax + << "\n"); + + R1Expr = SE->getUnknown(R1MinMax); + SmallVector Ops2{ RHSExpr, R1Expr }; + const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2); + + Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I); + NewMinMax->setName(Twine(I->getName()).concat(".nary")); + + LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n" + << "NARY: Inserting: " << *NewMinMax << "\n"); + return NewMinMax; + } + } + std::swap(LHS, RHS); + } + return nullptr; +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-smax.ll b/llvm/test/Transforms/NaryReassociate/nary-smax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-smax.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s + +declare i32 @llvm.smax.i32(i32 %a, i32 %b) + +; m1 = smax(a,b) ; has side uses +; m2 = smax(smax((b,c), a) -> m2 = smax(m1, c) +define i32 @smax_test1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test1( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SMAX1]], [[C:%.*]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sgt i32 %a, %b + %smax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sgt i32 %b, %c + %smax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp sgt i32 %smax2, %a + %smax3 = select i1 %c3, i32 %smax2, i32 %a + %res = add i32 %smax1, %smax3 + ret i32 %res +} + +; m1 = smax(a,b) ; has side uses +; m2 = smax(b, (smax(a, c))) -> m2 = smax(m1, c) +define i32 @smax_test2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test2( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SMAX1]], [[C:%.*]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sgt i32 %a, %b + %smax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sgt i32 %a, %c + %smax2 = select i1 %c2, i32 %a, i32 %c + %c3 = icmp sgt i32 %b, %smax2 + %smax3 = select i1 %c3, i32 %b, i32 %smax2 + %res = add i32 %smax1, %smax3 + ret i32 %res +} + +; Same test as smax_test1 but uses @llvm.smax intrinsic +define i32 @smax_test3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test3( +; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SMAX1]], [[C:%.*]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %smax1 = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %smax2 = call i32 @llvm.smax.i32(i32 %b, i32 %c) + %smax3 = call i32 @llvm.smax.i32(i32 %smax2, i32 %a) + %res = add i32 %smax1, %smax3 + ret i32 %res +} + +; m1 = smax(a,b) ; has side uses +; m2 = smax(smax_or_eq((b,c), a) -> m2 = smax(m1, c) +define i32 @umax_test4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test4( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SMAX1]], [[C:%.*]] +; CHECK-NEXT: [[SMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sgt i32 %a, %b + %smax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sge i32 %b, %c + %smax_or_eq2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp sgt i32 %smax_or_eq2, %a + %smax3 = select i1 %c3, i32 %smax_or_eq2, i32 %a + %res = add i32 %smax1, %smax3 + ret i32 %res +} + +; m1 = smax_or_eq(a,b) ; has side uses +; m2 = smax_or_eq(smax((b,c), a) -> m2 = smax(m1, c) +define i32 @smax_test5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test5( +; CHECK-NEXT: [[C1:%.*]] = icmp sge i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SMAX_OR_EQ1]], [[C:%.*]] +; CHECK-NEXT: [[SMAX_OR_EQ3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMAX_OR_EQ1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX_OR_EQ1]], [[SMAX_OR_EQ3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sge i32 %a, %b + %smax_or_eq1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sgt i32 %b, %c + %smax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp sge i32 %smax2, %a + %smax_or_eq3 = select i1 %c3, i32 %smax2, i32 %a + %res = add i32 %smax_or_eq1, %smax_or_eq3 + ret i32 %res +} + +; m1 = smax(a,b) ; has side uses +; m2 = smax(umax((b,c), a) ; check that signed and unsigned maxs are not mixed +define i32 @smax_test6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test6( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[UMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[UMAX2]], [[A]] +; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[UMAX2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sgt i32 %a, %b + %smax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ugt i32 %b, %c + %umax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp sgt i32 %umax2, %a + %smax3 = select i1 %c3, i32 %umax2, i32 %a + %res = add i32 %smax1, %smax3 + ret i32 %res +} + +; m1 = smax(a,b) ; has side uses +; m2 = smax(smin((b,c), a) ; check that max and min are not mixed +define i32 @smax_test7(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test7( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[SMIN2]], [[A]] +; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[SMIN2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sgt i32 %a, %b + %smax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp slt i32 %b, %c + %smin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp slt i32 %smin2, %a + %smax3 = select i1 %c3, i32 %smin2, i32 %a + %res = add i32 %smax1, %smax3 + ret i32 %res +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-smin.ll b/llvm/test/Transforms/NaryReassociate/nary-smin.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-smin.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s + +declare i32 @llvm.smin.i32(i32 %a, i32 %b) + +; m1 = smin(a,b) ; has side uses +; m2 = smin(smin((b,c), a) -> m2 = smin(m1, c) +define i32 @smin_test1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smin_test1( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SMIN1]], [[C:%.*]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp slt i32 %a, %b + %smin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp slt i32 %b, %c + %smin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp slt i32 %smin2, %a + %smin3 = select i1 %c3, i32 %smin2, i32 %a + %res = add i32 %smin1, %smin3 + ret i32 %res +} + +; m1 = smin(a,b) ; has side uses +; m2 = smin(b, (smin(a, c))) -> m2 = smin(m1, c) +define i32 @smin_test2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smin_test2( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SMIN1]], [[C:%.*]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp slt i32 %a, %b + %smin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp slt i32 %a, %c + %smin2 = select i1 %c2, i32 %a, i32 %c + %c3 = icmp slt i32 %b, %smin2 + %smin3 = select i1 %c3, i32 %b, i32 %smin2 + %res = add i32 %smin1, %smin3 + ret i32 %res +} + +; Same test as smin_test1 but uses @llvm.smin intrinsic +define i32 @smin_test3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smin_test3( +; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SMIN1]], [[C:%.*]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %smin1 = call i32 @llvm.smin.i32(i32 %a, i32 %b) + %smin2 = call i32 @llvm.smin.i32(i32 %b, i32 %c) + %smin3 = call i32 @llvm.smin.i32(i32 %smin2, i32 %a) + %res = add i32 %smin1, %smin3 + ret i32 %res +} + +; m1 = smin(a,b) ; has side uses +; m2 = smin(smin_or_eq((b,c), a) -> m2 = smin(m1, c) +define i32 @umin_test4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test4( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SMIN1]], [[C:%.*]] +; CHECK-NEXT: [[SMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp slt i32 %a, %b + %smin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sle i32 %b, %c + %smin_or_eq2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp slt i32 %smin_or_eq2, %a + %smin3 = select i1 %c3, i32 %smin_or_eq2, i32 %a + %res = add i32 %smin1, %smin3 + ret i32 %res +} + +; m1 = smin_or_eq(a,b) ; has side uses +; m2 = smin_or_eq(smin((b,c), a) -> m2 = smin(m1, c) +define i32 @smin_test5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smin_test5( +; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMIN_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SMIN_OR_EQ1]], [[C:%.*]] +; CHECK-NEXT: [[SMIN_OR_EQ3_NARY:%.*]] = select i1 [[TMP1]], i32 [[SMIN_OR_EQ1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN_OR_EQ1]], [[SMIN_OR_EQ3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sle i32 %a, %b + %smin_or_eq1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp slt i32 %b, %c + %smin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp sle i32 %smin2, %a + %smin_or_eq3 = select i1 %c3, i32 %smin2, i32 %a + %res = add i32 %smin_or_eq1, %smin_or_eq3 + ret i32 %res +} + +; m1 = smin(a,b) ; has side uses +; m2 = smin(umin((b,c), a) ; check that signed and unsigned mins are not mixed +define i32 @smin_test6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smin_test6( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[UMIN2]], [[A]] +; CHECK-NEXT: [[SMIN3:%.*]] = select i1 [[C3]], i32 [[UMIN2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp slt i32 %a, %b + %smin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ult i32 %b, %c + %umin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp slt i32 %umin2, %a + %smin3 = select i1 %c3, i32 %umin2, i32 %a + %res = add i32 %smin1, %smin3 + ret i32 %res +} + +; m1 = smin(a,b) ; has side uses +; m2 = smin(smax((b,c), a) ; check that min and max are not mixed +define i32 @smin_test7(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smin_test7( +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[SMAX2]], [[A]] +; CHECK-NEXT: [[SMIN3:%.*]] = select i1 [[C3]], i32 [[SMAX2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMIN1]], [[SMIN3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp slt i32 %a, %b + %smin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sgt i32 %b, %c + %smax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp slt i32 %smax2, %a + %smin3 = select i1 %c3, i32 %smax2, i32 %a + %res = add i32 %smin1, %smin3 + ret i32 %res +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-umax.ll b/llvm/test/Transforms/NaryReassociate/nary-umax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-umax.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s + +declare i32 @llvm.umax.i32(i32 %a, i32 %b) + +; m1 = umax(a,b) ; has side uses +; m2 = umax(umax((b,c), a) -> m2 = umax(m1, c) +define i32 @umax_test1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test1( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[UMAX1]], [[C:%.*]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ugt i32 %a, %b + %umax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ugt i32 %b, %c + %umax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ugt i32 %umax2, %a + %umax3 = select i1 %c3, i32 %umax2, i32 %a + %res = add i32 %umax1, %umax3 + ret i32 %res +} + +; m1 = umax(a,b) ; has side uses +; m2 = umax(b, (umax(a, c))) -> m2 = umax(m1, c) +define i32 @umax_test2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test2( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[UMAX1]], [[C:%.*]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ugt i32 %a, %b + %umax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ugt i32 %a, %c + %umax2 = select i1 %c2, i32 %a, i32 %c + %c3 = icmp ugt i32 %b, %umax2 + %umax3 = select i1 %c3, i32 %b, i32 %umax2 + %res = add i32 %umax1, %umax3 + ret i32 %res +} + +; Same test as umax_test1 but uses @llvm.umax intrinsic +define i32 @umax_test3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test3( +; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[UMAX1]], [[C:%.*]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %umax1 = call i32 @llvm.umax.i32(i32 %a, i32 %b) + %umax2 = call i32 @llvm.umax.i32(i32 %b, i32 %c) + %umax3 = call i32 @llvm.umax.i32(i32 %umax2, i32 %a) + %res = add i32 %umax1, %umax3 + ret i32 %res +} + +; m1 = umax(a,b) ; has side uses +; m2 = umax(umax_or_eq((b,c), a) -> m2 = umax(m1, c) +define i32 @umax_test4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test4( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[UMAX1]], [[C:%.*]] +; CHECK-NEXT: [[UMAX3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMAX1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ugt i32 %a, %b + %umax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp uge i32 %b, %c + %umax_or_eq2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ugt i32 %umax_or_eq2, %a + %umax3 = select i1 %c3, i32 %umax_or_eq2, i32 %a + %res = add i32 %umax1, %umax3 + ret i32 %res +} + +; m1 = umax_or_eq(a,b) ; has side uses +; m2 = umax_or_eq(umax((b,c), a) -> m2 = umax(m1, c) +define i32 @umax_test5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test5( +; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMAX_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[UMAX_OR_EQ1]], [[C:%.*]] +; CHECK-NEXT: [[UMAX_OR_EQ3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMAX_OR_EQ1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX_OR_EQ1]], [[UMAX_OR_EQ3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp uge i32 %a, %b + %umax_or_eq1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ugt i32 %b, %c + %umax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp uge i32 %umax2, %a + %umax_or_eq3 = select i1 %c3, i32 %umax2, i32 %a + %res = add i32 %umax_or_eq1, %umax_or_eq3 + ret i32 %res +} + +; m1 = umax(a,b) ; has side uses +; m2 = umax(smax((b,c), a) ; check that signed and unsigned maxs are not mixed +define i32 @umax_test6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test6( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp ugt i32 [[SMAX2]], [[A]] +; CHECK-NEXT: [[UMAX3:%.*]] = select i1 [[C3]], i32 [[SMAX2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ugt i32 %a, %b + %umax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sgt i32 %b, %c + %smax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ugt i32 %smax2, %a + %umax3 = select i1 %c3, i32 %smax2, i32 %a + %res = add i32 %umax1, %umax3 + ret i32 %res +} + +; m1 = umax(a,b) ; has side uses +; m2 = umax(umin((b,c), a) ; check that max and min are not mixed +define i32 @umax_test7(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umax_test7( +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[UMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp ugt i32 [[UMAX2]], [[A]] +; CHECK-NEXT: [[UMAX3:%.*]] = select i1 [[C3]], i32 [[UMAX2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMAX1]], [[UMAX3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ugt i32 %a, %b + %umax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ult i32 %b, %c + %umax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ugt i32 %umax2, %a + %umax3 = select i1 %c3, i32 %umax2, i32 %a + %res = add i32 %umax1, %umax3 + ret i32 %res +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-umin.ll b/llvm/test/Transforms/NaryReassociate/nary-umin.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-umin.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s + +declare i32 @llvm.umin.i32(i32 %a, i32 %b) + +; m1 = umin(a,b) ; has side uses +; m2 = umin(umin((b,c), a) -> m2 = umin(m1, c) +define i32 @umin_test1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test1( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN1]], [[C:%.*]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ult i32 %a, %b + %umin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ult i32 %b, %c + %umin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ult i32 %umin2, %a + %umin3 = select i1 %c3, i32 %umin2, i32 %a + %res = add i32 %umin1, %umin3 + ret i32 %res +} + +; m1 = umin(a,b) ; has side uses +; m2 = umin(b, (umin(a, c))) -> m2 = umin(m1, c) +define i32 @umin_test2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test2( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN1]], [[C:%.*]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ult i32 %a, %b + %umin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ult i32 %a, %c + %umin2 = select i1 %c2, i32 %a, i32 %c + %c3 = icmp ult i32 %b, %umin2 + %umin3 = select i1 %c3, i32 %b, i32 %umin2 + %res = add i32 %umin1, %umin3 + ret i32 %res +} + +; Same test as umin_test1 but uses @llvm.umin intrinsic +define i32 @umin_test3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test3( +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN1]], [[C:%.*]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %umin1 = call i32 @llvm.umin.i32(i32 %a, i32 %b) + %umin2 = call i32 @llvm.umin.i32(i32 %b, i32 %c) + %umin3 = call i32 @llvm.umin.i32(i32 %umin2, i32 %a) + %res = add i32 %umin1, %umin3 + ret i32 %res +} + +; m1 = umin(a,b) ; has side uses +; m2 = umin(umin_or_eq((b,c), a) -> m2 = umin(m1, c) +define i32 @umin_test4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test4( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN1]], [[C:%.*]] +; CHECK-NEXT: [[UMIN3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMIN1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ult i32 %a, %b + %umin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ule i32 %b, %c + %umin_or_eq2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ult i32 %umin_or_eq2, %a + %umin3 = select i1 %c3, i32 %umin_or_eq2, i32 %a + %res = add i32 %umin1, %umin3 + ret i32 %res +} + +; m1 = umin_or_eq(a,b) ; has side uses +; m2 = umin_or_eq(umin((b,c), a) -> m2 = umin(m1, c) +define i32 @umin_test5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test5( +; CHECK-NEXT: [[C1:%.*]] = icmp ule i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN_OR_EQ1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN_OR_EQ1]], [[C:%.*]] +; CHECK-NEXT: [[UMIN_OR_EQ3_NARY:%.*]] = select i1 [[TMP1]], i32 [[UMIN_OR_EQ1]], i32 [[C]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN_OR_EQ1]], [[UMIN_OR_EQ3_NARY]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ule i32 %a, %b + %umin_or_eq1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ult i32 %b, %c + %umin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ule i32 %umin2, %a + %umin_or_eq3 = select i1 %c3, i32 %umin2, i32 %a + %res = add i32 %umin_or_eq1, %umin_or_eq3 + ret i32 %res +} + +; m1 = umin(a,b) ; has side uses +; m2 = umin(smin((b,c), a) ; check that signed and unsigned mins are not mixed +define i32 @umin_test6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test6( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMIN2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp ult i32 [[SMIN2]], [[A]] +; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32 [[SMIN2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ult i32 %a, %b + %umin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp slt i32 %b, %c + %smin2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ult i32 %smin2, %a + %umin3 = select i1 %c3, i32 %smin2, i32 %a + %res = add i32 %umin1, %umin3 + ret i32 %res +} + +; m1 = umin(a,b) ; has side uses +; m2 = umin(umax((b,c), a) ; check that min and max are not mixed +define i32 @umin_test7(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @umin_test7( +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[UMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp ult i32 [[UMAX2]], [[A]] +; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[C3]], i32 [[UMAX2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[UMIN1]], [[UMIN3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp ult i32 %a, %b + %umin1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp ugt i32 %b, %c + %umax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp ult i32 %umax2, %a + %umin3 = select i1 %c3, i32 %umax2, i32 %a + %res = add i32 %umin1, %umin3 + ret i32 %res +}