diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" @@ -6489,13 +6490,29 @@ // which would often largely negate the benefit of folding anyway. // // Do the same for the other patterns recognized by matchSelectPattern. - if (I.hasOneUse()) - if (SelectInst *SI = dyn_cast(I.user_back())) { + if (I.hasOneUse()) { + Instruction *User = I.user_back(); + if (SelectInst *SI = dyn_cast(User)) { Value *A, *B; SelectPatternResult SPR = matchSelectPattern(SI, A, B); if (SPR.Flavor != SPF_UNKNOWN) return nullptr; + // Maybe a canonicalization that happens later will make it + // possible to form minmax, don't give up yet. + if (canReuseConstantFromSelectInComparison(*SI, I)) + return nullptr; } + // Sometimes we may have an intermediate instruction between the compare + // and the use. If this is a form of `not` then look through it. + ICmpInst::Predicate P; + if (match(User, m_ICmp(P, m_Specific(&I), m_Specific(Builder.getTrue())))) { + if (P == CmpInst::Predicate::ICMP_ULT || + P == CmpInst::Predicate::ICMP_UGT) + return nullptr; + } + if (match(User, m_Not(m_Specific(&I)))) + return nullptr; + } // Do this after checking for min/max to prevent infinite looping. if (Instruction *Res = foldICmpWithZero(I)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -388,6 +388,8 @@ Instruction *foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI, bool IsAnd); + bool canReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp); + public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1552,9 +1552,11 @@ // the result) is identical to the C1 in select. If it matches we can change // original comparison to one with swapped predicate, reuse the constant, // and swap the hands of select. +// If IC is null then this will return &Sel if it's possible to do the combine, +// without actually performing it. static Instruction * tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { + InstCombinerImpl *IC) { ICmpInst::Predicate Pred; Value *X; Constant *C0; @@ -1611,20 +1613,27 @@ if (!MatchesSelectValue(FlippedStrictness->second)) return nullptr; + if (!IC) + return &Sel; + // It matched! Lets insert the new comparison just before select. - InstCombiner::BuilderTy::InsertPointGuard Guard(IC.Builder); - IC.Builder.SetInsertPoint(&Sel); + InstCombiner::BuilderTy::InsertPointGuard Guard(IC->Builder); + IC->Builder.SetInsertPoint(&Sel); Pred = ICmpInst::getSwappedPredicate(Pred); // Yes, swapped. - Value *NewCmp = IC.Builder.CreateICmp(Pred, X, FlippedStrictness->second, - Cmp.getName() + ".inv"); - IC.replaceOperand(Sel, 0, NewCmp); + Value *NewCmp = IC->Builder.CreateICmp(Pred, X, FlippedStrictness->second, + Cmp.getName() + ".inv"); + IC->replaceOperand(Sel, 0, NewCmp); Sel.swapValues(); Sel.swapProfMetadata(); - return &Sel; } +bool InstCombinerImpl::canReuseConstantFromSelectInComparison(SelectInst &Sel, + ICmpInst &Cmp) { + return tryToReuseConstantFromSelectInComparison(Sel, Cmp, nullptr); +} + static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { @@ -1714,7 +1723,7 @@ return replaceInstUsesWith(SI, V); if (Instruction *NewSel = - tryToReuseConstantFromSelectInComparison(SI, *ICI, *this)) + tryToReuseConstantFromSelectInComparison(SI, *ICI, this)) return NewSel; bool Changed = adjustMinMax(SI, *ICI); diff --git a/llvm/test/Transforms/InstCombine/prevent-minmax-pattern-break.ll b/llvm/test/Transforms/InstCombine/prevent-minmax-pattern-break.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/prevent-minmax-pattern-break.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -p instcombine -S | FileCheck %s + +; Ensure that we don't break the smin idiom and generate the llvm.smin intrinsic call. + +define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockSize) { +; CHECK-LABEL: define void @arm_mult_q15 +; CHECK-SAME: (ptr [[PSRCA:%.*]], ptr [[PSRCB:%.*]], ptr noalias [[PDST:%.*]], i32 [[BLOCKSIZE:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[PSRCB_ADDR_0:%.*]] = phi ptr [ [[PSRCB]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[PDST_ADDR_0:%.*]] = phi ptr [ [[PDST]], [[ENTRY]] ], [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[PSRCA_ADDR_0:%.*]] = phi ptr [ [[PSRCA]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[BLKCNT_0:%.*]] = phi i32 [ [[BLOCKSIZE]], [[ENTRY]] ], [ [[DEC:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[BLKCNT_0]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[PSRCA_ADDR_0]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[PSRCA_ADDR_0]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i16, ptr [[PSRCB_ADDR_0]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[PSRCB_ADDR_0]], align 2 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV2]] +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15 +; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767) +; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i16 +; CHECK-NEXT: [[INCDEC_PTR4]] = getelementptr inbounds i16, ptr [[PDST_ADDR_0]], i64 1 +; CHECK-NEXT: store i16 [[CONV3]], ptr [[PDST_ADDR_0]], align 2 +; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_0]], -1 +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %pSrcB.addr.0 = phi ptr [ %pSrcB, %entry ], [ %incdec.ptr1, %while.body ] + %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %incdec.ptr4, %while.body ] + %pSrcA.addr.0 = phi ptr [ %pSrcA, %entry ], [ %incdec.ptr, %while.body ] + %blkCnt.0 = phi i32 [ %blockSize, %entry ], [ %dec, %while.body ] + %cmp.not = icmp eq i32 %blkCnt.0, 0 + br i1 %cmp.not, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.0, i32 1 + %0 = load i16, ptr %pSrcA.addr.0, align 2 + %conv = sext i16 %0 to i32 + %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcB.addr.0, i32 1 + %1 = load i16, ptr %pSrcB.addr.0, align 2 + %conv2 = sext i16 %1 to i32 + %mul = mul nsw i32 %conv, %conv2 + %shr = ashr i32 %mul, 15 + %cmp4.i = icmp sgt i32 %shr, 32767 + %switch.i = icmp ult i1 %cmp4.i, true + %spec.select.i = select i1 %switch.i, i32 %shr, i32 32767 + %conv3 = trunc i32 %spec.select.i to i16 + %incdec.ptr4 = getelementptr inbounds i16, ptr %pDst.addr.0, i32 1 + store i16 %conv3, ptr %pDst.addr.0, align 2 + %dec = add i32 %blkCnt.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} +