Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -327,6 +327,7 @@ OptimizationRemarkEmitter &ORE; BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; + TargetTransformInfo *TTI; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. @@ -339,10 +340,12 @@ bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI) + ProfileSummaryInfo *PSI, TargetTransformInfo *TTI, + const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), TTI(TTI), + LI(LI) {} /// Run the combiner over the entire worklist until it is empty. /// Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -2619,11 +2620,19 @@ // minnum/maxnum intrinsics. if (isa(SI) && SI.hasNoNaNs() && SI.hasNoSignedZeros()) { Value *X, *Y; - if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y)))) + auto FMF = cast(SI).getFastMathFlags(); + + if (TTI->getIntrinsicInstrCost(Intrinsic::maxnum, SelType, + {SelType, SelType}, + FMF) < TargetTransformInfo::TCC_Expensive && + match(&SI, m_OrdFMax(m_Value(X), m_Value(Y)))) return replaceInstUsesWith( SI, Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI)); - if (match(&SI, m_OrdFMin(m_Value(X), m_Value(Y)))) + if (TTI->getIntrinsicInstrCost(Intrinsic::minnum, SelType, + {SelType, SelType}, + FMF) < TargetTransformInfo::TCC_Expensive && + match(&SI, m_OrdFMin(m_Value(X), m_Value(Y)))) return replaceInstUsesWith( SI, Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI)); } Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -59,6 +59,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -3557,8 +3558,8 @@ Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, bool ExpensiveCombines = true, - LoopInfo *LI = nullptr) { + ProfileSummaryInfo *PSI, TargetTransformInfo *TTI, + bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); ExpensiveCombines |= EnableExpensiveCombines; @@ -3588,7 +3589,7 @@ MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, BFI, PSI, DL, LI); + AC, TLI, DT, ORE, BFI, PSI, TTI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3614,9 +3615,10 @@ MAM.getCachedResult(*F.getParent()); auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult(F) : nullptr; + auto &TTI = AM.getResult(F); if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - BFI, PSI, ExpensiveCombines, LI)) + BFI, PSI, &TTI, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3641,6 +3643,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } @@ -3664,9 +3667,10 @@ (PSI && PSI->hasProfileSummary()) ? &getAnalysis().getBFI() : nullptr; + auto &TTI = getAnalysis().getTTI(F); return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - BFI, PSI, ExpensiveCombines, LI); + BFI, PSI, &TTI, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; @@ -3686,6 +3690,7 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) Index: llvm/test/Transforms/InstCombine/maxnum-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/maxnum-02.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -instcombine -S -mtriple=systemz-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefix=CHECK-Z13 +; RUN: opt < %s -instcombine -S -mtriple=systemz-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck %s -check-prefix=CHECK-Z14 +; REQUIRES: systemz-registered-target +; +; Check that maxnum/minnum intrinsics are not created without fmax/fmin support. + +define float @f0(float %arg1, float %arg2) { +; CHECK-Z13-NOT: call fast float @llvm.maxnum.f32 +; CHECK-Z14: call fast float @llvm.maxnum.f32 +bb: + %tmp5 = fcmp fast oge float %arg1, %arg2 + %arg1.arg2 = select fast i1 %tmp5, float %arg1, float %arg2 + ret float %arg1.arg2 +} Index: llvm/test/Transforms/InstCombine/minnum-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/minnum-02.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -instcombine -S -mtriple=systemz-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefix=CHECK-Z13 +; RUN: opt < %s -instcombine -S -mtriple=systemz-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck %s -check-prefix=CHECK-Z14 +; REQUIRES: systemz-registered-target +; +; Check that maxnum/minnum intrinsics are not created without fmax/fmin support. + +define float @f0(float %arg1, float %arg2) { +; CHECK-Z13-NOT: call fast float @llvm.minnum.f32 +; CHECK-Z14: call fast float @llvm.minnum.f32 +bb: + %tmp5 = fcmp fast ole float %arg1, %arg2 + %arg1.arg2 = select fast i1 %tmp5, float %arg1, float %arg2 + ret float %arg1.arg2 +}