diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -41,6 +41,7 @@ class CallBase; class Function; class GlobalValue; +class InstCombiner; class IntrinsicInst; class LoadInst; class LoopAccessInfo; @@ -532,6 +533,8 @@ bool emitGetActiveLaneMask(Loop *L, LoopInfo *LI, ScalarEvolution &SE, bool TailFolded) const; + Value *instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const; + /// @} /// \name Scalar Target Information @@ -1261,6 +1264,7 @@ DominatorTree *DT, const LoopAccessInfo *LAI) = 0; virtual bool emitGetActiveLaneMask(Loop *L, LoopInfo *LI, ScalarEvolution &SE, bool TailFolded) = 0; + virtual Value *instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -1544,6 +1548,9 @@ bool TailFolded) override { return Impl.emitGetActiveLaneMask(L, LI, SE, TailFolded); } + Value *instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override { + return Impl.instCombineIntrinsic(IC, II); + } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -146,6 +146,10 @@ return false; } + Value *instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { + return nullptr; + } + void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &) {} diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -467,6 +467,10 @@ return BaseT::emitGetActiveLaneMask(L, LI, SE, TailFold); } + Value *instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) { + return BaseT::instCombineIntrinsic(IC, II); + } + int getInstructionLatency(const Instruction *I) { if (isa(I)) return getST()->getSchedModel().DefaultLoadLatency; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -318,6 +318,11 @@ return TTIImpl->emitGetActiveLaneMask(L, LI, SE, TailFolded); } +Value *TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + return TTIImpl->instCombineIntrinsic(IC, II); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" @@ -42,13 +43,13 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsX86.h" -#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/IR/IntrinsicsNVPTX.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" @@ -1964,7 +1965,6 @@ Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { - default: break; case Intrinsic::objectsize: if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, V); @@ -4350,6 +4350,10 @@ } break; } + default: { + if (Value *V = TTI.instCombineIntrinsic(*this, *II)) + return replaceInstUsesWith(*II, V); + } } return visitCallBase(*II); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -320,6 +320,7 @@ // Required analyses. AssumptionCache ∾ TargetLibraryInfo &TLI; + TargetTransformInfo &TTI; DominatorTree &DT; const DataLayout &DL; const SimplifyQuery SQ; @@ -335,13 +336,14 @@ public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, - bool MinimizeSize, AliasAnalysis *AA, - AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI) + bool MinimizeSize, AliasAnalysis *AA, AssumptionCache &AC, + TargetLibraryInfo &TLI, TargetTransformInfo &TTI, + DominatorTree &DT, OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), - AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} + AA(AA), AC(AC), TLI(TLI), TTI(TTI), DT(DT), DL(DL), + SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} /// Run the combiner over the entire worklist until it is empty. /// @@ -358,6 +360,8 @@ TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; } + TargetTransformInfo &getTargetTransformInfo() const { return TTI; } + // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: // Return Value: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -59,6 +59,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" @@ -3659,8 +3660,8 @@ static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, - AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, + DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) { auto &DL = F.getParent()->getDataLayout(); MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue()); @@ -3704,8 +3705,8 @@ MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombiner IC(Worklist, Builder, F.hasMinSize(), AA, - AC, TLI, DT, ORE, BFI, PSI, DL, LI); + InstCombiner IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, + ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3728,6 +3729,7 @@ auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &ORE = AM.getResult(F); + auto &TTI = AM.getResult(F); auto *LI = AM.getCachedResult(F); @@ -3738,8 +3740,8 @@ auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult(F) : nullptr; - if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI, - PSI, MaxIterations, LI)) + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, + BFI, PSI, MaxIterations, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3757,6 +3759,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -3775,6 +3778,7 @@ auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(F); + auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); @@ -3788,8 +3792,8 @@ &getAnalysis().getBFI() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI, - PSI, MaxIterations, LI); + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, + BFI, PSI, MaxIterations, LI); } char InstructionCombiningPass::ID = 0; @@ -3808,6 +3812,7 @@ "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)