diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -27,6 +27,7 @@ class Function; class ProfileSummaryInfo; class TargetTransformInfo; +class TargetLibraryInfo; namespace InlineConstants { // Various thresholds used by inline cost analysis. @@ -219,6 +220,7 @@ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, + function_ref GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr); /// Get an InlineCost with the callee explicitly specified. @@ -231,6 +233,7 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, + function_ref GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); /// Minimal filter to detect invalid constructs for inlining. diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -260,6 +260,18 @@ return *this; } + /// Determine whether a callee with the given TLI can be inlined into + /// caller with this TLI, based on 'nobuiltin' attributes. Currently we allow + /// inlining into a caller with a superset of the callee's nobuiltin + /// attributes, which is conservatively correct. + bool areInlineCompatible(const TargetLibraryInfo &CalleeTLI) const { + BitVector B = OverrideAsUnavailable; + B |= CalleeTLI.OverrideAsUnavailable; + // We can inline if the union of the caller and callee's nobuiltin + // attributes is no stricter than the caller's nobuiltin attributes. + return B == OverrideAsUnavailable; + } + /// Searches for a particular function name. /// /// If it is one of the known library functions, return true and set F to the diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -74,6 +74,7 @@ protected: AssumptionCacheTracker *ACT; ProfileSummaryInfo *PSI; + std::function GetTLI; ImportedFunctionsInliningStatistics ImportedFunctionsStats; }; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" @@ -2068,10 +2069,16 @@ /// Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. -static bool functionsHaveCompatibleAttributes(Function *Caller, - Function *Callee, - TargetTransformInfo &TTI) { +static bool functionsHaveCompatibleAttributes( + Function *Caller, Function *Callee, TargetTransformInfo &TTI, + function_ref &GetTLI) { + // Note that CalleeTLI must be a copy not a reference. The legacy pass manager + // caches the most recently created TLI in the TargetLibraryInfoWrapperPass + // object, and always returns the same object (which is overwritten on each + // GetTLI call). Therefore we copy the first result. + auto CalleeTLI = GetTLI(*Callee); return TTI.areInlineCompatible(Caller, Callee) && + GetTLI(*Caller).areInlineCompatible(CalleeTLI) && AttributeFuncs::areInlineCompatible(*Caller, *Callee); } @@ -2112,9 +2119,10 @@ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, + function_ref GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI, ORE); + GetAssumptionCache, GetBFI, GetTLI, PSI, ORE); } InlineCost llvm::getInlineCost( @@ -2122,6 +2130,7 @@ TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, Optional> GetBFI, + function_ref GetTLI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { // Cannot inline indirect calls. @@ -2154,7 +2163,7 @@ // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). Function *Caller = Call.getCaller(); - if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) + if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI)) return llvm::InlineCost::getNever("conflicting attributes"); // Don't inline this call if the caller has the optnone attribute. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -215,8 +215,8 @@ }; auto IC = llvm::getInlineCost(cast(*CS.getInstruction()), Callee, - LocalParams, TTI, GetAssumptionCache, None, PSI, - RemarksEnabled ? &ORE : nullptr); + LocalParams, TTI, GetAssumptionCache, None, + GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) { // Single BB does not increase total BB amount, thus subtract 1 diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -71,7 +71,7 @@ }; return llvm::getInlineCost( cast(*CS.getInstruction()), Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr); + /*GetBFI=*/None, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); } bool runOnSCC(CallGraphSCC &SCC) override; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -528,7 +528,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function GetAssumptionCache, ProfileSummaryInfo *PSI, - std::function GetTLI, + std::function GetTLI, bool InsertLifetime, function_ref GetInlineCost, function_ref AARGetter, @@ -761,7 +761,7 @@ CallGraph &CG = getAnalysis().getCallGraph(); ACT = &getAnalysis(); PSI = &getAnalysis().getPSI(); - auto GetTLI = [&](Function &F) -> TargetLibraryInfo & { + GetTLI = [&](Function &F) -> const TargetLibraryInfo & { return getAnalysis().getTLI(F); }; auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { @@ -1008,6 +1008,9 @@ auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { return FAM.getResult(F); }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; auto GetInlineCost = [&](CallSite CS) { Function &Callee = *CS.getCalledFunction(); @@ -1016,7 +1019,7 @@ Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( DEBUG_TYPE); return getInlineCost(cast(*CS.getInstruction()), Params, - CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, + CalleeTTI, GetAssumptionCache, {GetBFI}, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); }; diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -203,9 +203,10 @@ function_ref LookupAC, std::function *GTTI, Optional> GBFI, + std::function *GTLI, ProfileSummaryInfo *ProfSI) : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC), - GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} + GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {} bool run(Module &M); // Main part of the transformation that calls helper functions to find @@ -274,6 +275,7 @@ function_ref LookupAssumptionCache; std::function *GetTTI; Optional> GetBFI; + std::function *GetTLI; ProfileSummaryInfo *PSI; // Return the frequency of the OutlininingBB relative to F's entry point. @@ -355,6 +357,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool runOnModule(Module &M) override { @@ -381,8 +384,13 @@ return TTIWP->getTTI(F); }; + std::function GetTLI = + [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, - &GetTTI, NoneType::None, PSI) + &GetTTI, NoneType::None, &GetTLI, PSI) .run(M); } }; @@ -778,8 +786,8 @@ DEBUG_TYPE); assert(Call && "invalid callsite for partial inline"); InlineCost IC = getInlineCost(cast(*Call), getInlineParams(), - CalleeTTI, *GetAssumptionCache, GetBFI, PSI, - RemarksEnabled ? &ORE : nullptr); + CalleeTTI, *GetAssumptionCache, GetBFI, *GetTLI, + PSI, RemarksEnabled ? &ORE : nullptr); if (IC.isAlways()) { ORE.emit([&]() { @@ -1493,6 +1501,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) @@ -1523,10 +1532,15 @@ return FAM.getResult(F); }; + std::function GetTLI = + [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + ProfileSummaryInfo *PSI = &AM.getResult(M); if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI, - {GetBFI}, PSI) + {GetBFI}, &GetTLI, PSI) .run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -307,10 +308,12 @@ SampleProfileLoader( StringRef Name, StringRef RemapName, bool IsThinLTOPreLink, std::function GetAssumptionCache, - std::function GetTargetTransformInfo) + std::function GetTargetTransformInfo, + std::function GetTLI) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), - Filename(std::string(Name)), RemappingFilename(std::string(RemapName)), + GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), + CoverageTracker(*this), Filename(std::string(Name)), + RemappingFilename(std::string(RemapName)), IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); @@ -397,6 +400,7 @@ std::function GetAC; std::function GetTTI; + std::function GetTLI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -474,14 +478,17 @@ SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, bool IsThinLTOPreLink = false) - : ModulePass(ID), - SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink, - [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }, - [&](Function &F) -> TargetTransformInfo & { - return TTIWP->getTTI(F); - }) { + : ModulePass(ID), SampleLoader( + Name, SampleProfileRemappingFile, IsThinLTOPreLink, + [&](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }, + [&](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }, + [&](Function &F) -> TargetLibraryInfo & { + return TLIWP->getTLI(F); + }) { initializeSampleProfileLoaderLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -498,6 +505,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -505,6 +513,7 @@ SampleProfileLoader SampleLoader; AssumptionCacheTracker *ACT = nullptr; TargetTransformInfoWrapperPass *TTIWP = nullptr; + TargetLibraryInfoWrapperPass *TLIWP = nullptr; }; } // end anonymous namespace @@ -902,7 +911,7 @@ // see if it is legal to inline the callsite. InlineCost Cost = getInlineCost(cast(*I), Params, GetTTI(*CalledFunction), GetAC, - None, nullptr, nullptr); + None, GetTLI, nullptr, nullptr); if (Cost.isNever()) { ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) << "incompatible inlining"); @@ -929,7 +938,7 @@ InlineCost Cost = getInlineCost(cast(CallInst), getInlineParams(), - GetTTI(*Callee), GetAC, None, nullptr, nullptr); + GetTTI(*Callee), GetAC, None, GetTLI, nullptr, nullptr); return Cost.getCost() <= SampleColdCallSiteThreshold; } @@ -1770,6 +1779,7 @@ "Sample Profile loader", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) @@ -1890,6 +1900,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { ACT = &getAnalysis(); TTIWP = &getAnalysis(); + TLIWP = &getAnalysis(); ProfileSummaryInfo *PSI = &getAnalysis().getPSI(); return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); @@ -1966,12 +1977,15 @@ auto GetTTI = [&](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; SampleProfileLoader SampleLoader( ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - IsThinLTOPreLink, GetAssumptionCache, GetTTI); + IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI); if (!SampleLoader.doInitialization(M)) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/Inline/X86/inline-no-builtin-compatible.ll b/llvm/test/Transforms/Inline/X86/inline-no-builtin-compatible.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/inline-no-builtin-compatible.ll @@ -0,0 +1,78 @@ +; Test to ensure no inlining is allowed into a caller with fewer nobuiltin attributes. +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @allbuiltins() { +entry: + %call = call i32 (...) @externalfunc() + ret i32 %call +; CHECK-LABEL: allbuiltins +; CHECK: call i32 (...) @externalfunc() +} +declare i32 @externalfunc(...) + +; We can inline a function that allows all builtins into one with a single +; nobuiltin. +define i32 @nobuiltinmemcpy() #0 { +entry: + %call = call i32 @allbuiltins() + ret i32 %call +; CHECK-LABEL: nobuiltinmemcpy +; CHECK: call i32 (...) @externalfunc() +} + +; We can inline a function that allows all builtins into one with all +; nobuiltins. +define i32 @nobuiltins() #1 { +entry: + %call = call i32 @allbuiltins() + ret i32 %call +; CHECK-LABEL: nobuiltins +; CHECK: call i32 (...) @externalfunc() +} + +; We can inline a function with a single nobuiltin into one with all nobuiltins. +define i32 @nobuiltins2() #1 { +entry: + %call = call i32 @nobuiltinmemcpy() + ret i32 %call +; CHECK-LABEL: nobuiltins2 +; CHECK: call i32 (...) @externalfunc() +} + +; We can't inline a function with any given nobuiltin into one that allows all +; builtins. +define i32 @allbuiltins2() { +entry: + %call = call i32 @nobuiltinmemcpy() + ret i32 %call +; CHECK-LABEL: allbuiltins2 +; CHECK: call i32 @nobuiltinmemcpy() +} + +; We can't inline a function with all nobuiltins into one that allows all +; builtins. +define i32 @allbuiltins3() { +entry: + %call = call i32 @nobuiltins() + ret i32 %call +; CHECK-LABEL: allbuiltins3 +; CHECK: call i32 @nobuiltins() +} + +; We can't inline a function with a specific nobuiltin into one with a +; different specific nobuiltin. +define i32 @nobuiltinmemset() #2 { +entry: + %call = call i32 @nobuiltinmemcpy() + ret i32 %call +; CHECK-LABEL: nobuiltinmemset +; CHECK: call i32 @nobuiltinmemcpy() +} + +attributes #0 = { "no-builtin-memcpy" } +attributes #1 = { "no-builtins" } +attributes #2 = { "no-builtin-memset" }