diff --git a/llvm/include/llvm/Analysis/ConvergenceControlUsage.h b/llvm/include/llvm/Analysis/ConvergenceControlUsage.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/ConvergenceControlUsage.h @@ -0,0 +1,49 @@ +//===- ConvergenceControlUsage.h - Uses of convergence control --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A simple analysis that scans a function to determine if the body contains +/// controlled or uncontrolled convergent operations. In particular, this is +/// independent of cycles, and does not attempt to identify hearts and the +/// def-use chains of convergence tokens. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CONVERGENCECONTROLUSAGE_H +#define LLVM_ANALYSIS_CONVERGENCECONTROLUSAGE_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +class ConvergenceControlUsage { +public: + enum KindT { Unknown, ControlledOps, UncontrolledOps, NoConvergentOps } Kind; + ConvergenceControlUsage() : Kind(Unknown) {} + ConvergenceControlUsage(KindT Kind) : Kind(Kind) {} + + // Exposed for use in the AlwaysInlinerLegacyPass. + static ConvergenceControlUsage getFor(const Function &F); +}; + +/// Analysis pass which computes a \ref ConvergenceControlUsage. +class ConvergenceControlUsageAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = ConvergenceControlUsage; + + ConvergenceControlUsage run(Function &F, FunctionAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CONVERGENCECONTROLUSAGE_H diff --git a/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h b/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h --- a/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h +++ b/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h @@ -55,12 +55,14 @@ namespace llvm { class TargetLibraryInfo; +class ConvergenceControlUsage; class PlaceSafepointsPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - bool runImpl(Function &F, const TargetLibraryInfo &TLI); + bool runImpl(Function &F, const TargetLibraryInfo &TLI, + function_ref GetCU); void cleanup() {} diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -34,6 +34,7 @@ class BasicBlock; class BlockFrequencyInfo; class CallGraph; +class ConvergenceControlUsage; class DebugInfoFinder; class DominatorTree; class Function; @@ -203,13 +204,19 @@ class InlineFunctionInfo { public: explicit InlineFunctionInfo( + function_ref + GetConvergenceControlUsage, function_ref GetAssumptionCache = nullptr, ProfileSummaryInfo *PSI = nullptr, BlockFrequencyInfo *CallerBFI = nullptr, BlockFrequencyInfo *CalleeBFI = nullptr, bool UpdateProfile = true) - : GetAssumptionCache(GetAssumptionCache), PSI(PSI), CallerBFI(CallerBFI), + : GetConvergenceControlUsage(GetConvergenceControlUsage), + GetAssumptionCache(GetAssumptionCache), PSI(PSI), CallerBFI(CallerBFI), CalleeBFI(CalleeBFI), UpdateProfile(UpdateProfile) {} + function_ref + GetConvergenceControlUsage; + /// If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. function_ref GetAssumptionCache; diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -46,6 +46,7 @@ CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp + ConvergenceControlUsage.cpp CycleAnalysis.cpp DDG.cpp DDGPrinter.cpp diff --git a/llvm/lib/Analysis/ConvergenceControlUsage.cpp b/llvm/lib/Analysis/ConvergenceControlUsage.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/ConvergenceControlUsage.cpp @@ -0,0 +1,48 @@ +//===- ConvergenceControlUsage.cpp - Uses of convergence control ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConvergenceControlUsage.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +ConvergenceControlUsage ConvergenceControlUsage::getFor(const Function &F) { + assert(!F.empty()); + bool hasConvergentOps = false; + for (auto &BB : F) { + for (auto &II : BB) { + auto *CB = dyn_cast(&II); + if (!CB) + continue; + if (!CB->isConvergent()) + continue; + if (auto *Callee = CB->getCalledFunction()) { + switch (Callee->getIntrinsicID()) { + case Intrinsic::experimental_convergence_anchor: + case Intrinsic::experimental_convergence_entry: + case Intrinsic::experimental_convergence_loop: + return ConvergenceControlUsage::ControlledOps; + } + } + if (CB->getOperandBundle(LLVMContext::OB_convergencectrl).has_value()) + return ConvergenceControlUsage::ControlledOps; + hasConvergentOps = true; + } + } + return hasConvergentOps ? ConvergenceControlUsage::UncontrolledOps + : ConvergenceControlUsage::NoConvergentOps; +} + +ConvergenceControlUsage +ConvergenceControlUsageAnalysis::run(Function &F, FunctionAnalysisManager &) { + return ConvergenceControlUsage::getFor(F); +} + +AnalysisKey ConvergenceControlUsageAnalysis::Key; diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" @@ -186,7 +187,8 @@ uint64_t AllocaSize); bool ShouldInlinePointerAddress(CallInst &CI); - void TryInlinePointerAddress(); + void TryInlinePointerAddress( + function_ref GetCU); public: SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL, @@ -731,7 +733,8 @@ return true; } -void SafeStack::TryInlinePointerAddress() { +void SafeStack::TryInlinePointerAddress( + function_ref GetCU) { auto *CI = dyn_cast(UnsafeStackPtr); if (!CI) return; @@ -746,7 +749,7 @@ if (!ShouldInlinePointerAddress(*CI)) return; - InlineFunctionInfo IFI; + InlineFunctionInfo IFI(GetCU); InlineFunction(*CI, IFI); } @@ -844,7 +847,18 @@ IRB.CreateStore(BasePointer, UnsafeStackPtr); } - TryInlinePointerAddress(); + // Convergence control is an experimental feature that prevents inlining in + // some cases. For the legacy pass manager, we simply assume that convergence + // control does not overlap with safe stack insertion. Hence we arrange things + // so that inlining just works with the legacy pass manager. In the + // exceedingly rare case that uses convergence control, this may produce IR + // that fails the verifier. + ConvergenceControlUsage ConservativeDefault( + ConvergenceControlUsage::UncontrolledOps); + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return ConservativeDefault; + }; + TryInlinePointerAddress(GetCU); LLVM_DEBUG(dbgs() << "[SafeStack] safestack applied\n"); return true; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/CostModel.h" #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/Analysis/DDG.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -223,6 +223,7 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) +FUNCTION_ANALYSIS("convergence-usage", ConvergenceControlUsageAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis()) diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -139,7 +140,7 @@ return NewF; } - void create(); + void create(function_ref GetCU); private: bool isSwitchDestroyFunction() { @@ -156,10 +157,12 @@ } void replaceEntryBlock(); - Value *deriveNewFramePointer(); + Value *deriveNewFramePointer( + function_ref GetCU); void replaceRetconOrAsyncSuspendUses(); void replaceCoroSuspends(); - void replaceCoroEnds(); + void + replaceCoroEnds(function_ref GetCU); void replaceSwiftErrorOps(); void salvageDebugInfo(); void handleFinalSuspend(); @@ -181,7 +184,9 @@ /// Replace an llvm.coro.end.async. /// Will inline the must tail call function call if there is one. /// \returns true if cleanup of the coro.end block is needed, false otherwise. -static bool replaceCoroEndAsync(AnyCoroEndInst *End) { +static bool +replaceCoroEndAsync(AnyCoroEndInst *End, + function_ref GetCU) { IRBuilder<> Builder(End); auto *EndAsync = dyn_cast(End); @@ -208,7 +213,7 @@ // Insert the return instruction. Builder.SetInsertPoint(End); Builder.CreateRetVoid(); - InlineFunctionInfo FnInfo; + InlineFunctionInfo FnInfo(GetCU); // Remove the rest of the block, by splitting it into an unreachable block. auto *BB = End->getParent(); @@ -224,9 +229,10 @@ } /// Replace a non-unwind call to llvm.coro.end. -static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, - const coro::Shape &Shape, Value *FramePtr, - bool InResume, CallGraph *CG) { +static void replaceFallthroughCoroEnd( + AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, + bool InResume, CallGraph *CG, + function_ref GetCU) { // Start inserting right before the coro.end. IRBuilder<> Builder(End); @@ -243,7 +249,7 @@ // In async lowering this returns. case coro::ABI::Async: { - bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); + bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End, GetCU); if (!CoroEndBlockNeedsCleanup) return; break; @@ -341,12 +347,14 @@ } } -static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, - Value *FramePtr, bool InResume, CallGraph *CG) { +static void +replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, + bool InResume, CallGraph *CG, + function_ref GetCU) { if (End->isUnwind()) replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); else - replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); + replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG, GetCU); auto &Context = End->getContext(); End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) @@ -609,12 +617,14 @@ } } -void CoroCloner::replaceCoroEnds() { +void CoroCloner::replaceCoroEnds( + function_ref GetCU) { for (AnyCoroEndInst *CE : Shape.CoroEnds) { // We use a null call graph because there's no call graph node for // the cloned function yet. We'll just be rebuilding that later. auto *NewCE = cast(VMap[CE]); - replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); + replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr, + GetCU); } } @@ -792,7 +802,8 @@ } /// Derive the value of the new frame pointer. -Value *CoroCloner::deriveNewFramePointer() { +Value *CoroCloner::deriveNewFramePointer( + function_ref GetCU) { // Builder should be inserting to the front of the new entry block. switch (Shape.ABI) { @@ -824,7 +835,7 @@ Type::getInt8Ty(Context), CallerContext, Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); // Inline the projection function. - InlineFunctionInfo InlineInfo; + InlineFunctionInfo InlineInfo(GetCU); auto InlineRes = InlineFunction(*CallerContext, InlineInfo); assert(InlineRes.isSuccess()); (void)InlineRes; @@ -880,7 +891,8 @@ /// Clone the body of the original function into a resume function of /// some sort. -void CoroCloner::create() { +void CoroCloner::create( + function_ref GetCU) { // Create the new function if we don't already have one. if (!NewF) { NewF = createCloneDeclaration(OrigF, Shape, Suffix, @@ -1035,7 +1047,7 @@ replaceEntryBlock(); Builder.SetInsertPoint(&NewF->getEntryBlock().front()); - NewFramePtr = deriveNewFramePointer(); + NewFramePtr = deriveNewFramePointer(GetCU); // Remap frame pointer. Value *OldFramePtr = VMap[Shape.FramePtr]; @@ -1082,7 +1094,7 @@ replaceSwiftErrorOps(); // Remove coro.end intrinsics. - replaceCoroEnds(); + replaceCoroEnds(GetCU); // Salvage debug info that points into the coroutine frame. salvageDebugInfo(); @@ -1097,10 +1109,12 @@ // Create a resume clone by cloning the body of the original function, setting // new entry block and replacing coro.suspend an appropriate value to force // resume or cleanup pass for every suspend point. -static Function *createClone(Function &F, const Twine &Suffix, - coro::Shape &Shape, CoroCloner::Kind FKind) { +static Function * +createClone(Function &F, const Twine &Suffix, coro::Shape &Shape, + CoroCloner::Kind FKind, + function_ref GetCU) { CoroCloner Cloner(F, Suffix, Shape, FKind); - Cloner.create(); + Cloner.create(GetCU); return Cloner.getFunction(); } @@ -1611,18 +1625,19 @@ } } -static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, - SmallVectorImpl &Clones, - TargetTransformInfo &TTI) { +static void splitSwitchCoroutine( + Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, + TargetTransformInfo &TTI, + function_ref GetCU) { assert(Shape.ABI == coro::ABI::Switch); createResumeEntryBlock(F, Shape); - auto ResumeClone = createClone(F, ".resume", Shape, - CoroCloner::Kind::SwitchResume); - auto DestroyClone = createClone(F, ".destroy", Shape, - CoroCloner::Kind::SwitchUnwind); - auto CleanupClone = createClone(F, ".cleanup", Shape, - CoroCloner::Kind::SwitchCleanup); + auto ResumeClone = + createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, GetCU); + auto DestroyClone = + createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, GetCU); + auto CleanupClone = + createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, GetCU); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); @@ -1696,8 +1711,10 @@ return TailCall; } -static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, - SmallVectorImpl &Clones) { +static void +splitAsyncCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones, + function_ref GetCU) { assert(Shape.ABI == coro::ABI::Async); assert(Clones.empty()); // Reset various things that the optimizer might have decided it @@ -1775,7 +1792,7 @@ auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder); Builder.CreateRetVoid(); - InlineFunctionInfo FnInfo; + InlineFunctionInfo FnInfo(GetCU); auto InlineRes = InlineFunction(*TailCall, FnInfo); assert(InlineRes.isSuccess() && "Expected inlining to succeed"); (void)InlineRes; @@ -1789,12 +1806,13 @@ auto *Suspend = Shape.CoroSuspends[Idx]; auto *Clone = Clones[Idx]; - CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create(); + CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create(GetCU); } } -static void splitRetconCoroutine(Function &F, coro::Shape &Shape, - SmallVectorImpl &Clones) { +static void splitRetconCoroutine( + Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, + function_ref GetCU) { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); assert(Clones.empty()); @@ -1920,7 +1938,7 @@ auto Suspend = Shape.CoroSuspends[i]; auto Clone = Clones[i]; - CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); + CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(GetCU); } } @@ -1940,7 +1958,8 @@ static coro::Shape splitCoroutine(Function &F, SmallVectorImpl &Clones, TargetTransformInfo &TTI, bool OptimizeFrame, - std::function MaterializableCallback) { + std::function MaterializableCallback, + function_ref GetCU) { PrettyStackTraceFunction prettyStackTrace(F); // The suspend-crossing algorithm in buildCoroutineFrame get tripped @@ -1962,14 +1981,14 @@ } else { switch (Shape.ABI) { case coro::ABI::Switch: - splitSwitchCoroutine(F, Shape, Clones, TTI); + splitSwitchCoroutine(F, Shape, Clones, TTI, GetCU); break; case coro::ABI::Async: - splitAsyncCoroutine(F, Shape, Clones); + splitAsyncCoroutine(F, Shape, Clones, GetCU); break; case coro::ABI::Retcon: case coro::ABI::RetconOnce: - splitRetconCoroutine(F, Shape, Clones); + splitRetconCoroutine(F, Shape, Clones, GetCU); break; } } @@ -1989,9 +2008,12 @@ } /// Remove calls to llvm.coro.end in the original function. -static void removeCoroEnds(const coro::Shape &Shape) { +static void +removeCoroEnds(const coro::Shape &Shape, + function_ref GetCU) { for (auto *End : Shape.CoroEnds) { - replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr, + GetCU); } } @@ -1999,12 +2021,13 @@ LazyCallGraph::Node &N, const coro::Shape &Shape, const SmallVectorImpl &Clones, LazyCallGraph::SCC &C, LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, - FunctionAnalysisManager &FAM) { + FunctionAnalysisManager &FAM, + function_ref GetCU) { if (!Shape.CoroBegin) return; if (Shape.ABI != coro::ABI::Switch) - removeCoroEnds(Shape); + removeCoroEnds(Shape, GetCU); else { for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { auto &Context = End->getContext(); @@ -2113,6 +2136,10 @@ auto &FAM = AM.getResult(C, CG).getManager(); + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + // Check for uses of llvm.coro.prepare.retcon/async. SmallVector PrepareFns; addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); @@ -2144,8 +2171,9 @@ auto &ORE = FAM.getResult(F); const coro::Shape Shape = splitCoroutine(F, Clones, FAM.getResult(F), - OptimizeFrame, MaterializableCallback); - updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); + OptimizeFrame, MaterializableCallback, GetCU); + updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM, + GetCU); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -32,6 +33,7 @@ bool AlwaysInlineImpl( Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI, + function_ref GetCU, function_ref GetAssumptionCache, function_ref GetAAR, function_ref GetBFI) { @@ -62,7 +64,7 @@ DebugLoc DLoc = CB->getDebugLoc(); BasicBlock *Block = CB->getParent(); - InlineFunctionInfo IFI(GetAssumptionCache, &PSI, + InlineFunctionInfo IFI(GetCU, GetAssumptionCache, &PSI, GetBFI ? &GetBFI(*Caller) : nullptr, GetBFI ? &GetBFI(F) : nullptr); @@ -143,11 +145,33 @@ auto GetAAR = [&](Function &F) -> AAResults & { return getAnalysis(F).getAAResults(); }; + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return getAnalysis().getAssumptionCache(F); }; - return AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, GetAAR, + // Convergence control is an experimental feature that prevents inlining in + // some cases. The legacy version of AlwaysInliner is used only on AMDGPU. + // For this one special case, we create a local cache of information about + // convergencec control usage. + class ConvergenceControlUsageMap { + DenseMap CUMap; + + public: + ConvergenceControlUsage &getFor(const Function &F) { + ConvergenceControlUsage &CU = CUMap[&F]; + if (CU.Kind == ConvergenceControlUsage::Unknown) + CU = ConvergenceControlUsage::getFor(F); + return CU; + } + } CUMap; + + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return CUMap.getFor(F); + }; + + return AlwaysInlineImpl(M, InsertLifetime, PSI, GetCU, GetAssumptionCache, + GetAAR, /*GetBFI*/ nullptr); } @@ -182,6 +206,9 @@ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { return FAM.getResult(F); }; @@ -190,8 +217,12 @@ }; auto &PSI = MAM.getResult(M); - bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, - GetAAR, GetBFI); + bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, GetCU, + GetAssumptionCache, GetAAR, GetBFI); + if (!Changed) + return PreservedAnalyses::all(); - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; } diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" @@ -214,6 +215,10 @@ AM.getResult(InitialC, CG) .getManager(); + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M); Advisor.onPassEntry(&InitialC); @@ -373,7 +378,7 @@ // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - GetAssumptionCache, PSI, + GetCU, GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); @@ -576,6 +581,9 @@ PA.preserve(); // We have already invalidated all analyses on modified functions. PA.preserveSet>(); + // We update the caller's information if convergent operations were introduced + // by the callee. + PA.preserve(); return PA; } diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" @@ -191,6 +192,9 @@ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; if (InlineHistoryID != -1 && inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { @@ -208,7 +212,7 @@ // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - GetAssumptionCache, PSI, + GetCU, GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); @@ -297,5 +301,7 @@ if (!Changed) return PreservedAnalyses::all(); - return PreservedAnalyses::none(); + PreservedAnalyses PA; + PA.preserve(); + return PA; } diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -192,9 +193,10 @@ function_ref GTTI, function_ref GTLI, ProfileSummaryInfo &ProfSI, + function_ref GetCU, function_ref GBFI = nullptr) : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC), - GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {} + GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI), GetCU(GetCU) {} bool run(Module &M); // Main part of the transformation that calls helper functions to find @@ -269,6 +271,7 @@ function_ref GetBFI; function_ref GetTLI; ProfileSummaryInfo &PSI; + function_ref GetCU; // Return the frequency of the OutlininingBB relative to F's entry point. // The result is no larger than 1 and is represented using BP. @@ -1382,7 +1385,7 @@ OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " << ore::NV("Caller", CB->getCaller()); - InlineFunctionInfo IFI(GetAssumptionCache, &PSI); + InlineFunctionInfo IFI(GetCU, GetAssumptionCache, &PSI); // We can only forward varargs when we outlined a single region, else we // bail on vararg functions. if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true, @@ -1462,6 +1465,10 @@ return FAM.getCachedResult(F); }; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { return FAM.getResult(F); }; @@ -1476,9 +1483,12 @@ ProfileSummaryInfo &PSI = AM.getResult(M); - if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI, - GetTLI, PSI, GetBFI) - .run(M)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); + if (!PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI, + GetTLI, PSI, GetCU, GetBFI) + .run(M)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -35,6 +35,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" @@ -513,12 +514,13 @@ IntrusiveRefCntPtr FS, std::function GetAssumptionCache, std::function GetTargetTransformInfo, - std::function GetTLI) + std::function GetTLI, + function_ref GetCU) : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), std::move(FS)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), - LTOPhase(LTOPhase), + GetCU(GetCU), LTOPhase(LTOPhase), AnnotatedPassName(AnnotateSampleProfileInlinePhase ? llvm::AnnotateInlinePassName(InlineContext{ LTOPhase, InlinePass::SampleProfileInliner}) @@ -579,6 +581,7 @@ std::function GetAC; std::function GetTTI; std::function GetTLI; + function_ref GetCU; /// Profile tracker for different context. std::unique_ptr ContextTracker; @@ -1244,7 +1247,7 @@ if (!Cost) return false; - InlineFunctionInfo IFI(GetAC); + InlineFunctionInfo IFI(GetCU, GetAC); IFI.UpdateProfile = false; InlineResult IR = InlineFunction(CB, IFI, /*MergeAttributes=*/true); @@ -2539,6 +2542,9 @@ auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { return FAM.getResult(F); }; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; if (!FS) FS = vfs::getRealFileSystem(); @@ -2547,7 +2553,7 @@ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI); + LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, GetCU); if (!SampleLoader.doInitialization(M, &FAM)) return PreservedAnalyses::all(); @@ -2557,5 +2563,7 @@ if (!SampleLoader.runOnModule(M, &AM, PSI, CG)) return PreservedAnalyses::all(); - return PreservedAnalyses::none(); + PreservedAnalyses PA; + PA.preserve(); + return PA; } diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp --- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -54,6 +54,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -192,7 +193,8 @@ static void InsertSafepointPoll(Instruction *InsertBefore, std::vector &ParsePointsNeeded /*rval*/, - const TargetLibraryInfo &TLI); + const TargetLibraryInfo &TLI, + function_ref GetCU); bool PlaceBackedgeSafepointsLegacyPass::runOnLoop(Loop *L) { // Loop through all loop latches (branches controlling backedges). We need @@ -290,8 +292,20 @@ LLVM_DEBUG(dbgs() << "********** Begin Safepoint Placement **********\n"); LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n'); - bool MadeChange = - Impl.runImpl(F, getAnalysis().getTLI(F)); + // Convergence control is an experimental feature that prevents inlining in + // some cases. For the legacy pass manager, we simply assume that convergence + // control does not overlap with garbage collection. Hence we arrange things + // so that inlining just works with the legacy pass manager. In the + // exceedingly rare case that uses convergence control, this may produce IR + // that fails the verifier. + ConvergenceControlUsage ConservativeDefault( + ConvergenceControlUsage::UncontrolledOps); + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return ConservativeDefault; + }; + + bool MadeChange = Impl.runImpl( + F, getAnalysis().getTLI(F), GetCU); if (MadeChange) { LLVM_DEBUG(dbgs() << "********** Function after Safepoint Placement: " @@ -303,7 +317,9 @@ return MadeChange; } -bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) { +bool PlaceSafepointsPass::runImpl( + Function &F, const TargetLibraryInfo &TLI, + function_ref GetCU) { if (F.isDeclaration() || F.empty()) { // This is a declaration, nothing to do. Must exit early to avoid crash in // dom tree calculation @@ -425,7 +441,7 @@ // safepoint polls themselves. for (Instruction *PollLocation : PollsNeeded) { std::vector RuntimeCalls; - InsertSafepointPoll(PollLocation, RuntimeCalls, TLI); + InsertSafepointPoll(PollLocation, RuntimeCalls, TLI, GetCU); llvm::append_range(ParsePointNeeded, RuntimeCalls); } @@ -436,11 +452,17 @@ FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); - if (!runImpl(F, TLI)) + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return AM.getResult(F); + }; + + if (!runImpl(F, TLI, GetCU)) return PreservedAnalyses::all(); // TODO: can we preserve more? - return PreservedAnalyses::none(); + PreservedAnalyses PA; + PA.preserve(); + return PA; } static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) { @@ -678,7 +700,8 @@ static void InsertSafepointPoll(Instruction *InsertBefore, std::vector &ParsePointsNeeded /*rval*/, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + function_ref GetCU) { BasicBlock *OrigBB = InsertBefore->getParent(); Module *M = InsertBefore->getModule(); assert(M && "must be part of a module"); @@ -706,8 +729,9 @@ After++; assert(After != OrigBB->end() && "must have successor"); + InlineFunctionInfo IFI(GetCU); + // Do the actual inlining - InlineFunctionInfo IFI; bool InlineStatus = InlineFunction(*PollCall, IFI).isSuccess(); assert(InlineStatus && "inline must succeed"); (void)InlineStatus; // suppress warning in release-asserts diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" @@ -1929,6 +1930,30 @@ } } +// Inlining is allowed only if both functions agree on whether they use +// convergence control tokens. A function that has no convergent operations is +// always compatible with any other kind. +static bool IsConvergenceCompatible(const InlineFunctionInfo &IFI, + Function &Caller, Function &Callee) { + auto CalleeK = IFI.GetConvergenceControlUsage(Callee).Kind; + if (CalleeK == ConvergenceControlUsage::NoConvergentOps) + return true; + + auto CallerK = IFI.GetConvergenceControlUsage(Caller).Kind; + if (CallerK == ConvergenceControlUsage::NoConvergentOps) + return true; + + return CallerK == CalleeK; +} + +static void UpdateConvergenceControlUsage(const InlineFunctionInfo &IFI, + Function &Caller, Function &Callee) { + auto &CalleeInfo = IFI.GetConvergenceControlUsage(Callee); + if (CalleeInfo.Kind == ConvergenceControlUsage::NoConvergentOps) { + CalleeInfo.Kind = IFI.GetConvergenceControlUsage(Caller).Kind; + } +} + /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. @@ -1958,9 +1983,11 @@ // The inliner does not know how to inline through calls with operand bundles // in general ... + Value *ConvergenceControlToken = nullptr; if (CB.hasOperandBundles()) { for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { - uint32_t Tag = CB.getOperandBundleAt(i).getTagID(); + auto OBUse = CB.getOperandBundleAt(i); + uint32_t Tag = OBUse.getTagID(); // ... but it knows how to inline through "deopt" operand bundles ... if (Tag == LLVMContext::OB_deopt) continue; @@ -1971,6 +1998,10 @@ continue; if (Tag == LLVMContext::OB_kcfi) continue; + if (Tag == LLVMContext::OB_convergencectrl) { + ConvergenceControlToken = OBUse.Inputs[0].get(); + continue; + } return InlineResult::failure("unsupported operand bundle"); } @@ -1983,6 +2014,10 @@ BasicBlock *OrigBB = CB.getParent(); Function *Caller = OrigBB->getParent(); + if (!IsConvergenceCompatible(IFI, *Caller, *CalledFunc)) { + return InlineResult::failure("incompatible convergence control"); + } + // Do not inline strictfp function into non-strictfp one. It would require // conversion of all FP operations in host function to constrained intrinsics. if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) && @@ -2256,6 +2291,9 @@ // Propagate metadata on the callsite if necessary. PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); + // We know how the caller's convergence usage will change after inlining. + UpdateConvergenceControlUsage(IFI, *Caller, *CalledFunc); + // Register any cloned assumptions. if (IFI.GetAssumptionCache) for (BasicBlock &NewBlock : @@ -2265,10 +2303,23 @@ IFI.GetAssumptionCache(*Caller).registerAssumption(II); } + if (ConvergenceControlToken) { + auto *I = FirstNewBlock->getFirstNonPHI(); + if (auto *IntrinsicCall = dyn_cast(I)) { + if (IntrinsicCall->getIntrinsicID() == + Intrinsic::experimental_convergence_entry) { + IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); + IntrinsicCall->eraseFromParent(); + } + } + } + // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. + // + // Also handle convergence control entry intrinsics. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), diff --git a/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll b/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll --- a/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll +++ b/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll @@ -21,6 +21,7 @@ ; CHECK: Invalidating analysis: LoopAnalysis on test1_h ; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_h ; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_h +; CHECK: Invalidating analysis: ConvergenceControlUsageAnalysis on test1_h ; CHECK-NOT: Invalidating analysis: ; CHECK: Running pass: DominatorTreeVerifierPass on test1_g ; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_g diff --git a/llvm/test/Transforms/Inline/convergence-inline.ll b/llvm/test/Transforms/Inline/convergence-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/convergence-inline.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='cgscc(inline)' -S %s | FileCheck %s + +define void @nonconvergent_callee() alwaysinline { +; CHECK-LABEL: @nonconvergent_callee( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.anchor() + call void @f(i32 0) [ "convergencectrl"(token %token) ] + ret void +} + +define void @convergent_callee(i32 %v) convergent alwaysinline { +; CHECK-LABEL: @convergent_callee( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: call void @f(i32 [[V:%.*]]) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.entry() + call void @f(i32 %v) [ "convergencectrl"(token %token) ] + ret void +} + +define void @test_nonconvergent() { +; CHECK-LABEL: @test_nonconvergent( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN_I:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN_I]]) ] +; CHECK-NEXT: ret void +; +entry: + call void @nonconvergent_callee() + ret void +} + +define void @test_convergent_basic() { +; CHECK-LABEL: @test_convergent_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %then, label %end + +then: + call void @convergent_callee(i32 0) [ "convergencectrl"(token %token) ] + br label %end + +end: + ret void +} + +define void @test_convergent_multiple() convergent { +; CHECK-LABEL: @test_convergent_multiple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: call void @f(i32 1) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: call void @f(i32 2) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.entry() + call void @convergent_callee(i32 0) [ "convergencectrl"(token %token) ] + call void @convergent_callee(i32 1) [ "convergencectrl"(token %token) ] + call void @convergent_callee(i32 2) [ "convergencectrl"(token %token) ] + ret void +} + +define void @test_convergent_loop() { +; CHECK-LABEL: @test_convergent_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: br i1 undef, label [[HDR:%.*]], label [[END:%.*]] +; CHECK: hdr: +; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOK_LOOP]]) ] +; CHECK-NEXT: br i1 undef, label [[HDR]], label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %hdr, label %end + +hdr: + %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %token) ] + call void @convergent_callee(i32 0) [ "convergencectrl"(token %tok.loop) ] + br i1 undef, label %hdr, label %end + +end: + ret void +} + +declare void @f(i32) convergent + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() +declare token @llvm.experimental.convergence.loop() diff --git a/llvm/test/Transforms/Inline/convergence-no-mixing.ll b/llvm/test/Transforms/Inline/convergence-no-mixing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/convergence-no-mixing.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='cgscc(inline)' -S %s | FileCheck %s + +declare void @f() convergent +declare void @g() + +;;; Caller: no convergent ops +;;; Callee: no convergent ops + +define internal i32 @inner10() alwaysinline convergent { + ret i32 1 +} + +define i32 @outer10() convergent { +; CHECK-LABEL: @outer10( +; CHECK-NEXT: ret i32 1 +; + %r = call i32 @inner10() + ret i32 %r +} + +;;; Caller: uncontrolled convergent ops +;;; Callee: uncontrolled convergent ops + +define internal i32 @inner20() alwaysinline convergent { + call void @f() ; uncontrolled convergent call + ret i32 1 +} + +define i32 @outer20() convergent { +; CHECK-LABEL: @outer20( +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 1 +; + %r = call i32 @inner20() + ret i32 %r +} + +;;; Caller: uncontrolled convergent ops +;;; Callee: controlled convergent ops + +define internal i32 @inner30() alwaysinline convergent { +; CHECK-LABEL: @inner30( +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 1 +; + %t = call token @llvm.experimental.convergence.entry() + call void @f() [ "convergencectrl"(token %t) ] + ret i32 1 +} + +define i32 @outer30() convergent { +; CHECK-LABEL: @outer30( +; CHECK-NEXT: [[R:%.*]] = call i32 @inner30() +; CHECK-NEXT: ret i32 [[R]] +; + %r = call i32 @inner30() + ret i32 %r +} + +;;; Caller: no convergent ops +;;; Callee: noconvergent with controlled convergent ops + +define internal i32 @inner31() alwaysinline { + %t = call token @llvm.experimental.convergence.anchor() + call void @f() [ "convergencectrl"(token %t) ] + ret i32 1 +} + +define i32 @outer31() convergent { +; CHECK-LABEL: @outer31( +; CHECK-NEXT: [[T_I:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[T_I]]) ] +; CHECK-NEXT: ret i32 1 +; + %r = call i32 @inner31() + ret i32 %r +} + +;;; Caller: controlled convergent ops +;;; Callee: controlled convergent ops + +define internal i32 @inner40() alwaysinline convergent { + %t = call token @llvm.experimental.convergence.entry() + call void @f() [ "convergencectrl"(token %t) ] + ret i32 1 +} + +define i32 @outer40() convergent { +; CHECK-LABEL: @outer40( +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 1 +; + %t = call token @llvm.experimental.convergence.anchor() + %r = call i32 @inner40() [ "convergencectrl"(token %t) ] + ret i32 %r +} + +;;; Caller: controlled convergent ops +;;; Callee: uncontrolled convergent ops + +define internal i32 @inner50() alwaysinline convergent { +; CHECK-LABEL: @inner50( +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 1 +; + call void @f() + ret i32 1 +} + +define i32 @outer50() convergent { +; CHECK-LABEL: @outer50( +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: [[R:%.*]] = call i32 @inner50() [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 [[R]] +; + %t = call token @llvm.experimental.convergence.anchor() + %r = call i32 @inner50() [ "convergencectrl"(token %t) ] + ret i32 %r +} + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() diff --git a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp --- a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp +++ b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConvergenceControlUsage.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Dominators.h" @@ -29,6 +30,7 @@ class FunctionPropertiesAnalysisTest : public testing::Test { public: FunctionPropertiesAnalysisTest() { + FAM.registerPass([&] { return ConvergenceControlUsageAnalysis(); }); FAM.registerPass([&] { return DominatorTreeAnalysis(); }); FAM.registerPass([&] { return LoopAnalysis(); }); FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); @@ -154,7 +156,10 @@ EXPECT_EQ(FPI, ExpectedInitial); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -208,7 +213,10 @@ EXPECT_EQ(FPI, ExpectedInitial); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -274,7 +282,10 @@ auto FPI = buildFPI(*F1); EXPECT_EQ(FPI, ExpectedInitial); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); @@ -320,7 +331,10 @@ auto FPI = buildFPI(*F1); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -373,7 +387,10 @@ auto FPI = buildFPI(*F1); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -427,7 +444,10 @@ auto FPI = buildFPI(*F1); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -479,7 +499,10 @@ auto FPI = buildFPI(*F1); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -535,7 +558,10 @@ auto FPI = buildFPI(*F1); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -606,7 +632,10 @@ EXPECT_EQ(FPI, ExpectedInitial); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -673,7 +702,10 @@ EXPECT_EQ(FPI, ExpectedInitial); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1); @@ -733,7 +765,10 @@ EXPECT_EQ(FPI, ExpectedInitial); FunctionPropertiesUpdater FPU(FPI, *CB); - InlineFunctionInfo IFI; + auto GetCU = [&](Function &F) -> ConvergenceControlUsage & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(GetCU); auto IR = llvm::InlineFunction(*CB, IFI); EXPECT_TRUE(IR.isSuccess()); invalidate(*F1);