diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -253,6 +253,15 @@ TargetMachine *TM = nullptr, CodeGenOpt::Level OptLevel = CodeGenOpt::Default); ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); + +struct AMDGPUAlwaysInlinePass : PassInfoMixin { + AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + bool GlobalOpt; +}; + ModulePass *createR600OpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -17,6 +17,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; @@ -32,8 +33,6 @@ class AMDGPUAlwaysInline : public ModulePass { bool GlobalOpt; - void recursivelyVisitUsers(GlobalValue &GV, - SmallPtrSetImpl &FuncsToAlwaysInline); public: static char ID; @@ -53,9 +52,9 @@ char AMDGPUAlwaysInline::ID = 0; -void AMDGPUAlwaysInline::recursivelyVisitUsers( - GlobalValue &GV, - SmallPtrSetImpl &FuncsToAlwaysInline) { +static void +recursivelyVisitUsers(GlobalValue &GV, + SmallPtrSetImpl &FuncsToAlwaysInline) { SmallVector Stack; SmallPtrSet Visited; @@ -91,7 +90,7 @@ } } -bool AMDGPUAlwaysInline::runOnModule(Module &M) { +static bool alwaysInlineImpl(Module &M, bool GlobalOpt) { std::vector AliasesToRemove; SmallPtrSet FuncsToAlwaysInline; @@ -157,7 +156,16 @@ return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); } +bool AMDGPUAlwaysInline::runOnModule(Module &M) { + return alwaysInlineImpl(M, GlobalOpt); +} + ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { return new AMDGPUAlwaysInline(GlobalOpt); } +PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M, + ModuleAnalysisManager &AM) { + alwaysInlineImpl(M, GlobalOpt); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -506,6 +506,10 @@ PM.addPass(AMDGPUPrintfRuntimeBindingPass()); return true; } + if (PassName == "amdgpu-always-inline") { + PM.addPass(AMDGPUAlwaysInlinePass()); + return true; + } return false; }); PB.registerPipelineParsingCallback( @@ -565,6 +569,8 @@ if (InternalizeSymbols) { PM.addPass(GlobalDCEPass()); } + if (EarlyInlineAll && !EnableFunctionCalls) + PM.addPass(AMDGPUAlwaysInlinePass()); }); PB.registerCGSCCOptimizerLateEPCallback( diff --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll --- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll +++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline %s | FileCheck -check-prefixes=CALLS-ENABLED,ALL %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline %s | FileCheck -check-prefixes=CALLS-ENABLED,ALL %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck -check-prefixes=STRESS-CALLS,ALL %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline %s | FileCheck -check-prefixes=STRESS-CALLS,ALL %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -471,7 +471,8 @@ "amdgpu-propagate-attributes-early", "amdgpu-propagate-attributes-late", "amdgpu-unify-metadata", - "amdgpu-printf-runtime-binding"}; + "amdgpu-printf-runtime-binding", + "amdgpu-always-inline"}; for (const auto &P : PassNameExactToIgnore) if (Pass == P) return false;