Index: llvm/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.h +++ llvm/lib/Target/AMDGPU/AMDGPU.h @@ -16,6 +16,7 @@ namespace llvm { +class AMDGPUTargetMachine; class TargetMachine; // GlobalISel passes @@ -53,7 +54,8 @@ FunctionPass *createAMDGPULateCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); -ModulePass *createAMDGPULowerModuleLDSPass(); +ModulePass * +createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); FunctionPass *createSIModeRegisterPass(); FunctionPass *createGCNPreRAOptimizationsPass(); @@ -112,10 +114,13 @@ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -void initializeAMDGPULowerModuleLDSPass(PassRegistry &); -extern char &AMDGPULowerModuleLDSID; +void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &); +extern char &AMDGPULowerModuleLDSLegacyPassID; struct AMDGPULowerModuleLDSPass : PassInfoMixin { + const AMDGPUTargetMachine &TM; + AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; Index: llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -177,6 +177,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/BitVector.h" @@ -186,6 +187,7 @@ #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -252,7 +254,8 @@ return {std::move(V)}; } -class AMDGPULowerModuleLDS : public ModulePass { +class AMDGPULowerModuleLDS { + const AMDGPUTargetMachine &TM; static void removeLocalVarsFromUsedLists(Module &M, @@ -326,11 +329,7 @@ } public: - static char ID; - - AMDGPULowerModuleLDS() : ModulePass(ID) { - initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry()); - } + AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {} using FunctionVariableMap = DenseMap>; @@ -1089,7 +1088,7 @@ return KernelToCreatedDynamicLDS; } - bool runOnModule(Module &M) override { + bool runOnModule(Module &M) { CallGraph CG = CallGraph(M); bool Changed = superAlignLDSGlobals(M); @@ -1241,6 +1240,7 @@ } if (Offset != 0) { + (void)TM; // TODO: Account for target maximum LDS std::string Buffer; raw_string_ostream SS{Buffer}; SS << format("%u", Offset); @@ -1530,21 +1530,51 @@ } }; +class AMDGPULowerModuleLDSLegacy : public ModulePass { +public: + const AMDGPUTargetMachine *TM; + static char ID; + + AMDGPULowerModuleLDSLegacy(const AMDGPUTargetMachine *TM_ = nullptr) + : ModulePass(ID), TM(TM_) { + initializeAMDGPULowerModuleLDSLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + if (!TM) + AU.addRequired(); + } + + bool runOnModule(Module &M) override { + if (!TM) { + auto &TPC = getAnalysis(); + TM = &TPC.getTM(); + } + + return AMDGPULowerModuleLDS(*TM).runOnModule(M); + } +}; + } // namespace -char AMDGPULowerModuleLDS::ID = 0; +char AMDGPULowerModuleLDSLegacy::ID = 0; -char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID; +char &llvm::AMDGPULowerModuleLDSLegacyPassID = AMDGPULowerModuleLDSLegacy::ID; -INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, - "Lower uses of LDS variables from non-kernel functions", false, - false) +INITIALIZE_PASS_BEGIN(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE, + "Lower uses of LDS variables from non-kernel functions", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE, + "Lower uses of LDS variables from non-kernel functions", + false, false) -ModulePass *llvm::createAMDGPULowerModuleLDSPass() { - return new AMDGPULowerModuleLDS(); +ModulePass * +llvm::createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM) { + return new AMDGPULowerModuleLDSLegacy(TM); } PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M, ModuleAnalysisManager &) { - return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + return AMDGPULowerModuleLDS(TM).runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -185,7 +185,7 @@ "AMDGPU promote alloca to vector or LDS", false, false) // Move LDS uses from functions to kernels before promote alloca for accurate // estimation of LDS available -INITIALIZE_PASS_DEPENDENCY(AMDGPULowerModuleLDS) +INITIALIZE_PASS_DEPENDENCY(AMDGPULowerModuleLDSLegacy) INITIALIZE_PASS_END(AMDGPUPromoteAlloca, DEBUG_TYPE, "AMDGPU promote alloca to vector or LDS", false, false) Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -388,7 +388,7 @@ initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPULateCodeGenPreparePass(*PR); initializeAMDGPURemoveIncompatibleFunctionsPass(*PR); - initializeAMDGPULowerModuleLDSPass(*PR); + initializeAMDGPULowerModuleLDSLegacyPass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); initializeAMDGPURewriteUndefForPHIPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); @@ -595,8 +595,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerPipelineParsingCallback( - [](StringRef PassName, ModulePassManager &PM, - ArrayRef) { + [this](StringRef PassName, ModulePassManager &PM, + ArrayRef) { if (PassName == "amdgpu-unify-metadata") { PM.addPass(AMDGPUUnifyMetadataPass()); return true; @@ -610,7 +610,7 @@ return true; } if (PassName == "amdgpu-lower-module-lds") { - PM.addPass(AMDGPULowerModuleLDSPass()); + PM.addPass(AMDGPULowerModuleLDSPass(*this)); return true; } if (PassName == "amdgpu-lower-ctor-dtor") { @@ -989,7 +989,7 @@ // Runs before PromoteAlloca so the latter can account for function uses if (EnableLowerModuleLDS) { - addPass(createAMDGPULowerModuleLDSPass()); + addPass(createAMDGPULowerModuleLDSLegacyPass(&TM)); } // AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run