diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -340,6 +340,15 @@ void initializeGCNNSAReassignPass(PassRegistry &); extern char &GCNNSAReassignID; +ModulePass *createAMDGPULowerFunctionLocalLDSPass(); +void initializeAMDGPULowerFunctionLocalLDSPass(PassRegistry &); +extern char &AMDGPULowerFunctionLocalLDSID; +struct AMDGPULowerFunctionLocalLDSPass + : PassInfoMixin { + AMDGPULowerFunctionLocalLDSPass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + namespace AMDGPU { enum TargetIndex { TI_CONSTDATA_START, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -118,13 +118,15 @@ // should only appear when IPO passes manages to move LDs defined in a kernel // into a single user function. - for (GlobalVariable &GV : M.globals()) { - // TODO: Region address - unsigned AS = GV.getAddressSpace(); - if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS) - continue; - - recursivelyVisitUsers(GV, FuncsToAlwaysInline); + if (!AMDGPUTargetMachine::EnableFunctionLocalLDSLowering) { + for (GlobalVariable &GV : M.globals()) { + // TODO: Region address + unsigned AS = GV.getAddressSpace(); + if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS) + continue; + + recursivelyVisitUsers(GV, FuncsToAlwaysInline); + } } if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerFunctionLocalLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerFunctionLocalLDS.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerFunctionLocalLDS.cpp @@ -0,0 +1,84 @@ +//===-- AMDGPULowerFunctionLocalLDS.cpp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include +#include + +#define DEBUG_TYPE "amdgpu-lower-function-local-lds" + +using namespace llvm; + +namespace { + +class LowerFunctionLocalLDSImpl { +public: + // Constructs a LowerFunctionLocalLDSImpl object for the given Module. + LowerFunctionLocalLDSImpl(const Module &M) : M(M) {} + + // Entry point function. + bool lower(); + +private: + const Module &M; +}; + +// Entry point function. +bool LowerFunctionLocalLDSImpl::lower() { + // TODO: + return false; +} + +class AMDGPULowerFunctionLocalLDS : public ModulePass { +public: + static char ID; + + AMDGPULowerFunctionLocalLDS() : ModulePass(ID) { + initializeAMDGPULowerFunctionLocalLDSPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; +}; + +} // namespace + +char AMDGPULowerFunctionLocalLDS::ID = 0; +char &llvm::AMDGPULowerFunctionLocalLDSID = AMDGPULowerFunctionLocalLDS::ID; + +INITIALIZE_PASS(AMDGPULowerFunctionLocalLDS, "amdgpu-lower-function-local-lds", + "Lower LDS Defined Within AMDGPU Non-kernel Device Function", + false /*only look at the cfg*/, false /*analysis pass*/) + +bool AMDGPULowerFunctionLocalLDS::runOnModule(Module &M) { + LowerFunctionLocalLDSImpl LDSLowerer{M}; + return LDSLowerer.lower(); +} + +ModulePass *llvm::createAMDGPULowerFunctionLocalLDSPass() { + return new AMDGPULowerFunctionLocalLDS(); +} + +PreservedAnalyses +AMDGPULowerFunctionLocalLDSPass::run(Module &M, ModuleAnalysisManager &AM) { + LowerFunctionLocalLDSImpl LDSLowerer{M}; + LDSLowerer.lower(); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -34,6 +34,7 @@ static bool EnableLateStructurizeCFG; static bool EnableFunctionCalls; static bool EnableFixedFunctionABI; + static bool EnableFunctionLocalLDSLowering; AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -193,6 +193,13 @@ cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden); +static cl::opt EnableFunctionLocalLDSLowering( + "amdgpu-enable-function-local-lds-lowering", + cl::desc( + "Enable lowering of LDS defined within non-kernel device function"), + cl::location(AMDGPUTargetMachine::EnableFunctionLocalLDSLowering), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheAMDGPUTarget()); @@ -260,6 +267,7 @@ initializeGCNRegBankReassignPass(*PR); initializeGCNNSAReassignPass(*PR); initializeSIAddIMGInitPass(*PR); + initializeAMDGPULowerFunctionLocalLDSPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -389,6 +397,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; bool AMDGPUTargetMachine::EnableFixedFunctionABI = false; +bool AMDGPUTargetMachine::EnableFunctionLocalLDSLowering = false; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; @@ -502,6 +511,10 @@ PM.addPass(AMDGPUAlwaysInlinePass()); return true; } + if (PassName == "amdgpu-lower-function-local-lds") { + PM.addPass(AMDGPULowerFunctionLocalLDSPass()); + return true; + } return false; }); PB.registerPipelineParsingCallback( @@ -845,6 +858,12 @@ disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); + // We expect to run this pass as a first AMDGPU IR pass so that new + // instructions being added in this pass can possibly undergo further + // transformations via subsequent passes. + if (EnableFunctionLocalLDSLowering) + addPass(createAMDGPULowerFunctionLocalLDSPass()); + addPass(createAMDGPUPrintfRuntimeBinding()); // This must occur before inlining, as the inliner will not look through diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -50,6 +50,7 @@ AMDGPUAtomicOptimizer.cpp AMDGPUCallLowering.cpp AMDGPUCodeGenPrepare.cpp + AMDGPULowerFunctionLocalLDS.cpp AMDGPUExportClustering.cpp AMDGPUFixFunctionBitcasts.cpp AMDGPUFrameLowering.cpp