diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -512,7 +512,7 @@ const SITargetLowering &TLI = *getTLI(); const DataLayout &DL = F.getParent()->getDataLayout(); - Info->allocateKnownAddressLDSGlobal(F); + Info->allocateKnownAddressLDSGlobal(MF.getFunction().getParent(), F); SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); @@ -596,7 +596,7 @@ const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); - Info->allocateKnownAddressLDSGlobal(F); + Info->allocateKnownAddressLDSGlobal(MF.getFunction().getParent(), F); SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -13,6 +13,7 @@ // #include "AMDGPUMCInstLower.h" +#include "AMDGPU.h" #include "AMDGPUAsmPrinter.h" #include "AMDGPUMachineFunction.h" #include "AMDGPUTargetMachine.h" @@ -168,12 +169,17 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { // Intercept LDS variables with known addresses - if (const GlobalVariable *GV = dyn_cast(CV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) { - unsigned offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV); - Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset)); - return AsmPrinter::lowerConstant(C); + if (const GlobalVariable *GV = dyn_cast(CV)) { + if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + auto MD = GV->getMetadata(LLVMContext::MD_absolute_symbol); + if (MD && MD->getNumOperands() == 1) { + if (ConstantInt *KnownSize = + mdconst::extract(MD->getOperand(0))) { + if (KnownSize->getZExtValue() <= UINT32_MAX) { + return AsmPrinter::lowerConstant(KnownSize); + } + } + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -104,7 +104,7 @@ unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, Align Trailing); - void allocateKnownAddressLDSGlobal(const Function &F); + void allocateKnownAddressLDSGlobal(Module *M, const Function &F); // A kernel function may have an associated LDS allocation, and a kernel-scope // LDS allocation must have an associated kernel function @@ -112,12 +112,8 @@ // LDS allocation should have an associated kernel function static const Function * getKernelLDSFunctionFromGlobal(const GlobalVariable &GV); - static const GlobalVariable * - getKernelLDSGlobalFromFunction(const Function &F); - - // Module or kernel scope LDS variable - static bool isKnownAddressLDSGlobal(const GlobalVariable &GV); - static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV); + static GlobalVariable *getKernelLDSGlobalFromFunction(Module *M, + const Function &F); static std::optional getLDSKernelIdMetadata(const Function &F); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -12,6 +12,8 @@ #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -89,12 +91,6 @@ static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; -bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { - auto name = GV.getName(); - return (name == ModuleLDSName) || - (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); -} - const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( const GlobalVariable &GV) { const Module &M = *GV.getParent(); @@ -105,9 +101,9 @@ return nullptr; } -const GlobalVariable * -AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { - const Module *M = F.getParent(); +GlobalVariable * +AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(Module *M, + const Function &F) { std::string KernelLDSName = "llvm.amdgcn.kernel."; KernelLDSName += F.getName(); KernelLDSName += ".lds"; @@ -119,39 +115,19 @@ return F.hasFnAttribute("amdgpu-elide-module-lds"); } -unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( - const GlobalVariable &GV) { - // module.lds, then alignment padding, then kernel.lds, then other variables - // if any - - assert(isKnownAddressLDSGlobal(GV)); - unsigned Offset = 0; - - if (GV.getName() == ModuleLDSName) { - return 0; - } - - const Module *M = GV.getParent(); - const DataLayout &DL = M->getDataLayout(); - - const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); - const Function *f = getKernelLDSFunctionFromGlobal(GV); - - // Account for module.lds if allocated for this function - if (GVM && f && !canElideModuleLDS(*f)) { - // allocator aligns this to var align, but it's zero to begin with - Offset += DL.getTypeAllocSize(GVM->getValueType()); - } - - // No dynamic LDS alignment done by allocateModuleLDSGlobal - Offset = alignTo( - Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); - - return Offset; +static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV, + uint32_t Address) { + // Write the specified address into metadata where it can be retrieved by the + // assembler + LLVMContext &Ctx = M->getContext(); + auto Type = M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS); + GV->setMetadata(LLVMContext::MD_absolute_symbol, + MDNode::get(Ctx, ConstantAsMetadata::get( + ConstantInt::get(Type, Address)))); } -void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { - const Module *M = F.getParent(); +void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(Module *M, + const Function &F) { // This function is called before allocating any other LDS so that it can // reliably put values at known addresses. Consequently, dynamic LDS, if @@ -176,25 +152,19 @@ // } // other variables, e.g. dynamic lds, allocated after this call - const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); - const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); + GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); + GlobalVariable *KV = getKernelLDSGlobalFromFunction(M, F); if (GV && !canElideModuleLDS(F)) { - assert(isKnownAddressLDSGlobal(*GV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && - "Module LDS expected to be allocated before other LDS"); + recordLDSAbsoluteAddress(M, GV, Offset); } if (KV) { // The per-kernel offset is deterministic because it is allocated // before any other non-module LDS variables. - assert(isKnownAddressLDSGlobal(*KV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && - "Kernel LDS expected to be immediately after module LDS"); + recordLDSAbsoluteAddress(M, KV, Offset); } } } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2412,7 +2412,7 @@ return DAG.getEntryNode(); } - Info->allocateKnownAddressLDSGlobal(Fn); + Info->allocateKnownAddressLDSGlobal(MF.getFunction().getParent(), Fn); SmallVector Splits; SmallVector ArgLocs;