diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1331,11 +1331,18 @@ if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { if (!MFI->isModuleEntryFunction()) { + // TODO: Should be able to use this from within module entry functions too if (const GlobalVariable *GVar = dyn_cast(GV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GVar)) { - unsigned Offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GVar); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); + auto MD = GVar->getMetadata(LLVMContext::MD_absolute_symbol); + if (MD && MD->getNumOperands() == 1) { + if (ConstantInt *KnownSize = + mdconst::extract(MD->getOperand(0))) { + uint64_t ZExt = KnownSize->getZExtValue(); + if (ZExt <= UINT32_MAX) { + return DAG.getConstant(static_cast(ZExt), SDLoc(Op), + Op.getValueType()); + } + } } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -658,6 +658,17 @@ return MostUsed.GV; } + static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV, + uint32_t Address) { + // Write the specified address into metadata where it can be retrieved by + // the assembler + LLVMContext &Ctx = M->getContext(); + auto Type = M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS); + GV->setMetadata(LLVMContext::MD_absolute_symbol, + MDNode::get(Ctx, ConstantAsMetadata::get( + ConstantInt::get(Type, Address)))); + } + bool runOnModule(Module &M) override { LLVMContext &Ctx = M.getContext(); CallGraph CG = CallGraph(M); @@ -758,17 +769,21 @@ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, TableLookupVariables); + GlobalVariable *MaybeModuleScopeStruct = nullptr; if (!ModuleScopeVariables.empty()) { LDSVariableReplacement ModuleScopeReplacement = createLDSVariableReplacement(M, "llvm.amdgcn.module.lds", ModuleScopeVariables); - + MaybeModuleScopeStruct = ModuleScopeReplacement.SGV; appendToCompilerUsed(M, {static_cast( ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast(ModuleScopeReplacement.SGV), Type::getInt8PtrTy(Ctx)))}); + // module.lds will be allocated at zero in any kernel that allocates it + recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0); + // historic removeLocalVarsFromUsedLists(M, ModuleScopeVariables); @@ -856,6 +871,33 @@ auto Replacement = createLDSVariableReplacement(M, VarName, KernelUsedVariables); + // This struct is allocated at a predictable address that can be + // calculated now, recorded in metadata then used to lower references to + // it during codegen. + { + // frame layout, starting from 0 + //{ + // module.lds + // alignment padding + // kernel instance + //} + + if (!MaybeModuleScopeStruct || + Func.hasFnAttribute("amdgpu-elide-module-lds")) { + // There's no module.lds for this kernel so this replacement struct + // goes first + recordLDSAbsoluteAddress(&M, Replacement.SGV, 0); + } else { + const DataLayout &DL = M.getDataLayout(); + TypeSize ModuleSize = + DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType()); + GlobalVariable *KernelStruct = Replacement.SGV; + Align KernelAlign = AMDGPU::getAlign(DL, KernelStruct); + recordLDSAbsoluteAddress(&M, Replacement.SGV, + alignTo(ModuleSize, KernelAlign)); + } + } + // remove preserves existing codegen removeLocalVarsFromUsedLists(M, KernelUsedVariables); KernelToReplacement[&Func] = Replacement; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -13,6 +13,7 @@ // #include "AMDGPUMCInstLower.h" +#include "AMDGPU.h" #include "AMDGPUAsmPrinter.h" #include "AMDGPUMachineFunction.h" #include "AMDGPUTargetMachine.h" @@ -168,12 +169,17 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { // Intercept LDS variables with known addresses - if (const GlobalVariable *GV = dyn_cast(CV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) { - unsigned offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV); - Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset)); - return AsmPrinter::lowerConstant(C); + if (const GlobalVariable *GV = dyn_cast(CV)) { + if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + auto MD = GV->getMetadata(LLVMContext::MD_absolute_symbol); + if (MD && MD->getNumOperands() == 1) { + if (ConstantInt *KnownSize = + mdconst::extract(MD->getOperand(0))) { + if (KnownSize->getZExtValue() <= UINT32_MAX) { + return AsmPrinter::lowerConstant(KnownSize); + } + } + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -115,10 +115,6 @@ static const GlobalVariable * getKernelLDSGlobalFromFunction(const Function &F); - // Module or kernel scope LDS variable - static bool isKnownAddressLDSGlobal(const GlobalVariable &GV); - static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV); - static std::optional getLDSKernelIdMetadata(const Function &F); Align getDynLDSAlign() const { return DynLDSAlign; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -89,12 +89,6 @@ static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; -bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { - auto name = GV.getName(); - return (name == ModuleLDSName) || - (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); -} - const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( const GlobalVariable &GV) { const Module &M = *GV.getParent(); @@ -119,40 +113,27 @@ return F.hasFnAttribute("amdgpu-elide-module-lds"); } -unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( - const GlobalVariable &GV) { - // module.lds, then alignment padding, then kernel.lds, then other variables - // if any - - assert(isKnownAddressLDSGlobal(GV)); - unsigned Offset = 0; - - if (GV.getName() == ModuleLDSName) { - return 0; - } - - const Module *M = GV.getParent(); - const DataLayout &DL = M->getDataLayout(); - - const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); - const Function *f = getKernelLDSFunctionFromGlobal(GV); - - // Account for module.lds if allocated for this function - if (GVM && f && !canElideModuleLDS(*f)) { - // allocator aligns this to var align, but it's zero to begin with - Offset += DL.getTypeAllocSize(GVM->getValueType()); +static std::optional parseSingleOperandMetadata(MDNode *MD) { + if (MD && MD->getNumOperands() == 1) { + if (ConstantInt *KnownSize = + mdconst::extract(MD->getOperand(0))) { + uint64_t ZExt = KnownSize->getZExtValue(); + if (ZExt <= UINT32_MAX) { + return ZExt; + } + } } + return {}; +} - // No dynamic LDS alignment done by allocateModuleLDSGlobal - Offset = alignTo( - Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); - - return Offset; +static std::optional +retrieveLDSAbsoluteAddress(const GlobalVariable *GV) { + return parseSingleOperandMetadata( + GV->getMetadata(LLVMContext::MD_absolute_symbol)); } void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const Module *M = F.getParent(); - // This function is called before allocating any other LDS so that it can // reliably put values at known addresses. Consequently, dynamic LDS, if // present, will not yet have been allocated @@ -180,38 +161,28 @@ const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); if (GV && !canElideModuleLDS(F)) { - assert(isKnownAddressLDSGlobal(*GV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && - "Module LDS expected to be allocated before other LDS"); + std::optional Expect = retrieveLDSAbsoluteAddress(GV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on module LDS variable"); + } } if (KV) { // The per-kernel offset is deterministic because it is allocated // before any other non-module LDS variables. - assert(isKnownAddressLDSGlobal(*KV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && - "Kernel LDS expected to be immediately after module LDS"); + std::optional Expect = retrieveLDSAbsoluteAddress(KV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on kernel LDS variable"); + } } } } std::optional AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { - auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); - if (MD && MD->getNumOperands() == 1) { - ConstantInt *KnownSize = mdconst::extract(MD->getOperand(0)); - if (KnownSize) { - uint64_t V = KnownSize->getZExtValue(); - if (V <= UINT32_MAX) { - return V; - } - } - } - return {}; + return parseSingleOperandMetadata(F.getMetadata("llvm.amdgcn.lds.kernel.id")); } void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,