diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1332,10 +1332,11 @@ if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { if (!MFI->isModuleEntryFunction()) { if (const GlobalVariable *GVar = dyn_cast(GV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GVar)) { - unsigned Offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GVar); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); + auto MD = GVar->getMetadata(LLVMContext::MD_absolute_symbol); + std::optional Address = + AMDGPUMachineFunction::parseSingleOperandMetadata(MD); + if (Address) { + return DAG.getConstant(Address.value(), SDLoc(Op), Op.getValueType()); } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -658,6 +658,17 @@ return MostUsed.GV; } + static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV, + uint32_t Address) { + // Write the specified address into metadata where it can be retrieved by + // the assembler + LLVMContext &Ctx = M->getContext(); + auto Type = M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS); + GV->setMetadata(LLVMContext::MD_absolute_symbol, + MDNode::get(Ctx, ConstantAsMetadata::get( + ConstantInt::get(Type, Address)))); + } + bool runOnModule(Module &M) override { LLVMContext &Ctx = M.getContext(); CallGraph CG = CallGraph(M); @@ -758,17 +769,21 @@ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, TableLookupVariables); + GlobalVariable *MaybeModuleScopeStruct = nullptr; if (!ModuleScopeVariables.empty()) { LDSVariableReplacement ModuleScopeReplacement = createLDSVariableReplacement(M, "llvm.amdgcn.module.lds", ModuleScopeVariables); - + MaybeModuleScopeStruct = ModuleScopeReplacement.SGV; appendToCompilerUsed(M, {static_cast( ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast(ModuleScopeReplacement.SGV), Type::getInt8PtrTy(Ctx)))}); + // module.lds will be allocated at zero in any kernel that allocates it + recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0); + // historic removeLocalVarsFromUsedLists(M, ModuleScopeVariables); @@ -856,6 +871,33 @@ auto Replacement = createLDSVariableReplacement(M, VarName, KernelUsedVariables); + // This struct is allocated at a predictable address that can be + // calculated now, recorded in metadata then used to lower references to + // it during codegen. + { + // frame layout, starting from 0 + //{ + // module.lds + // alignment padding + // kernel instance + //} + + if (!MaybeModuleScopeStruct || + Func.hasFnAttribute("amdgpu-elide-module-lds")) { + // There's no module.lds for this kernel so this replacement struct + // goes first + recordLDSAbsoluteAddress(&M, Replacement.SGV, 0); + } else { + const DataLayout &DL = M.getDataLayout(); + TypeSize ModuleSize = + DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType()); + GlobalVariable *KernelStruct = Replacement.SGV; + Align KernelAlign = AMDGPU::getAlign(DL, KernelStruct); + recordLDSAbsoluteAddress(&M, Replacement.SGV, + alignTo(ModuleSize, KernelAlign)); + } + } + // remove preserves existing codegen removeLocalVarsFromUsedLists(M, KernelUsedVariables); KernelToReplacement[&Func] = Replacement; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -13,6 +13,7 @@ // #include "AMDGPUMCInstLower.h" +#include "AMDGPU.h" #include "AMDGPUAsmPrinter.h" #include "AMDGPUMachineFunction.h" #include "AMDGPUTargetMachine.h" @@ -168,12 +169,16 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { // Intercept LDS variables with known addresses - if (const GlobalVariable *GV = dyn_cast(CV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) { - unsigned offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV); - Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset)); - return AsmPrinter::lowerConstant(C); + if (const GlobalVariable *GV = dyn_cast(CV)) { + if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + auto MD = GV->getMetadata(LLVMContext::MD_absolute_symbol); + std::optional Address = + AMDGPUMachineFunction::parseSingleOperandMetadata(MD); + if (Address) { + LLVMContext &Ctx = CV->getContext(); + return AsmPrinter::lowerConstant( + ConstantInt::get(Type::getInt32Ty(Ctx), Address.value())); + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -106,19 +106,7 @@ void allocateKnownAddressLDSGlobal(const Function &F); - // A kernel function may have an associated LDS allocation, and a kernel-scope - // LDS allocation must have an associated kernel function - - // LDS allocation should have an associated kernel function - static const Function * - getKernelLDSFunctionFromGlobal(const GlobalVariable &GV); - static const GlobalVariable * - getKernelLDSGlobalFromFunction(const Function &F); - - // Module or kernel scope LDS variable - static bool isKnownAddressLDSGlobal(const GlobalVariable &GV); - static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV); - + static std::optional parseSingleOperandMetadata(MDNode *MD); static std::optional getLDSKernelIdMetadata(const Function &F); Align getDynLDSAlign() const { return DynLDSAlign; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -12,6 +12,7 @@ #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -89,24 +90,7 @@ static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; -bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { - auto name = GV.getName(); - return (name == ModuleLDSName) || - (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); -} - -const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( - const GlobalVariable &GV) { - const Module &M = *GV.getParent(); - StringRef N(GV.getName()); - if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { - return M.getFunction(N); - } - return nullptr; -} - -const GlobalVariable * -AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { +static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) { const Module *M = F.getParent(); std::string KernelLDSName = "llvm.amdgcn.kernel."; KernelLDSName += F.getName(); @@ -114,45 +98,33 @@ return M->getNamedGlobal(KernelLDSName); } +std::optional +AMDGPUMachineFunction::parseSingleOperandMetadata(MDNode *MD) { + if (MD && MD->getNumOperands() == 1) { + if (ConstantInt *KnownSize = + mdconst::extract(MD->getOperand(0))) { + uint64_t ZExt = KnownSize->getZExtValue(); + if (ZExt <= UINT32_MAX) { + return ZExt; + } + } + } + return {}; +} + // This kernel calls no functions that require the module lds struct static bool canElideModuleLDS(const Function &F) { return F.hasFnAttribute("amdgpu-elide-module-lds"); } -unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( - const GlobalVariable &GV) { - // module.lds, then alignment padding, then kernel.lds, then other variables - // if any - - assert(isKnownAddressLDSGlobal(GV)); - unsigned Offset = 0; - - if (GV.getName() == ModuleLDSName) { - return 0; - } - - const Module *M = GV.getParent(); - const DataLayout &DL = M->getDataLayout(); - - const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); - const Function *f = getKernelLDSFunctionFromGlobal(GV); - - // Account for module.lds if allocated for this function - if (GVM && f && !canElideModuleLDS(*f)) { - // allocator aligns this to var align, but it's zero to begin with - Offset += DL.getTypeAllocSize(GVM->getValueType()); - } - - // No dynamic LDS alignment done by allocateModuleLDSGlobal - Offset = alignTo( - Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); - - return Offset; +static std::optional +retrieveLDSAbsoluteAddress(const GlobalVariable *GV) { + return AMDGPUMachineFunction::parseSingleOperandMetadata( + GV->getMetadata(LLVMContext::MD_absolute_symbol)); } void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const Module *M = F.getParent(); - // This function is called before allocating any other LDS so that it can // reliably put values at known addresses. Consequently, dynamic LDS, if // present, will not yet have been allocated @@ -180,38 +152,28 @@ const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); if (GV && !canElideModuleLDS(F)) { - assert(isKnownAddressLDSGlobal(*GV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && - "Module LDS expected to be allocated before other LDS"); + std::optional Expect = retrieveLDSAbsoluteAddress(GV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on module LDS variable"); + } } if (KV) { // The per-kernel offset is deterministic because it is allocated // before any other non-module LDS variables. - assert(isKnownAddressLDSGlobal(*KV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && - "Kernel LDS expected to be immediately after module LDS"); + std::optional Expect = retrieveLDSAbsoluteAddress(KV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on kernel LDS variable"); + } } } } std::optional AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { - auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); - if (MD && MD->getNumOperands() == 1) { - ConstantInt *KnownSize = mdconst::extract(MD->getOperand(0)); - if (KnownSize) { - uint64_t V = KnownSize->getZExtValue(); - if (V <= UINT32_MAX) { - return V; - } - } - } - return {}; + return parseSingleOperandMetadata(F.getMetadata("llvm.amdgcn.lds.kernel.id")); } void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,