diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1329,15 +1329,10 @@ GlobalAddressSDNode *G = cast(Op); const GlobalValue *GV = G->getGlobal(); - if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { - if (!MFI->isModuleEntryFunction()) { - if (const GlobalVariable *GVar = dyn_cast(GV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GVar)) { - unsigned Offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GVar); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); - } - } + if (!MFI->isModuleEntryFunction()) { + if (std::optional Address = + AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) { + return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType()); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -608,6 +608,19 @@ return MostUsed.GV; } + static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV, + uint32_t Address) { + // Write the specified address into metadata where it can be retrieved by + // the assembler. Format is a half open range, [Address Address+1) + LLVMContext &Ctx = M->getContext(); + auto *IntTy = + M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS); + auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address)); + auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1)); + GV->setMetadata(LLVMContext::MD_absolute_symbol, + MDNode::get(Ctx, {MinC, MaxC})); + } + bool runOnModule(Module &M) override { LLVMContext &Ctx = M.getContext(); CallGraph CG = CallGraph(M); @@ -708,17 +721,21 @@ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, TableLookupVariables); + GlobalVariable *MaybeModuleScopeStruct = nullptr; if (!ModuleScopeVariables.empty()) { LDSVariableReplacement ModuleScopeReplacement = createLDSVariableReplacement(M, "llvm.amdgcn.module.lds", ModuleScopeVariables); - + MaybeModuleScopeStruct = ModuleScopeReplacement.SGV; appendToCompilerUsed(M, {static_cast( ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast(ModuleScopeReplacement.SGV), Type::getInt8PtrTy(Ctx)))}); + // module.lds will be allocated at zero in any kernel that allocates it + recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0); + // historic removeLocalVarsFromUsedLists(M, ModuleScopeVariables); @@ -806,6 +823,33 @@ auto Replacement = createLDSVariableReplacement(M, VarName, KernelUsedVariables); + // This struct is allocated at a predictable address that can be + // calculated now, recorded in metadata then used to lower references to + // it during codegen. + { + // frame layout, starting from 0 + //{ + // module.lds + // alignment padding + // kernel instance + //} + + if (!MaybeModuleScopeStruct || + Func.hasFnAttribute("amdgpu-elide-module-lds")) { + // There's no module.lds for this kernel so this replacement struct + // goes first + recordLDSAbsoluteAddress(&M, Replacement.SGV, 0); + } else { + const DataLayout &DL = M.getDataLayout(); + TypeSize ModuleSize = + DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType()); + GlobalVariable *KernelStruct = Replacement.SGV; + Align KernelAlign = AMDGPU::getAlign(DL, KernelStruct); + recordLDSAbsoluteAddress(&M, Replacement.SGV, + alignTo(ModuleSize, KernelAlign)); + } + } + // remove preserves existing codegen removeLocalVarsFromUsedLists(M, KernelUsedVariables); KernelToReplacement[&Func] = Replacement; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -13,6 +13,7 @@ // #include "AMDGPUMCInstLower.h" +#include "AMDGPU.h" #include "AMDGPUAsmPrinter.h" #include "AMDGPUMachineFunction.h" #include "AMDGPUTargetMachine.h" @@ -168,12 +169,11 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { // Intercept LDS variables with known addresses - if (const GlobalVariable *GV = dyn_cast(CV)) { - if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) { - unsigned offset = - AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV); - Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset)); - return AsmPrinter::lowerConstant(C); + if (const GlobalVariable *GV = dyn_cast(CV)) { + if (std::optional Address = + AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) { + auto *IntTy = Type::getInt32Ty(CV->getContext()); + return AsmPrinter::lowerConstant(ConstantInt::get(IntTy, *Address)); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -106,20 +106,8 @@ void allocateKnownAddressLDSGlobal(const Function &F); - // A kernel function may have an associated LDS allocation, and a kernel-scope - // LDS allocation must have an associated kernel function - - // LDS allocation should have an associated kernel function - static const Function * - getKernelLDSFunctionFromGlobal(const GlobalVariable &GV); - static const GlobalVariable * - getKernelLDSGlobalFromFunction(const Function &F); - - // Module or kernel scope LDS variable - static bool isKnownAddressLDSGlobal(const GlobalVariable &GV); - static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV); - static std::optional getLDSKernelIdMetadata(const Function &F); + static std::optional getLDSAbsoluteAddress(const GlobalValue &GV); Align getDynLDSAlign() const { return DynLDSAlign; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -11,7 +11,9 @@ #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -89,24 +91,7 @@ static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; -bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { - auto name = GV.getName(); - return (name == ModuleLDSName) || - (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); -} - -const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( - const GlobalVariable &GV) { - const Module &M = *GV.getParent(); - StringRef N(GV.getName()); - if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { - return M.getFunction(N); - } - return nullptr; -} - -const GlobalVariable * -AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { +static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) { const Module *M = F.getParent(); std::string KernelLDSName = "llvm.amdgcn.kernel."; KernelLDSName += F.getName(); @@ -119,40 +104,8 @@ return F.hasFnAttribute("amdgpu-elide-module-lds"); } -unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( - const GlobalVariable &GV) { - // module.lds, then alignment padding, then kernel.lds, then other variables - // if any - - assert(isKnownAddressLDSGlobal(GV)); - unsigned Offset = 0; - - if (GV.getName() == ModuleLDSName) { - return 0; - } - - const Module *M = GV.getParent(); - const DataLayout &DL = M->getDataLayout(); - - const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); - const Function *f = getKernelLDSFunctionFromGlobal(GV); - - // Account for module.lds if allocated for this function - if (GVM && f && !canElideModuleLDS(*f)) { - // allocator aligns this to var align, but it's zero to begin with - Offset += DL.getTypeAllocSize(GVM->getValueType()); - } - - // No dynamic LDS alignment done by allocateModuleLDSGlobal - Offset = alignTo( - Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); - - return Offset; -} - void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const Module *M = F.getParent(); - // This function is called before allocating any other LDS so that it can // reliably put values at known addresses. Consequently, dynamic LDS, if // present, will not yet have been allocated @@ -180,40 +133,60 @@ const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); if (GV && !canElideModuleLDS(F)) { - assert(isKnownAddressLDSGlobal(*GV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && - "Module LDS expected to be allocated before other LDS"); + std::optional Expect = getLDSAbsoluteAddress(*GV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on module LDS variable"); + } } if (KV) { // The per-kernel offset is deterministic because it is allocated // before any other non-module LDS variables. - assert(isKnownAddressLDSGlobal(*KV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && - "Kernel LDS expected to be immediately after module LDS"); + std::optional Expect = getLDSAbsoluteAddress(*KV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on kernel LDS variable"); + } } } } std::optional AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { - auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); + // TODO: Would be more consistent with the abs symbols to use a range + MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); if (MD && MD->getNumOperands() == 1) { - ConstantInt *KnownSize = mdconst::extract(MD->getOperand(0)); - if (KnownSize) { - uint64_t V = KnownSize->getZExtValue(); - if (V <= UINT32_MAX) { - return V; + if (ConstantInt *KnownSize = + mdconst::extract(MD->getOperand(0))) { + uint64_t ZExt = KnownSize->getZExtValue(); + if (ZExt <= UINT32_MAX) { + return ZExt; } } } return {}; } +std::optional +AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { + if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + return {}; + + std::optional AbsSymRange = GV.getAbsoluteSymbolRange(); + if (!AbsSymRange) + return {}; + + if (const APInt *V = AbsSymRange->getSingleElement()) { + std::optional ZExt = V->tryZExtValue(); + if (ZExt && (*ZExt <= UINT32_MAX)) { + return *ZExt; + } + } + + return {}; +} + void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV) { assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll @@ -14,19 +14,19 @@ ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [4 x i8] } ;. -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8 +; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8, !absolute_symbol !0 ; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata" -; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16 -; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16 -; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2 -; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4 +; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16, !absolute_symbol !0 +; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16, !absolute_symbol !0 +; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2, !absolute_symbol !0 +; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4, !absolute_symbol !0 ;. define amdgpu_kernel void @k0() #0 { ; CHECK-LABEL: @k0( -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -41,9 +41,9 @@ define amdgpu_kernel void @k1() #0 { ; CHECK-LABEL: @k1( -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23 ; CHECK-NEXT: ret void ; store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2 @@ -83,8 +83,8 @@ define void @f0() { ; CHECK-LABEL: define void @f0( -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !23 -; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !23 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24 +; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -95,3 +95,5 @@ attributes #0 = { "amdgpu-elide-module-lds" } ; CHECK: attributes #0 = { "amdgpu-elide-module-lds" } + +; CHECK: !0 = !{i64 0, i64 1} diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll @@ -11,15 +11,15 @@ ;. ; CHECK: @lds.k2 = addrspace(3) global [1 x i8] undef, align 1 -; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16 -; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16 +; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16, !absolute_symbol !0 +; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16, !absolute_symbol !0 ;. define amdgpu_kernel void @k0() { ; CHECK-LABEL: @k0( -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -34,9 +34,9 @@ define amdgpu_kernel void @k1() { ; CHECK-LABEL: @k1( -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23 ; CHECK-NEXT: ret void ; store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2 @@ -61,3 +61,5 @@ ret void } + +; CHECK: !0 = !{i64 0, i64 1} diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll @@ -13,7 +13,7 @@ @_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1 ;. -; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4 +; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4, !absolute_symbol !0 ;. define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 { ; GCN-LABEL: test: @@ -33,13 +33,13 @@ ; GCN-NEXT: s_endpgm ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4 +; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1 ; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP4]], 3 -; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6 -; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4 +; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1 ; CHECK-NEXT: [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP9]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]] ; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP10]] to i8 @@ -66,11 +66,12 @@ ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[META0:![0-9]+]] = !{!1} -; CHECK: [[META1:![0-9]+]] = distinct !{!1, !2} -; CHECK: [[META2:![0-9]+]] = distinct !{!2} -; CHECK: [[META3:![0-9]+]] = !{!4} -; CHECK: [[META4:![0-9]+]] = distinct !{!4, !2} -; CHECK: [[META5:![0-9]+]] = !{!4, !1} -; CHECK: [[META6:![0-9]+]] = !{} +; CHECK: [[META0:![0-9]+]] = !{i64 0, i64 1} +; CHECK: [[META1:![0-9]+]] = !{!2} +; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3} +; CHECK: [[META3:![0-9]+]] = distinct !{!3} +; CHECK: [[META4:![0-9]+]] = !{!5} +; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3} +; CHECK: [[META6:![0-9]+]] = !{!5, !2} +; CHECK: [[META7:![0-9]+]] = !{} ;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll @@ -5,10 +5,10 @@ @b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4 ; CHECK-LABEL: @no_clobber_ds_load_stores_x2_preexisting_aa -; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !0, !noalias !5 -; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5 -; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !0, !noalias !5 -; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5 +; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !1, !noalias !6 +; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !noalias !6 +; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !1, !noalias !6 +; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !noalias !6 define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrspace(1) %arg, i32 %i) { bb: @@ -34,9 +34,10 @@ !8 = !{!"omnipotent char", !9, i64 0} !9 = !{!"Simple C++ TBAA"} -; CHECK:!0 = !{!1, !2, i64 0} -; CHECK:!1 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !2, i64 0} -; CHECK:!2 = !{!"int", !3, i64 0} -; CHECK:!3 = !{!"omnipotent char", !4, i64 0} -; CHECK:!4 = !{!"Simple C++ TBAA"} -; CHECK:!5 = !{} +; CHECK:!0 = !{i64 0, i64 1} +; CHECK:!1 = !{!2, !3, i64 0} +; CHECK:!2 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !3, i64 0} +; CHECK:!3 = !{!"int", !4, i64 0} +; CHECK:!4 = !{!"omnipotent char", !5, i64 0} +; CHECK:!5 = !{!"Simple C++ TBAA"} +; CHECK:!6 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll @@ -17,10 +17,10 @@ ; GCN: ds_read_b32 ; CHECK-LABEL: @no_clobber_ds_load_stores_x2 -; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !0, !noalias !3 -; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !0, !noalias !3 -; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !3, !noalias !0 -; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !3, !noalias !0 +; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4 +; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !1, !noalias !4 +; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1 +; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !4, !noalias !1 define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i32 %i) { bb: @@ -44,13 +44,13 @@ ; GCN-DAG: ds_read_b32 ; CHECK-LABEL: @no_clobber_ds_load_stores_x3 -; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !5, !noalias !8 +; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9 ; CHECK: %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 %i -; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !5, !noalias !8 -; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12 -; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !11, !noalias !12 -; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !13, !noalias !14 -; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !13, !noalias !14 +; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !6, !noalias !9 +; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13 +; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !12, !noalias !13 +; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15 +; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !14, !noalias !15 define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i32 %i) { bb: @@ -69,18 +69,19 @@ ret void } -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2} -; CHECK: !2 = distinct !{!2} -; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2} -; CHECK: !5 = !{!6} -; CHECK: !6 = distinct !{!6, !7} -; CHECK: !7 = distinct !{!7} -; CHECK: !8 = !{!9, !10} -; CHECK: !9 = distinct !{!9, !7} -; CHECK: !10 = distinct !{!10, !7} -; CHECK: !11 = !{!9} -; CHECK: !12 = !{!6, !10} -; CHECK: !13 = !{!10} -; CHECK: !14 = !{!6, !9} +; CHECK: !0 = !{i64 0, i64 1} +; CHECK: !1 = !{!2} +; CHECK: !2 = distinct !{!2, !3} +; CHECK: !3 = distinct !{!3} +; CHECK: !4 = !{!5} +; CHECK: !5 = distinct !{!5, !3} +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !8} +; CHECK: !8 = distinct !{!8} +; CHECK: !9 = !{!10, !11} +; CHECK: !10 = distinct !{!10, !8} +; CHECK: !11 = distinct !{!11, !8} +; CHECK: !12 = !{!10} +; CHECK: !13 = !{!7, !11} +; CHECK: !14 = !{!11} +; CHECK: !15 = !{!7, !10} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll @@ -28,9 +28,9 @@ @f0.lds = addrspace(3) global i16 undef define void @f0() { ; MODULE-LABEL: @f0( -; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !0, !noalias !3 +; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4 ; MODULE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3 -; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !0, !noalias !3 +; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4 ; MODULE-NEXT: ret void ; ; TABLE-LABEL: @f0( @@ -60,7 +60,7 @@ define amdgpu_kernel void @k_f0() { ; MODULE-LABEL: @k_f0( -; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] +; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope !5, !noalias !1 ; MODULE-NEXT: call void @f0() ; MODULE-NEXT: ret void ; @@ -82,9 +82,9 @@ @both.lds = addrspace(3) global i32 undef define void @f_both() { ; MODULE-LABEL: @f_both( -; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !3 +; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4 ; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4 -; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !3 +; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4 ; MODULE-NEXT: ret void ; ; TABLE-LABEL: @f_both( @@ -115,9 +115,9 @@ define amdgpu_kernel void @k0_both() { ; MODULE-LABEL: @k0_both( ; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] -; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !0 +; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1 ; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5 -; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !0 +; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1 ; MODULE-NEXT: call void @f_both() ; MODULE-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll @@ -11,12 +11,12 @@ @v3 = addrspace(3) global i8 undef @unused = addrspace(3) global i16 undef -; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16 +; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16, !absolute_symbol !0 ; OPT: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata" -; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8 -; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 4 -; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8 -; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8 +; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8, !absolute_symbol !0 +; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 4, !absolute_symbol !1 +; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8, !absolute_symbol !0 +; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8, !absolute_symbol !2 ; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]] ;. @@ -234,9 +234,9 @@ ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ] ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; OPT-NEXT: call void @f1() -; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !2, !noalias !5 +; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8 ; OPT-NEXT: %mul = mul i8 %ld, 8 -; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !2, !noalias !5 +; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8 ; OPT-NEXT: call void @f2() ; OPT-NEXT: ret void ; @@ -289,13 +289,16 @@ ; OPT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } ; OPT: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. -; OPT: !0 = !{i32 1} -; OPT: !1 = !{i32 0} -; OPT: !2 = !{!3} -; OPT: !3 = distinct !{!3, !4} -; OPT: !4 = distinct !{!4} +; OPT: !0 = !{i64 0, i64 1} +; OPT: !1 = !{i64 4, i64 5} +; OPT: !2 = !{i64 8, i64 9} +; OPT: !3 = !{i32 1} +; OPT: !4 = !{i32 0} ; OPT: !5 = !{!6} -; OPT: !6 = distinct !{!6, !4} +; OPT: !6 = distinct !{!6, !7} +; OPT: !7 = distinct !{!7} +; OPT: !8 = !{!9} +; OPT: !9 = distinct !{!9, !7} ;. ; Table size length number-kernels * number-variables * sizeof(uint16_t) diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll @@ -15,10 +15,10 @@ ; OPT: %llvm.amdgcn.kernel.k23.lds.t = type { i64, i8 } ; OPT: %llvm.amdgcn.kernel.k123.lds.t = type { i16, i8, [5 x i8], i64 } -; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8 -; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 16 -; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8 -; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 16 +; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8, !absolute_symbol !0 +; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 16, !absolute_symbol !0 +; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8, !absolute_symbol !0 +; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 16, !absolute_symbol !0 ; Salient parts of the IR lookup table check: ; It has (top level) size 3 as there are 3 kernels that call functions which use lds @@ -218,7 +218,7 @@ ; Access two variables, will allocate those two define amdgpu_kernel void @k01() { -; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !0 { +; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !1 { ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ] ; OPT-NEXT: call void @f0() ; OPT-NEXT: call void @f1() @@ -256,7 +256,7 @@ } define amdgpu_kernel void @k23() { -; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !1 { +; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !2 { ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ] ; OPT-NEXT: call void @f2() ; OPT-NEXT: call void @f3() @@ -295,12 +295,12 @@ ; Access and allocate three variables define amdgpu_kernel void @k123() { -; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !2 { +; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !3 { ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ] ; OPT-NEXT: call void @f1() -; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6 +; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7 ; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8 -; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6 +; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7 ; OPT-NEXT: call void @f2() ; OPT-NEXT: ret void ; @@ -346,9 +346,10 @@ ; OPT: declare i32 @llvm.amdgcn.lds.kernel.id() -!0 = !{i32 0} -!1 = !{i32 2} -!2 = !{i32 1} +!0 = !{i64 0, i64 1} +!1 = !{i32 0} +!2 = !{i32 2} +!3 = !{i32 1} ; Table size length number-kernels * number-variables * sizeof(uint16_t)