diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -498,7 +498,7 @@ static GlobalVariable *buildLookupTable( Module &M, ArrayRef Variables, ArrayRef kernels, - DenseMap &KernelToReplacement) { + const DenseMap &KernelToReplacement) { if (Variables.empty()) { return nullptr; } @@ -513,11 +513,15 @@ ArrayType *AllKernelsOffsetsType = ArrayType::get(KernelOffsetsType, NumberKernels); - std::vector overallConstantExprElts(NumberKernels); + std::vector overallConstantExprElts( + NumberKernels, PoisonValue::get(KernelOffsetsType)); for (size_t i = 0; i < NumberKernels; i++) { - LDSVariableReplacement Replacement = KernelToReplacement[kernels[i]]; + Function *Kernel = kernels[i]; + auto Replacement = KernelToReplacement.find(Kernel); + if (Replacement == KernelToReplacement.end()) + continue; overallConstantExprElts[i] = getAddressesOfVariablesInKernel( - Ctx, Variables, Replacement.LDSVarsToConstantGEP); + Ctx, Variables, Replacement->second.LDSVarsToConstantGEP); } Constant *init = @@ -1141,7 +1145,7 @@ GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables( M, ModuleScopeVariables, KernelsThatAllocateModuleLDS); - DenseMap KernelToReplacement = + const DenseMap KernelToReplacement = lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables, KernelsThatAllocateModuleLDS, MaybeModuleScopeStruct); @@ -1150,8 +1154,8 @@ for (auto &GV : KernelAccessVariables) { auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV]; assert(funcs.size() == 1); // Only one kernel can access it - LDSVariableReplacement Replacement = - KernelToReplacement[*(funcs.begin())]; + const LDSVariableReplacement &Replacement = + KernelToReplacement.at(*funcs.begin()); DenseSet Vec; Vec.insert(GV); @@ -1172,9 +1176,11 @@ LLVMContext &Ctx = M.getContext(); IRBuilder<> Builder(Ctx); - for (size_t i = 0; i < OrderedKernels.size(); i++) { - markUsedByKernel(Builder, OrderedKernels[i], - KernelToReplacement[OrderedKernels[i]].SGV); + for (Function *Kernel : OrderedKernels) { + auto Replacement = KernelToReplacement.find(Kernel); + if (Replacement == KernelToReplacement.end()) + continue; + markUsedByKernel(Builder, Kernel, Replacement->second.SGV); } // The order must be consistent between lookup table and accesses to diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=hybrid < %s | FileCheck %s + +@A = external addrspace(3) global [8 x ptr] +@B = external addrspace(3) global [0 x i32] + +define amdgpu_kernel void @kernel_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_0() !llvm.amdgcn.lds.kernel.id !1 { +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ] +; CHECK-NEXT: call void @call_store_A() +; CHECK-NEXT: ret void +; + call void @call_store_A() + ret void +} + +define amdgpu_kernel void @kernel_1() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 { +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ] +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() +; CHECK-NEXT: ret void +; + %ptr = call ptr @get_B_ptr() + ret void +} + +define amdgpu_kernel void @kernel_2() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_2() !llvm.amdgcn.lds.kernel.id !3 { +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ] +; CHECK-NEXT: call void @store_A() +; CHECK-NEXT: ret void +; + call void @store_A() + ret void +} + +define amdgpu_kernel void @kernel_3() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 { +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ] +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() +; CHECK-NEXT: ret void +; + %ptr = call ptr @get_B_ptr() + ret void +} + +define private void @call_store_A() { +; CHECK-LABEL: define private void @call_store_A() { +; CHECK-NEXT: call void @store_A() +; CHECK-NEXT: ret void +; + call void @store_A() + ret void +} + +define private void @store_A() { +; CHECK-LABEL: define private void @store_A() { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4 +; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr +; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8 +; CHECK-NEXT: ret void +; + store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null + ret void +} + +define private ptr @get_B_ptr() { +; CHECK-LABEL: define private ptr @get_B_ptr() { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4 +; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr +; CHECK-NEXT: ret ptr [[TMP3]] +; + ret ptr addrspacecast (ptr addrspace(3) @B to ptr) +}