diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h @@ -26,10 +26,6 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV); -/// \returns true if a given global variable \p GV (or its global users) appear -/// as an use within some instruction (either from kernel or from non-kernel). -bool hasUserInstruction(const GlobalValue *GV); - std::vector findVariablesToLower(Module &M, const Function *F = nullptr); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp @@ -62,29 +62,6 @@ } } -bool hasUserInstruction(const GlobalValue *GV) { - SmallPtrSet Visited; - SmallVector Stack(GV->users()); - - while (!Stack.empty()) { - const User *U = Stack.pop_back_val(); - - if (!Visited.insert(U).second) - continue; - - if (isa(U)) - return true; - - append_range(Stack, U->users()); - } - - return false; -} - -/// \returns true if an LDS global requires lowering to a module LDS structure -/// if \p F is not given. If \p F is given it must be a kernel and function -/// \returns true if an LDS global is directly used from that kernel and it -/// is safe to replace its uses with a kernel LDS structure member. static bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) { // We are not interested in kernel LDS lowering for module LDS itself. @@ -94,7 +71,6 @@ bool Ret = false; SmallPtrSet Visited; SmallVector Stack(GV.users()); - SmallPtrSet GlobalUsers; assert(!F || isKernelCC(F)); @@ -102,15 +78,10 @@ const User *V = Stack.pop_back_val(); Visited.insert(V); - if (auto *G = dyn_cast(V)) { - StringRef GName = G->getName(); - if (F && GName != "llvm.used" && GName != "llvm.compiler.used") { - // For kernel LDS lowering, if G is not a compiler.used list, then we - // cannot lower the lds GV since we cannot replace the use of GV within - // G. - return false; - } - GlobalUsers.insert(G); + if (isa(V)) { + // This use of the LDS variable is the initializer of a global variable. + // This is ill formed. The address of an LDS variable is kernel dependent + // and unknown until runtime. It can't be written to a global variable. continue; } @@ -132,15 +103,6 @@ append_range(Stack, V->users()); } - if (!F && !Ret) { - // For module LDS lowering, we have not yet decided if we should lower GV or - // not. Explore all global users of GV, and check if atleast one of these - // global users appear as an use within an instruction (possibly nested use - // via constant expression), if so, then conservately lower LDS. - for (auto *G : GlobalUsers) - Ret |= hasUserInstruction(G); - } - return Ret; } diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -;. -; Kernel LDS lowering. -;. -; @lds.1: is part of @llvm.used list, and also it is used within kernel, hence it is lowered. -; @lds.2: is part of @llvm.compiler.used list, and also it is used within kernel, hence it is lowered. -; @lds.3: is used as initializer to @gptr.3, hence @lds.3 is not lowered, though it is used within kernel. -; @lds.4: is used as initializer to @gptr.4, hence @lds.4 is not lowered, though it is used within kernel, -; irrespective of the uses of @gptr.4 itself ( @gptr.4 is part of llvm.compiler.used list ). -; @lds.5: is part of @llvm.used list, but is not used within kernel, hence it is not lowered. -; @lds.6: is part of @llvm.compiler.used list, but is not used within kernel, hence it is not lowered. -;. - -; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { i32, i16 } - -; CHECK-NOT: @lds.1 -; CHECK-NOT: @lds.2 -; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8 -; CHECK: @lds.4 = addrspace(3) global float undef, align 4 -; CHECK: @lds.5 = addrspace(3) global i16 undef, align 2 -; CHECK: @lds.6 = addrspace(3) global i32 undef, align 4 -@lds.1 = addrspace(3) global i16 undef, align 2 -@lds.2 = addrspace(3) global i32 undef, align 4 -@lds.3 = addrspace(3) global i64 undef, align 8 -@lds.4 = addrspace(3) global float undef, align 4 -@lds.5 = addrspace(3) global i16 undef, align 2 -@lds.6 = addrspace(3) global i32 undef, align 4 - -; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8 -@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8 - -; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 4 - -; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.compiler.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" - -; CHECK-LABEL: @k0() -; CHECK: %ld.lds.1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 4 -; CHECK: %ld.lds.2 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0), align 4 -; CHECK: %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3, align 4 -; CHECK: %ld.lds.4 = load float, float addrspace(3)* @lds.4, align 4 -; CHECK: ret void -define amdgpu_kernel void @k0() { - %ld.lds.1 = load i16, i16 addrspace(3)* @lds.1 - %ld.lds.2 = load i32, i32 addrspace(3)* @lds.2 - %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3 - %ld.lds.4 = load float, float addrspace(3)* @lds.4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll +++ /dev/null @@ -1,93 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -;. -; @lds.1: is aliased with @alias.to.lds.1, and @alias.to.lds.1 is used within kernel @k0. -; Hence, @lds.1 is lowered. -; @lds.2: is aliased with @alias.to.lds.2, and @alias.to.lds.2 is used within non-kernel @f0, -; Hence, @lds.2 is lowered. -; @lds.3: is used as initializer to global @gptr.3, and @gptr.3 is aliased with @alias.to.gptr.3, -; and @alias.to.gptr.3 is used within kernel @k1. Hence, @lds.3 is lowered. -; @lds.4: is used as initializer to global @gptr.4, and @gptr.4 is aliased with @alias.to.gptr.4, -; and @alias.to.gptr.4 is used within non-kernel @f1. Hence, @lds.4 is lowered. -; @lds.5: is aliased with @alias.to.lds.5, but neither @lds.5 nor @alias.to.lds.5 is used anywhere. -; Hence, @lds.5 is not lowered. -; @lds.6: is used as initializer to global @gptr.6, and @gptr.6 is aliased with @alias.to.gptr.6. -; But none of them are used anywhere. Hence, @lds.6 is not lowered. -;. - -; CHECK: %llvm.amdgcn.module.lds.t = type { [4 x i8], [3 x i8], [1 x i8], [2 x i8] } - -; CHECK-NOT: @lds.1 -; CHECK-NOT: @lds.2 -; CHECK-NOT: @lds.3 -; CHECK-NOT: @lds.4 -; CHECK: @lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8 -; CHECK: @lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8 -@lds.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1 -@lds.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2 -@lds.3 = internal unnamed_addr addrspace(3) global [3 x i8] undef, align 4 -@lds.4 = internal unnamed_addr addrspace(3) global [4 x i8] undef, align 4 -@lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8 -@lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8 - -; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([3 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([6 x i8] addrspace(3)* @lds.6 to i64 addrspace(3)*) to i64*), align 8 -@gptr.3 = addrspace(1) global i64* addrspacecast ([3 x i8] addrspace(3)* @lds.3 to i64*), align 8 -@gptr.4 = addrspace(1) global i64* addrspacecast ([4 x i8] addrspace(3)* @lds.4 to i64*), align 8 -@gptr.6 = addrspace(1) global i64* addrspacecast ([6 x i8] addrspace(3)* @lds.6 to i64*), align 8 - -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4 -; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata" - -; CHECK: @alias.to.lds.1 = alias [1 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) -; CHECK: @alias.to.lds.2 = alias [2 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3) -; CHECK: @alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3 -; CHECK: @alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4 -; CHECK: @alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5 -; CHECK: @alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6 -@alias.to.lds.1 = alias [1 x i8], [1 x i8] addrspace(3)* @lds.1 -@alias.to.lds.2 = alias [2 x i8], [2 x i8] addrspace(3)* @lds.2 -@alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3 -@alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4 -@alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5 -@alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6 - -; CHECK-LABEL: @f1 -; CHECK: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4, align 8 -; CHECK: ret void -define void @f1() { - %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4 - ret void -} - -; CHECK-LABEL: @f0 -; CHECK: %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)* -; CHECK: store i8 1, i8 addrspace(3)* %bc, align 2 -; CHECK: ret void -define void @f0() { - %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %bc, align 2 - ret void -} - -; CHECK-LABEL: @k1 -; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK-LABEL: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3, align 8 -; CHECK-LABEL: ret void -define amdgpu_kernel void @k1() { - %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3 - ret void -} - -; CHECK-LABEL: @k0 -; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK-LABEL: %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)* -; CHECK-LABEL: store i8 1, i8 addrspace(3)* %bc, align 1 -; CHECK-LABEL: ret void -define amdgpu_kernel void @k0() { - %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %bc, align 1 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -;. -; @lds.1: is part of @llvm.used list, and is no-where used. Hence it is not lowered. -; @lds.2: is part of @llvm.compiler.used list, and is no-where used. Hence it is not lowered. -; @lds.3: is used as initializer to @gptr.3, and is no-where used. @gptr.3 itself is also not -; used anywhere else, hence @lds.3 is not lowered. -; @lds.4: is used as initializer to @gptr.4, and is no-where used. @gptr.4 is part of -; @llvm.compiler.used list, but is no-where else used. hence @lds.4 is not lowered. -; -; @lds.5: is used as initializer to @gptr.5, and is no-where used. @gptr.5 is part of -; @llvm.compiler.used list, but is also used within kernel @k0. Hence @lds.5 is lowered. -; @lds.6: is used as initializer to @gptr.6, and is no-where used. @gptr.6 is part of -; @llvm.compiler.used list, but is also used within non-kernel function @f0. Hence @lds.6 is lowered. -; @lds.7: is used as initializer to @gptr.7, and is no-where used. @gptr.7 is used as initializer to @gptr.8, -; and @gptr.8 is used within non-kernel function @f1. Hence @lds.7 is lowered. -;. - -; CHECK: %llvm.amdgcn.module.lds.t = type { [3 x float], [1 x float], [2 x float] } - -; CHECK: @lds.1 = addrspace(3) global i16 undef, align 2 -; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4 -; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8 -; CHECK: @lds.4 = addrspace(3) global float undef, align 4 -; CHECK-NOT: @lds.5 -; CHECK-NOT: @lds.6 -; CHECK-NOT: @lds.7 -@lds.1 = addrspace(3) global i16 undef, align 2 -@lds.2 = addrspace(3) global i32 undef, align 4 -@lds.3 = addrspace(3) global i64 undef, align 8 -@lds.4 = addrspace(3) global float undef, align 4 -@lds.5 = addrspace(3) global [1 x float] undef, align 4 -@lds.6 = addrspace(3) global [2 x float] undef, align 8 -@lds.7 = addrspace(3) global [3 x float] undef, align 16 - -; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.5 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([1 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([2 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.7 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8 -@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8 -@gptr.5 = addrspace(1) global i64* addrspacecast ([1 x float] addrspace(3)* @lds.5 to i64*), align 8 -@gptr.6 = addrspace(1) global i64* addrspacecast ([2 x float] addrspace(3)* @lds.6 to i64*), align 8 -@gptr.7 = addrspace(1) global i64* addrspacecast ([3 x float] addrspace(3)* @lds.7 to i64*), align 8 -@gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8 - -; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16 -; CHECK: @llvm.compiler.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.compiler.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata" - -; CHECK-LABEL: @f1() -; CHECK: %ld = load i64**, i64** addrspace(1)* @gptr.8, align 8 -; CHECK: ret void -define void @f1() { - %ld = load i64**, i64** addrspace(1)* @gptr.8 - ret void -} - -; CHECK-LABEL: @f0() -; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 -; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 -; CHECK: ret void -define void @f0() { - %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 - ret void -} - -; CHECK-LABEL: @k0() -; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 -; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 -; CHECK: ret void -define amdgpu_kernel void @k0() { - %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 - ret void -} - -; CHECK-LABEL: @k1() -; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK: ret void -define amdgpu_kernel void @k1() { - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -; CHECK: %llvm.amdgcn.module.lds.t = type { double, float } - -; CHECK: @function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to float*), align 8 - -; CHECK: @kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to double*), align 8 - -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8 - -@function_target = addrspace(3) global float undef, align 4 -@function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* @function_target to float*), align 8 - -@kernel_target = addrspace(3) global double undef, align 8 -@kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* @kernel_target to double*), align 8 - -; CHECK-LABEL: @function(float %x) -; CHECK: %0 = load float*, float* addrspace(1)* @function_indirect, align 8 -define void @function(float %x) local_unnamed_addr #5 { -entry: - %0 = load float*, float* addrspace(1)* @function_indirect, align 8 - store float %x, float* %0, align 4 - ret void -} - -; CHECK-LABEL: @kernel(double %x) -; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK: %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8 -define amdgpu_kernel void @kernel(double %x) local_unnamed_addr #5 { -entry: - %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8 - store double %x, double* %0, align 8 - ret void -} - - - -