diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -110,6 +110,25 @@ namespace { +static bool hasStaticAllocaAfterCall(Function *K) { + auto &EBB = K->getEntryBlock(); + auto *TI = EBB.getTerminator(); + BasicBlock::iterator Iter(&(*(EBB.getFirstInsertionPt()))); + + while (!isa(*Iter) && (&*Iter != TI)) + ++Iter; + if (&*Iter == TI) + return false; + + while (&*Iter != TI) { + if (isa(*Iter) && cast(&*Iter)->isStaticAlloca()) + return true; + ++Iter; + } + + return false; +} + class ReplaceLDSUseImpl { Module &M; LLVMContext &Ctx; @@ -182,6 +201,27 @@ return LDSPointer; } + // We need to split the entry block of kernel K just after all the static + // allocas in the entry block. Backward traverse the entry block till a static + // alloca is encountered, and return immediate previous instruction (in + // backward direction) as the split point. + Instruction *findEntryBlockSplitPoint(Function *K) { + auto &EBB = K->getEntryBlock(); + auto *EI = &(*(EBB.getFirstInsertionPt())); + BasicBlock::reverse_iterator RIT(EBB.getTerminator()); + + while (&*RIT != EI) { + if (isa(*RIT) && cast(&*RIT)->isStaticAlloca()) + break; + ++RIT; + } + + if (&*RIT != EI) + --RIT; + + return &*RIT; + } + // Split entry basic block in such a way that only lane 0 of each wave does // the LDS pointer initialization, and return newly created basic block. BasicBlock *activateLaneZero(Function *K) { @@ -191,9 +231,8 @@ if (!BasicBlockEntry.second) return BasicBlockEntry.first->second; - // Split entry basic block of kernel K. - auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt())); - IRBuilder<> Builder(EI); + // Split entry basic block of kernel K just after all static allocas. + IRBuilder<> Builder(findEntryBlockSplitPoint(K)); Value *Mbcnt = Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, @@ -402,6 +441,21 @@ return false; } + // FIXME: At the moment, it is not *very clear* if LLVM IR with static alloca + // after call is legal or not. + // + // In this pass, since we need to split the entry block before any call, any + // static alloca after the call will be converted into dynamic alloca which we + // want to avoid. + // + // So, we skip running this pass, if there a kernel which has static alloca + // after call. Revisit this code later. + for (auto KI = KernelToCallees.begin(), KE = KernelToCallees.end(); KI != KE; + ++KI) { + if (hasStaticAllocaAfterCall(KI->first)) + return false; + } + // For every LDS from collected LDS globals set, replace its non-kernel // function scope use by pointer. bool Changed = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -293,7 +293,7 @@ static cl::opt EnableLDSReplaceWithPointer( "amdgpu-enable-lds-replace-with-pointer", - cl::desc("Enable LDS replace with pointer pass"), cl::init(false), + cl::desc("Enable LDS replace with pointer pass"), cl::init(true), cl::Hidden); static cl::opt EnableLowerModuleLDS( diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -42,6 +42,7 @@ ; GCN-O0-NEXT: Inliner for always_inline functions ; GCN-O0-NEXT: A No-Op Barrier Pass ; GCN-O0-NEXT: Lower OpenCL enqueued blocks +; GCN-O0-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Expand Atomic instructions @@ -178,6 +179,7 @@ ; GCN-O1-NEXT: Inliner for always_inline functions ; GCN-O1-NEXT: A No-Op Barrier Pass ; GCN-O1-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces @@ -430,6 +432,7 @@ ; GCN-O1-OPTS-NEXT: Inliner for always_inline functions ; GCN-O1-OPTS-NEXT: A No-Op Barrier Pass ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-OPTS-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Infer address spaces @@ -715,6 +718,7 @@ ; GCN-O2-NEXT: Inliner for always_inline functions ; GCN-O2-NEXT: A No-Op Barrier Pass ; GCN-O2-NEXT: Lower OpenCL enqueued blocks +; GCN-O2-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Infer address spaces @@ -1002,6 +1006,7 @@ ; GCN-O3-NEXT: Inliner for always_inline functions ; GCN-O3-NEXT: A No-Op Barrier Pass ; GCN-O3-NEXT: Lower OpenCL enqueued blocks +; GCN-O3-NEXT: Replace within non-kernel function use of LDS with pointer ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Infer address spaces diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-diamond-shape.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-selected_functions.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-to-declare-only-func.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-to-declare-only-func.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-to-declare-only-func.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-call-to-declare-only-func.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-global-scope-use.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-inline-asm-call.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-kernel-only-used-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION ; ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-not-reachable-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION ; ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-ignore-small-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION ; ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-diamond-shape.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-selected_functions.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-indirect-call-signature-match.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-static-alloca.ll copy from llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll copy to llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-static-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-static-alloca.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; @@ -7,6 +7,9 @@ ; reachable from kernel. Hence nested constant expression should to be converted into a ; series of instructons and pointer replacement should take place. ; +; Further the entry basic block of the kernel @k0 contains alloca instruction. Hence the +; entry basic splitting for pointer initialization should happen after alloca. +; ; Original LDS should exist. ; CHECK: @used_only_within_func = addrspace(3) global [4 x i32] undef, align 4 @@ -36,19 +39,31 @@ ; Pointer initialization code shoud be added define amdgpu_kernel void @k0() { ; CHECK-LABEL: entry: -; CHECK: %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) -; CHECK: %1 = icmp eq i32 %0, 0 -; CHECK: br i1 %1, label %2, label %3 -; -; CHECK-LABEL: 2: +; CHECK: %0 = alloca i64, align 8, addrspace(5) +; CHECK: %1 = addrspacecast i64 addrspace(5)* %0 to i64* +; CHECK: %2 = alloca i64, align 8, addrspace(5) +; CHECK: %3 = addrspacecast i64 addrspace(5)* %2 to i64* +; CHECK: %4 = alloca i64, align 8, addrspace(5) +; CHECK: %5 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; CHECK: %6 = icmp eq i32 %5, 0 +; CHECK: br i1 %6, label %7, label %8 + +; CHECK-LABEL: 7: ; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @used_only_within_func to i16), i16 addrspace(3)* @used_only_within_func.ptr, align 2 -; CHECK: br label %3 -; -; CHECK-LABEL: 3: +; CHECK: br label %8 + +; CHECK-LABEL: 8: ; CHECK: call void @llvm.amdgcn.wave.barrier() +; CHECK: %9 = addrspacecast i64 addrspace(5)* %4 to i64* ; CHECK: call void @f0(i32 0) ; CHECK: ret void entry: + %0 = alloca i64, align 8, addrspace(5) + %1 = addrspacecast i64 addrspace(5)* %0 to i64* + %2 = alloca i64, align 8, addrspace(5) + %3 = addrspacecast i64 addrspace(5)* %2 to i64* + %4 = alloca i64, align 8, addrspace(5) + %5 = addrspacecast i64 addrspace(5)* %4 to i64* call void @f0(i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-multiple-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-same-lds.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr1.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-const-expr2.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; There is one lds global defined here, and this lds is used within a single non-kernel diff --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll --- a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll +++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s ; DESCRIPTION: ; diff --git a/llvm/test/CodeGen/replace-lds-by-ptr-ignore-static-alloca-after-call.ll b/llvm/test/CodeGen/replace-lds-by-ptr-ignore-static-alloca-after-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/replace-lds-by-ptr-ignore-static-alloca-after-call.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer < %s | FileCheck %s + +; DESCRIPTION: +; +; There is one lds global defined here, and this lds is used within a single non-kernel +; function, as an operand of nested constant expression, and this non-kernel function is +; reachable from kernel. But, there is static alloca after call. Hence, pointer replacement +; is not triggered. +; +; Original LDS should exist. +; CHECK: @used_only_within_func = addrspace(3) global [4 x i32] undef, align 4 +@used_only_within_func = addrspace(3) global [4 x i32] undef, align 4 + +; Pointer should not be created. +; CHECK-NOT: @used_only_within_func.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 + +define void @f0(i32 %x) { +; CHECK-LABEL: entry: +; CHECK: store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @used_only_within_func, i32 0, i32 0) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @used_only_within_func, i32 0, i32 0) to i32*) to i64)) to i32*), align 4 +; CHECK: ret void +entry: + store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast ([4 x i32] addrspace(3)* @used_only_within_func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast ([4 x i32] addrspace(3)* @used_only_within_func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4 + ret void +} + +define amdgpu_kernel void @k0() { +; CHECK-LABEL: entry: +; CHECK: call void @f0(i32 0) +; CHECK: %a = alloca i64, align 8, addrspace(5) +; CHECK: ret void +entry: + call void @f0(i32 0) + %a = alloca i64, align 8, addrspace(5) + ret void +}