Index: lib/Analysis/CaptureTracking.cpp =================================================================== --- lib/Analysis/CaptureTracking.cpp +++ lib/Analysis/CaptureTracking.cpp @@ -240,7 +240,8 @@ // Not captured if the callee is readonly, doesn't return a copy through // its return value and doesn't unwind (a readonly function can leak bits // by throwing an exception or not depending on the input value). - if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) + if (CS.onlyReadsMemory() && CS.doesNotThrow() && + (I->getType()->isVoidTy() || I->use_empty())) break; // Volatile operations effectively capture the memory location that they Index: test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll +++ test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll @@ -1,7 +1,8 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +target datalayout = "A5" -declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) #0 -declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) #0 +declare {}* @llvm.invariant.start.p5i8(i64, i8 addrspace(5)* nocapture) #0 +declare void @llvm.invariant.end.p5i8({}*, i64, i8 addrspace(5)* nocapture) #0 declare i8* @llvm.invariant.group.barrier(i8*) #1 ; GCN-LABEL: {{^}}use_invariant_promotable_lds: @@ -9,14 +10,15 @@ ; GCN: ds_write_b32 define amdgpu_kernel void @use_invariant_promotable_lds(i32 addrspace(1)* %arg) #2 { bb: - %tmp = alloca i32, align 4 - %tmp1 = bitcast i32* %tmp to i8* + %tmp = alloca i32, align 4, addrspace(5) + %tmp1 = bitcast i32 addrspace(5)* %tmp to i8 addrspace(5)* %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 %tmp3 = load i32, i32 addrspace(1)* %tmp2 - store i32 %tmp3, i32* %tmp - %tmp4 = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %tmp1) #0 - call void @llvm.invariant.end.p0i8({}* %tmp4, i64 4, i8* %tmp1) #0 - %tmp5 = call i8* @llvm.invariant.group.barrier(i8* %tmp1) #1 + store i32 %tmp3, i32 addrspace(5)* %tmp + %tmp4 = call {}* @llvm.invariant.start.p5i8(i64 4, i8 addrspace(5)* %tmp1) #0 + call void @llvm.invariant.end.p5i8({}* %tmp4, i64 4, i8 addrspace(5)* %tmp1) #0 + %tmp5 = addrspacecast i8 addrspace(5)* %tmp1 to i8* + %tmp6 = call i8* @llvm.invariant.group.barrier(i8* %tmp5) #1 ret void }