Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -162,12 +162,16 @@ bool SufficientLDS = hasSufficientLocalMem(F); bool Changed = false; BasicBlock &EntryBB = *F.begin(); - for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) { - AllocaInst *AI = dyn_cast(I); - ++I; - if (AI) - Changed |= handleAlloca(*AI, SufficientLDS); + SmallVector Allocas; + for (Instruction &I : EntryBB) { + if (AllocaInst *AI = dyn_cast(&I)) + Allocas.push_back(AI); + } + + for (AllocaInst *AI : Allocas) { + if (handleAlloca(*AI, SufficientLDS)) + Changed = true; } return Changed; Index: test/CodeGen/AMDGPU/promote-alloca-lifetime.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-lifetime.ll +++ test/CodeGen/AMDGPU/promote-alloca-lifetime.ll @@ -1,7 +1,9 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca %s | FileCheck -check-prefix=OPT %s -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + +declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #0 +declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #0 ; OPT-LABEL: @use_lifetime_promotable_lds( ; OPT-NOT: alloca i32 @@ -9,13 +11,25 @@ ; OPT: store i32 %tmp3, i32 addrspace(3)* define amdgpu_kernel void @use_lifetime_promotable_lds(i32 addrspace(1)* %arg) #2 { bb: - %tmp = alloca i32, align 4 - %tmp1 = bitcast i32* %tmp to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp1) + %tmp = alloca i32, align 4, addrspace(5) + %tmp1 = bitcast i32 addrspace(5)* %tmp to i8 addrspace(5)* + call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %tmp1) %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 %tmp3 = load i32, i32 addrspace(1)* %tmp2 - store i32 %tmp3, i32* %tmp - call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp1) + store i32 %tmp3, i32 addrspace(5)* %tmp + call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* %tmp1) + ret void +} + +; After handling the alloca, the lifetime was erased. This was the +; next iterator to be checked as an alloca, crashing. + +; OPT-LABEL: @iterator_erased_lifetime( +; OPT-NOT: alloca +define amdgpu_kernel void @iterator_erased_lifetime() { +entry: + %alloca = alloca i8, align 1, addrspace(5) + call void @llvm.lifetime.start.p5i8(i64 1, i8 addrspace(5)* %alloca) ret void }