diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -882,6 +882,7 @@ addPass(createSILowerI1CopiesPass()); addPass(createSIFixupVectorISelPass()); addPass(createSIAddIMGInitPass()); + addPass(&UnreachableMachineBlockElimID); return false; } diff --git a/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=amdgcn -O0 -o - %s | FileCheck %s + +; CHECK-LABEL: non_uniform_loop +define amdgpu_kernel void @non_uniform_loop(float addrspace(1)* %array) { +entry: + %w = tail call i32 @llvm.amdgcn.workitem.id.x() + br label %for.cond + +for.cond: + %i = phi i32 [0, %entry], [%i.next, %for.inc] + %cmp = icmp ult i32 %i, %w + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %i.next = add i32 %i, 1 + br label %for.cond + +for.end: + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x()