diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -522,9 +522,11 @@ auto *UseBlock = UseMI->getParent(); // Replace uses in Endif block if (UseBlock == Endif) { - if (UseMI->isPHI()) { + if (UseMI->isPHI()) O.setReg(NewReg); - } else { + else if (UseMI->isDebugInstr()) + continue; + else { // DetectDeadLanes may mark register uses as undef without removing // them, in which case a non-phi instruction using the original register // may exist in the Endif block even though the register is not live diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s + +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined(i1 %arg) { +; GCN-LABEL: __omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined: +; GCN: .Lfunc_begin0: +; GCN-NEXT: .cfi_sections .debug_frame +; GCN-NEXT: .cfi_startproc +; GCN-NEXT: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: global_load_dwordx2 v[1:2], v[1:2], off +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; GCN-NEXT: s_cbranch_execnz .LBB0_3 +; GCN-NEXT: ; %bb.1: ; %Flow +; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GCN-NEXT: s_cbranch_execnz .LBB0_4 +; GCN-NEXT: .LBB0_2: ; %bb3 +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: .LBB0_3: ; %bb2 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_mov_b32_e32 v4, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: flat_store_dwordx2 v[1:2], v[3:4] +; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GCN-NEXT: s_cbranch_execz .LBB0_2 +; GCN-NEXT: .LBB0_4: ; %bb1 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_mov_b32_e32 v4, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: flat_store_dwordx2 v[1:2], v[3:4] +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +bb: + %i = load ptr, ptr addrspace(1) null, align 8 + br i1 %arg, label %bb1, label %bb2 + +bb1: ; preds = %bb + store double 0.000000e+00, ptr %i, align 8 + br label %bb3 + +bb2: ; preds = %bb + store double 0.000000e+00, ptr %i, align 8 + br label %bb3 + +bb3: ; preds = %bb2, %bb1 + call void @llvm.dbg.value(metadata !DIArgList(ptr %i, i64 0), metadata !4, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 2712, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 2680, DW_OP_stack_value)), !dbg !9 + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) +!1 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b") +!2 = !{} +!3 = !{i32 1, !"Debug Info Version", i32 3} +!4 = !DILocalVariable(name: "c", scope: !5, file: !1, line: 2, type: !8) +!5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!6 = !DISubroutineType(types: !7) +!7 = !{!8} +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !DILocation(line: 0, scope: !5) diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir @@ -0,0 +1,111 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-opt-vgpr-liverange %s -o - | FileCheck -check-prefix=GCN %s + +# SIOptimizeVGPRLiveRange shouldn't try to modify use of %5 in DBG_VALUE_LIST + +--- | + define void @dbg_instr_use(i1 %arg) #1 { + ret void + } + + attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" } + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) + !1 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b") + !2 = !{} + !3 = !{i32 1, !"Debug Info Version", i32 3} + !4 = !DILocalVariable(name: "c", scope: !5, file: !1, line: 2, type: !8) + !5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) + !6 = !DISubroutineType(types: !7) + !7 = !{!8} + !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !9 = !DILocation(line: 0, scope: !5) +... + +--- +name: dbg_instr_use +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: dbg_instr_use + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, killed [[COPY]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, killed [[V_AND_B32_e32_]], implicit $exec + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 killed [[V_CMP_EQ_U32_e64_]], -1, implicit-def dead $scc + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[V_MOV_B]], 0, 0, implicit $exec + ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[S_XOR_B64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.0, undef %13:vreg_64, %bb.3 + ; GCN-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.4(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 + ; GCN-NEXT: FLAT_STORE_DWORDX2 killed [[PHI]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: S_BRANCH %bb.4 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_1]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GCN-NEXT: FLAT_STORE_DWORDX2 killed [[GLOBAL_LOAD_DWORDX2_]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: DBG_VALUE_LIST + ; GCN-NEXT-SAME: %9 + ; GCN-NEXT: SI_RETURN + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_AND_B32_e32 1, %0, implicit $exec + %2:sreg_64 = V_CMP_EQ_U32_e64 1, killed %1, implicit $exec + %3:sreg_64 = S_XOR_B64 killed %2, -1, implicit-def dead $scc + %4:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + %5:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %4, 0, 0, implicit $exec + %6:sreg_64 = SI_IF killed %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.1: + successors: %bb.2(0x40000000), %bb.4(0x40000000) + + %7:sreg_64 = SI_ELSE %6, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.4(0x80000000) + + %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %8, %subreg.sub1 + FLAT_STORE_DWORDX2 %5, killed %9, 0, 0, implicit $exec, implicit $flat_scr + S_BRANCH %bb.4 + + bb.3: + successors: %bb.1(0x80000000) + + %10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %11:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %10, %subreg.sub1 + FLAT_STORE_DWORDX2 %5, killed %11, 0, 0, implicit $exec, implicit $flat_scr + S_BRANCH %bb.1 + + bb.4: + SI_END_CF %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + DBG_VALUE_LIST !4, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 2712, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 2680, DW_OP_stack_value), %5, 0, debug-location !9 + SI_RETURN +...