Index: lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -100,8 +100,13 @@ = MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx); const MCExpr *SrcBBSym = MCSymbolRefExpr::create(SrcBB.getSymbol(), Ctx); - assert(SrcBB.front().getOpcode() == AMDGPU::S_GETPC_B64 && - ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4); + // FIXME: The first half of this assert should be removed. This should + // probably be PC relative instead of using the source block symbol, and + // therefore the indirect branch expansion should use a bundle. + assert( + skipDebugInstructionsForward(SrcBB.begin(), SrcBB.end())->getOpcode() == + AMDGPU::S_GETPC_B64 && + ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4); // s_getpc_b64 returns the address of next instruction. const MCConstantExpr *One = MCConstantExpr::create(4, Ctx); Index: test/CodeGen/AMDGPU/branch-relaxation-debug-info.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/branch-relaxation-debug-info.ll @@ -0,0 +1,199 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-s-branch-bits=4 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +; Make sure there is no assertion due to dbg_value instructions +; present in the block used for the branch expansion. +define amdgpu_kernel void @long_branch_dbg_value(float addrspace(1)* nocapture %arg, float %arg1) #1 !dbg !5 { +; GCN-LABEL: long_branch_dbg_value: +; GCN: .Lfunc_begin0: +; GCN-NEXT: .file 1 "/tmp/test_debug_value.cl" +; GCN-NEXT: .loc 1 2 0 ; /tmp/test_debug_value.cl:2:0 +; GCN-NEXT: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 +; GCN-NEXT: s_load_dword s6, s[4:5], 0x8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e64 v0, s6, s6 +; GCN-NEXT: .Ltmp0: +; GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value] $sgpr8_sgpr9 +; GCN-NEXT: v_mov_b32_e32 v1, s8 +; GCN-NEXT: v_mov_b32_e32 v2, s9 +; GCN-NEXT: .Ltmp1: +; GCN-NEXT: .loc 1 1 42 prologue_end ; /tmp/test_debug_value.cl:1:42 +; GCN-NEXT: global_store_dword v[1:2], v0, off offset:12 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GCN-NEXT: v_mov_b32_e32 v4, 0 +; GCN-NEXT: s_mov_b64 s[4:5], 0 +; GCN-NEXT: s_and_b64 vcc, exec, s[4:5] +; GCN-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s7 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s7 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s7 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s7 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s7 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s7 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: s_cbranch_vccnz BB0_4 +; GCN-NEXT: s_branch BB0_1 +; GCN-NEXT: .Ltmp2: +; GCN-NEXT: BB0_4: ; %bb +; GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value] $sgpr8_sgpr9 +; GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42 +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, BB0_3-(BB0_4+4) +; GCN-NEXT: s_addc_u32 s5, s5, 0 +; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: .Ltmp3: +; GCN-NEXT: BB0_1: ; %bb7 +; GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value] $sgpr8_sgpr9 +; GCN-NEXT: s_mov_b32 s4, 0x7f800000 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s7 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_neq_f32_e64 s[8:9], s4, v0 +; GCN-NEXT: .Ltmp4: +; GCN-NEXT: s_and_b64 vcc, exec, s[8:9] +; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s7 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s7 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s7 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s7 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: s_cbranch_vccz BB0_2 +; GCN-NEXT: BB0_5: ; %bb7 +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, BB0_3-(BB0_5+4) +; GCN-NEXT: s_addc_u32 s5, s5, 0 +; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: BB0_2: ; %bb9 +; GCN-NEXT: s_mov_b32 s4, 0x1e800000 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v1, s4, v0 +; GCN-NEXT: s_mov_b32 s4, 0x7fc00000 +; GCN-NEXT: ; implicit-def: $vgpr2 +; GCN-NEXT: ; implicit-def: $vgpr3 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s4 +; GCN-NEXT: v_mov_b32_e32 v5, v3 +; GCN-NEXT: v_mov_b32_e32 v6, v4 +; GCN-NEXT: v_mov_b32_e32 v7, v1 +; GCN-NEXT: v_mov_b32_e32 v8, v2 +; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s7 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s7 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s7 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s7 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s7 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: BB0_3: ; %bb12 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s7 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s7 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s7 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s7 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s7 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b32 s4, 0x71800000 +; GCN-NEXT: v_mul_f32_e32 v0, s4, v0 +; GCN-NEXT: s_mov_b32 s4, 0x58800000 +; GCN-NEXT: v_mul_f32_e32 v0, s4, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v5, v1 +; GCN-NEXT: v_mul_f32_e32 v0, v5, v0 +; GCN-NEXT: s_mov_b64 s[8:9], 0 +; GCN-NEXT: v_mov_b32_e32 v6, s8 +; GCN-NEXT: v_mov_b32_e32 v7, s9 +; GCN-NEXT: global_store_dword v[6:7], v0, off +; GCN-NEXT: s_endpgm +; GCN-NEXT: .section .rodata,#alloc +; GCN-NEXT: .p2align 6 +; GCN-NEXT: .amdhsa_kernel long_branch_dbg_value +; GCN-NEXT: .amdhsa_group_segment_fixed_size 0 +; GCN-NEXT: .amdhsa_private_segment_fixed_size 32 +; GCN-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 +; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; GCN-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; GCN-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; GCN-NEXT: .amdhsa_next_free_vgpr 9 +; GCN-NEXT: .amdhsa_next_free_sgpr 10 +; GCN-NEXT: .amdhsa_reserve_flat_scratch 0 +; GCN-NEXT: .amdhsa_float_round_mode_32 0 +; GCN-NEXT: .amdhsa_float_round_mode_16_64 0 +; GCN-NEXT: .amdhsa_float_denorm_mode_32 0 +; GCN-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; GCN-NEXT: .amdhsa_dx10_clamp 1 +; GCN-NEXT: .amdhsa_ieee_mode 1 +; GCN-NEXT: .amdhsa_fp16_overflow 0 +; GCN-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; GCN-NEXT: .amdhsa_exception_fp_denorm_src 0 +; GCN-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; GCN-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; GCN-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; GCN-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; GCN-NEXT: .amdhsa_exception_int_div_zero 0 +; GCN-NEXT: .end_amdhsa_kernel +; GCN-NEXT: .text +bb: + %tmp = fmul float %arg1, %arg1 + %tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 3 + call void @llvm.dbg.value(metadata float addrspace(1)* %tmp2, metadata !11, metadata !DIExpression()) #2, !dbg !12 + store float %tmp, float addrspace(1)* %tmp2, align 4, !dbg !12 + %tmp3 = tail call float @llvm.fmuladd.f32(float undef, float undef, float undef) #2 + %tmp5 = fadd float %tmp3, 1.000000e+00 + %tmp6 = fcmp olt float %tmp5, 0x3810000000000000 + br i1 %tmp6, label %bb12, label %bb7 + +bb7: ; preds = %bb + %tmp8 = fcmp oeq float %tmp5, 0x7FF0000000000000 + br i1 %tmp8, label %bb9, label %bb12 + +bb9: ; preds = %bb7 + %tmp4 = insertelement <3 x float> undef, float %tmp3, i32 2 + %tmp10 = fmul <3 x float> %tmp4, + %tmp11 = shufflevector <3 x float> %tmp10, <3 x float> undef, <4 x i32> + br label %bb12 + +bb12: ; preds = %bb9, %bb7, %bb + %tmp13 = phi <4 x float> [ %tmp11, %bb9 ], [ undef, %bb7 ], [ undef, %bb ] + %tmp14 = phi float [ undef, %bb9 ], [ %tmp5, %bb7 ], [ 0.000000e+00, %bb ] + %tmp16 = fmul float %tmp14, 0x4630000000000000 + %tmp17 = fmul float %tmp16, 0x4310000000000000 + %0 = extractelement <4 x float> %tmp13, i32 0 + %tmp20 = fmul float %0, %tmp17 + store float %tmp20, float addrspace(1)* null, align 536870912 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) #0 + +attributes #0 = { nounwind readnone speculatable } +attributes #1 = { nounwind writeonly } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !6, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !10) +!6 = !DISubroutineType(types: !7) +!7 = !{null, !8} +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 32) +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !{!11} +!11 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !5, file: !1, line: 1, type: !8) +!12 = !DILocation(line: 1, column: 42, scope: !5)