diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -707,14 +707,13 @@ MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - bool IsProlog) { + const DebugLoc &DL, bool IsProlog) { Register ScratchExecCopy; MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - DebugLoc DL; initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog); @@ -762,6 +761,8 @@ LivePhysRegs LiveRegs; MachineBasicBlock::iterator MBBI = MBB.begin(); + // DebugLoc must be unknown since the first instruction with DebugLoc is used + // to determine the end of the prologue. DebugLoc DL; bool HasFP = false; @@ -782,7 +783,7 @@ continue; if (!ScratchExecCopy) - ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ true); buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR, @@ -792,7 +793,7 @@ for (auto ReservedWWM : FuncInfo->wwmAllocation()) { if (!ScratchExecCopy) ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); + buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ true); buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); @@ -966,9 +967,18 @@ const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); - MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); LivePhysRegs LiveRegs; + // Get the insert location for the epilogue. If there were no terminators in + // the block, get the last instruction. + MachineBasicBlock::iterator MBBI = MBB.end(); DebugLoc DL; + if (!MBB.empty()) { + MBBI = MBB.getLastNonDebugInstr(); + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + MBBI = MBB.getFirstTerminator(); + } const MachineFrameInfo &MFI = MF.getFrameInfo(); uint32_t NumBytes = MFI.getStackSize(); @@ -1051,7 +1061,7 @@ if (!ScratchExecCopy) ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); + buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false); buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR, *Reg.FI); @@ -1060,7 +1070,7 @@ for (auto ReservedWWM : FuncInfo->wwmAllocation()) { if (!ScratchExecCopy) ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); + buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false); buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); diff --git a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-dwarfdump --debug-line - | FileCheck --check-prefix=DWARFLINE %s + +; Test that the prologue end line directive is emitted after all the prologue instructions +; and also before the beginning of the epilogue instructions in a trivial function. + +; Function Attrs: convergent noinline nounwind optnone mustprogress +define hidden void @_Z9base_casev() #0 !dbg !6 { +; CHECK-LABEL: _Z9base_casev: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .file 0 "dir" "file.cpp" +; CHECK-NEXT: .loc 0 5 0 ; file.cpp:5:0 +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .loc 0 7 3 prologue_end ; file.cpp:7:3 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: .Ltmp1: + +; DWARFLINE: file format elf64-amdgpu +; DWARFLINE: .debug_line contents +; DWARFLINE: Address Line Column File ISA Discriminator Flags +; DWARFLINE: 0x0000000000000000 5 0 0 0 0 is_stmt +; DWARFLINE-NEXT: 0x0000000000000004 7 3 0 0 0 is_stmt prologue_end +; DWARFLINE-NEXT: 0x0000000000000008 7 3 0 0 0 is_stmt end_sequence + +entry: + ret void, !dbg !7 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: !1, isOptimized: false, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "file.cpp", directory: "dir") +!2 = !DISubroutineType(types: !3) +!3 = !{null} +!4 = !{i32 7, !"Dwarf Version", i32 5} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "base_case", linkageName: "_Z9base_casev", scope: !1, file: !1, line: 5, type: !2, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!7 = !DILocation(line: 7, column: 3, scope: !6)