diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -125,8 +125,8 @@ const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, Register SpillReg, - int FI) { + MachineBasicBlock::iterator I, const DebugLoc &DL, + Register SpillReg, int FI) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; @@ -136,7 +136,7 @@ PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); LiveRegs.addReg(SpillReg); - TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true, + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); LiveRegs.removeReg(SpillReg); @@ -147,8 +147,8 @@ const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, Register SpillReg, - int FI) { + MachineBasicBlock::iterator I, + const DebugLoc &DL, Register SpillReg, int FI) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; @@ -157,7 +157,7 @@ MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); - TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false, + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); } @@ -776,7 +776,7 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR, *Reg.FI); } @@ -791,7 +791,8 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, + *FI); } if (ScratchExecCopy) { @@ -817,7 +818,7 @@ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(FramePtrReg); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, FramePtrFI); } @@ -835,7 +836,7 @@ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(BasePtrReg); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, BasePtrFI); } @@ -1031,8 +1032,8 @@ MRI, LiveRegs, AMDGPU::VGPR_32RegClass); if (!TmpVGPR) report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, - FramePtrFI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + TmpVGPR, FramePtrFI); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) .addReg(TmpVGPR, RegState::Kill); } else { @@ -1057,8 +1058,8 @@ MRI, LiveRegs, AMDGPU::VGPR_32RegClass); if (!TmpVGPR) report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, - BasePtrFI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + TmpVGPR, BasePtrFI); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) .addReg(TmpVGPR, RegState::Kill); } else { @@ -1083,8 +1084,8 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, - *Reg.FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + Reg.VGPR, *Reg.FI); } for (const auto &Reg : FuncInfo->WWMReservedRegs) { @@ -1097,7 +1098,8 @@ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, + *FI); } if (ScratchExecCopy) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -371,10 +371,11 @@ // For creating spill instructions during frame lowering, where no scavenger // is available, LiveRegs can be used. void buildSpillLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, unsigned LoadStoreOp, - int Index, Register ValueReg, bool ValueIsKill, - MCRegister ScratchOffsetReg, int64_t InstrOffset, - MachineMemOperand *MMO, RegScavenger *RS, + MachineBasicBlock::iterator MI, const DebugLoc &DL, + unsigned LoadStoreOp, int Index, Register ValueReg, + bool ValueIsKill, MCRegister ScratchOffsetReg, + int64_t InstrOffset, MachineMemOperand *MMO, + RegScavenger *RS, LivePhysRegs *LiveRegs = nullptr) const; }; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1073,7 +1073,7 @@ } void SIRegisterInfo::buildSpillLoadStore( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS, LivePhysRegs *LiveRegs) const { @@ -1085,7 +1085,6 @@ const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const MCInstrDesc *Desc = &TII->get(LoadStoreOp); - const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); bool IsStore = Desc->mayStore(); bool IsFlat = TII->isFLATScratch(LoadStoreOp); @@ -1349,12 +1348,12 @@ if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, - Offset * SB.EltSize, MMO, SB.RS); + buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false, + FrameReg, Offset * SB.EltSize, MMO, SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, + buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, Offset * SB.EltSize, MMO, SB.RS); // This only ever adds one VGPR spill SB.MFI.addToSpilledVGPRs(1); @@ -1747,7 +1746,7 @@ : AMDGPU::BUFFER_STORE_DWORD_OFFSET; auto *MBB = MI->getParent(); buildSpillLoadStore( - *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, + *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); @@ -1783,7 +1782,7 @@ : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; auto *MBB = MI->getParent(); buildSpillLoadStore( - *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, + *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS); MI->eraseFromParent(); diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs < %s | FileCheck %s + +; Test that source locations (.loc directives) are not added to the code within the prologue. + +; Function Attrs: convergent mustprogress nounwind +define hidden void @_ZL3barv() #0 !dbg !1644 { +; CHECK-LABEL: _ZL3barv: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .file 0 "/tmp" "lane-info.cpp" md5 0x4ab9b75a30baffdf0f6f536a80e3e382 +; CHECK-NEXT: .loc 0 30 0 ; lane-info.cpp:30:0 +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: v_writelane_b32 v40, s33, 2 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_add_i32 s32, s32, 0x400 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, _ZL13sleep_foreverv@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, _ZL13sleep_foreverv@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] +; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 +; CHECK-NEXT: v_readlane_b32 s33, v40, 2 +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: .Ltmp2: +entry: + call void @_ZL13sleep_foreverv(), !dbg !1646 + ret void, !dbg !1647 +} + +; Function Attrs: convergent nounwind +declare void @_ZL13sleep_foreverv() #0 + +attributes #0 = { nounwind "frame-pointer"="all" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!1638, !1639, !1640, !1641} +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: !1, producer: "clang version 13.0.0)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "lane-info.cpp", directory: "/tmp", checksumkind: CSK_MD5, checksum: "4ab9b75a30baffdf0f6f536a80e3e382") +!371 = !DISubroutineType(types: !372) +!372 = !{null} +!1638 = !{i32 7, !"Dwarf Version", i32 5} +!1639 = !{i32 2, !"Debug Info Version", i32 3} +!1640 = !{i32 1, !"wchar_size", i32 4} +!1641 = !{i32 7, !"PIC Level", i32 1} +!1644 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", scope: !1, file: !1, line: 29, type: !371, scopeLine: 30, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !1645) +!1645 = !{} +!1646 = !DILocation(line: 31, column: 3, scope: !1644) +!1647 = !DILocation(line: 32, column: 1, scope: !1644)