diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -69,12 +69,22 @@ Register PreloadedPrivateBufferReg, Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const; + void emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const; + public: bool hasFP(const MachineFunction &MF) const override; /// Create a CFI index for CFIInst and build a MachineInstr around it. void buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst) const; + /// Create a CFI index describing a spill of an SGPR to a single lane of + /// a VGPR and build a MachineInstr around it. + void buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register SGPR, + const Register VGPR, const int Lane) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -13,6 +13,7 @@ #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -20,12 +21,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/Support/LEB128.h" using namespace llvm; #define DEBUG_TYPE "frame-info" - // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during @@ -700,6 +701,55 @@ return ScratchExecCopy; } +void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); + Register StackPtrReg = + MF.getInfo()->getStackPtrOffsetReg(); + + // DW_ASPACE_AMDGPU_private_wave FIXME: should be defined elsewhere + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, MCRI->getDwarfRegNum(StackPtrReg, false), 0, 6)); + + static const char PCEncodedInst[] = { + dwarf::DW_CFA_expression, + 16, // PC 64 + 8, // length + static_cast(dwarf::DW_OP_regx), + 62, // SGPR30 + static_cast(dwarf::DW_OP_piece), + 4, // 32 bits + static_cast(dwarf::DW_OP_regx), + 63, // SGPR31 + static_cast(dwarf::DW_OP_piece), + 4 // 32 bits + }; + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape( + nullptr, StringRef(PCEncodedInst, sizeof(PCEncodedInst)))); + + BitVector IsCalleeSaved(TRI.getNumRegs()); + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + for (unsigned I = 0; CSRegs[I]; ++I) { + IsCalleeSaved.set(CSRegs[I]); + } + auto ProcessReg = [&](MCPhysReg Reg) { + if (IsCalleeSaved.test(Reg) || !MRI.isPhysRegModified(Reg)) + return; + MCRegister DwarfReg = MCRI->getDwarfRegNum(Reg, false); + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createUndefined(nullptr, DwarfReg)); + }; + for_each(TRI.getAllVGPR32(MF), ProcessReg); + for_each(TRI.getAllSGPR32(MF), ProcessReg); +}; + void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -713,6 +763,7 @@ const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); Register FramePtrReg = FuncInfo->getFrameOffsetReg(); @@ -731,6 +782,8 @@ // turn on all lanes before doing the spill to memory. Register ScratchExecCopy; + emitPrologueEntryCFI(MBB, MBBI, DL); + bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); bool SpillFPToMemory = false; // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. @@ -754,6 +807,11 @@ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) .addReg(FramePtrReg) .setMIFlag(MachineInstr::FrameSetup); + buildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false), + MCRI->getDwarfRegNum(FuncInfo->SGPRForFPSaveRestoreCopy, false))); } // Emit the copy if we need a BP, and are using a free SGPR to save it. @@ -762,6 +820,11 @@ FuncInfo->SGPRForBPSaveRestoreCopy) .addReg(BasePtrReg) .setMIFlag(MachineInstr::FrameSetup); + buildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(BasePtrReg, false), + MCRI->getDwarfRegNum(FuncInfo->SGPRForBPSaveRestoreCopy, false))); } // If a copy has been emitted for FP and/or BP, Make the SGPRs @@ -790,14 +853,21 @@ if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); + int FI = Reg.FI.getValue(); + buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, - FuncInfo->getScratchRSrcReg(), - StackPtrReg, - Reg.FI.getValue()); + FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); + + // We spill the entire VGPR, so we can get away with just cfi_offset + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(Reg.VGPR, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); } if (HasFPSaveIndex && SpillFPToMemory) { - assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue())); + const int FI = *FuncInfo->FramePointerSaveIndex; + assert(!MFI.isDeadObjectIndex(FI)); if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); @@ -809,12 +879,16 @@ .addReg(FramePtrReg); buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR, - FuncInfo->getScratchRSrcReg(), StackPtrReg, - FuncInfo->FramePointerSaveIndex.getValue()); + FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); } if (HasBPSaveIndex && SpillBPToMemory) { - assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex)); + const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; + assert(!MFI.isDeadObjectIndex(BasePtrFI)); if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); @@ -826,8 +900,11 @@ .addReg(BasePtrReg); buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR, - FuncInfo->getScratchRSrcReg(), StackPtrReg, - *FuncInfo->BasePointerSaveIndex); + FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI); + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(BasePtrReg, false), + MFI.getObjectOffset(BasePtrFI) * ST.getWavefrontSize())); } if (ScratchExecCopy) { @@ -841,7 +918,7 @@ // In this case, spill the FP to a reserved VGPR. if (HasFPSaveIndex && !SpillFPToMemory) { - const int FI = FuncInfo->FramePointerSaveIndex.getValue(); + const int FI = *FuncInfo->FramePointerSaveIndex; assert(!MFI.isDeadObjectIndex(FI)); assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); @@ -853,9 +930,12 @@ // FIXME: This should respect spillSGPRToVGPR; BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(FramePtrReg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); + .addReg(FramePtrReg) + .addImm(Spill[0].Lane) + .addReg(Spill[0].VGPR, RegState::Undef); + + buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, FramePtrReg, Spill[0].VGPR, + Spill[0].Lane); } // In this case, spill the BP to a reserved VGPR. @@ -875,6 +955,8 @@ .addReg(BasePtrReg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); + buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, BasePtrReg, Spill[0].VGPR, + Spill[0].Lane); } if (TRI.needsStackRealignment(MF)) { @@ -921,6 +1003,12 @@ .setMIFlag(MachineInstr::FrameSetup); } + if (HasFP) { + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false))); + } + if (HasFP && RoundedSize != 0) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) .addReg(StackPtrReg) @@ -954,6 +1042,7 @@ const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); LivePhysRegs LiveRegs; @@ -1004,7 +1093,7 @@ Register ScratchExecCopy; if (HasFPSaveIndex) { - const int FI = FuncInfo->FramePointerSaveIndex.getValue(); + const int FI = *FuncInfo->FramePointerSaveIndex; assert(!MFI.isDeadObjectIndex(FI)); if (SpillFPToMemory) { if (!ScratchExecCopy) @@ -1029,6 +1118,11 @@ } } + if (hasFP(MF)) + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, MCRI->getDwarfRegNum(StackPtrReg, false))); + if (HasBPSaveIndex) { const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; assert(!MFI.isDeadObjectIndex(BasePtrFI)); @@ -1314,3 +1408,56 @@ .addCFIIndex(MF.addFrameInst(CFIInst)) .setMIFlag(MachineInstr::FrameSetup); } + +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + +void SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register SGPR, const Register VGPR, + const int Lane) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getMMI().getContext().getRegisterInfo(); + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + + // CFI for an SGPR spilled to a single lane of a VGPR is implemented as an + // expression(E) rule where E is a register location description referencing + // a VGPR register location storage at a byte offset of the lane index + // multiplied by the size of an SGPR (4 bytes). In other words we generate + // the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_LLVM_offset_uconst *4) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. the + // implictly pushed one is just ignored.) + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + OSCFIInst << uint8_t(dwarf::DW_CFA_expression); + encodeULEB128(DwarfSGPR, OSCFIInst); + + encodeDwarfRegisterLocation(DwarfVGPR, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + // FIXME: + const unsigned SGPRByteSize = 4; + encodeULEB128(Lane * SGPRByteSize, OSBlock); + + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, OSCFIInst.str())); +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -258,8 +258,8 @@ ; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s4, s32, 0x7c0 ; GFX9-NEXT: s_mov_b32 s6, s33 +; GFX9-NEXT: s_add_u32 s4, s32, 0x7c0 ; GFX9-NEXT: s_and_b32 s33, s4, 0xfffff800 ; GFX9-NEXT: s_add_u32 s32, s32, 0x1000 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -286,14 +286,16 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_add_u32 s4, s32, 0x3e0 ; GFX10-NEXT: s_mov_b32 s6, s33 +; GFX10-NEXT: s_add_u32 s4, s32, 0x3e0 ; GFX10-NEXT: s_and_b32 s33, s4, 0xfffffc00 ; GFX10-NEXT: s_add_u32 s32, s32, 0x800 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+4 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_mov_b32 s33, s6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -166,8 +166,8 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s8, s33 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz BB2_3 @@ -231,8 +231,8 @@ ; GCN-LABEL: func_non_entry_block_static_alloca_align64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_add_u32 s4, s32, 0xfc0 ; GCN-NEXT: s_mov_b32 s8, s33 +; GCN-NEXT: s_add_u32 s4, s32, 0xfc0 ; GCN-NEXT: s_and_b32 s33, s4, 0xfffff000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_add_u32 s32, s32, 0x2000 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -197,7 +197,7 @@ ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: v_readlane_b32 s42, v0, 0 ; GCN-NEXT: s_setpc_b64 -define void @spill_only_csr_sgpr() { +define void @spill_only_csr_sgpr() #0 { call void asm sideeffect "; clobber s42", "~{s42}"() ret void } @@ -296,8 +296,8 @@ ; GCN-LABEL: {{^}}realign_stack_no_fp_elim: ; GCN: s_waitcnt -; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 ; GCN-NEXT: s_mov_b32 s4, s33 +; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 ; GCN-NEXT: s_and_b32 s33, [[SCRATCH]], 0xfff80000 ; GCN-NEXT: s_add_u32 s32, s32, 0x100000 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 @@ -315,14 +315,14 @@ ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp: ; GCN: s_waitcnt ; GCN-NEXT: v_writelane_b32 v1, s33, 2 -; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GCN: v_writelane_b32 v1, s31, 1 ; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4 ; GCN: ;;#ASMSTART -; GCN: v_readlane_b32 s4, v1, 0 -; GCN-NEXT: s_add_u32 s32, s32, 0x200 +; GCN: s_add_u32 s32, s32, 0x200 +; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: v_readlane_b32 s5, v1, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200 ; GCN-NEXT: v_readlane_b32 s33, v1, 2 @@ -349,8 +349,8 @@ ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN-DAG: buffer_store_dword @@ -396,8 +396,8 @@ ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}} ; GCN-DAG: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -31,8 +31,8 @@ ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 @@ -65,8 +65,8 @@ ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 @@ -99,8 +99,8 @@ ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 @@ -133,8 +133,8 @@ ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll --- a/llvm/test/CodeGen/AMDGPU/debug-frame.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s ; CHECK-LABEL: kern1: ; CHECK: .cfi_startproc @@ -23,7 +24,506 @@ ret void } +; CHECK-LABEL: func_no_clobber: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; SGPR32 = 64 +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; DW_CFA_expression [0x10] +; PC_64 ULEB128(17)=[0x10] +; BLOCK_LENGTH ULEB128(8)=[0x08] +; DW_OP_regx [0x90] +; SGPR30 ULEB128(62)=[0x3e] +; DW_OP_piece [0x93] +; PIECE_SIZE [0x04] +; DW_OP_regx [0x90] +; SGPR31 ULEB128(63)=[0x3f] +; DW_OP_piece [0x93] +; PIECE_SIZE [0x04] +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define hidden void @func_no_clobber() #0 { +entry: + ret void +} + +; CHECK-LABEL: {{^}}callee_need_to_spill_fp_to_memory: +; CHECK: .cfi_startproc + +; SGPR33 = 65 +; CHECK: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 +; CHECK: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:456 ; 4-byte Folded Spill +; WAVE64: .cfi_offset 65, 29184 +; WAVE32: .cfi_offset 65, 14592 + +; CHECK: .cfi_endproc +define void @callee_need_to_spill_fp_to_memory() #1 { + call void asm sideeffect "; clobber nonpreserved SGPRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"() + ret void +} + +declare hidden void @ex() #0 + +; CHECK-LABEL: func_call_clobber: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + +; VGPR0_wave64 = 2560 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; WAVE64-NEXT: .cfi_undefined 2592 +; WAVE64-NEXT: .cfi_undefined 2593 +; WAVE64-NEXT: .cfi_undefined 2594 +; WAVE64-NEXT: .cfi_undefined 2595 +; WAVE64-NEXT: .cfi_undefined 2596 +; WAVE64-NEXT: .cfi_undefined 2597 +; WAVE64-NEXT: .cfi_undefined 2598 +; WAVE64-NEXT: .cfi_undefined 2599 + +; VPGR48_wave64 = 2608 +; WAVE64-NEXT: .cfi_undefined 2608 +; WAVE64-NEXT: .cfi_undefined 2609 +; WAVE64-NEXT: .cfi_undefined 2610 +; WAVE64-NEXT: .cfi_undefined 2611 +; WAVE64-NEXT: .cfi_undefined 2612 +; WAVE64-NEXT: .cfi_undefined 2613 +; WAVE64-NEXT: .cfi_undefined 2614 +; WAVE64-NEXT: .cfi_undefined 2615 + +; WAVE64-NEXT: .cfi_undefined 2624 +; WAVE64-NEXT: .cfi_undefined 2625 +; WAVE64-NEXT: .cfi_undefined 2626 +; WAVE64-NEXT: .cfi_undefined 2627 +; WAVE64-NEXT: .cfi_undefined 2628 +; WAVE64-NEXT: .cfi_undefined 2629 +; WAVE64-NEXT: .cfi_undefined 2630 +; WAVE64-NEXT: .cfi_undefined 2631 + +; WAVE64-NEXT: .cfi_undefined 2640 +; WAVE64-NEXT: .cfi_undefined 2641 +; WAVE64-NEXT: .cfi_undefined 2642 +; WAVE64-NEXT: .cfi_undefined 2643 +; WAVE64-NEXT: .cfi_undefined 2644 +; WAVE64-NEXT: .cfi_undefined 2645 +; WAVE64-NEXT: .cfi_undefined 2646 +; WAVE64-NEXT: .cfi_undefined 2647 + +; WAVE64-NEXT: .cfi_undefined 2656 +; WAVE64-NEXT: .cfi_undefined 2657 +; WAVE64-NEXT: .cfi_undefined 2658 +; WAVE64-NEXT: .cfi_undefined 2659 +; WAVE64-NEXT: .cfi_undefined 2660 +; WAVE64-NEXT: .cfi_undefined 2661 +; WAVE64-NEXT: .cfi_undefined 2662 +; WAVE64-NEXT: .cfi_undefined 2663 + +; WAVE64-NEXT: .cfi_undefined 2672 +; WAVE64-NEXT: .cfi_undefined 2673 +; WAVE64-NEXT: .cfi_undefined 2674 +; WAVE64-NEXT: .cfi_undefined 2675 +; WAVE64-NEXT: .cfi_undefined 2676 +; WAVE64-NEXT: .cfi_undefined 2677 +; WAVE64-NEXT: .cfi_undefined 2678 +; WAVE64-NEXT: .cfi_undefined 2679 + +; WAVE64-NEXT: .cfi_undefined 2688 +; WAVE64-NEXT: .cfi_undefined 2689 +; WAVE64-NEXT: .cfi_undefined 2690 +; WAVE64-NEXT: .cfi_undefined 2691 +; WAVE64-NEXT: .cfi_undefined 2692 +; WAVE64-NEXT: .cfi_undefined 2693 +; WAVE64-NEXT: .cfi_undefined 2694 +; WAVE64-NEXT: .cfi_undefined 2695 + +; WAVE64-NEXT: .cfi_undefined 2704 +; WAVE64-NEXT: .cfi_undefined 2705 +; WAVE64-NEXT: .cfi_undefined 2706 +; WAVE64-NEXT: .cfi_undefined 2707 +; WAVE64-NEXT: .cfi_undefined 2708 +; WAVE64-NEXT: .cfi_undefined 2709 +; WAVE64-NEXT: .cfi_undefined 2710 +; WAVE64-NEXT: .cfi_undefined 2711 + +; WAVE64-NEXT: .cfi_undefined 2720 +; WAVE64-NEXT: .cfi_undefined 2721 +; WAVE64-NEXT: .cfi_undefined 2722 +; WAVE64-NEXT: .cfi_undefined 2723 +; WAVE64-NEXT: .cfi_undefined 2724 +; WAVE64-NEXT: .cfi_undefined 2725 +; WAVE64-NEXT: .cfi_undefined 2726 +; WAVE64-NEXT: .cfi_undefined 2727 + +; WAVE64-NEXT: .cfi_undefined 2736 +; WAVE64-NEXT: .cfi_undefined 2737 +; WAVE64-NEXT: .cfi_undefined 2738 +; WAVE64-NEXT: .cfi_undefined 2739 +; WAVE64-NEXT: .cfi_undefined 2740 +; WAVE64-NEXT: .cfi_undefined 2741 +; WAVE64-NEXT: .cfi_undefined 2742 +; WAVE64-NEXT: .cfi_undefined 2743 + +; WAVE64-NEXT: .cfi_undefined 2752 +; WAVE64-NEXT: .cfi_undefined 2753 +; WAVE64-NEXT: .cfi_undefined 2754 +; WAVE64-NEXT: .cfi_undefined 2755 +; WAVE64-NEXT: .cfi_undefined 2756 +; WAVE64-NEXT: .cfi_undefined 2757 +; WAVE64-NEXT: .cfi_undefined 2758 +; WAVE64-NEXT: .cfi_undefined 2759 + +; WAVE64-NEXT: .cfi_undefined 2768 +; WAVE64-NEXT: .cfi_undefined 2769 +; WAVE64-NEXT: .cfi_undefined 2770 +; WAVE64-NEXT: .cfi_undefined 2771 +; WAVE64-NEXT: .cfi_undefined 2772 +; WAVE64-NEXT: .cfi_undefined 2773 +; WAVE64-NEXT: .cfi_undefined 2774 +; WAVE64-NEXT: .cfi_undefined 2775 + +; WAVE64-NEXT: .cfi_undefined 2784 +; WAVE64-NEXT: .cfi_undefined 2785 +; WAVE64-NEXT: .cfi_undefined 2786 +; WAVE64-NEXT: .cfi_undefined 2787 +; WAVE64-NEXT: .cfi_undefined 2788 +; WAVE64-NEXT: .cfi_undefined 2789 +; WAVE64-NEXT: .cfi_undefined 2790 +; WAVE64-NEXT: .cfi_undefined 2791 + +; WAVE64-NEXT: .cfi_undefined 2800 +; WAVE64-NEXT: .cfi_undefined 2801 +; WAVE64-NEXT: .cfi_undefined 2802 +; WAVE64-NEXT: .cfi_undefined 2803 +; WAVE64-NEXT: .cfi_undefined 2804 +; WAVE64-NEXT: .cfi_undefined 2805 +; WAVE64-NEXT: .cfi_undefined 2806 +; WAVE64-NEXT: .cfi_undefined 2807 + + +; VGPR0_wave32 = 1536 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 + +; VPGR48_wave64 = 1584 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 + +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 + +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 + +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 + +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 + +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 + +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 + +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 + +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 + +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 + +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 + +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 + +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 + + +; SGPR0 = 32 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 62 +; CHECK-NEXT: .cfi_undefined 63 + +; CHECK-NOT: .cfi_{{.*}} + +; WAVE64: s_or_saveexec_b64 s[4:5], -1 +; WAVE32: s_or_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; VGPR40_wave64 = 2600 +; WAVE64-NEXT: .cfi_offset 2600, 0 +; VGPR40_wave32 = 1576 +; WAVE32-NEXT: .cfi_offset 1576, 0 +; CHECK-NOT: .cfi_{{.*}} +; WAVE64: s_mov_b64 exec, s[4:5] +; WAVE32: s_mov_b32 exec_lo, s4 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: v_writelane_b32 v40, s33, 2 + +; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] +; BLOCK_LENGTH ULEB128(5)=[0x05] +; DW_OP_regx [0x90] +; VGPR40_wave64 ULEB128(2600)=[0xa8, 0x14] +; DW_OP_LLVM_offset_uconst [0xe4] +; OFFSET ULEB128(0x08) [0x08] +; WAVE64-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa8, 0x14, 0xe4, 0x08 + +; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] +; BLOCK_LENGTH ULEB128(5)=[0x05] +; DW_OP_regx [0x90] +; VGPR40_wave32 ULEB128(1576)=[0xa8, 0x0c] +; DW_OP_LLVM_offset_uconst [0xe4] +; OFFSET ULEB128(0x08) [0x08] +; WAVE32-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa8, 0x0c, 0xe4, 0x08 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: s_mov_b32 s33, s32 +; SGPR33 = 65 +; CHECK-NEXT: .cfi_def_cfa_register 65 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: s_sub_u32 s32, s32, +; CHECK-NEXT: v_readlane_b32 s33, v40, 2 +; SGPR32 = 64 +; CHECK-NEXT: .cfi_def_cfa_register 64 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define hidden void @func_call_clobber() #0 { +entry: + call void @ex() #0 + ret void +} + attributes #0 = { nounwind } +attributes #1 = { nounwind "frame-pointer"="all" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3} diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -192,6 +192,7 @@ ; GFX9-NEXT: v_writelane_b32 v43, s33, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_add_u32 s32, s32, 0x800 +; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -151,8 +151,8 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s7, s33 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz BB2_3 @@ -214,8 +214,8 @@ ; GCN-LABEL: func_non_entry_block_static_alloca_align64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_add_u32 s4, s32, 0xfc0 ; GCN-NEXT: s_mov_b32 s7, s33 +; GCN-NEXT: s_add_u32 s4, s32, 0xfc0 ; GCN-NEXT: s_and_b32 s33, s4, 0xfffff000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_add_u32 s32, s32, 0x2000 diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -13,6 +13,8 @@ ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp0: ; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5 @@ -35,6 +37,12 @@ ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp2: ; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17 @@ -65,6 +73,8 @@ ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp8: ; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5 @@ -83,6 +93,8 @@ ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp10: ; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5 @@ -103,6 +115,8 @@ ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp12: ; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5 @@ -121,6 +135,8 @@ ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp14: ; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5 @@ -139,6 +155,8 @@ ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp16: ; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -124,14 +124,14 @@ } ; GCN-LABEL: {{^}}default_realign_align128: -; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 ; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000 -; GCN-NEXT: s_add_u32 s32, s32, 0x4000 -; GCN-NOT: s33 +; GCN: s_add_u32 s32, s32, 0x4000 ; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}} ; GCN: s_sub_u32 s32, s32, 0x4000 ; GCN: s_mov_b32 s33, [[FP_COPY]] +; GCN-NOT: s33 define void @default_realign_align128(i32 %idx) #0 { %alloca.align = alloca i32, align 128, addrspace(5) store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 @@ -164,17 +164,17 @@ ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000 ; GCN: s_mov_b32 s34, s32 -; GCN-NEXT: v_mov_b32_e32 v32, 0 +; GCN: v_mov_b32_e32 v32, 0 +; GCN: s_add_u32 s32, s32, 0x30000 ; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024 ; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34 -; GCN-NEXT: s_add_u32 s32, s32, 0x30000 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s33, [[VGPR_REG]], 2 -; GCN-NEXT: s_sub_u32 s32, s32, 0x30000 +; GCN: s_sub_u32 s32, s32, 0x30000 +; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload @@ -194,11 +194,11 @@ ; The BP value will get saved/restored in an SGPR at the prolgoue/epilogue. ; GCN-LABEL: needs_align1024_stack_args_used_inside_loop: -; GCN: s_mov_b32 [[BP_COPY:s[0-9]+]], s34 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34 ; GCN-NEXT: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0 ; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34 ; GCN: s_add_u32 s32, s32, 0x30000 @@ -235,8 +235,8 @@ ; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy: ; GCN: ; %bb.0: ; GCN: v_writelane_b32 [[VGPR_REG:v[0-9]+]], s34, 0 -; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34 +; GCN: s_mov_b32 s34, s32 +; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:128 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND