diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -116,6 +116,8 @@ void emitFunctionBodyEnd() override; + void emitImplicitDef(const MachineInstr *MI) const override; + void emitFunctionEntryLabel() override; void emitBasicBlockStart(const MachineBasicBlock &MBB) override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -245,6 +245,21 @@ Streamer.popSection(); } +void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { + Register RegNo = MI->getOperand(0).getReg(); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL) + OS << " : SGPR spill to VGPR lane"; + + OutStreamer->AddComment(OS.str()); + OutStreamer->addBlankLine(); +} + void AMDGPUAsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { AsmPrinter::emitFunctionEntryLabel(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1397,6 +1397,13 @@ } // end namespace AMDGPU +namespace AMDGPU { +enum AsmComments { + // For sgpr to vgpr spill instructions + SGPR_SPILL = MachineInstr::TAsmComments +}; +} // namespace AMDGPU + namespace SI { namespace KernelInputOffsets { diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -281,6 +281,8 @@ auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(), TII->get(AMDGPU::IMPLICIT_DEF), Reg); MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); + // Set SGPR_SPILL asm printer flag + MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); if (LIS) { LIS->InsertMachineInstrInMaps(*MIB); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 -; CHECK-NEXT: ; implicit-def: $vgpr8 +; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; CHECK-NEXT: v_mov_b32_e32 v8, v0 ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -36,7 +36,7 @@ ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -144,7 +144,7 @@ ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -232,7 +232,7 @@ ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -321,7 +321,7 @@ ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -422,7 +422,7 @@ ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -48,7 +48,7 @@ ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -221,7 +221,7 @@ ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -430,7 +430,7 @@ ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -676,7 +676,7 @@ ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -931,7 +931,7 @@ ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -1080,7 +1080,7 @@ ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -117,7 +117,7 @@ ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 +; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0 ; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1 @@ -240,7 +240,7 @@ ; FLAT_SCR_ARCH-LABEL: test: ; FLAT_SCR_ARCH: ; %bb.0: ; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 +; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0 ; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1 diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 ; CHECK-NEXT: s_add_i32 s12, s33, 0x100200 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -144,7 +144,7 @@ ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr5 +; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v5, v3 ; W64-O0-NEXT: v_mov_b32_e32 v6, v2 @@ -497,7 +497,7 @@ ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr13 +; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill @@ -1019,7 +1019,7 @@ ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr8 +; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v8, v6 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -143,7 +143,7 @@ ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr5 +; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v6, v2 @@ -511,7 +511,7 @@ ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr13 +; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill @@ -1058,7 +1058,7 @@ ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr8 +; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -71,7 +71,7 @@ ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill @@ -139,7 +139,7 @@ ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill @@ -260,7 +260,7 @@ ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -17,9 +17,9 @@ ; GCN-NEXT: s_mov_b32 s95, 0xe8f000 ; GCN-NEXT: s_add_u32 s92, s92, s11 ; GCN-NEXT: s_addc_u32 s93, s93, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr2 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] @@ -488,8 +488,8 @@ ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] @@ -738,8 +738,8 @@ ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND @@ -989,8 +989,8 @@ ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll --- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-LABEL: kernel0: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; implicit-def: $vgpr23 +; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll @@ -9,8 +9,8 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_add_u32 s0, s0, s15 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s4, s[8:9], 0x2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -13,7 +13,7 @@ ; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GCN-NEXT: s_mov_b32 s38, -1 ; GCN-NEXT: s_mov_b32 s39, 0xe00000 -; GCN-NEXT: ; implicit-def: $vgpr3 +; GCN-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; GCN-NEXT: s_add_u32 s36, s36, s11 ; GCN-NEXT: v_writelane_b32 v3, s4, 0 ; GCN-NEXT: s_movk_i32 s32, 0x400 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -715,7 +715,7 @@ ; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440 ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: ; implicit-def: $vgpr0 +; CHECK-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; CHECK-NEXT: .LBB0_1: ; %bb.1 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_3 diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll --- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll @@ -16,7 +16,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0 -; CHECK-NEXT: ; implicit-def: $vgpr2 +; CHECK-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; CHECK-NEXT: ; kill: killed $sgpr0_sgpr1 ; CHECK-NEXT: s_mov_b32 s7, 0x401c0000 ; CHECK-NEXT: s_mov_b32 s5, 0x40280000 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -13,7 +13,7 @@ ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr1 +; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; CHECK-NEXT: v_mov_b32_e32 v2, v0 ; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll @@ -16,7 +16,7 @@ ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b32 s33, -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll @@ -19,7 +19,7 @@ ; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[18:19] ; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 -; GFX90A-NEXT: ; implicit-def: $vgpr0 +; GFX90A-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -27,7 +27,7 @@ ; GCN-NEXT: v_writelane_b32 v40, s28, 2 ; GCN-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-NEXT: v_writelane_b32 v40, s16, 4 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -83,7 +83,7 @@ ; GCN-O0-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-O0-NEXT: v_writelane_b32 v40, s16, 4 ; GCN-O0-NEXT: s_add_i32 s32, s32, 0x400 -; GCN-O0-NEXT: ; implicit-def: $vgpr0 +; GCN-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-O0-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-O0-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -145,7 +145,7 @@ ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: ; implicit-def: $vgpr3 +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload @@ -580,7 +580,7 @@ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000 -; GFX9-O0-NEXT: ; implicit-def: $vgpr0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s34, s8