diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -957,7 +957,6 @@ unsigned SDSTName; switch (MI->getOpcode()) { case AMDGPU::V_READLANE_B32: - case AMDGPU::V_READLANE_B32_gfx10: case AMDGPU::V_READFIRSTLANE_B32: SDSTName = AMDGPU::OpName::vdst; break; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -907,8 +907,7 @@ // Save FP before setting it up. // FIXME: This should respect spillSGPRToVGPR; - BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - Spill[0].VGPR) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) .addReg(FramePtrReg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); @@ -926,8 +925,7 @@ // Save BP before setting it up. // FIXME: This should respect spillSGPRToVGPR; - BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - Spill[0].VGPR) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) .addReg(BasePtrReg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); @@ -1078,8 +1076,7 @@ ArrayRef Spill = FuncInfo->getSGPRToVGPRSpills(FI); assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - FramePtrReg) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) .addReg(Spill[0].VGPR) .addImm(Spill[0].Lane); } @@ -1104,8 +1101,7 @@ ArrayRef Spill = FuncInfo->getSGPRToVGPRSpills(BasePtrFI); assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - BasePtrReg) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) .addReg(Spill[0].VGPR) .addImm(Spill[0].Lane); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3502,13 +3502,7 @@ if (SIInstrInfo::isVALU(MI)) { switch (MI.getOpcode()) { case AMDGPU::V_READLANE_B32: - case AMDGPU::V_READLANE_B32_gfx6_gfx7: - case AMDGPU::V_READLANE_B32_gfx10: - case AMDGPU::V_READLANE_B32_vi: case AMDGPU::V_WRITELANE_B32: - case AMDGPU::V_WRITELANE_B32_gfx6_gfx7: - case AMDGPU::V_WRITELANE_B32_gfx10: - case AMDGPU::V_WRITELANE_B32_vi: return false; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1029,12 +1029,12 @@ } else if (!IsKill) { // Restore SGPRs from appropriate VGPR lanes if (!OnlyExecLo) { - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1])) .addReg(VGPR) .addImm(ExecLane + 1); } - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), NumSubRegs == 1 ? SavedExecReg : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane])) @@ -1094,12 +1094,11 @@ // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. - auto MIB = BuildMI(*MBB, MI, DL, - TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - Spill.VGPR) - .addReg(SubReg, getKillRegState(UseKill)) - .addImm(Spill.Lane) - .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); + auto MIB = + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) + .addReg(SubReg, getKillRegState(UseKill)) + .addImm(Spill.Lane) + .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); if (i == 0 && NumSubRegs > 1) { // We may be spilling a super-register which is only partially defined, @@ -1138,9 +1137,7 @@ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); MachineInstrBuilder WriteLane = - BuildMI(*MBB, MI, DL, - TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - TmpVGPR) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), TmpVGPR) .addReg(SubReg, SubKillState) .addImm(i % PerVGPR) .addReg(TmpVGPR, TmpVGPRFlags); @@ -1204,11 +1201,9 @@ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); + auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); if (NumSubRegs > 1 && i == 0) MIB.addReg(SuperReg, RegState::ImplicitDefine); } @@ -1234,8 +1229,7 @@ bool LastSubReg = (i + 1 == e); auto MIB = - BuildMI(*MBB, MI, DL, - TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) .addReg(TmpVGPR, getKillRegState(LastSubReg)) .addImm(i); if (NumSubRegs > 1 && i == 0) diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -254,19 +254,20 @@ ; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr: ; GCN: s_waitcnt ; GCN-NEXT: v_writelane_b32 v1, s33, 63 -; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-COUNT-60: v_writelane_b32 v1 +; GCN: s_mov_b32 s33, s32 +; GCN-COUNT-2: v_writelane_b32 v1 ; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill -; GCN-COUNT-63: v_writelane_b32 v1 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:8 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:8 ; GCN: ;;#ASMSTART -; GCN-COUNT-63: v_readlane_b32 s{{[0-9]+}}, v1 +; GCN: v_writelane_b32 v1 ; MUBUF: s_add_u32 s32, s32, 0x300 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300 +; MUBUF: s_sub_u32 s32, s32, 0x300 ; FLATSCR: s_add_u32 s32, s32, 12 -; FLATSCR-NEXT: s_sub_u32 s32, s32, 12 +; FLATSCR: s_sub_u32 s32, s32, 12 ; GCN-NEXT: v_readlane_b32 s33, v1, 63 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 @@ -289,22 +290,22 @@ ; Use a copy to a free SGPR instead of introducing a second CSR VGPR. ; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr: ; GCN: s_waitcnt -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-COUNT-62: v_writelane_b32 v1, +; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill -; GCN-COUNT-64: v_writelane_b32 v1, - +; GCN: v_writelane_b32 v1, +; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; MUBUF: buffer_store_dword ; FLATSCR: scratch_store_dword ; GCN: ;;#ASMSTART -; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1 - +; GCN: v_writelane_b32 v1, ; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload ; MUBUF: s_add_u32 s32, s32, 0x300 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300 ; FLATSCR: s_add_u32 s32, s32, 12 +; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1 +; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300 ; FLATSCR-NEXT: s_sub_u32 s32, s32, 12 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -398,10 +399,9 @@ ; MUBUF: s_add_u32 s32, s32, 0x300{{$}} ; FLATSCR: s_add_u32 s32, s32, 12{{$}} -; GCN: ;;#ASMSTART - ; GCN: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; GCN-NEXT: v_readlane_b32 s5, [[CSR_VGPR]], 1 +; GCN: ;;#ASMSTART +; GCN: v_readlane_b32 s5, [[CSR_VGPR]], 1 ; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300{{$}} ; FLATSCR-NEXT: s_sub_u32 s32, s32, 12{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 @@ -450,10 +450,9 @@ ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; GCN: ;;#ASMSTART - ; GCN: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; GCN-NEXT: v_readlane_b32 s5, [[CSR_VGPR]], 1 +; GCN: ;;#ASMSTART +; GCN: v_readlane_b32 s5, [[CSR_VGPR]], 1 ; MUBUF-NEXT: s_sub_u32 s32, s32, 0x40300{{$}} ; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x100c{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -142,8 +142,8 @@ ; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: v_mov_b32_e32 v1, v4 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll b/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll --- a/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll @@ -3,8 +3,8 @@ ; Make sure new higher SGPRs are callee saved ; GFX10-LABEL: {{^}}callee_new_sgprs: ; GFX10: v_writelane_b32 v0, s104, 0 -; GFX10: v_writelane_b32 v0, s105, 1 -; GFX10: ; clobber s104 +; GFX10-DAG: v_writelane_b32 v0, s105, 1 +; GFX10-DAG: ; clobber s104 ; GFX10: ; clobber s105 ; GFX10: v_readlane_b32 s105, v0, 1 ; GFX10: v_readlane_b32 s104, v0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir --- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir @@ -14,10 +14,10 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $exec_lo ; CHECK: $sgpr0 = S_MOV_B32 $exec_lo - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_lo @@ -38,10 +38,10 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $exec_hi ; CHECK: $sgpr0 = S_MOV_B32 $exec_hi - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_hi @@ -62,13 +62,13 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $exec ; CHECK: $sgpr0_sgpr1 = S_MOV_B64 $exec - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr1, 1, killed $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK: $sgpr1 = V_READLANE_B32_vi $vgpr0, 1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 + ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK: $sgpr1 = V_READLANE_B32_vi killed $vgpr0, 1 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 + ; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 ; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec @@ -91,10 +91,10 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo @@ -113,10 +113,10 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi @@ -135,13 +135,13 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr1, 1, killed $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK: $sgpr1 = V_READLANE_B32_vi $vgpr0, 1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 + ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK: $sgpr1 = V_READLANE_B32_vi killed $vgpr0, 1 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 + ; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 ; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir --- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir @@ -15,10 +15,10 @@ ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def $m0 ; CHECK: $sgpr0 = S_MOV_B32 $m0 - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK: S_NOP 0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec @@ -43,10 +43,10 @@ ; CHECK: liveins: $vgpr0 ; CHECK: S_WAITCNT 0 ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0 - ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK: S_NOP 0 ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -192,35 +192,35 @@ ; GFX9-NEXT: v_writelane_b32 v43, s33, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_add_u32 s32, s32, 0x800 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v43, s34, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 ; GFX9-NEXT: v_writelane_b32 v43, s35, 1 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, v1 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_writelane_b32 v43, s30, 2 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40 ; GFX9-NEXT: v_writelane_b32 v43, s31, 3 ; GFX9-NEXT: v_and_b32_e32 v42, 0xffffff, v40 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_mad_u32_u24 v40, v41, v40, v42 ; GFX9-NEXT: v_mov_b32_e32 v0, v40 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_add_u32_e32 v0, v40, v42 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s4, v43, 2 ; GFX9-NEXT: v_readlane_b32 s5, v43, 3 ; GFX9-NEXT: v_readlane_b32 s35, v43, 1 ; GFX9-NEXT: v_readlane_b32 s34, v43, 0 -; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: s_sub_u32 s32, s32, 0x800 ; GFX9-NEXT: v_readlane_b32 s33, v43, 4 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -25,7 +25,7 @@ ; GFX8-LABEL: name: pei_scavenge_vgpr_spill ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX8: $vgpr2 = V_WRITELANE_B32_vi $sgpr33, 0, undef $vgpr2 + ; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX8: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc ; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc @@ -36,13 +36,13 @@ ; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec ; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec ; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc - ; GFX8: $sgpr33 = V_READLANE_B32_vi $vgpr2, 0 + ; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524544, implicit-def $scc ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-LABEL: name: pei_scavenge_vgpr_spill ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX9: $vgpr2 = V_WRITELANE_B32_vi $sgpr33, 0, undef $vgpr2 + ; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX9: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc ; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc @@ -52,20 +52,20 @@ ; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec ; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec ; GFX9: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc - ; GFX9: $sgpr33 = V_READLANE_B32_vi $vgpr2, 0 + ; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524544, implicit-def $scc ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32_vi $sgpr33, 0, undef $vgpr2 + ; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX9-FLATSCR: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc ; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294959104, implicit-def $scc ; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc ; GFX9-FLATSCR: $vcc_hi = S_ADD_U32 $sgpr33, 8192, implicit-def $scc ; GFX9-FLATSCR: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec ; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc - ; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32_vi $vgpr2, 0 + ; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec S_ENDPGM 0, csr_amdgpu_allvgprs diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir @@ -21,8 +21,8 @@ ; CHECK-LABEL: name: sgpr_spill_s64_undef_high32 ; CHECK: liveins: $sgpr4, $vgpr0 - ; CHECK: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5 - ; CHECK: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) ... @@ -45,8 +45,8 @@ ; CHECK-LABEL: name: sgpr_spill_s64_undef_low32 ; CHECK: liveins: $sgpr5, $vgpr0 - ; CHECK: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5 - ; CHECK: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -214,16 +214,16 @@ ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s34, v42, 0 -; GCN-DAG: v_readlane_b32 s35, v42, 1 - -; GCN: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-DAG: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-DAG: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 +; GCN-DAG: v_readlane_b32 s34, v42, 0 +; GCN-DAG: v_readlane_b32 s35, v42, 1 + ; GCN: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: v_readlane_b32 s33, ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir --- a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir @@ -304,21 +304,6 @@ S_ENDPGM 0 ... -# Workaround since spilling/restoring SGPRs use real opcodes. -# GCN-LABEL: name: hazard_smem_war_readlane_gfx10 -# GCN: S_LOAD_DWORD_IMM -# GCN: $sgpr_null = S_MOV_B32 0 -# GCN-NEXT: V_READLANE_B32_gfx10 ---- -name: hazard_smem_war_readlane_gfx10 -body: | - bb.0: - liveins: $sgpr0, $sgpr1, $sgpr3, $vgpr0 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 - $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, $sgpr3 - S_ENDPGM 0 -... - # GCN-LABEL: name: hazard_smem_war_readfirstlane # GCN: S_LOAD_DWORD_IMM # GCN: $sgpr_null = S_MOV_B32 0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -22,10 +22,10 @@ ; GCN-LABEL: name: spill_sgpr128_use_subreg ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN: renamable $sgpr1 = COPY $sgpr2 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1 ; GCN: S_ENDPGM 0, implicit $sgpr8 renamable $sgpr1 = COPY $sgpr2 @@ -52,10 +52,10 @@ ; GCN-LABEL: name: spill_sgpr128_use_kill ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN: renamable $sgpr1 = COPY $sgpr2 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: S_ENDPGM 0 renamable $sgpr1 = COPY $sgpr2 SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -36,9 +36,8 @@ bb.0: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 - ; CHECK-LABEL: name: check_vcc - ; CHECK: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 - + ; GFX9-LABEL: name: check_vcc + ; GFX9: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX9: $sgpr33 = S_MOV_B32 0 ; GFX9: $sgpr12 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr13 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 @@ -47,17 +46,17 @@ ; GFX9: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9: $vcc = IMPLICIT_DEF - ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc - ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc ; GFX9: $vcc = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) ; GFX9: $exec = S_MOV_B64 $vcc - ; GFX9: $vcc_hi = V_READLANE_B32_vi $vgpr0, 1 - ; GFX9: $vcc_lo = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $vcc_hi = V_READLANE_B32 $vgpr0, 1 + ; GFX9: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0 ; GFX9: $vcc = IMPLICIT_DEF - ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc - ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc ; GFX9: $vcc = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) @@ -66,9 +65,10 @@ ; GFX9: $exec = S_MOV_B64 3 ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $vcc - ; GFX9: $vcc_lo = V_READLANE_B32_vi $vgpr0, 0, implicit-def $vcc - ; GFX9: $vcc_hi = V_READLANE_B32_vi killed $vgpr0, 1 - + ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc + ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 + ; GFX10-LABEL: name: check_vcc + ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX10: $sgpr33 = S_MOV_B32 0 ; GFX10: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -77,17 +77,17 @@ ; GFX10: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10: $vcc = IMPLICIT_DEF - ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc - ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc ; GFX10: $vcc = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) ; GFX10: $exec = S_MOV_B64 $vcc - ; GFX10: $vcc_hi = V_READLANE_B32_gfx10 $vgpr0, 1 - ; GFX10: $vcc_lo = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $vcc_hi = V_READLANE_B32 $vgpr0, 1 + ; GFX10: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0 ; GFX10: $vcc = IMPLICIT_DEF - ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc - ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc ; GFX10: $vcc = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) @@ -96,8 +96,8 @@ ; GFX10: $exec = S_MOV_B64 3 ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) ; GFX10: $exec = S_MOV_B64 killed $vcc - ; GFX10: $vcc_lo = V_READLANE_B32_gfx10 $vgpr0, 0, implicit-def $vcc - ; GFX10: $vcc_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc + ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 $vcc = IMPLICIT_DEF SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir --- a/llvm/test/CodeGen/AMDGPU/spill192.mir +++ b/llvm/test/CodeGen/AMDGPU/spill192.mir @@ -30,12 +30,12 @@ ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0 ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) @@ -43,12 +43,12 @@ ; EXPANDED: S_NOP 1 ; EXPANDED: bb.2: ; EXPANDED: liveins: $vgpr0 - ; EXPANDED: $sgpr4 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $sgpr5 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 1 - ; EXPANDED: $sgpr6 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 2 - ; EXPANDED: $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3 - ; EXPANDED: $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4 - ; EXPANDED: $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5 + ; EXPANDED: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED: $sgpr9 = V_READLANE_B32 $vgpr0, 5 ; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 bb.0: S_NOP 0, implicit-def %0:sgpr_192 diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll --- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll @@ -1,10 +1,11 @@ ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -enable-var-scope %s ; CHECK-LABEL: {{^}}spill_more_than_wavesize_csr_sgprs: -; CHECK: v_writelane_b32 v0, s98, 63 -; CHECK-NEXT: v_writelane_b32 v1, s99, 0 -; CHECK: v_readlane_b32 s99, v1, 0 -; CHECK-NEXT: v_readlane_b32 s98, v0, 63 +; CHECK-DAG: v_writelane_b32 v0, s98, 63 +; CHECK-DAG: v_writelane_b32 v1, s99, 0 +; CHECK-NOT: dummy +; CHECK-DAG: v_readlane_b32 s99, v1, 0 +; CHECK-DAG: v_readlane_b32 s98, v0, 63 define void @spill_more_than_wavesize_csr_sgprs() { call void asm sideeffect "", @@ -21,10 +22,11 @@ } ; CHECK-LABEL: {{^}}spill_more_than_wavesize_csr_sgprs_with_stack_object: -; CHECK: v_writelane_b32 v1, s98, 63 -; CHECK-NEXT: v_writelane_b32 v2, s99, 0 -; CHECK: v_readlane_b32 s99, v2, 0 -; CHECK-NEXT: v_readlane_b32 s98, v1, 63 +; CHECK-DAG: v_writelane_b32 v1, s98, 63 +; CHECK-DAG: v_writelane_b32 v2, s99, 0 +; CHECK-NOT: dummy +; CHECK-DAG: v_readlane_b32 s99, v2, 0 +; CHECK-DAG: v_readlane_b32 s98, v1, 63 define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() { %alloca = alloca i32, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -159,8 +159,8 @@ ; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s33, 2 -; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s34, 3 -; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 +; GCN-DAG: v_writelane_b32 [[VGPR_REG]], s34, 3 +; GCN-DAG: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000 ; GCN: s_mov_b32 s34, s32 ; GCN: v_mov_b32_e32 v32, 0 @@ -171,8 +171,8 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s33, [[VGPR_REG]], 2 -; GCN-NEXT: s_sub_u32 s32, s32, 0x30000 +; GCN: s_sub_u32 s32, s32, 0x30000 +; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload @@ -235,11 +235,11 @@ ; GCN: v_writelane_b32 [[VGPR_REG:v[0-9]+]], s34, 0 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34 +; GCN: v_readlane_b32 s34, [[VGPR_REG:v[0-9]+]], 0 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:128 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN: v_readlane_b32 s34, [[VGPR_REG:v[0-9]+]], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir --- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -90,8 +90,8 @@ # instructions to fix vccz. # CHECK-LABEL: name: reload_vcc_from_vgpr -# CHECK: $vcc_lo = V_READLANE_B32_vi $vgpr0, 8, implicit-def $vcc -# CHECK: $vcc_hi = V_READLANE_B32_vi $vgpr0, 9 +# CHECK: $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc +# CHECK: $vcc_hi = V_READLANE_B32 $vgpr0, 9 # SI: $vcc = S_MOV_B64 $vcc # GFX9: $vcc = S_MOV_B64 $vcc # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc @@ -99,8 +99,8 @@ name: reload_vcc_from_vgpr body: | bb.0: - $vcc_lo = V_READLANE_B32_vi $vgpr0, 8, implicit-def $vcc - $vcc_hi = V_READLANE_B32_vi $vgpr0, 9 + $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc + $vcc_hi = V_READLANE_B32 $vgpr0, 9 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -14,22 +14,20 @@ ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9: v_writelane_b32 v44, s30, 0 -; GFX9-NEXT: v_mov_b32_e32 v36, v16 +; GFX9: v_mov_b32_e32 v36, v16 ; GFX9-NEXT: v_mov_b32_e32 v35, v15 ; GFX9-NEXT: v_mov_b32_e32 v34, v14 ; GFX9-NEXT: v_mov_b32_e32 v33, v13 ; GFX9-NEXT: v_mov_b32_e32 v32, v12 - ; GFX9: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND - ; GFX9: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[4:7] dmask:0x1 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_writelane_b32 v44, s30, 0 +; GFX9: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload @@ -60,9 +58,9 @@ ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 +; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10: buffer_load_dword v43, off, s[0:3], s33 @@ -107,7 +105,7 @@ ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -136,8 +134,9 @@ ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_mov_b32_e32 v41, v15 +; GFX10-NEXT: v_mov_b32_e32 v40, v16 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v41, v15 ; GFX10-NEXT: v_mov_b32_e32 v42, v14 ; GFX10-NEXT: v_mov_b32_e32 v43, v13 ; GFX10-NEXT: v_mov_b32_e32 v44, v12