diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2994,7 +2994,8 @@ } else if (MO.isReg()) { auto Reg = MO.getReg(); const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; + auto PReg = mc2PseudoReg(Reg); + return isSGPR(PReg, TRI) && PReg != SGPR_NULL; } else { return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -35,6 +35,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" @@ -321,7 +322,7 @@ return RegNo % NUM_VGPR_BANKS; } - unsigned RegNo = TRI->getEncodingValue(Reg) / 2; + unsigned RegNo = TRI->getEncodingValue(AMDGPU::getMCReg(Reg, *ST)) / 2; return RegNo % NUM_SGPR_BANKS + SGPR_BANK_OFFSET; } @@ -366,7 +367,7 @@ } // SGPRs have 8 banks holding 2 consequitive registers each. - unsigned RegNo = TRI->getEncodingValue(Reg) / 2; + unsigned RegNo = TRI->getEncodingValue(AMDGPU::getMCReg(Reg, *ST)) / 2; unsigned StartBit = AMDGPU::VGPR_32RegClass.getNumRegs(); if (RegNo + StartBit >= RegsUsed.size()) return 0; @@ -818,9 +819,10 @@ MaxNumSGPRs = std::min(ST->getMaxNumSGPRs(Occupancy, true), MaxNumSGPRs); CSRegs = MRI->getCalleeSavedRegs(); - - RegsUsed.resize(AMDGPU::VGPR_32RegClass.getNumRegs() + - TRI->getEncodingValue(AMDGPU::SGPR_NULL) / 2 + 1); + unsigned NumRegBanks = AMDGPU::VGPR_32RegClass.getNumRegs() + + // Not a tight bound + AMDGPU::SReg_32RegClass.getNumRegs() / 2 + 1; + RegsUsed.resize(NumRegBanks); LLVM_DEBUG(dbgs() << "=== RegBanks reassign analysis on function " << MF.getName() << '\n'); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -487,7 +487,7 @@ RegInterval Result; - unsigned Reg = TRI->getEncodingValue(Op.getReg()); + unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST)); if (TRI->isVGPR(*MRI, Op.getReg())) { assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL); @@ -624,8 +624,9 @@ MachineOperand &DefMO = Inst.getOperand(I); if (DefMO.isReg() && DefMO.isDef() && TRI->isVGPR(*MRI, DefMO.getReg())) { - setRegScore(TRI->getEncodingValue(DefMO.getReg()), EXP_CNT, - CurrScore); + setRegScore( + TRI->getEncodingValue(AMDGPU::getMCReg(DefMO.getReg(), *ST)), + EXP_CNT, CurrScore); } } } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -132,6 +132,7 @@ StringRef getRegAsmName(MCRegister Reg) const override; + // Pseudo regs are not allowed unsigned getHWRegIndex(MCRegister Reg) const { return getEncodingValue(Reg) & 0xff; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll @@ -29,7 +29,7 @@ ; ; GFX10NSA-LABEL: gather4_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 @@ -45,7 +45,7 @@ ; GFX10NSA-NEXT: s_mov_b32 s10, s12 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v0, v0, 0xffff, v1 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -84,7 +84,7 @@ ; ; GFX10NSA-LABEL: gather4_cube: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff @@ -103,7 +103,7 @@ ; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -142,7 +142,7 @@ ; ; GFX10NSA-LABEL: gather4_2darray: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff @@ -161,7 +161,7 @@ ; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -198,7 +198,7 @@ ; ; GFX10NSA-LABEL: gather4_c_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -214,7 +214,7 @@ ; GFX10NSA-NEXT: s_mov_b32 s10, s12 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -253,7 +253,7 @@ ; ; GFX10NSA-LABEL: gather4_cl_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff @@ -272,7 +272,7 @@ ; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -311,7 +311,7 @@ ; ; GFX10NSA-LABEL: gather4_c_cl_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_mov_b32_e32 v4, 0xffff @@ -330,7 +330,7 @@ ; GFX10NSA-NEXT: v_and_or_b32 v1, v1, v4, v2 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v2, v3, v4, s12 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -367,7 +367,7 @@ ; ; GFX10NSA-LABEL: gather4_b_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -383,7 +383,7 @@ ; GFX10NSA-NEXT: s_mov_b32 s10, s12 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -420,7 +420,7 @@ ; ; GFX10NSA-LABEL: gather4_c_b_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_lshlrev_b32_e32 v3, 16, v3 @@ -436,7 +436,7 @@ ; GFX10NSA-NEXT: s_mov_b32 s10, s12 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -475,7 +475,7 @@ ; ; GFX10NSA-LABEL: gather4_b_cl_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_mov_b32_e32 v4, 0xffff @@ -494,7 +494,7 @@ ; GFX10NSA-NEXT: v_and_or_b32 v1, v1, v4, v2 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v2, v3, v4, s12 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) @@ -533,7 +533,7 @@ ; ; GFX10NSA-LABEL: gather4_c_b_cl_2d: ; GFX10NSA: ; %bb.0: ; %main_body -; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo +; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo ; GFX10NSA-NEXT: s_mov_b32 s0, s2 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10NSA-NEXT: v_mov_b32_e32 v5, 0xffff @@ -552,7 +552,7 @@ ; GFX10NSA-NEXT: v_and_or_b32 v2, v2, v5, v3 ; GFX10NSA-NEXT: s_mov_b32 s11, s13 ; GFX10NSA-NEXT: v_and_or_b32 v3, v4, v5, s12 -; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; GFX10NSA-NEXT: ; implicit-def: $vcc_hi ; GFX10NSA-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -123,7 +123,7 @@ ; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s12, 0 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s13, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[30:31], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[28:29], vcc ; GFX1064-NEXT: s_cbranch_execz BB0_3 ; GFX1064-NEXT: ; %bb.2: ; GFX1064-NEXT: s_bcnt1_i32_b64 s12, s[12:13] @@ -131,7 +131,7 @@ ; GFX1064-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc ; GFX1064-NEXT: BB0_3: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1064-NEXT: s_or_b64 exec, exec, s[30:31] +; GFX1064-NEXT: s_or_b64 exec, exec, s[28:29] ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: v_readfirstlane_b32 s4, v1 ; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s4 @@ -355,14 +355,14 @@ ; GFX1064-NEXT: s_mov_b64 exec, s[10:11] ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: ; implicit-def: $vgpr0 -; GFX1064-NEXT: s_and_saveexec_b64 s[30:31], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[28:29], vcc ; GFX1064-NEXT: s_cbranch_execz BB1_3 ; GFX1064-NEXT: ; %bb.2: ; GFX1064-NEXT: v_mov_b32_e32 v0, s12 ; GFX1064-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc ; GFX1064-NEXT: BB1_3: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1064-NEXT: s_or_b64 exec, exec, s[30:31] +; GFX1064-NEXT: s_or_b64 exec, exec, s[28:29] ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v3 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -131,10 +131,10 @@ ; GFX9-LABEL: store_load_sindex_kernel: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 @@ -469,11 +469,11 @@ ; GFX9-LABEL: store_load_sindex_small_offset_kernel: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 @@ -492,10 +492,10 @@ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 @@ -863,11 +863,11 @@ ; GFX9-LABEL: store_load_sindex_large_offset_kernel: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 @@ -886,10 +886,10 @@ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 @@ -1149,10 +1149,10 @@ ; GFX9-LABEL: store_load_vidx_sidx_offset: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 15 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -97,11 +97,11 @@ ; ; GFX10-LABEL: image_sample_2d_f16_tfe: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s14, exec_lo +; GFX10-NEXT: s_mov_b32 s28, exec_lo ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, v2 -; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; GFX10-NEXT: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe d16 ; GFX10-NEXT: v_mov_b32_e32 v0, s12 ; GFX10-NEXT: v_mov_b32_e32 v1, s13 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -77,7 +77,7 @@ ; ; GFX10-LABEL: sample_1d_tfe: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe] +; GFX10-NEXT: s_mov_b32 s28, exec_lo ; encoding: [0x7e,0x03,0x9c,0xbe] ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] @@ -85,7 +85,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] -; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; encoding: [0x7e,0x1c,0x7e,0x87] ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0f,0x81,0xf0,0x05,0x00,0x40,0x00] ; GFX10-NEXT: v_mov_b32_e32 v5, s12 ; encoding: [0x0c,0x02,0x0a,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v6, s13 ; encoding: [0x0d,0x02,0x0c,0x7e] @@ -499,7 +499,7 @@ ; ; GFX10-LABEL: sample_1d_lwe: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe] +; GFX10-NEXT: s_mov_b32 s28, exec_lo ; encoding: [0x7e,0x03,0x9c,0xbe] ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] @@ -507,7 +507,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] -; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s28 ; encoding: [0x7e,0x1c,0x7e,0x87] ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; encoding: [0x00,0x0f,0x82,0xf0,0x05,0x00,0x40,0x00] ; GFX10-NEXT: v_mov_b32_e32 v5, s12 ; encoding: [0x0c,0x02,0x0a,0x7e] ; GFX10-NEXT: v_mov_b32_e32 v6, s13 ; encoding: [0x0d,0x02,0x0c,0x7e]