Index: llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -177,10 +177,12 @@ return MO.isReg() && TRI->isVectorRegister(MRI, MO.getReg()); })) { - // The only exception allowed here is another indirect V_MOV_B32_e32 + // The only exception allowed here is another indirect vector move // with the same mode. - if (!IdxOn || I->getOpcode() != AMDGPU::V_MOV_B32_e32 || - !I->hasRegisterImplicitUseOperand(AMDGPU::M0)) + if (!IdxOn || + !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 && + I->hasRegisterImplicitUseOperand(AMDGPU::M0)) || + I->getOpcode() == AMDGPU::V_MOV_B32_indirect)) return false; } } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -1066,8 +1066,6 @@ ; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 -; GPRIDX-NEXT: s_set_gpr_idx_off -; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off @@ -1138,8 +1136,6 @@ ; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 -; GPRIDX-NEXT: s_set_gpr_idx_off -; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off @@ -1422,8 +1418,6 @@ ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 -; GPRIDX-NEXT: s_set_gpr_idx_off -; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off @@ -3046,8 +3040,6 @@ ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 ; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 -; GPRIDX-NEXT: s_set_gpr_idx_off -; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 @@ -3262,8 +3254,6 @@ ; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 ; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 -; GPRIDX-NEXT: s_set_gpr_idx_off -; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) ; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 Index: llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir +++ llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir @@ -336,3 +336,23 @@ $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0 S_SET_GPR_IDX_OFF ... + +--- +name: indirect_mov +body: | + bb.0: + + ; GCN-LABEL: name: indirect_mov + ; GCN: S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0 + ; GCN: $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0 + ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3) + ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3) + ; GCN: S_SET_GPR_IDX_OFF + S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0 + $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0 + V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3) + S_SET_GPR_IDX_OFF + S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $m0, implicit undef $m0 + V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3) + S_SET_GPR_IDX_OFF +...