Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1600,6 +1600,30 @@ return true; } +/// Return the register to use for the index value, and the subregister to use +/// for the indirectly accessed register. +static std::pair +computeIndirectRegIndex(MachineRegisterInfo &MRI, + const SIRegisterInfo &TRI, + const TargetRegisterClass *SuperRC, + Register IdxReg, + unsigned EltSize) { + Register IdxBaseReg; + int Offset; + MachineInstr *Unused; + + std::tie(IdxBaseReg, Offset, Unused) + = AMDGPU::getBaseWithConstantOffset(MRI, IdxReg); + + ArrayRef SubRegs = TRI.getRegSplitParts(SuperRC, EltSize); + + // Skip out of bounds offsets, or else we would end up using an undefined + // register. + if (static_cast(Offset) >= SubRegs.size()) + return std::make_pair(IdxReg, SubRegs[0]); + return std::make_pair(IdxBaseReg, SubRegs[Offset]); +} + bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); @@ -1631,7 +1655,9 @@ const DebugLoc &DL = MI.getDebugLoc(); const bool Is64 = DstTy.getSizeInBits() == 64; - unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; + unsigned SubReg; + std::tie(IdxReg, SubReg) = computeIndirectRegIndex(*MRI, TRI, SrcRC, IdxReg, + DstTy.getSizeInBits() / 8); if (SrcRB->getID() == AMDGPU::SGPRRegBankID) { if (DstTy.getSizeInBits() != 32 && !Is64) Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPURegisterBankInfo.h" + +#include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -1704,7 +1706,13 @@ assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty()); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + const LLT S32 = LLT::scalar(32); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + MachineIRBuilder B(MI); const ValueMapping &DstMapping @@ -1712,10 +1720,39 @@ const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank; const RegisterBank *SrcBank = OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank; + const RegisterBank *IdxBank = + OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank; + + Register BaseIdxReg; + unsigned ConstOffset; + MachineInstr *OffsetDef; + std::tie(BaseIdxReg, ConstOffset, OffsetDef) + = AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(2).getReg()); + + // See if the index is an add of a constant which will be foldable by moving + // the base register of the index later if this is going to be executed in a + // waterfall loop. This is essentially to reassociate the add of a constant + // with the readfirstlane. + bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank && + ConstOffset > 0 && ConstOffset < SrcTy.getNumElements(); + + // Re-insert the constant offset add inside the waterfall loop. + auto ReinsertIndexAdd = [=, &B, &MRI](MachineInstr &IdxUseInstr, + unsigned OpIdx) { + Register WaterfallIdx = IdxUseInstr.getOperand(OpIdx).getReg(); + B.setInsertPt(*IdxUseInstr.getParent(), IdxUseInstr.getIterator()); + + auto MaterializedOffset = B.buildConstant(S32, ConstOffset); + + auto Add = B.buildAdd(S32, WaterfallIdx, MaterializedOffset); + MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(Add.getReg(0), AMDGPU::SGPRRegBank); + IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0)); + }; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - Register IdxReg = MI.getOperand(2).getReg(); + // Move the base register. We'll re-insert the add later. + if (ShouldMoveIndexIntoLoop) + MI.getOperand(2).setReg(BaseIdxReg); // If this is a VGPR result only because the index was a VGPR result, the // actual indexing will be done on the SGPR source vector, which will @@ -1739,13 +1776,14 @@ buildVCopy(B, DstReg, TmpReg); } + if (ShouldMoveIndexIntoLoop) + ReinsertIndexAdd(MI, 2); + return; } assert(DstTy.getSizeInBits() == 64); - LLT SrcTy = MRI.getType(SrcReg); - const LLT S32 = LLT::scalar(32); LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32); auto CastSrc = B.buildBitcast(Vec32, SrcReg); @@ -1758,7 +1796,7 @@ MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB()); // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). - auto IdxLo = B.buildShl(S32, IdxReg, One); + auto IdxLo = B.buildShl(S32, BaseIdxReg, One); auto IdxHi = B.buildAdd(S32, IdxLo, One); auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo); @@ -1799,6 +1837,9 @@ buildVCopy(B, DstRegs[1], TmpReg1); } + if (ShouldMoveIndexIntoLoop) + ReinsertIndexAdd(*IdxLo, 1); + return; } case AMDGPU::G_INSERT_VECTOR_ELT: { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -257,15 +257,10 @@ ; GPRIDX-NEXT: s_mov_b64 s[20:21], exec ; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 +; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 -; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1 -; GPRIDX-NEXT: s_add_u32 s23, s22, 1 -; GPRIDX-NEXT: s_mov_b32 m0, s22 -; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b32 s22, s4 -; GPRIDX-NEXT: s_mov_b32 m0, s23 -; GPRIDX-NEXT: s_nop 0 -; GPRIDX-NEXT: s_movrels_b32 s23, s4 +; GPRIDX-NEXT: s_movrels_b32 s23, s5 ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB6_1 @@ -289,13 +284,10 @@ ; MOVREL-NEXT: s_mov_b64 s[20:21], exec ; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 +; MOVREL-NEXT: s_lshl_b32 m0, s22, 1 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 -; MOVREL-NEXT: s_lshl_b32 s22, s22, 1 -; MOVREL-NEXT: s_add_u32 s23, s22, 1 -; MOVREL-NEXT: s_mov_b32 m0, s22 ; MOVREL-NEXT: s_movrels_b32 s22, s4 -; MOVREL-NEXT: s_mov_b32 m0, s23 -; MOVREL-NEXT: s_movrels_b32 s23, s4 +; MOVREL-NEXT: s_movrels_b32 s23, s5 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB6_1 @@ -371,15 +363,11 @@ ; GPRIDX-NEXT: s_mov_b64 s[16:17], exec ; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0 +; GPRIDX-NEXT: s_lshl_b32 m0, s18, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 -; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1 -; GPRIDX-NEXT: s_add_u32 s19, s18, 1 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b32 s18, s0 -; GPRIDX-NEXT: s_mov_b32 m0, s19 +; GPRIDX-NEXT: s_movrels_b32 s19, s1 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 -; GPRIDX-NEXT: s_movrels_b32 s19, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s19 ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc @@ -410,13 +398,10 @@ ; MOVREL-NEXT: s_mov_b64 s[16:17], exec ; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s18, v0 +; MOVREL-NEXT: s_lshl_b32 m0, s18, 1 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 -; MOVREL-NEXT: s_lshl_b32 s18, s18, 1 -; MOVREL-NEXT: s_add_u32 s19, s18, 1 -; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_movrels_b32 s18, s0 -; MOVREL-NEXT: s_mov_b32 m0, s19 -; MOVREL-NEXT: s_movrels_b32 s19, s0 +; MOVREL-NEXT: s_movrels_b32 s19, s1 ; MOVREL-NEXT: v_mov_b32_e32 v1, s18 ; MOVREL-NEXT: v_mov_b32_e32 v2, s19 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc @@ -439,14 +424,13 @@ ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 +; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 -; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 -; GPRIDX-NEXT: s_add_u32 s7, s6, 1 -; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) -; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v18, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc @@ -463,13 +447,10 @@ ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 +; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 -; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 -; MOVREL-NEXT: s_mov_b32 m0, s6 -; MOVREL-NEXT: s_add_u32 s7, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 -; MOVREL-NEXT: s_mov_b32 m0, s7 -; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 +; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB9_1 @@ -487,24 +468,20 @@ ; GPRIDX-LABEL: dyn_extract_v8i64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 -; GPRIDX-NEXT: s_add_u32 s1, s0, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off -; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0) -; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_v_s: ; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_lshl_b32 s0, s2, 1 -; MOVREL-NEXT: s_mov_b32 m0, s0 -; MOVREL-NEXT: s_add_u32 s0, s0, 1 +; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 -; MOVREL-NEXT: s_mov_b32 m0, s0 -; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] ; MOVREL-NEXT: s_endpgm entry: @@ -573,30 +550,30 @@ ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_add_u32 m0, s10, 3 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: s_movrels_b32 s0, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_add_u32 m0, s10, 3 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: s_movrels_b32 s0, s3 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog entry: @@ -609,38 +586,36 @@ ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8 ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) -; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v9, v3 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB13_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] -; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 +; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8 ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 ; MOVREL-NEXT: s_mov_b32 m0, s6 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 -; MOVREL-NEXT: v_movrels_b32_e32 v8, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; MOVREL-NEXT: v_movrels_b32_e32 v9, v3 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB13_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] -; MOVREL-NEXT: v_mov_b32_e32 v0, v8 +; MOVREL-NEXT: v_mov_b32_e32 v0, v9 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 @@ -653,9 +628,9 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 1 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 @@ -668,16 +643,16 @@ ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 1 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 @@ -690,7 +665,7 @@ ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 1 @@ -703,11 +678,11 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 2 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 @@ -718,18 +693,18 @@ ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 2 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 @@ -740,7 +715,7 @@ ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 2 @@ -753,13 +728,13 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 @@ -768,20 +743,20 @@ ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 @@ -790,7 +765,7 @@ ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 @@ -803,7 +778,6 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 4 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 @@ -812,20 +786,20 @@ ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 4 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 @@ -834,13 +808,14 @@ ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 4 @@ -853,7 +828,6 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 5 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 @@ -864,18 +838,18 @@ ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 5 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 @@ -886,11 +860,12 @@ ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 5 @@ -903,7 +878,6 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 6 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 @@ -916,16 +890,16 @@ ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 6 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 @@ -938,9 +912,10 @@ ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 6 @@ -953,7 +928,6 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, 7 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 @@ -968,14 +942,15 @@ ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, 7 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 @@ -990,7 +965,8 @@ ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 7 @@ -1052,49 +1028,45 @@ ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16 ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 -; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 -; GPRIDX-NEXT: s_add_u32 s7, s6, 1 -; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) -; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 -; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 +; GPRIDX-NEXT: s_add_u32 s7, s6, 3 +; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v18, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc ; GPRIDX-NEXT: s_cbranch_execnz BB22_1 ; GPRIDX-NEXT: ; %bb.2: ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] -; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 -; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16 ; MOVREL-NEXT: s_mov_b64 s[4:5], exec ; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s6, v18 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 -; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 -; MOVREL-NEXT: s_mov_b32 m0, s6 -; MOVREL-NEXT: s_add_u32 s7, s6, 1 -; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 -; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; MOVREL-NEXT: s_add_u32 s6, s6, 3 +; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc ; MOVREL-NEXT: s_cbranch_execnz BB22_1 ; MOVREL-NEXT: ; %bb.2: ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] -; MOVREL-NEXT: v_mov_b32_e32 v0, v16 -; MOVREL-NEXT: v_mov_b32_e32 v1, v17 +; MOVREL-NEXT: v_mov_b32_e32 v0, v17 +; MOVREL-NEXT: v_mov_b32_e32 v1, v18 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -286,18 +286,14 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] - ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] - ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -352,18 +348,14 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] - ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] - ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -418,18 +410,14 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] - ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] - ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -451,18 +439,14 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] - ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] - ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -685,18 +669,14 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] - ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -753,18 +733,14 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] - ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -436,10 +436,10 @@ ; WAVE64: successors: %bb.1(0x80000000) ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16 ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -447,9 +447,11 @@ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1 ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1 - ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]] + ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32) ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -462,10 +464,10 @@ ; WAVE32: successors: %bb.1(0x80000000) ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16 ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo @@ -473,9 +475,11 @@ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1 ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1 - ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]] + ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32) ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -643,10 +647,10 @@ ; WAVE64: successors: %bb.1(0x80000000) ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16 ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE64: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF @@ -662,12 +666,14 @@ ; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1 ; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1 ; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1 - ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32) - ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] + ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]] + ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32) + ; WAVE64: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32) + ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32) ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -681,10 +687,10 @@ ; WAVE32: successors: %bb.1(0x80000000) ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16 ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE32: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF @@ -700,12 +706,14 @@ ; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1 ; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1 ; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1 - ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32) - ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] + ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]] + ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32) + ; WAVE32: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32) + ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32) ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -736,10 +744,10 @@ ; WAVE64: successors: %bb.1(0x80000000) ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec @@ -747,9 +755,11 @@ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1 ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1 - ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]] + ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32) ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -763,10 +773,10 @@ ; WAVE32: successors: %bb.1(0x80000000) ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo @@ -774,9 +784,11 @@ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1 ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1 - ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]] + ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32) ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc @@ -807,10 +819,10 @@ ; WAVE64: successors: %bb.1(0x80000000) ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE64: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF @@ -826,12 +838,14 @@ ; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1 ; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1 ; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1 - ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32) - ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] + ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]] + ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32) + ; WAVE64: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32) + ; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32) ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec @@ -847,10 +861,10 @@ ; WAVE32: successors: %bb.1(0x80000000) ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE32: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF @@ -866,12 +880,14 @@ ; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1 ; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1 ; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1 - ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec - ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32) - ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] + ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]] + ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32) + ; WAVE32: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]] ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32) + ; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32) ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec