Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1671,25 +1671,45 @@ assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + MachineIRBuilder B(MI); + + const ValueMapping &DstMapping + = OpdMapper.getInstrMapping().getOperandMapping(0); + const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank; + const RegisterBank *SrcBank = + OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register IdxReg = MI.getOperand(2).getReg(); + + // If this is a VGPR result only because the index was a VGPR result, the + // actual indexing will be done on the SGPR source vector, which will + // produce a scalar result. We need to copy to the VGPR result inside the + // waterfall loop. + const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank && + SrcBank == &AMDGPU::SGPRRegBank; if (DstRegs.empty()) { applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, { 2 }); + + // We don't want a phi for this temporary reg. + Register TmpReg = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank); + MI.getOperand(0).setReg(TmpReg); + B.setInsertPt(*MI.getParent(), ++MI.getIterator()); + B.buildCopy(DstReg, TmpReg); return; } - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - Register IdxReg = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(DstReg); - (void)DstTy; - assert(DstTy.getSizeInBits() == 64); LLT SrcTy = MRI.getType(SrcReg); const LLT S32 = LLT::scalar(32); LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32); - MachineIRBuilder B(MI); auto CastSrc = B.buildBitcast(Vec32, SrcReg); auto One = B.buildConstant(S32, 1); @@ -1702,15 +1722,11 @@ // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). auto IdxLo = B.buildShl(S32, IdxReg, One); auto IdxHi = B.buildAdd(S32, IdxLo, One); - B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo); - B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi); - const ValueMapping &DstMapping - = OpdMapper.getInstrMapping().getOperandMapping(0); + auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo); + auto Extract1 = B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi); - // FIXME: Should be getting from mapping or not? - const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); - MRI.setRegBank(DstReg, *DstMapping.BreakDown[0].RegBank); + MRI.setRegBank(DstReg, *DstBank); MRI.setRegBank(CastSrc.getReg(0), *SrcBank); MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank); MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank); @@ -1728,6 +1744,22 @@ MI.eraseFromParent(); executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), OpsToWaterfall, MRI); + + if (NeedCopyToVGPR) { + MachineBasicBlock *LoopBB = Extract1->getParent(); + Register TmpReg0 = MRI.createGenericVirtualRegister(S32); + Register TmpReg1 = MRI.createGenericVirtualRegister(S32); + MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank); + MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank); + + Extract0->getOperand(0).setReg(TmpReg0); + Extract1->getOperand(0).setReg(TmpReg1); + + B.setInsertPt(*LoopBB, ++Extract1->getIterator()); + B.buildCopy(DstRegs[0], TmpReg0); + B.buildCopy(DstRegs[1], TmpReg1); + } + return; } case AMDGPU::G_INSERT_VECTOR_ELT: { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -19,13 +19,15 @@ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE64: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE64: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY [[EVEC]](s32) + ; WAVE64: $vgpr0 = COPY [[COPY2]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_ss ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE32: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE32: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY [[EVEC]](s32) + ; WAVE32: $vgpr0 = COPY [[COPY2]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $sgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -55,7 +57,8 @@ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1 ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -63,7 +66,7 @@ ; WAVE64: successors: %bb.3(0x80000000) ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; WAVE64: .3: - ; WAVE64: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE64: $vgpr0 = COPY [[COPY2]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv ; WAVE32: successors: %bb.1(0x80000000) ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 @@ -78,7 +81,8 @@ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1 ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -86,7 +90,7 @@ ; WAVE32: successors: %bb.3(0x80000000) ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]] ; WAVE32: .3: - ; WAVE32: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE32: $vgpr0 = COPY [[COPY2]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -106,14 +110,16 @@ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE64: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) + ; WAVE64: $vgpr0 = COPY [[COPY2]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vs ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) - ; WAVE32: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) + ; WAVE32: $vgpr0 = COPY [[COPY2]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -143,7 +149,8 @@ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1 ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -151,7 +158,7 @@ ; WAVE64: successors: %bb.3(0x80000000) ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; WAVE64: .3: - ; WAVE64: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE64: $vgpr0 = COPY [[COPY2]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv ; WAVE32: successors: %bb.1(0x80000000) ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 @@ -166,7 +173,8 @@ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1 ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32) + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -174,7 +182,7 @@ ; WAVE32: successors: %bb.3(0x80000000) ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]] ; WAVE32: .3: - ; WAVE32: $vgpr0 = COPY [[EVEC]](s32) + ; WAVE32: $vgpr0 = COPY [[COPY2]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -195,13 +203,15 @@ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32) - ; WAVE64: $sgpr0_sgpr1 = COPY [[EVEC]](s64) + ; WAVE64: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY [[EVEC]](s64) + ; WAVE64: $sgpr0_sgpr1 = COPY [[COPY2]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_ss ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32) - ; WAVE32: $sgpr0_sgpr1 = COPY [[EVEC]](s64) + ; WAVE32: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY [[EVEC]](s64) + ; WAVE32: $sgpr0_sgpr1 = COPY [[COPY2]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $sgpr16 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -280,8 +290,10 @@ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32) ; WAVE64: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) + ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) + ; WAVE64: [[EVEC1:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) + ; WAVE64: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[EVEC1]](s32) ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -289,7 +301,7 @@ ; WAVE64: successors: %bb.3(0x80000000) ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; WAVE64: .3: - ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32) + ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; WAVE64: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv ; WAVE32: successors: %bb.1(0x80000000) @@ -315,8 +327,10 @@ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32) ; WAVE32: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] - ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) - ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) + ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32) + ; WAVE32: [[EVEC1:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[EVEC]](s32) + ; WAVE32: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[EVEC1]](s32) ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -324,7 +338,7 @@ ; WAVE32: successors: %bb.3(0x80000000) ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]] ; WAVE32: .3: - ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32) + ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; WAVE32: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0