Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -52,24 +52,6 @@ } -static bool isConstant(const MachineOperand &MO, int64_t &C) { - const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF->getRegInfo(); - const MachineInstr *Def = MRI.getVRegDef(MO.getReg()); - if (!Def) - return false; - - if (Def->getOpcode() == AMDGPU::G_CONSTANT) { - C = Def->getOperand(1).getCImm()->getSExtValue(); - return true; - } - - if (Def->getOpcode() == AMDGPU::COPY) - return isConstant(Def->getOperand(1), C); - - return false; -} - unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, const RegisterBank &Src, unsigned Size) const { @@ -816,42 +798,35 @@ case AMDGPU::G_EXTRACT_VECTOR_ELT: { - unsigned IdxOp = 2; - int64_t Imm; - // XXX - Do we really need to fully handle these? The constant case should - // be legalized away before RegBankSelect? - - unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ? + unsigned OutputBankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; - + unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); - OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits()); - OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits()); + + OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize); + OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize); // The index can be either if the source vector is VGPR. - OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits()); + OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize); break; } case AMDGPU::G_INSERT_VECTOR_ELT: { - // XXX - Do we really need to fully handle these? The constant case should - // be legalized away before RegBankSelect? - - int64_t Imm; - - unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3; - unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ? - AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; - - + unsigned OutputBankID = isSALUMapping(MI) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; - // TODO: Can do SGPR indexing, which would obviate the need for the - // isConstant check. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); - OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size); - } + unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); + unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize); + OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize); + OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize); + // The index can be either if the source vector is VGPR. + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); break; } case AMDGPU::G_UNMERGE_VALUES: { Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -1,39 +1,76 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: extract_vector_elt_0_v2i32_s +name: extract_vector_elt_v16i32_ss legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_vector_elt_0_v2i32_s - ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 + ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) ; CHECK: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 0 + %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(s32) = COPY $sgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0 = COPY %2 ... +--- +name: extract_vector_elt_v16i32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>) + ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_vector_elt_v16i32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... --- -name: extract_vector_elt_0_v4i32_s +name: extract_vector_elt_v16i32_vv legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-LABEL: name: extract_vector_elt_0_v4i32_s - ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32) + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 + ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) ; CHECK: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0 = COPY %2 ... Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -1,111 +1,111 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: insert_vector_elt_v4i32_s_s_k +name: insert_vector_elt_v4i32_s_s_s legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5 - ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = COPY $sgpr5 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_v4i32_v_s_k +name: insert_vector_elt_v4i32_v_s_s legalized: true body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5 - ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_v4i32_s_v_k +name: insert_vector_elt_v4i32_s_v_s legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5 - ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr4 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_var_v4i32_s_s_s +name: insert_vector_elt_v4i32_s_s_v legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6 - ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = COPY $sgpr6 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = COPY $vgpr0 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_var_v4i32_s_s_v +name: insert_vector_elt_v4i32_s_v_v legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6 - ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = COPY $vgpr6 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... @@ -116,17 +116,38 @@ body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = COPY $vgpr0 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_var_v4i32_v_v_s +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0 + + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = COPY $vgpr6 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr0 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... @@ -137,16 +158,17 @@ body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr5 - %2:_(s32) = COPY $vgpr6 + %1:_(s32) = COPY $vgpr4 + %2:_(s32) = COPY $vgpr5 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 ...