Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -106,6 +106,17 @@ setAction({G_LOAD, 1, S64}, Legal); setAction({G_STORE, 1, S64}, Legal); + for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) { + getActionDefinitionsBuilder(Op) + .legalIf([=](const LegalityQuery &Query) { + const LLT &VecTy = Query.Types[1]; + const LLT &IdxTy = Query.Types[2]; + return VecTy.getSizeInBits() % 32 == 0 && + VecTy.getSizeInBits() <= 512 && + IdxTy.getSizeInBits() == 32; + }); + } + // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(G_EXTRACT) .unsupportedIf([=](const LegalityQuery &Query) { Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -50,6 +50,24 @@ } +static bool isConstant(const MachineOperand &MO, int64_t &C) { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const MachineInstr *Def = MRI.getVRegDef(MO.getReg()); + if (!Def) + return false; + + if (Def->getOpcode() == AMDGPU::G_CONSTANT) { + C = Def->getOperand(1).getCImm()->getSExtValue(); + return true; + } + + if (Def->getOpcode() == AMDGPU::COPY) + return isConstant(Def->getOperand(1), C); + + return false; +} + unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, const RegisterBank &Src, unsigned Size) const { @@ -430,6 +448,49 @@ OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size); break; } + + + case AMDGPU::G_EXTRACT_VECTOR_ELT: { + unsigned IdxOp = 2; + int64_t Imm; + // XXX - Do we really need to fully handle these? The constant case should + // be legalized away before RegBankSelect? + + unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; + + unsigned VecBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits()); + OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits()); + + // The index can be either if the source vector is VGPR. + OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits()); + break; + } + case AMDGPU::G_INSERT_VECTOR_ELT: { + // XXX - Do we really need to fully handle these? The constant case should + // be legalized away before RegBankSelect? + + int64_t Imm; + unsigned VecOp = 0; + + unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3; + unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; + + + + // TODO: Can do SGPR indexing, which would obviate the need for the + // isConstant check. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); + OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size); + } + + + break; + } case AMDGPU::G_INTRINSIC: { switch(MI.getOperand(1).getIntrinsicID()) { default: Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -0,0 +1,180 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: extract_vector_elt_0_v2i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: extract_vector_elt_0_v2i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... +--- +name: extract_vector_elt_0_v3i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; CHECK-LABEL: name: extract_vector_elt_0_v3i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... +--- +name: extract_vector_elt_0_v4i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: extract_vector_elt_0_v4i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_vector_elt_0_v5i32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_0_v5i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[MV:%[0-9]+]]:_(<5 x s32>) = G_MERGE_VALUES [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[MV]](<5 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<5 x s32>) = G_MERGE_VALUES %0, %0, %0, %0, %0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: extract_vector_elt_0_v6i32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_0_v6i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[MV:%[0-9]+]]:_(<6 x s32>) = G_MERGE_VALUES [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[MV]](<6 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<6 x s32>) = G_MERGE_VALUES %0, %0, %0, %0, %0, %0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: extract_vector_elt_0_v7i32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_0_v7i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[MV:%[0-9]+]]:_(<7 x s32>) = G_MERGE_VALUES [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[MV]](<7 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<7 x s32>) = G_MERGE_VALUES %0, %0, %0, %0, %0, %0, %0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: extract_vector_elt_0_v8i32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_0_v8i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[MV:%[0-9]+]]:_(<8 x s32>) = G_MERGE_VALUES [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[MV]](<8 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<8 x s32>) = G_MERGE_VALUES %0, %0, %0, %0, %0, %0, %0, %0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: extract_vector_elt_0_v16i32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_0_v16i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[MV:%[0-9]+]]:_(<16 x s32>) = G_MERGE_VALUES [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[MV]](<16 x s32>), [[C]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<16 x s32>) = G_MERGE_VALUES %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_EXTRACT_VECTOR_ELT %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: extract_vector_elt_var_v2i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: extract_vector_elt_var_v2i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_vector_elt_var_v8i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-LABEL: name: extract_vector_elt_var_v8i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s32>), [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: insert_vector_elt_0_v2i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_vector_elt_0_v2i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -0,0 +1,28 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s + +--- +name: extract_vector_elt_0_v2i32_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... + + +--- +name: extract_vector_elt_0_v4i32_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s + +--- +name: insert_vector_elt_v4i32_s_s_k +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5 + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = COPY $sgpr5 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_v4i32_v_s_k +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5 + ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = COPY $sgpr5 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_v4i32_s_v_k +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5 + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_var_v4i32_s_s_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = COPY $sgpr5 + %2:_(s32) = COPY $sgpr6 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_var_v4i32_s_s_v +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = COPY $sgpr5 + %2:_(s32) = COPY $vgpr6 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_var_v4i32_v_s_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = COPY $sgpr5 + %2:_(s32) = COPY $vgpr6 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_var_v4i32_v_v_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = COPY $vgpr5 + %2:_(s32) = COPY $vgpr6 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +...