Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -125,6 +125,9 @@ LegalizeResult fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult + fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); MachineRegisterInfo &MRI; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1386,6 +1386,81 @@ return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned CondReg = MI.getOperand(1).getReg(); + + unsigned NumParts = 0; + LLT NarrowTy0, NarrowTy1; + + LLT DstTy = MRI.getType(DstReg); + LLT CondTy = MRI.getType(CondReg); + unsigned Size = DstTy.getSizeInBits(); + + assert(TypeIdx == 0 || CondTy.isVector()); + + if (TypeIdx == 0) { + NarrowTy0 = NarrowTy; + NarrowTy1 = CondTy; + + unsigned NarrowSize = NarrowTy0.getSizeInBits(); + NumParts = Size / NarrowSize; + + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; + + // Need to break down the condition type + if (CondTy.isVector()) { + if (CondTy.getNumElements() == NumParts) + NarrowTy1 = CondTy.getElementType(); + else + NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, + CondTy.getScalarSizeInBits()); + } + } else { + NumParts = CondTy.getNumElements(); + if (NarrowTy.isVector()) { + // TODO: Handle uneven breakdown. + if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) + return UnableToLegalize; + + return UnableToLegalize; + } else { + NarrowTy0 = DstTy.getElementType(); + NarrowTy1 = NarrowTy; + } + } + + SmallVector DstRegs, Src0Regs, Src1Regs, Src2Regs; + if (CondTy.isVector()) + extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); + + extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); + + for (unsigned i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + + if (CondTy.isVector()) + MIRBuilder.buildSelect(DstReg, Src0Regs[i], Src1Regs[i], Src2Regs[i]); + else + MIRBuilder.buildSelect(DstReg, CondReg, Src1Regs[i], Src2Regs[i]); + DstRegs.push_back(DstReg); + } + + if (NarrowTy0.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -1529,6 +1604,8 @@ case TargetOpcode::G_ANYEXT: case TargetOpcode::G_FPEXT: return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_SELECT: + return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); } } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -138,8 +138,7 @@ // FIXME: Pointer types getActionDefinitionsBuilder(G_CONSTANT) .legalFor({S1, S32, S64}) - .clampScalar(0, S32, S64) - .widenScalarToNextPow2(0); + .clampScalar(0, S32, S64); setAction({G_FRAME_INDEX, PrivatePtr}, Legal); @@ -217,8 +216,6 @@ .clampMaxNumElements(0, S1, 1) .clampMaxNumElements(1, S32, 1); - - setAction({G_CTLZ, S32}, Legal); setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal); setAction({G_CTTZ, S32}, Legal); @@ -312,7 +309,28 @@ // TODO: Pointer types, any 32-bit or 64-bit vector getActionDefinitionsBuilder(G_SELECT) .legalFor({{S32, S1}, {S64, S1}, {V2S32, S1}, {V2S16, S1}}) - .clampScalar(0, S32, S64); + .clampScalar(0, S32, S64) + .fewerElementsIf( + [=](const LegalityQuery &Query) { + if (Query.Types[1].isVector()) + return true; + + LLT Ty = Query.Types[0]; + + // FIXME: Hack until odd splits handled + return Ty.isVector() && + (Ty.getScalarSizeInBits() > 32 || Ty.getNumElements() % 2 != 0); + }, + [=](const LegalityQuery &Query) { return scalarize(Query, 0); }) + // FIXME: Handle 16-bit vectors better + .fewerElementsIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].isVector() && + Query.Types[0].getElementType().getSizeInBits() < 32;}, + [=](const LegalityQuery &Query) { return scalarize(Query, 0); }) + .clampMaxNumElements(0, S32, 2) + .clampMaxNumElements(1, S1, 1); + // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can // be more flexible with the shift amount type. Index: test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -73,6 +73,91 @@ ... +--- +name: test_select_v3s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6 + ; CHECK-LABEL: name: test_select_v3s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV3]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV4]] + ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $vgpr0 + %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 + %3:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 + + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(<3 x s32>) = G_SELECT %4, %2, %3 + $vgpr0_vgpr1_vgpr2 = COPY %5 + +... + +--- +name: test_select_v4s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK-LABEL: name: test_select_v4s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; CHECK: [[SELECT:%[0-9]+]]:_(<2 x s32>) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(<2 x s32>) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[SELECT]](<2 x s32>), [[SELECT1]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $vgpr0 + %2:_(<4 x s32>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + %3:_(<4 x s32>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(<4 x s32>) = G_SELECT %4, %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + +... + +--- +name: test_select_v2s64 +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK-LABEL: name: test_select_v2s64 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $vgpr0 + %2:_(<2 x s64>) = COPY $vgpr1_vgpr2_vgpr3_vgpr4 + %3:_(<2 x s64>) = COPY $vgpr5_vgpr6_vgpr7_vgpr8 + + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(<2 x s64>) = G_SELECT %4, %2, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 + +... + --- name: test_select_s16 body: | @@ -179,3 +264,127 @@ $vgpr0 = COPY %5 ... + +--- +name: test_vselect_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_vselect_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] + ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x s32>) = COPY $vgpr6_vgpr7 + + %4:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 + %5:_(<2 x s32>) = G_SELECT %4, %2, %3 + $vgpr0_vgpr1 = COPY %5 + +... + +--- +name: test_vselect_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 + ; CHECK-LABEL: name: test_vselect_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV6]], [[UV9]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV7]], [[UV10]] + ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[UV8]], [[UV11]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + + %3:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 + %4:_(<3 x s32>) = G_SELECT %3, %1, %2 + $vgpr0_vgpr1_vgpr2 = COPY %4 + +... + +--- +name: test_vselect_v4s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-LABEL: name: test_vselect_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV4]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] + ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV8]], [[UV12]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV9]], [[UV13]] + ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[UV10]], [[UV14]] + ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[UV11]], [[UV15]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + + %3:_(<4 x s1>) = G_ICMP intpred(ne), %0, %1 + %4:_(<4 x s32>) = G_SELECT %3, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + +... + +--- +name: test_vselect_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-LABEL: name: test_vselect_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[UV2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[UV3]] + ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV6]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV5]], [[UV7]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + + %3:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1 + %4:_(<2 x s64>) = G_SELECT %3, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + +...