diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -952,6 +952,7 @@ SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); + SDValue WidenVecRes_VECTOR_DEINTERLEAVE(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3932,6 +3932,9 @@ Res = WidenVecRes_INSERT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_DEINTERLEAVE: + Res = WidenVecRes_VECTOR_DEINTERLEAVE(N); + return; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: @@ -5017,7 +5020,6 @@ } } } - assert(!WidenVT.isScalableVector() && "Cannot use build vectors to widen CONCAT_VECTOR result"); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -5719,6 +5721,34 @@ Mask); } +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { + SDLoc dl(N); + EVT InVT = N->getValueType(0); + EVT InConcVT = + EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + InVT.getVectorElementCount() * 2); + SDValue WidenVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, InConcVT, + N->getOperand(0), N->getOperand(1)); + if (getTypeAction(WidenVec.getValueType()) == TargetLowering::TypeWidenVector) + WidenVec = GetWidenedVector(WidenVec); + + EVT WidenVT = WidenVec.getValueType(); + EVT SplitVT = EVT::getVectorVT( + *DAG.getContext(), WidenVT.getVectorElementType(), + WidenVT.getVectorMinNumElements() / 2, InVT.isScalableVector()); + SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, WidenVec, + DAG.getConstant(0, dl, MVT::i64)); + SDValue Hi = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, SplitVT, WidenVec, + DAG.getConstant(SplitVT.getVectorMinNumElements(), dl, MVT::i64)); + + SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, + DAG.getVTList(SplitVT, SplitVT), Lo, Hi); + SetWidenedVector(SDValue(N, 0), Res.getValue(0)); + SetWidenedVector(SDValue(N, 1), Res.getValue(1)); + return SDValue(); +} + SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -11604,10 +11604,30 @@ return; } + // The resulting vector is wrongly concatenated/ordered if it doesn't widen + // the vector first if needed. + LLVMContext &Ctx = *DAG.getContext(); + bool IsInVTWiden = + (TLI.getTypeAction(Ctx, InVT) == TargetLowering::TypeWidenVector); + if (IsInVTWiden) { + InVT = TLI.getTypeToTransformTo(Ctx, InVT); + InVec0 = widenVectorToPartType(DAG, InVec0, DL, InVT); + InVec1 = widenVectorToPartType(DAG, InVec1, DL, InVT); + OutVT = InVec0.getValueType().getDoubleNumVectorElementsVT(Ctx); + } + SDValue Res = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, DAG.getVTList(InVT, InVT), InVec0, InVec1); Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0), Res.getValue(1)); + + // Return to the original vector size, before widening. + if (IsInVTWiden) { + OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Res, + DAG.getVectorIdxConstant(0, DL)); + } + setValue(&I, Res); return; } diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll @@ -244,6 +244,39 @@ ret {, } %retval } +; Widen illegal type size + +define {, } @vector_deinterleave_nxv6i64_nxv12i64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv6i64_nxv12i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG +; CHECK-NEXT: uzp2 z3.s, z2.s, z0.s +; CHECK-NEXT: uzp2 z4.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z0.s +; CHECK-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: uunpklo z0.d, z1.s +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: st1w { z0.d }, p1, [sp, #6, mul vl] +; CHECK-NEXT: uunpklo z0.d, z3.s +; CHECK-NEXT: st1w { z4.s }, p0, [sp] +; CHECK-NEXT: st1w { z0.d }, p1, [sp, #2, mul vl] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp, #2, mul vl] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [sp] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv12i64( %vec) +ret {, } %retval +} + ; Floating declarations declare {,} @llvm.experimental.vector.deinterleave2.nxv4f16() @@ -272,3 +305,5 @@ declare {, } @llvm.experimental.vector.deinterleave2.nxv16i8() declare {, } @llvm.experimental.vector.deinterleave2.nxv8i16() declare {, } @llvm.experimental.vector.deinterleave2.nxv4i32() + +declare {, } @llvm.experimental.vector.deinterleave2.nxv12i64() diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll @@ -224,6 +224,20 @@ ret %retval } +; Widen illegal type size + +define @interleave2_nxv6i32( %vec0, %vec1) nounwind { +; CHECK-LABEL: interleave2_nxv6i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.s, z0.s, z1.s +; CHECK-NEXT: zip2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv6i32( %vec0, %vec1) + ret %retval +} + + ; Float declarations declare @llvm.experimental.vector.interleave2.nxv4f16(, ) declare @llvm.experimental.vector.interleave2.nxv8f16(, ) @@ -251,3 +265,5 @@ declare @llvm.experimental.vector.interleave2.nxv16i8(, ) declare @llvm.experimental.vector.interleave2.nxv8i16(, ) declare @llvm.experimental.vector.interleave2.nxv4i32(, ) + +declare @llvm.experimental.vector.interleave2.nxv6i32(, )