Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4680,8 +4680,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); + + EVT ResVT = N->getValueType(0); unsigned NumElems = N->getNumOperands(); + if (ResVT.isScalableVector()) { + SDValue ResVec = DAG.getUNDEF(ResVT); + + for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) { + SDValue Op = N->getOperand(OpIdx); + unsigned OpNumElts = Op.getValueType().getVectorMinNumElements(); + ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op, + DAG.getIntPtrConstant(OpIdx * OpNumElts, dl)); + } + + return ResVec; + } + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); SmallVector NewOps; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -964,8 +964,10 @@ } } - for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) + for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); @@ -9099,9 +9101,34 @@ EVT InVT = Op.getOperand(1).getValueType(); unsigned Idx = cast(Op.getOperand(2))->getZExtValue(); - // We don't have any patterns for scalable vector yet. - if (InVT.isScalableVector()) + if (InVT.isScalableVector()) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + if (!isTypeLegal(VT) || !VT.isInteger()) + return SDValue(); + + SDValue Vec0 = Op.getOperand(0); + SDValue Vec1 = Op.getOperand(1); + + // Ensure the subvector is half the size of the main vector. + if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2)) + return SDValue(); + + // Extend elements of smaller vector... + EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); + SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); + + if (Idx == 0) { + SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0); + return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0); + } else if (Idx == InVT.getVectorMinNumElements()) { + SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0); + return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec); + } + return SDValue(); + } // This will be matched by custom code during ISelDAGToDAG. if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef()) @@ -13001,6 +13028,28 @@ S->getMemOperand()->getFlags()); } +static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT ResVT = N->getValueType(0); + // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z) + if (Op0.getOpcode() == AArch64ISD::UUNPKLO) { + if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) { + SDValue UzpOp = Op0.getOperand(0).getOperand(0); + return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, UzpOp, Op1); + } + } + // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z) + if (Op1.getOpcode() == AArch64ISD::UUNPKHI) { + if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) { + SDValue UzpOp = Op1.getOperand(0).getOperand(1); + return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, UzpOp); + } + } + return SDValue(); +} + /// Target-specific DAG combine function for post-increment LD1 (lane) and /// post-increment LD1R. static SDValue performPostLD1Combine(SDNode *N, @@ -14342,6 +14391,8 @@ return performPostLD1Combine(N, DCI, false); case AArch64ISD::NVCAST: return performNVCASTCombine(N); + case AArch64ISD::UZP1: + return performUzpCombine(N, DAG); case ISD::INSERT_VECTOR_ELT: return performPostLD1Combine(N, DCI, true); case ISD::INTRINSIC_VOID: Index: llvm/test/CodeGen/AArch64/sve-split-trunc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-split-trunc.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @trunc_i16toi8( %in) { +; CHECK-LABEL: trunc_i16toi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i32toi16( %in) { +; CHECK-LABEL: trunc_i32toi16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i64toi32( %in) { +; CHECK-LABEL: trunc_i64toi32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i64toi16( %in) { +; CHECK-LABEL: trunc_i64toi16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +} + +define @trunc_i64toi8( %in) { +; CHECK-LABEL: trunc_i64toi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z6.s, z6.s, z7.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z5.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.h, z4.h, z6.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = trunc %in to + ret %out +}