Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21485,10 +21485,13 @@ // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && - V.getOperand(0).getValueType().isVector() && + V.getOperand(0).getValueType().isFixedLengthVector() && (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) { SDValue SrcOp = V.getOperand(0); EVT SrcVT = SrcOp.getValueType(); + // For scalable vectors, we purposely add the bitcasts, and only deal + // with integer extract_subvector. So we don't reorder those particular + // bitcasts. unsigned SrcNumElts = SrcVT.getVectorMinNumElements(); unsigned DestNumElts = V.getValueType().getVectorMinNumElements(); if ((SrcNumElts % DestNumElts) == 0) { Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14928,11 +14928,32 @@ static SDValue performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + EVT InVT = N->getOperand(0).getValueType(); + EVT OutVT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(N); + // Reorder when the scalable vector's inner type is floating point and the + // outer type is not scalable vector. Also, the index shoud be 0 and all the + // input and output types should be legal to deal with. + if (InVT.isScalableVector() && InVT.isFloatingPoint() && + DCI.isBeforeLegalize() && !OutVT.isScalableVector() && + isNullConstant(N->getOperand(1)) && TLI.isTypeLegal(OutVT) && + TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, InVT)) { + // Bitcast the input + SDValue VecOp = N->getOperand(0); + VecOp = DAG.getNode(ISD::BITCAST, DL, InVT.changeTypeToInteger(), VecOp); + // Perform extract in integer type + SDValue Extract = + DAG.getNode(N->getOpcode(), DL, OutVT.changeTypeToInteger(), VecOp, + N->getOperand(1)); + // Bitcast back to fp type + return DAG.getNode(ISD::BITCAST, DL, OutVT, Extract); + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); - EVT VT = N->getValueType(0); - if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) + if (!OutVT.isScalableVector() || OutVT.getVectorElementType() != MVT::i1) return SDValue(); SDValue V = N->getOperand(0); @@ -14943,7 +14964,7 @@ // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) if (V.getOpcode() == ISD::SPLAT_VECTOR) if (isa(V.getOperand(0))) - return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0)); + return DAG.getNode(ISD::SPLAT_VECTOR, DL, OutVT, V.getOperand(0)); return SDValue(); } Index: llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s | FileCheck %s +; RUN: llc -mattr=+sve < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" ; == Matching first N elements == -define <4 x i1> @reshuffle_v4i1_nxv4i1( %a) #0 { +define <4 x i1> @reshuffle_v4i1_nxv4i1( %a) { ; CHECK-LABEL: reshuffle_v4i1_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 @@ -29,4 +29,34 @@ ret <4 x i1> %v3 } -attributes #0 = { "target-features"="+sve" } +; Extract from packed SVE vectors into different sizes of NEON registers. + +define <2 x float> @extract_subreg_2f32_unpacked_nx2xf32( %vec) nounwind { +; CHECK-LABEL: extract_subreg_2f32_unpacked_nx2xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret + %vec.e0 = extractelement %vec, i32 0 + %vec.e1 = extractelement %vec, i32 1 + + %1 = insertelement <2 x float> undef, float %vec.e0, i32 0 + %2 = insertelement <2 x float> %1, float %vec.e1, i32 1 + ret <2 x float> %2 +} + +define <4 x half> @extract_subreg_4f16_unpacked_nx4xf16( %vec) nounwind { +; CHECK-LABEL: extract_subreg_4f16_unpacked_nx4xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret + %vec.e0 = extractelement %vec, i32 0 + %vec.e1 = extractelement %vec, i32 1 + %vec.e2 = extractelement %vec, i32 2 + %vec.e3 = extractelement %vec, i32 3 + + %1 = insertelement <4 x half> undef, half %vec.e0, i32 0 + %2 = insertelement <4 x half> %1, half %vec.e1, i32 1 + %3 = insertelement <4 x half> %2, half %vec.e2, i32 2 + %4 = insertelement <4 x half> %3, half %vec.e3, i32 3 + ret <4 x half> %4 +}