Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14430,7 +14430,8 @@ static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { SDLoc DL(N); SDValue Vec = N->getOperand(0); SDValue SubVec = N->getOperand(1); @@ -14438,6 +14439,23 @@ EVT VecVT = Vec.getValueType(); EVT SubVT = SubVec.getValueType(); + // Check for fixed vector mask splats inserted into scalable vectors. + if (VecVT.isScalableVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VecVT) && + SubVT.isFixedLengthVector() && SubVT.getVectorElementType() == MVT::i1) { + uint64_t VecEC = VecVT.getVectorElementCount().getKnownMinValue(); + uint64_t SubVecEC = SubVT.getVectorElementCount().getKnownMinValue(); + + bool Negated; + uint64_t SplatVal; + if (Vec.isUndef() && IdxVal == 0 && + isPow2Splat(SubVec, SplatVal, Negated) && + SplatVal == 1 && !Negated && + VecEC * Subtarget->getVScaleForTuning() == SubVecEC) + return DAG.getNode(ISD::SPLAT_VECTOR, DL, VecVT, + DAG.getConstant(1, DL, MVT::i32)); + } + // Only do this for legal fixed vector types. if (!VecVT.isFixedLengthVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) || @@ -17941,7 +17959,7 @@ case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); case ISD::INSERT_SUBVECTOR: - return performInsertSubvectorCombine(N, DCI, DAG); + return performInsertSubvectorCombine(N, DCI, DAG, Subtarget); case ISD::SELECT: return performSelectCombine(N, DCI); case ISD::VSELECT: Index: llvm/test/CodeGen/AArch64/sve-fixed-length-ptrue.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-fixed-length-ptrue.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @ptest_v8i1() #0 { +; CHECK-LABEL: ptest_v8i1: +; CHECK: // %bb.0: // %L.entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ret +L.entry: + %0 = call @llvm.experimental.vector.insert.nxv2i1.v8i1 ( undef, <8 x i1> , i64 0) + ret %0 +} + +define @ptest_v16i1() #0 { +; CHECK-LABEL: ptest_v16i1: +; CHECK: // %bb.0: // %L.entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +L.entry: + %0 = call @llvm.experimental.vector.insert.nxv4i1.v16i1 ( undef, <16 x i1> , i64 0) + ret %0 +} + +define @ptest_v32i1() #0 { +; CHECK-LABEL: ptest_v32i1: +; CHECK: // %bb.0: // %L.entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ret +L.entry: + %0 = call @llvm.experimental.vector.insert.nxv8i1.v32i1 ( undef, <32 x i1> , i64 0) + ret %0 +} + +define @ptest_v64i1() #0 { +; CHECK-LABEL: ptest_v64i1: +; CHECK: // %bb.0: // %L.entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ret +L.entry: + %0 = call @llvm.experimental.vector.insert.nxv16i1.v64i1 ( undef, <64 x i1> , i64 0) + ret %0 +} + +declare @llvm.experimental.vector.insert.nxv2i1.v8i1(, <8 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv4i1.v16i1(, <16 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv8i1.v32i1(, <32 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv16i1.v64i1(, <64 x i1>, i64) + +attributes #0 = { vscale_range(4,4) "target-features"="+sve" "target-cpu"="a64fx" }