Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16214,6 +16214,53 @@ return false; } +static bool isAllInactivePredicate(SDValue N) { + // Look through cast. + while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) + N = N.getOperand(0); + + return ISD::isConstantSplatVectorAllZeros(N.getNode()); +} + +static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { + unsigned NumElts = N.getValueType().getVectorMinNumElements(); + + // Look through cast. + while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) { + N = N.getOperand(0); + // When reinterpreting from a type with fewer elements the "new" elements + // are not active, so bail if they're likely to be used. + if (N.getValueType().getVectorMinNumElements() < NumElts) + return false; + } + + if (ISD::isConstantSplatVectorAllOnes(N.getNode())) + return true; + + // "ptrue p., all" can be considered all active when is the same size + // or smaller than the implicit element type represented by N. + // NOTE: A larger element count implies a smaller element type. + if (N.getOpcode() == AArch64ISD::PTRUE && + N.getConstantOperandVal(0) == AArch64SVEPredPattern::all) + return N.getValueType().getVectorMinNumElements() >= NumElts; + + // If we're compiling for a specific vector-length, we can check if the + // pattern's VL equals that of the scalable vector at runtime. + if (N.getOpcode() == AArch64ISD::PTRUE) { + const auto &Subtarget = DAG.getSubtarget(); + unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); + unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); + if (MaxSVESize && MinSVESize == MaxSVESize) { + unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock; + unsigned PatNumElts = + getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0)); + return PatNumElts == (NumElts * VScale); + } + } + + return false; +} + static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) @@ -16260,6 +16307,13 @@ return DAG.getNode(Opc, DL, N->getValueType(0), And); } + // If both sides of AND operations are i1 splat_vectors then + // we can produce just i1 splat_vector as the result. + if (isAllActivePredicate(DAG, Src)) + return N->getOperand(1); + if (isAllActivePredicate(DAG, N->getOperand(1))) + return Src; + if (!EnableCombineMGatherIntrinsics) return SDValue(); @@ -17897,53 +17951,6 @@ Zero); } -static bool isAllInactivePredicate(SDValue N) { - // Look through cast. - while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) - N = N.getOperand(0); - - return ISD::isConstantSplatVectorAllZeros(N.getNode()); -} - -static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { - unsigned NumElts = N.getValueType().getVectorMinNumElements(); - - // Look through cast. - while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) { - N = N.getOperand(0); - // When reinterpreting from a type with fewer elements the "new" elements - // are not active, so bail if they're likely to be used. - if (N.getValueType().getVectorMinNumElements() < NumElts) - return false; - } - - if (ISD::isConstantSplatVectorAllOnes(N.getNode())) - return true; - - // "ptrue p., all" can be considered all active when is the same size - // or smaller than the implicit element type represented by N. - // NOTE: A larger element count implies a smaller element type. - if (N.getOpcode() == AArch64ISD::PTRUE && - N.getConstantOperandVal(0) == AArch64SVEPredPattern::all) - return N.getValueType().getVectorMinNumElements() >= NumElts; - - // If we're compiling for a specific vector-length, we can check if the - // pattern's VL equals that of the scalable vector at runtime. - if (N.getOpcode() == AArch64ISD::PTRUE) { - const auto &Subtarget = DAG.getSubtarget(); - unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); - unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); - if (MaxSVESize && MinSVESize == MaxSVESize) { - unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock; - unsigned PatNumElts = - getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0)); - return PatNumElts == (NumElts * VScale); - } - } - - return false; -} - // If a merged operation has no inactive lanes we can relax it to a predicated // or unpredicated operation, which potentially allows better isel (perhaps // using immediate forms) or relaxing register reuse requirements. Index: llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that a no-op 'and' get removed with vector splat of 1 + +define @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %2, %1 + ret %and +} + +define @bar() #0 { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +entry: + %ins = insertelement undef, i1 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + %0 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %splat) + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %0, %1 + ret %and +} + +define @foo1() #0 { +; CHECK-LABEL: foo1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ptrue p2.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %2) + %4 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %3) + %and = and %4, %1 + ret %and +} + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + + +attributes #0 = { "target-features"="+sve" }