Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16214,6 +16214,12 @@ return false; } +static SDValue findRootNonReinterpretNode(SDValue Op) { + if (Op.getNode()->getOpcode() != AArch64ISD::REINTERPRET_CAST) + return Op; + return findRootNonReinterpretNode(Op->getOperand(0)); +} + static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) @@ -16260,6 +16266,26 @@ return DAG.getNode(Opc, DL, N->getValueType(0), And); } + // If both sides of AND operations are i1 splat_vectors then + // we can produce just i1 splat_vector as the result + SDValue Op0 = findRootNonReinterpretNode(Src); + SDValue Op1 = findRootNonReinterpretNode(N->getOperand(1)); + if ((ISD::isConstantSplatVectorAllOnes(Op0.getNode()) || + Op0.getNode()->getOpcode() == AArch64ISD::PTRUE) && + (ISD::isConstantSplatVectorAllOnes(Op1.getNode()) || + Op1.getNode()->getOpcode() == AArch64ISD::PTRUE)) { + EVT OpVT0 = Op0.getValueType(); + EVT OpVT1 = Op1.getValueType(); + if (OpVT0.getVectorElementType() == MVT::i1 && + OpVT1.getVectorElementType() == MVT::i1) { + SDLoc DL(N); + return (OpVT0.getVectorElementCount().getKnownMinValue() > + OpVT1.getVectorElementCount().getKnownMinValue()) + ? N->getOperand(1) + : Src; + } + } + if (!EnableCombineMGatherIntrinsics) return SDValue(); Index: llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -125,9 +125,7 @@ define @chained_reinterpret() { ; CHECK-LABEL: chained_reinterpret: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ret %in = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %cast2 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %in) Index: llvm/test/CodeGen/AArch64/sve-splat-and-ptrue.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-splat-and-ptrue.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O3 | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that a no-op 'and' get removed, when the and is constructed via a llvm.aarch64.sve.convert.to.svbool.nxv4i1 node. +define void @bar() #0 { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: b foo +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + tail call void @foo( %1, %2) + ret void +} + +declare void @foo(, ) + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) + +attributes #0 = { "target-features"="+sve" }