Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16319,6 +16319,16 @@ return false; } +static SDValue performReinterpretCastCombine(SDNode *N) { + SDValue LeafOp = SDValue(N, 0); + SDValue Op = N->getOperand(0); + while (Op.getOpcode() == AArch64ISD::REINTERPRET_CAST) + Op = Op->getOperand(0); + if (LeafOp.getValueType() == Op.getValueType()) + return Op; + return SDValue(); +} + static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) @@ -16365,6 +16375,13 @@ return DAG.getNode(Opc, DL, N->getValueType(0), And); } + // If both sides of AND operations are i1 splat_vectors then + // we can produce just i1 splat_vector as the result. + if (isAllActivePredicate(DAG, N->getOperand(0))) + return N->getOperand(1); + if (isAllActivePredicate(DAG, N->getOperand(1))) + return N->getOperand(0); + if (!EnableCombineMGatherIntrinsics) return SDValue(); @@ -21392,6 +21409,8 @@ return performUzpCombine(N, DAG); case AArch64ISD::SETCC_MERGE_ZERO: return performSetccMergeZeroCombine(N, DCI); + case AArch64ISD::REINTERPRET_CAST: + return performReinterpretCastCombine(N); case AArch64ISD::GLD1_MERGE_ZERO: case AArch64ISD::GLD1_SCALED_MERGE_ZERO: case AArch64ISD::GLD1_UXTW_MERGE_ZERO: Index: llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -125,9 +125,7 @@ define @chained_reinterpret() { ; CHECK-LABEL: chained_reinterpret: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ret %in = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %cast2 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %in) Index: llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that a no-op 'and' get removed with vector splat of 1 or ptrue with proper constant + +define @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +entry: + %0 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %2, %1 + ret %and +} + +define @bar() #0 { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +entry: + %ins = insertelement undef, i1 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + %0 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %splat) + %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %0, %1 + ret %and +} + +; Ensure that one AND operation remain for inactive lanes zeroing with 2 x i1 type (llvm.aarch64.sve.convert.to.svbool.nxv2i1). +define @foo1() #0 { +; CHECK-LABEL: foo1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b +; CHECK-NEXT: ret +entry: + %0 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %2) + %4 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %3) + %and = and %4, %1 + ret %and +} + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + + +attributes #0 = { "target-features"="+sve" }