Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -377,12 +377,61 @@ return IC.replaceInstUsesWith(II, NPN); } +// Predicated binary operations can produce redundant convert.to.svbools +// Narrow the wider operands with convert.from.svbool instead of widening the +// narrower operands when the predicate comes from convert.to.svbool +static Optional tryCombineFromSVBoolBinOp(InstCombiner &IC, + IntrinsicInst &II) { + auto BinOp = dyn_cast(II.getOperand(0)); + if (!BinOp) + return None; + + auto IntrinsicID = BinOp->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::aarch64_sve_and_z: + break; + default: + return None; + } + + auto BinOpPred = BinOp->getOperand(0); + auto BinOpOp1 = BinOp->getOperand(1); + auto BinOpOp2 = BinOp->getOperand(2); + + auto PredIntr = dyn_cast(BinOpPred); + if (!PredIntr || + PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) + return None; + + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + auto PredOp = PredIntr->getOperand(0); + auto PredOpTy = cast(PredOp->getType()); + + SmallVector NarrowedBinOpArgs = {PredOp}; + auto NarrowBinOpOp1 = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1}); + NarrowedBinOpArgs.push_back(NarrowBinOpOp1); + if (BinOpOp1 == BinOpOp2) + NarrowedBinOpArgs.push_back(NarrowBinOpOp1); + else + NarrowedBinOpArgs.push_back(Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2})); + + auto NarrowedBinOp = + Builder.CreateIntrinsic(IntrinsicID, {PredOpTy}, NarrowedBinOpArgs); + return IC.replaceInstUsesWith(II, NarrowedBinOp); +} + static Optional instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) { // If the reinterpret instruction operand is a PHI Node if (isa(II.getArgOperand(0))) return processPhiNode(IC, II); + if (auto BinOpCombine = tryCombineFromSVBoolBinOp(IC, II)) + return BinOpCombine; + SmallVector CandidatesForRemoval; Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr; Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-to-svbool-binops.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-to-svbool-binops.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @remove_redundant_svbool( %a, %b, %c) { +; CHECK-LABEL: @remove_redundant_svbool( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[C:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.z.nxv4i1( [[A:%.*]], [[TMP1]], [[TMP2]]) +; CHECK-NEXT: ret [[TMP3]] +; + %t1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %t2 = tail call @llvm.aarch64.sve.and.z.nxv16i1( %t1, %b, %c) + %t3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %t2) + ret %t3 +} + +define @remove_redundant_svbool_equal_ops( %a, %b) { +; CHECK-LABEL: @remove_redundant_svbool_equal_ops( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.z.nxv4i1( [[A:%.*]], [[TMP1]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] +; + %t1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %t2 = tail call @llvm.aarch64.sve.and.z.nxv16i1( %t1, %b, %b) + %t3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %t2) + ret %t3 +} + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.and.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() + +attributes #0 = { "target-features"="+sve" }