Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16123,6 +16123,22 @@ return false; } +// Return true if the 2nd operand of specific intrinsic is zero. +static bool isSwapPredicateZeroing(SDValue VSel, SDNode *N) { + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); + if (VSel.getOpcode() != ISD::VSELECT) + return false; + + unsigned IID = getIntrinsicID(N); + switch (IID) { + // TODO: Add more intrinsic once we have test coverage. + case Intrinsic::aarch64_sve_and: + return isAllInactivePredicate(VSel.getOperand(2)); + } + + return false; +} + // If a merged operation has no inactive lanes we can relax it to a predicated // or unpredicated operation, which potentially allows better isel (perhaps // using immediate forms) or relaxing register reuse requirements. @@ -16147,6 +16163,34 @@ return SDValue(); } +// Swap the operations when the 2nd operation is comming from a VSELECT +// with predicate zeroing. +// +static SDValue tryCombineOpWithPredicateZeroing( + unsigned IID, unsigned Opc, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, bool UnpredOp = false, bool SwapOperands = false) { + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); + + if (SDValue Res = + convertMergedOpToPredOp(N, Opc, DAG, UnpredOp, SwapOperands)) + return Res; + + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + // Swap to candidate for movprfx if it is predicate zeroing. + SDValue Pg = N->getOperand(1); + SDValue Op1 = N->getOperand(2); + SDValue Op2 = N->getOperand(3); + if (isSwapPredicateZeroing(Op2, N)) { + SDValue ID = DAG.getTargetConstant(IID, SDLoc(N), MVT::i64); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), + ID, Pg, Op2, Op1); + } + + return SDValue(); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -16302,7 +16346,7 @@ case Intrinsic::aarch64_sve_subr: return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true); case Intrinsic::aarch64_sve_and: - return convertMergedOpToPredOp(N, ISD::AND, DAG, true); + return tryCombineOpWithPredicateZeroing (IID, ISD::AND, N, DCI, DAG, true); case Intrinsic::aarch64_sve_bic: return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true); case Intrinsic::aarch64_sve_eor: Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -337,6 +337,20 @@ ret %out } +; commutative operation +define @and_i64_zero_comm( %pg, %a, %b) { +; CHECK-LABEL: and_i64_zero_comm: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: and z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, + %b, + %a_z) + ret %out +} + ; ; BIC ;