Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16123,6 +16123,22 @@
   return false;
 }
 
+// Return true if the 2nd operand of specific intrinsic is zero.
+static bool isSwapPredicateZeroing(SDValue VSel, SDNode *N) {
+  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
+  if (VSel.getOpcode() != ISD::VSELECT)
+    return false;
+
+  unsigned IID = getIntrinsicID(N);
+  switch (IID) {
+  // TODO: Add more intrinsic once we have test coverage.
+  case Intrinsic::aarch64_sve_and:
+    return isAllInactivePredicate(VSel.getOperand(2));
+  }
+
+  return false;
+}
+
 // If a merged operation has no inactive lanes we can relax it to a predicated
 // or unpredicated operation, which potentially allows better isel (perhaps
 // using immediate forms) or relaxing register reuse requirements.
@@ -16147,6 +16163,34 @@
   return SDValue();
 }
 
+// Swap the operations when the 2nd operation is comming from a VSELECT
+// with predicate zeroing.
+//
+static SDValue tryCombineOpWithPredicateZeroing(
+    unsigned IID, unsigned Opc, SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+    SelectionDAG &DAG, bool UnpredOp = false, bool SwapOperands = false) {
+  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
+
+  if (SDValue Res =
+          convertMergedOpToPredOp(N, Opc, DAG, UnpredOp, SwapOperands))
+    return Res;
+
+  if (DCI.isAfterLegalizeDAG())
+    return SDValue();
+
+  // Swap to candidate for movprfx if it is predicate zeroing.
+  SDValue Pg = N->getOperand(1);
+  SDValue Op1 = N->getOperand(2);
+  SDValue Op2 = N->getOperand(3);
+  if (isSwapPredicateZeroing(Op2, N)) {
+    SDValue ID = DAG.getTargetConstant(IID, SDLoc(N), MVT::i64);
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+                       ID, Pg, Op2, Op1);
+  }
+
+  return SDValue();
+}
+
 static SDValue performIntrinsicCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const AArch64Subtarget *Subtarget) {
@@ -16302,7 +16346,7 @@
   case Intrinsic::aarch64_sve_subr:
     return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
   case Intrinsic::aarch64_sve_and:
-    return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
+    return tryCombineOpWithPredicateZeroing (IID, ISD::AND, N, DCI, DAG, true);
   case Intrinsic::aarch64_sve_bic:
     return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
   case Intrinsic::aarch64_sve_eor:
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll
@@ -337,6 +337,20 @@
   ret <vscale x 2 x i64> %out
 }
 
+; commutative operation
+define <vscale x 2 x i64> @and_i64_zero_comm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: and_i64_zero_comm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    and z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
+                                                               <vscale x 2 x i64> %b,
+                                                               <vscale x 2 x i64> %a_z)
+  ret <vscale x 2 x i64> %out
+}
+
 ;
 ; BIC
 ;