diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4613,6 +4613,8 @@
                              const SDLoc &dl, SDValue &Chain,
                              bool IsSignaling = false) const;
 
+  SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) const;
+
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
   //
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6582,6 +6582,55 @@
   return true;
 }
 
+static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
+                                   SDValue And1_L, SDValue And1_R,
+                                   SDLoc DL, SelectionDAG &DAG) {
+  if (!isBitwiseNot(And0_L, true))
+    return SDValue();
+  SDValue NotOp = And0_L->getOperand(0);
+  if (NotOp == And1_R) {
+    std::swap(And1_R, And1_L);
+  }
+  // (~(NotOp) & And0_R) | (NotOp & And1_R)
+  // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
+  if (NotOp == And1_L) {
+    EVT VT = And1_L->getValueType(0);
+    SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
+    SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
+    SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
+    SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
+    return Xor1;
+  }
+  return SDValue();
+}
+
+SDValue TargetLowering::foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) const {
+  // Note that masked-merge variants using XOR or ADD expressions are
+  // normalized to OR by InstCombine so we only check for OR.
+  assert(Node->getOpcode() == ISD::OR);
+  SDValue N0 = Node->getOperand(0);
+  if (N0->getOpcode() != ISD::AND)
+    return SDValue();
+  SDValue N1 = Node->getOperand(1);
+  if (N1->getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDLoc DL(Node);
+  SDValue N00 = N0->getOperand(0);
+  SDValue N01 = N0->getOperand(1);
+  SDValue N10 = N1->getOperand(0);
+  SDValue N11 = N1->getOperand(1);
+  if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
+    return Result;
+  if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
+    return Result;
+  if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
+    return Result;
+  if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
+    return Result;
+  return SDValue();
+}
+
 // TODO: Merge with expandFunnelShift.
 bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
                                SDValue &Result, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1478,6 +1478,10 @@
       LegalFPImmediates.push_back(Imm);
     }
 
+    SDValue combineOr(SDNode *N, SelectionDAG &DAG,
+                      DAGCombinerInfo &DCI,
+                      const X86Subtarget &Subtarget) const;
+
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46382,9 +46382,9 @@
   return Ret;
 }
 
-static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
-                         TargetLowering::DAGCombinerInfo &DCI,
-                         const X86Subtarget &Subtarget) {
+SDValue X86TargetLowering::combineOr(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const X86Subtarget &Subtarget) const {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
@@ -46475,6 +46475,13 @@
       return Res;
   }
 
+  // When `andn` is unavailable transform:
+  // (x & m) | (y & m)  -->  ((x ^ y) & m) ^ y
+  if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1) {
+    if (SDValue R = foldMaskedMerge(N, DAG))
+      return R;
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/fold-masked-merge.ll b/llvm/test/CodeGen/X86/fold-masked-merge.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fold-masked-merge.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s
+; RUN: llc -o - %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=BMI
+;
+; test that masked-merge code is generated as "xor;and;xor" sequence or
+; "andn ; and; or" if and-not is available.
+
+define i32 @masked_merge(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: masked_merge:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    retq
+;
+; BMI-LABEL: masked_merge:
+; BMI:       # %bb.0:
+; BMI-NEXT:    andl %edi, %esi
+; BMI-NEXT:    andnl %edx, %edi, %eax
+; BMI-NEXT:    orl %esi, %eax
+; BMI-NEXT:    retq
+  %and0 = and i32 %a0, %a1
+  %not = xor i32 %a0, -1
+  %and1 = and i32 %not, %a2
+  %or = or i32 %and0, %and1
+  ret i32 %or
+}
+
+define i32 @masked_merge1(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: masked_merge1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    retq
+;
+; BMI-LABEL: masked_merge1:
+; BMI:       # %bb.0:
+; BMI-NEXT:    andl %edi, %esi
+; BMI-NEXT:    andnl %edx, %edi, %eax
+; BMI-NEXT:    orl %esi, %eax
+; BMI-NEXT:    retq
+  %and0 = and i32 %a0, %a1
+  %not = xor i32 %a0, -1
+  %and1 = and i32 %a2, %not
+  %or = or i32 %and1, %and0
+  ret i32 %or
+}
+
+define i32 @masked_merge2(i32, i32, i32) {
+; CHECK-LABEL: masked_merge2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    notl %esi
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    retq
+;
+; BMI-LABEL: masked_merge2:
+; BMI:       # %bb.0:
+; BMI-NEXT:    notl %edx
+; BMI-NEXT:    andnl %edx, %edi, %ecx
+; BMI-NEXT:    andnl %edi, %esi, %eax
+; BMI-NEXT:    orl %ecx, %eax
+; BMI-NEXT:    retq
+  %v0 = xor i32 %1, -1
+  %v1 = xor i32 %2, -1
+  %not = xor i32 %0, -1
+  %and0 = and i32 %not, %v1
+  %and1 = and i32 %v0, %0
+  %or = or i32 %and0, %and1
+  ret i32 %or
+}