diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2365,6 +2365,21 @@
           return B;
       }
     }
+
+    // TODO(MML):
+    // Include 'and' for instruction combine case (e.g., generate new
+    // instructions).
+    bool m1 = match(Op0, m_And(m_Value(A), m_APInt(C1)));
+    bool m2 = match(Op1, m_And(m_Specific(A), m_APInt(C2)));
+    bool andIsZero = (((*C1) & (*C2)).isZero());
+    if (m1 && m2 && andIsZero) {
+      APInt orResult = ((*C1) | (*C2));
+      const int bitSize = Op0->getType()->getScalarSizeInBits();
+      if (orResult == APInt::getAllOnes(bitSize)) {
+        return A;
+      }
+      // rewrite
+    }
   }
 
   // If the operation is with the result of a phi instruction, check whether
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -371,6 +371,26 @@
   Instruction *foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI,
                                                  bool IsAnd);
 
+  // Imagine there is an instruction that applies `I` over `V0` and `V1` in
+  // `BB`, if this instruction could be simplified away in `BB`, returns the
+  // value; otherwise returns nullptr.
+  //
+  // Note simplification or combination as a result of new instruction won't be
+  // performed by the time this function returns, with the assumption that
+  // future iterations should be able to perform the simplification; if the
+  // assumption doesn't hold and newly created instruction cannot be simplified
+  // later, appending the newly created instruction might be counterproductive.
+  //
+  // Caller should guarantee that it's correct to append the newly-created
+  // instruction at the end of basic block (before terminator); for instance,
+  // see how `foldBinaryOpIntoPHI` folds a binary op with two PHI nodes as
+  // operands and call this helper function.
+  Value *couldSimplifyInBasicBlock(BinaryOperator &I, Value *V0, Value *V1,
+                                   BasicBlock *BB);
+
+  Instruction *foldBinaryOpIntoPHI(BinaryOperator &I, PHINode *PN0,
+                                   PHINode *PN1);
+
 public:
   /// Inserts an instruction \p New before instruction \p Old
   ///
@@ -792,4 +812,4 @@
 
 #undef DEBUG_TYPE
 
-#endif // LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+#endif // LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
\ No newline at end of file
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1289,17 +1289,119 @@
   return replaceInstUsesWith(I, NewPN);
 }
 
+Value *InstCombinerImpl::couldSimplifyInBasicBlock(BinaryOperator &I, Value *V0,
+                                                   Value *V1, BasicBlock *BB) {
+  // Returns nullptr for now for ops that are simply not implemented yet.
+  if (I.getOpcode() != Instruction::Or)
+    return nullptr;
+  if (Value *V = SimplifyOrInst(V0, V1, SQ))
+    return V;
+
+  // FIXME: This function could be enhanced to cover more cases that could be
+  // simplified.
+  return nullptr;
+}
+
+Instruction *InstCombinerImpl::foldBinaryOpIntoPHI(BinaryOperator &I,
+                                                   PHINode *PN0, PHINode *PN1) {
+  const unsigned NumPHIValues = PN0->getNumIncomingValues();
+  // FIXME: Generalize the function when PHI nodes have more than 2 incoming
+  // blocks.
+  if ((NumPHIValues != 2) || (NumPHIValues != PN1->getNumIncomingValues())) {
+    return nullptr;
+  }
+
+  BasicBlock *PN0_BB0 = PN0->getIncomingBlock(0);
+  BasicBlock *PN0_BB1 = PN0->getIncomingBlock(1);
+  BasicBlock *PN1_BB0 = PN1->getIncomingBlock(0);
+  BasicBlock *PN1_BB1 = PN1->getIncomingBlock(1);
+  Value *PN0_VAL0 = PN0->getIncomingValue(0);
+  Value *PN0_VAL1 = PN0->getIncomingValue(1);
+
+  if ((PN0_BB0 == PN1_BB1) && (PN0_BB1 == PN1_BB0)) {
+    std::swap(PN0_BB0, PN0_BB1);
+    std::swap(PN0_VAL0, PN0_VAL1);
+  } else if ((PN0_BB0 != PN1_BB0) || (PN0_BB1 != PN1_BB1)) {
+    return nullptr;
+  }
+
+  Value *PN1_VAL0 = PN1->getIncomingValue(0);
+  Value *PN1_VAL1 = PN1->getIncomingValue(1);
+
+  // Now both PHI nodes have two incoming blocks, and we know
+  // PN0_BB0 == PN1_BB0, and PNO_BB1 == PN1_BB1.
+
+  // Validates both PHI nodes have one use, for simplicity.
+  if (!(PN0->hasOneUse() && PN1->hasOneUse())) {
+    return nullptr;
+  }
+
+  if (couldSimplifyInBasicBlock(I, PN0_VAL0, PN1_VAL0, PN0_BB0) &&
+      couldSimplifyInBasicBlock(I, PN0_VAL1, PN1_VAL1, PN1_BB1)) {
+    // If folding two PHI operands into respective predecessor blocks opens up
+    // opportunity for instruction simplification or combination, then transform
+    //
+    // bb2:
+    //   res0 = phi [val00, bb0], [val01, bb1]
+    //   res1 = phi [val10, bb0], [val11, bb1]
+    //   binaryOp res0, res1
+    //
+    // into
+    //
+    // bb0:
+    //   res_bb0 = binaryOp val00, val10
+    // bb1:
+    //   res_bb1 = binaryOp val01, val11
+    // bb2:
+    //   phi [res_bb0, bb0], [res_bb1, bb1].
+
+    // Step 1: Create the new PHI node.
+    PHINode *NewPN = PHINode::Create(I.getType(), PN0->getNumIncomingValues());
+    InsertNewInstBefore(NewPN, *PN0);
+    NewPN->takeName(PN0);
+
+    // Step 2: Create binary ops and fold them into predecessor blocks.
+    // Step 2.1: Do the step for bb0
+    Builder.SetInsertPoint(PN0_BB0->getTerminator());
+    Value *ValFromBB0 = Builder.CreateBinOp(I.getOpcode(), PN0_VAL0, PN1_VAL0);
+    NewPN->addIncoming(ValFromBB0, PN0_BB0);
+
+    // Step 2.1: Do the step for bb1
+    Builder.SetInsertPoint(PN0_BB1->getTerminator());
+    Value *ValFromBB1 = Builder.CreateBinOp(I.getOpcode(), PN0_VAL1, PN1_VAL1);
+    NewPN->addIncoming(ValFromBB1, PN0_BB1);
+
+    return replaceInstUsesWith(I, NewPN);
+  }
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
-  if (!isa<Constant>(I.getOperand(1)))
+  Value *LHS = I.getOperand(0);
+  Value *RHS = I.getOperand(1);
+  const bool constantRHS = isa<Constant>(RHS);
+  if (constantRHS) {
+    if (auto *Sel = dyn_cast<SelectInst>(LHS)) {
+      if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
+        return NewSel;
+    } else if (auto *PN = dyn_cast<PHINode>(LHS)) {
+      if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
+        return NewPhi;
+    }
     return nullptr;
+  }
+
+  {
+    auto *PN0 = dyn_cast<PHINode>(LHS);
+    auto *PN1 = dyn_cast<PHINode>(RHS);
 
-  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
-    if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
-      return NewSel;
-  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
-    if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
-      return NewPhi;
+    if (PN0 && PN1) {
+      if (Instruction *newInst = foldBinaryOpIntoPHI(I, PN0, PN1)) {
+        return newInst;
+      }
+    }
   }
+
   return nullptr;
 }
 
@@ -4373,4 +4475,4 @@
 
 void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createInstructionCombiningPass());
-}
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-of-binary-op.ll b/llvm/test/Transforms/InstCombine/fold-phi-of-binary-op.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-of-binary-op.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Meant to collect the test cases for PHI folding for binary ops in general.
+; Currently only has test case for OR.
+
+; Tests that two PHI nodes of `or` (in `return` basic block) is folded into predecessors,
+; and IR get simplified.
+define dso_local i64 @_Z5ParsebPF6RetValvE(i1 zeroext %test, i64 ()* nocapture %p) local_unnamed_addr {
+; CHECK-LABEL: @_Z5ParsebPF6RetValvE(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[TEST:%.*]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 [[P:%.*]]()
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_SROA_3_0:%.*]] = phi i64 [ [[CALL]], [[IF_END]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL_SROA_3_0]]
+;
+entry:
+  br i1 %test, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %call = call i64 %p()
+  %retval.sroa.3.0.extract.shift = and i64 %call, -4294967296
+  %phi.cast = and i64 %call, 4294967295
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  %retval.sroa.0.0 = phi i64 [ %phi.cast, %if.end ], [ 0, %entry ]
+  %retval.sroa.3.0 = phi i64 [ %retval.sroa.3.0.extract.shift, %if.end ], [ 0, %entry ]
+  %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.3.0, %retval.sroa.0.0
+  ret i64 %retval.sroa.0.0.insert.insert
+}
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -46,11 +46,7 @@
 ; CHECK:       block1:
 ; CHECK-NEXT:    br label [[BLOCK2]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ]
-; CHECK-NEXT:    [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]]
-; CHECK-NEXT:    [[CONV2:%.*]] = zext i1 [[CMP1]] to i32
-; CHECK-NEXT:    ret i32 [[CONV2]]
+; CHECK-NEXT:    ret i32 1
 ;
 entry:
   br label %block2
@@ -76,11 +72,7 @@
 ; CHECK:       block1:
 ; CHECK-NEXT:    br label [[BLOCK2]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ]
-; CHECK-NEXT:    [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]]
-; CHECK-NEXT:    [[CONV2:%.*]] = zext i1 [[CMP1]] to i32
-; CHECK-NEXT:    ret i32 [[CONV2]]
+; CHECK-NEXT:    ret i32 1
 ;
 entry:
   br label %block2