diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -25,13 +25,16 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -964,6 +967,118 @@
   return ShAmt < 4 && ShAmt > 0;
 }
 
+// Returns true if SF is known to be zero after AndInstr executes.
+inline static bool isSFZero(const MachineInstr &AndInstr) {
+  assert(X86::isAND(AndInstr.getOpcode()) && "Caller guaranteed");
+
+  int64_t Mask = 0, Imm = 0;
+  const MachineOperand &MO2 = AndInstr.getOperand(2);
+  // Only AND32* are handled in switch statements;
+  // that is, {AND8*, AND16*, AND64*} are not handled.
+  //
+  // The rationale is that caller knows the AndInstr generates a 32-bit
+  // integer that iz zero-extended into a 64-bit integer.
+  switch (AndInstr.getOpcode()) {
+  default:
+    return false;
+  case X86::AND32ri8:
+  case X86::AND32mi8:
+    Mask = 0x80;
+    assert(MO2.isImm() && "AndInstr is And32ri8 or And32mi8");
+    Imm = MO2.getImm();
+    break;
+  case X86::AND32ri:
+  case X86::AND32i32: // AND32i32 is the new op code for AND32ri
+  case X86::AND32mi:
+    Mask = 0x80000000;
+    assert(MO2.isImm() && "AndInstr is And32ri or And32mi or And32i32");
+    Imm = MO2.getImm();
+    break;
+  }
+  return (Mask & Imm) == 0;
+}
+
+inline static bool isRedundantFlagInstr(MachineInstr &CmpInstr,
+                                        MachineInstr &CmpValDefInstr,
+                                        const MachineRegisterInfo *MRI,
+                                        MachineInstr **AndInstr,
+                                        const TargetRegisterInfo *TRI) {
+  // FIXME: Handle the case when CmpValDefInstr.getOpcode() ==
+  // X86::INSERT_SUBREG
+  if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
+    return false;
+
+  if (CmpInstr.getOpcode() != X86::TEST64rr)
+    return false;
+
+  // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
+  // guarantees that it's analyzable only if two registers are identical.
+  assert(
+      (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
+      "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
+      "requires two reg operands are the same.");
+
+  assert(
+      (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
+      "Caller guaranteed.");
+
+  assert((CmpValDefInstr.getNumOperands() == 4) &&
+         "Guaranteed by SUBREG_TO_REG definition.");
+
+  const MachineOperand &MO1 = CmpValDefInstr.getOperand(1);
+
+  assert((MO1.isImm()) && "MO1 expected to be immediate number");
+  // As seen in X86 td files, MO1.getImm() is typically 0.
+  if (MO1.getImm() != 0)
+    return false;
+
+  const MachineOperand &MO3 = CmpValDefInstr.getOperand(3);
+  assert(MO3.isImm() && "MO3 is an immediate representing subregister indices");
+  // As seen in X86 td files, MO3 is typically sub_32bit or sub_xmm.
+  if (MO3.getImm() != X86::sub_32bit)
+    return false;
+
+  const MachineOperand &MO2 = CmpValDefInstr.getOperand(2);
+  assert((MO2.isReg()) &&
+         "MO2 should be register by definition of SUBREG_TO_REG");
+  MachineInstr *VregDefInstr = MRI->getVRegDef(MO2.getReg());
+
+  assert(VregDefInstr && "Must have a definition (SSA)");
+
+  // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
+  // to simplify the subsequent analysis.
+  //
+  // FIXME: If `VregDefInstr->getParent()` is the only predecessor of
+  // `CmpValDefInstr.getParent()`, this could be handled.
+  if (VregDefInstr->getParent() != CmpValDefInstr.getParent()) {
+    return false;
+  }
+
+  if (X86::isAND(VregDefInstr->getOpcode()) && isSFZero(*VregDefInstr)) {
+    // Get a sequence of instructions like
+    //   %reg = And32* %reg1 %reg2
+    //   ...                                                       // EFLAGS not
+    //   changed %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit  //
+    //   EFLAGS not changed TEST64rr %extended_reg, %extended_reg, implicit-def
+    //   $eflags
+    //
+    // The TEST64rr could be erased since And32* is known to set SF to zero.
+    for (MachineInstr &Instr :
+         make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
+                    MachineBasicBlock::iterator(CmpValDefInstr))) {
+      if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+        // There are instructions between 'VregDefInstr' and
+        // 'CmpValDefInstr' that modifies EFLAGS.
+        return false;
+      }
+    }
+
+    *AndInstr = VregDefInstr;
+    return true;
+  }
+  return false;
+}
+
 bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
                                   unsigned Opc, bool AllowSP, Register &NewSrc,
                                   bool &isKill, MachineOperand &ImplicitOp,
@@ -4197,6 +4312,27 @@
           MI = &Inst;
           break;
         }
+
+        // Look back for an instruction that sets EFLAGS to the same value as
+        // CmpInstr.
+        //
+        // Example:
+        //  %reg = and32ri %in_reg, 5
+        //  ...                         // EFLAGS not changed.
+        //  %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
+        //  test64rr %src_reg, %src_reg, implicit-def $eflags
+        //
+        // The test64rr instruction could be erased.
+        MachineInstr *AndInstr = nullptr;
+        if (IsCmpZero &&
+            isRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI)) {
+          assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
+          // AND instruction will update SF and clear OF.
+          NoSignFlag = false;
+          ClearsOverflowFlag = true;
+          MI = AndInstr;
+          break;
+        }
         // Cannot find other candidates before definition of SrcReg.
         return false;
       }
diff --git a/llvm/test/CodeGen/X86/peephole-test-after-add.mir b/llvm/test/CodeGen/X86/peephole-test-after-add.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/peephole-test-after-add.mir
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=x86_64-unknown-linux-gnu --run-pass=peephole-opt | FileCheck %s
+
+# Test that TEST64rr is erased in `test_erased`, and kept in `test_not_erased_when_eflags_change`. 
+--- |
+  ; ModuleID = 'tmp.ll'
+  source_filename = "tmp.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+  define i64 @test_erased(ptr %0, i64 %1, i64 %2) {
+    %4 = load i64, ptr %0, align 8
+    %5 = and i64 %4, 3
+    %6 = icmp eq i64 %5, 0
+    %7 = select i1 %6, i64 %1, i64 %5
+    store i64 %7, ptr %0, align 8
+    ret i64 %5
+  }
+
+  define void @test_not_erased_when_eflags_change(ptr %0, i64 %1, i64 %2, i64 %3, ptr %4) {
+    %6 = load i64, ptr %0, align 8
+    %7 = and i64 %6, 3
+    %8 = xor i64 %3, 5
+    %9 = icmp eq i64 %7, 0
+    %10 = select i1 %9, i64 %1, i64 %7
+    store i64 %10, ptr %0, align 8
+    store i64 %8, ptr %4, align 8
+    ret void
+  }
+
+...
+---
+name:            test_erased
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64 }
+  - { id: 1, class: gr64 }
+  - { id: 2, class: gr64 }
+  - { id: 3, class: gr64 }
+  - { id: 4, class: gr32 }
+  - { id: 5, class: gr32 }
+  - { id: 6, class: gr64 }
+  - { id: 7, class: gr64 }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+  - { reg: '$rsi', virtual-reg: '%1' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.3):
+    liveins: $rdi, $rsi
+
+    ; CHECK-LABEL: name: test_erased
+    ; CHECK: liveins: $rdi, $rsi
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
+    ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
+    ; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY2]], 3, implicit-def $eflags
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
+    ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY]], 4, implicit $eflags
+    ; CHECK-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
+    ; CHECK-NEXT: $rax = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: RET 0, $rax
+    %1:gr64 = COPY $rsi
+    %0:gr64 = COPY $rdi
+    %3:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
+    %4:gr32 = COPY %3.sub_32bit
+    %5:gr32 = AND32ri8 %4, 3, implicit-def dead $eflags
+    %6:gr64 = SUBREG_TO_REG 0, killed %5, %subreg.sub_32bit
+    TEST64rr %6, %6, implicit-def $eflags
+    %7:gr64 = CMOV64rr %6, %1, 4, implicit $eflags
+    MOV64mr %0, 1, $noreg, 0, $noreg, killed %7 :: (store (s64) into %ir.0)
+    $rax = COPY %6
+    RET 0, $rax
+
+...
+---
+name:            test_not_erased_when_eflags_change
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64 }
+  - { id: 1, class: gr64 }
+  - { id: 2, class: gr64 }
+  - { id: 3, class: gr64 }
+  - { id: 4, class: gr64 }
+  - { id: 5, class: gr64 }
+  - { id: 6, class: gr32 }
+  - { id: 7, class: gr32 }
+  - { id: 8, class: gr64 }
+  - { id: 9, class: gr64 }
+  - { id: 10, class: gr64 }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+  - { reg: '$rsi', virtual-reg: '%1' }
+  - { reg: '$rcx', virtual-reg: '%3' }
+  - { reg: '$r8', virtual-reg: '%4' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.5):
+    liveins: $rdi, $rsi, $rcx, $r8
+
+    ; CHECK-LABEL: name: test_not_erased_when_eflags_change
+    ; CHECK: liveins: $rdi, $rsi, $rcx, $r8
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $r8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rcx
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rdi
+    ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY3]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
+    ; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY4]], 3, implicit-def dead $eflags
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
+    ; CHECK-NEXT: [[XOR64ri8_:%[0-9]+]]:gr64 = XOR64ri8 [[COPY1]], 5, implicit-def dead $eflags
+    ; CHECK-NEXT: TEST64rr [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit-def $eflags
+    ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY2]], 4, implicit $eflags
+    ; CHECK-NEXT: MOV64mr [[COPY3]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
+    ; CHECK-NEXT: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, killed [[XOR64ri8_]] :: (store (s64) into %ir.4)
+    ; CHECK-NEXT: RET 0
+    %4:gr64 = COPY $r8
+    %3:gr64 = COPY $rcx
+    %1:gr64 = COPY $rsi
+    %0:gr64 = COPY $rdi
+    %5:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
+    %6:gr32 = COPY %5.sub_32bit
+    %7:gr32 = AND32ri8 %6, 3, implicit-def dead $eflags
+    %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
+    %9:gr64 = XOR64ri8 %3, 5, implicit-def dead $eflags
+    TEST64rr %8, %8, implicit-def $eflags
+    %10:gr64 = CMOV64rr %8, %1, 4, implicit $eflags
+    MOV64mr %0, 1, $noreg, 0, $noreg, killed %10 :: (store (s64) into %ir.0)
+    MOV64mr %4, 1, $noreg, 0, $noreg, killed %9 :: (store (s64) into %ir.4)
+    RET 0
+
+...