Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -459,6 +459,13 @@
                                      unsigned &SrcOpIdx1,
                                      unsigned &SrcOpIdx2) const;
 
+  /// Returns true if the target has a preference on the operands order of
+  /// the given machine instruction. And specify if \p Commute is required to
+  /// get the desired operands order.
+  virtual bool hasCommutePreference(MachineInstr &MI, bool &Commute) const {
+    return false;
+  }
+
   /// A pair composed of a register and a sub-register index.
   /// Used to give some type checking when modeling Reg:SubReg.
   struct RegSubRegPair {
Index: llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
===================================================================
--- llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -527,6 +527,11 @@
   if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
     return false;
 
+  // Look for other target specific commute preference.
+  bool Commute;
+  if (TII->hasCommutePreference(*MI, Commute))
+    return Commute;
+
   // Since there are no intervening uses for both registers, then commute
   // if the def of RegC is closer. Its live interval is shorter.
   return LastDefB && LastDefC && LastDefC > LastDefB;
Index: llvm/lib/Target/X86/X86FixupLEAs.cpp
===================================================================
--- llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -79,6 +79,27 @@
                      MachineBasicBlock &MBB, bool OptIncDec,
                      bool UseLEAForSP) const;
 
+  /// Look for and transform the sequence
+  ///     lea (reg1, reg2), reg3
+  ///     sub reg3, reg4
+  /// to
+  ///     sub reg1, reg4
+  ///     sub reg2, reg4
+  /// It can also optimize the sequence lea/add similarly.
+  bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
+
+  /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
+  /// the dest register of LEA instruction I.
+  MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
+                                            MachineBasicBlock &MBB) const;
+
+  /// Check instructions between LeaI and AluI (exclusively).
+  /// Set BaseIndexDef to true if base or index register from LeaI is defined.
+  /// Set AluDestRef to true if the dest register of AluI is used or defined.
+  void checkRegUsage(MachineBasicBlock::iterator &LeaI,
+                     MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
+                     bool &AluDestRef) const;
+
   /// Determine if an instruction references a machine register
   /// and, if so, whether it reads or writes the register.
   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
@@ -338,6 +359,18 @@
   }
 }
 
+static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
+  switch (LEAOpcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA32r:
+  case X86::LEA64_32r:
+    return X86::SUB32rr;
+  case X86::LEA64r:
+    return X86::SUB64rr;
+  }
+}
+
 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
                                        const MachineOperand &Offset) {
   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
@@ -364,6 +397,136 @@
   }
 }
 
+MachineBasicBlock::iterator
+FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
+                            MachineBasicBlock &MBB) const {
+  const int InstrDistanceThreshold = 5;
+  int InstrDistance = 1;
+  MachineBasicBlock::iterator CurInst = std::next(I);
+
+  unsigned LEAOpcode = I->getOpcode();
+  unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
+  unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
+  Register DestReg = I->getOperand(0).getReg();
+
+  while (CurInst != MBB.end()) {
+    if (CurInst->isCall() || CurInst->isInlineAsm())
+      break;
+    if (InstrDistance > InstrDistanceThreshold)
+      break;
+
+    // Check if the lea dest register is used in an add/sub instruction only.
+    for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
+      MachineOperand &Opnd = CurInst->getOperand(I);
+      if (Opnd.isReg() && Opnd.getReg() == DestReg) {
+        if (Opnd.isDef() || !Opnd.isKill())
+          return MachineBasicBlock::iterator();
+
+        unsigned AluOpcode = CurInst->getOpcode();
+        if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
+          return MachineBasicBlock::iterator();
+
+        MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
+        MachineOperand AluDest = CurInst->getOperand(0);
+        if (Opnd2.getReg() != AluDest.getReg())
+          return MachineBasicBlock::iterator();
+
+        // X - (Y + Z) may generate different flags than (X - Y) - Z when there
+        // is overflow. So we can't change the alu instruction if the flags
+        // register is live.
+        if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
+          return MachineBasicBlock::iterator();
+
+        return CurInst;
+      }
+    }
+
+    InstrDistance++;
+    ++CurInst;
+  }
+  return MachineBasicBlock::iterator();
+}
+
+void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
+                                 MachineBasicBlock::iterator &AluI,
+                                 bool &BaseIndexDef, bool &AluDestRef) const {
+  BaseIndexDef = AluDestRef = false;
+  Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
+  Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
+  Register AluDestReg = AluI->getOperand(0).getReg();
+
+  MachineBasicBlock::iterator CurInst = std::next(LeaI);
+  while (CurInst != AluI) {
+    for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
+      MachineOperand &Opnd = CurInst->getOperand(I);
+      if (!Opnd.isReg())
+        continue;
+      Register Reg = Opnd.getReg();
+      if (TRI->regsOverlap(Reg, AluDestReg))
+        AluDestRef = true;
+      if (Opnd.isDef() &&
+          (TRI->regsOverlap(Reg, BaseReg) || TRI->regsOverlap(Reg, IndexReg))) {
+        BaseIndexDef = true;
+      }
+    }
+    ++CurInst;
+  }
+}
+
+bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
+                             MachineBasicBlock &MBB) const {
+  // Look for an add/sub instruction which uses the result of lea.
+  MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
+  if (AluI == MachineBasicBlock::iterator())
+    return false;
+
+  // Check if there are any related register usage between lea and alu.
+  bool BaseIndexDef, AluDestRef;
+  checkRegUsage(I, AluI, BaseIndexDef, AluDestRef);
+
+  MachineBasicBlock::iterator InsertPos = AluI;
+  if (BaseIndexDef) {
+    if (AluDestRef)
+      return false;
+    InsertPos = I;
+  }
+
+  // Check if there are same registers.
+  Register AluDestReg = AluI->getOperand(0).getReg();
+  Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
+  Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
+  if (I->getOpcode() == X86::LEA64_32r) {
+    BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+    IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
+  }
+  if (AluDestReg == IndexReg) {
+    if (BaseReg == IndexReg)
+      return false;
+    std::swap(BaseReg, IndexReg);
+  }
+
+  // Now it's safe to change instructions.
+  MachineInstr *NewMI1, *NewMI2;
+  unsigned NewOpcode = AluI->getOpcode();
+  NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
+                   AluDestReg)
+               .addReg(AluDestReg)
+               .addReg(BaseReg);
+  NewMI1->addRegisterDead(X86::EFLAGS, TRI);
+  NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
+                   AluDestReg)
+               .addReg(AluDestReg)
+               .addReg(IndexReg);
+  NewMI2->addRegisterDead(X86::EFLAGS, TRI);
+
+  MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
+  MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
+  MBB.erase(I);
+  MBB.erase(AluI);
+  I = NewMI1;
+  return true;
+}
+
 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
                                  MachineBasicBlock &MBB, bool OptIncDec,
                                  bool UseLEAForSP) const {
@@ -398,6 +561,7 @@
 
   MachineInstr *NewMI = nullptr;
 
+  // Case 1.
   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
   // which can be turned into add %reg2, %reg1
   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
@@ -417,6 +581,7 @@
         .addReg(BaseReg).addReg(IndexReg);
     }
   } else if (DestReg == BaseReg && IndexReg == 0) {
+    // Case 2.
     // This is an LEA with only a base register and a displacement,
     // We can use ADDri or INC/DEC.
 
@@ -447,6 +612,12 @@
           .addReg(BaseReg).addImm(Disp.getImm());
       }
     }
+  } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
+    // Case 3.
+    // Look for and transform the sequence
+    //     lea (reg1, reg2), reg3
+    //     sub reg3, reg4
+    return optLEAALU(I, MBB);
   } else
     return false;
 
Index: llvm/lib/Target/X86/X86InstrInfo.h
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.h
+++ llvm/lib/Target/X86/X86InstrInfo.h
@@ -284,6 +284,10 @@
   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
+  /// Returns true if we have preference on the operands order in MI, the
+  /// commute decision is returned in Commute.
+  bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override;
+
   /// Returns an adjusted FMA opcode that must be used in FMA instruction that
   /// performs the same computations as the given \p MI but which has the
   /// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
Index: llvm/lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.cpp
+++ llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2670,6 +2670,60 @@
   return false;
 }
 
+static bool isConvertibleLEA(MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
+      Opcode != X86::LEA64_32r)
+    return false;
+
+  const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
+  const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
+  const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
+
+  if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
+      Scale.getImm() > 1)
+    return false;
+
+  return true;
+}
+
+bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
+  // Currently we're interested in following sequence only.
+  //   r3 = lea r1, r2
+  //   r5 = add r3, r4
+  // Both r3 and r4 are killed in add, we hope the add instruction has the
+  // operand order
+  //   r5 = add r4, r3
+  // So later in X86FixupLEAs the lea instruction can be rewritten as add.
+  unsigned Opcode = MI.getOpcode();
+  if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
+    return false;
+
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  Register Reg1 = MI.getOperand(1).getReg();
+  Register Reg2 = MI.getOperand(2).getReg();
+
+  // Check if Reg1 comes from LEA in the same MBB.
+  if (MachineOperand *Op = MRI.getOneDef(Reg1)) {
+    MachineInstr *Inst = Op->getParent();
+    if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
+      Commute = true;
+      return true;
+    }
+  }
+
+  // Check if Reg2 comes from LEA in the same MBB.
+  if (MachineOperand *Op = MRI.getOneDef(Reg2)) {
+    MachineInstr *Inst = Op->getParent();
+    if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
+      Commute = false;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default: return X86::COND_INVALID;
Index: llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
===================================================================
--- llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -29,9 +29,9 @@
 ; CHECK-NEXT:    addq %r15, %rbx
 ; CHECK-NEXT:    addq %rdx, %rbx
 ; CHECK-NEXT:    addq %rsi, %rbx
-; CHECK-NEXT:    leaq (%r9,%r10), %rsi
-; CHECK-NEXT:    leaq (%rsi,%r8), %rdx
-; CHECK-NEXT:    addq %rsi, %rdx
+; CHECK-NEXT:    leaq (%r9,%r10), %rdx
+; CHECK-NEXT:    addq %rdx, %rdx
+; CHECK-NEXT:    addq %r8, %rdx
 ; CHECK-NEXT:    movq X(%rip), %rdi
 ; CHECK-NEXT:    addq %rbx, %r12
 ; CHECK-NEXT:    addq %r8, %rdx
@@ -41,9 +41,9 @@
 ; CHECK-NEXT:    addq %r12, %rsi
 ; CHECK-NEXT:    addq %r11, %rdi
 ; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    leaq (%r10,%r8), %rbx
-; CHECK-NEXT:    leaq (%rdx,%rbx), %rsi
-; CHECK-NEXT:    addq %rbx, %rsi
+; CHECK-NEXT:    leaq (%r10,%r8), %rsi
+; CHECK-NEXT:    addq %rsi, %rsi
+; CHECK-NEXT:    addq %rdx, %rsi
 ; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %r12, %rdi
 ; CHECK-NEXT:    addq %rdi, %r9
@@ -54,9 +54,9 @@
 ; CHECK-NEXT:    addq %r9, %rdi
 ; CHECK-NEXT:    addq %r14, %rbx
 ; CHECK-NEXT:    addq %rdi, %rbx
-; CHECK-NEXT:    leaq (%rdx,%r8), %rax
-; CHECK-NEXT:    leaq (%rsi,%rax), %rdi
-; CHECK-NEXT:    addq %rax, %rdi
+; CHECK-NEXT:    leaq (%rdx,%r8), %rdi
+; CHECK-NEXT:    addq %rdi, %rdi
+; CHECK-NEXT:    addq %rsi, %rdi
 ; CHECK-NEXT:    movq X(%rip), %rcx
 ; CHECK-NEXT:    addq %r9, %rbx
 ; CHECK-NEXT:    addq %rbx, %r10
@@ -67,9 +67,9 @@
 ; CHECK-NEXT:    addq %r10, %rax
 ; CHECK-NEXT:    addq %r15, %rcx
 ; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    leaq (%rsi,%rdx), %rbx
-; CHECK-NEXT:    leaq (%rdi,%rbx), %r11
-; CHECK-NEXT:    addq %rbx, %r11
+; CHECK-NEXT:    leaq (%rsi,%rdx), %r11
+; CHECK-NEXT:    addq %r11, %r11
+; CHECK-NEXT:    addq %rdi, %r11
 ; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %r10, %rcx
 ; CHECK-NEXT:    addq %rcx, %r8
@@ -80,9 +80,9 @@
 ; CHECK-NEXT:    addq %r8, %rcx
 ; CHECK-NEXT:    addq %r12, %rbx
 ; CHECK-NEXT:    addq %rcx, %rbx
-; CHECK-NEXT:    leaq (%rdi,%rsi), %rax
-; CHECK-NEXT:    leaq (%r11,%rax), %r14
-; CHECK-NEXT:    addq %rax, %r14
+; CHECK-NEXT:    leaq (%rdi,%rsi), %r14
+; CHECK-NEXT:    addq %r14, %r14
+; CHECK-NEXT:    addq %r11, %r14
 ; CHECK-NEXT:    movq X(%rip), %rax
 ; CHECK-NEXT:    addq %r8, %rbx
 ; CHECK-NEXT:    addq %rbx, %rdx
@@ -93,9 +93,9 @@
 ; CHECK-NEXT:    addq %rdx, %rbx
 ; CHECK-NEXT:    addq %r9, %rax
 ; CHECK-NEXT:    addq %rbx, %rax
-; CHECK-NEXT:    leaq (%r11,%rdi), %rbx
-; CHECK-NEXT:    leaq (%r14,%rbx), %r9
-; CHECK-NEXT:    addq %rbx, %r9
+; CHECK-NEXT:    leaq (%r11,%rdi), %r9
+; CHECK-NEXT:    addq %r9, %r9
+; CHECK-NEXT:    addq %r14, %r9
 ; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %rdx, %rax
 ; CHECK-NEXT:    addq %rax, %rsi
@@ -106,9 +106,9 @@
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    addq %r10, %rbx
 ; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    leaq (%r14,%r11), %rax
-; CHECK-NEXT:    leaq (%r9,%rax), %r10
-; CHECK-NEXT:    addq %rax, %r10
+; CHECK-NEXT:    leaq (%r14,%r11), %r10
+; CHECK-NEXT:    addq %r10, %r10
+; CHECK-NEXT:    addq %r9, %r10
 ; CHECK-NEXT:    movq X(%rip), %rax
 ; CHECK-NEXT:    addq %rsi, %rbx
 ; CHECK-NEXT:    addq %rbx, %rdi
@@ -119,9 +119,9 @@
 ; CHECK-NEXT:    addq %rdi, %rbx
 ; CHECK-NEXT:    addq %r8, %rax
 ; CHECK-NEXT:    addq %rbx, %rax
-; CHECK-NEXT:    leaq (%r9,%r14), %rbx
-; CHECK-NEXT:    leaq (%r10,%rbx), %r8
-; CHECK-NEXT:    addq %rbx, %r8
+; CHECK-NEXT:    leaq (%r9,%r14), %r8
+; CHECK-NEXT:    addq %r8, %r8
+; CHECK-NEXT:    addq %r10, %r8
 ; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    addq %rax, %r11
@@ -132,9 +132,9 @@
 ; CHECK-NEXT:    addq %r11, %rax
 ; CHECK-NEXT:    addq %rdx, %rbx
 ; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    leaq (%r10,%r9), %rax
-; CHECK-NEXT:    leaq (%r8,%rax), %r15
-; CHECK-NEXT:    addq %rax, %r15
+; CHECK-NEXT:    leaq (%r10,%r9), %r15
+; CHECK-NEXT:    addq %r15, %r15
+; CHECK-NEXT:    addq %r8, %r15
 ; CHECK-NEXT:    movq X(%rip), %rax
 ; CHECK-NEXT:    addq %r11, %rbx
 ; CHECK-NEXT:    addq %rbx, %r14
@@ -145,9 +145,9 @@
 ; CHECK-NEXT:    addq %r14, %rbx
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    addq %rbx, %rax
-; CHECK-NEXT:    leaq (%r8,%r10), %rbx
-; CHECK-NEXT:    leaq (%r15,%rbx), %rsi
-; CHECK-NEXT:    addq %rbx, %rsi
+; CHECK-NEXT:    leaq (%r8,%r10), %rsi
+; CHECK-NEXT:    addq %rsi, %rsi
+; CHECK-NEXT:    addq %r15, %rsi
 ; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %r14, %rax
 ; CHECK-NEXT:    addq %rax, %r9
@@ -158,9 +158,9 @@
 ; CHECK-NEXT:    addq %r9, %rax
 ; CHECK-NEXT:    addq %rdi, %rbx
 ; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    leaq (%r15,%r8), %rax
-; CHECK-NEXT:    leaq (%rsi,%rax), %r12
-; CHECK-NEXT:    addq %rax, %r12
+; CHECK-NEXT:    leaq (%r15,%r8), %r12
+; CHECK-NEXT:    addq %r12, %r12
+; CHECK-NEXT:    addq %rsi, %r12
 ; CHECK-NEXT:    movq X(%rip), %rcx
 ; CHECK-NEXT:    addq %r9, %rbx
 ; CHECK-NEXT:    addq %rbx, %r10
@@ -171,9 +171,9 @@
 ; CHECK-NEXT:    addq %r10, %rax
 ; CHECK-NEXT:    addq %r11, %rcx
 ; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    leaq (%rsi,%r15), %rbx
-; CHECK-NEXT:    leaq (%r12,%rbx), %rax
-; CHECK-NEXT:    addq %rbx, %rax
+; CHECK-NEXT:    leaq (%rsi,%r15), %rax
+; CHECK-NEXT:    addq %rax, %rax
+; CHECK-NEXT:    addq %r12, %rax
 ; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %r10, %rcx
 ; CHECK-NEXT:    addq %rcx, %r8
@@ -184,9 +184,9 @@
 ; CHECK-NEXT:    addq %r8, %rcx
 ; CHECK-NEXT:    addq %r14, %rbx
 ; CHECK-NEXT:    addq %rcx, %rbx
-; CHECK-NEXT:    leaq (%r12,%rsi), %rdx
-; CHECK-NEXT:    leaq (%rax,%rdx), %rcx
-; CHECK-NEXT:    addq %rdx, %rcx
+; CHECK-NEXT:    leaq (%r12,%rsi), %rcx
+; CHECK-NEXT:    addq %rcx, %rcx
+; CHECK-NEXT:    addq %rax, %rcx
 ; CHECK-NEXT:    movq X(%rip), %rdx
 ; CHECK-NEXT:    addq %r8, %rbx
 ; CHECK-NEXT:    addq %rbx, %r15
@@ -197,9 +197,9 @@
 ; CHECK-NEXT:    addq %r15, %rbx
 ; CHECK-NEXT:    addq %r9, %rdx
 ; CHECK-NEXT:    addq %rbx, %rdx
-; CHECK-NEXT:    leaq (%rax,%r12), %r9
-; CHECK-NEXT:    leaq (%rcx,%r9), %rbx
-; CHECK-NEXT:    addq %r9, %rbx
+; CHECK-NEXT:    leaq (%rax,%r12), %rbx
+; CHECK-NEXT:    addq %rbx, %rbx
+; CHECK-NEXT:    addq %rcx, %rbx
 ; CHECK-NEXT:    addq %r15, %rdx
 ; CHECK-NEXT:    addq %rdx, %rsi
 ; CHECK-NEXT:    addq %rcx, %rbx
@@ -211,12 +211,12 @@
 ; CHECK-NEXT:    addq %rsi, %rdi
 ; CHECK-NEXT:    addq %rdi, %rdx
 ; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    leaq (%rbx,%rcx), %rdi
-; CHECK-NEXT:    addq %rcx, %rdi
-; CHECK-NEXT:    addq %rbx, %rdi
+; CHECK-NEXT:    addq %rcx, %rcx
+; CHECK-NEXT:    addq %rbx, %rcx
+; CHECK-NEXT:    addq %rbx, %rcx
 ; CHECK-NEXT:    addq %rsi, %rdx
 ; CHECK-NEXT:    addq %rdx, %r12
-; CHECK-NEXT:    addq %rdx, %rdi
+; CHECK-NEXT:    addq %rdx, %rcx
 ; CHECK-NEXT:    addq %r15, %rsi
 ; CHECK-NEXT:    movq X(%rip), %rax
 ; CHECK-NEXT:    bswapq %rax
@@ -225,7 +225,7 @@
 ; CHECK-NEXT:    addq %r12, %rsi
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    addq %r12, %rax
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12
 ; CHECK-NEXT:    popq %r14
Index: llvm/test/CodeGen/X86/lea-opt2.ll
===================================================================
--- llvm/test/CodeGen/X86/lea-opt2.ll
+++ llvm/test/CodeGen/X86/lea-opt2.ll
@@ -11,15 +11,14 @@
 ;        subl    %edx, %ecx
 ;        subl    %eax, %ecx
 
-; TODO: replace lea with sub.
 ; C - (A + B)   -->    C - A - B
 define i32 @test1(i32* %p, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    leal (%rdx,%rax), %esi
-; CHECK-NEXT:    subl %esi, %ecx
+; CHECK-NEXT:    subl %edx, %ecx
+; CHECK-NEXT:    subl %eax, %ecx
 ; CHECK-NEXT:    movl %ecx, (%rdi)
 ; CHECK-NEXT:    subl %edx, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
@@ -32,16 +31,15 @@
   ret i32 %sub1
 }
 
-; TODO: replace lea with add.
 ; (A + B) + C   -->    C + A + B
 define i32 @test2(i32* %p, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    leal (%rax,%rdx), %esi
-; CHECK-NEXT:    addl %ecx, %esi
-; CHECK-NEXT:    movl %esi, (%rdi)
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    addl %edx, %ecx
+; CHECK-NEXT:    movl %ecx, (%rdi)
 ; CHECK-NEXT:    subl %edx, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
@@ -53,16 +51,15 @@
   ret i32 %sub1
 }
 
-; TODO: replace lea with add.
 ; C + (A + B)   -->    C + A + B
 define i32 @test3(i32* %p, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    leal (%rax,%rdx), %esi
-; CHECK-NEXT:    addl %ecx, %esi
-; CHECK-NEXT:    movl %esi, (%rdi)
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    addl %edx, %ecx
+; CHECK-NEXT:    movl %ecx, (%rdi)
 ; CHECK-NEXT:    subl %edx, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
@@ -95,13 +92,12 @@
   ret i32 %sub1
 }
 
-; TODO: replace lea with sub.
 define i64 @test5(i64* %p, i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    leaq (%rdx,%rax), %rsi
-; CHECK-NEXT:    subq %rsi, %rcx
+; CHECK-NEXT:    subq %rdx, %rcx
+; CHECK-NEXT:    subq %rax, %rcx
 ; CHECK-NEXT:    movq %rcx, (%rdi)
 ; CHECK-NEXT:    subq %rdx, %rax
 ; CHECK-NEXT:    retq
@@ -114,14 +110,13 @@
   ret i64 %sub1
 }
 
-; TODO: replace lea with add.
 define i64 @test6(i64* %p, i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    leaq (%rdx,%rax), %rsi
-; CHECK-NEXT:    addq %rcx, %rsi
-; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT:    addq %rdx, %rcx
+; CHECK-NEXT:    addq %rax, %rcx
+; CHECK-NEXT:    movq %rcx, (%rdi)
 ; CHECK-NEXT:    subq %rdx, %rax
 ; CHECK-NEXT:    retq
 entry:
@@ -133,14 +128,13 @@
   ret i64 %sub1
 }
 
-; TODO: replace lea with add.
 define i64 @test7(i64* %p, i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    leaq (%rdx,%rax), %rsi
-; CHECK-NEXT:    addq %rcx, %rsi
-; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT:    addq %rdx, %rcx
+; CHECK-NEXT:    addq %rax, %rcx
+; CHECK-NEXT:    movq %rcx, (%rdi)
 ; CHECK-NEXT:    subq %rdx, %rax
 ; CHECK-NEXT:    retq
 entry:
@@ -152,3 +146,39 @@
   ret i64 %sub1
 }
 
+; The sub instruction generated flags is used by following branch,
+; so it should not be transformed.
+define i64 @test8(i64* %p, i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    leaq (%rdx,%rax), %rsi
+; CHECK-NEXT:    subq %rsi, %rcx
+; CHECK-NEXT:    ja .LBB7_2
+; CHECK-NEXT:  # %bb.1: # %then
+; CHECK-NEXT:    movq %rcx, (%rdi)
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB7_2: # %else
+; CHECK-NEXT:    movq $0, (%rdi)
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %ld = load i64, i64* %p, align 8
+  %0 = add i64 %b, %ld
+  %sub = sub i64 %c, %0
+  %cond = icmp ule i64 %c, %0
+  br i1 %cond, label %then, label %else
+
+then:
+  store i64 %sub, i64* %p, align 8
+  br label %endif
+
+else:
+  store i64 0, i64* %p, align 8
+  br label %endif
+
+endif:
+  %sub1 = sub i64 %ld, %b
+  ret i64 %sub1
+}
Index: llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
===================================================================
--- llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
+++ llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
@@ -53,9 +53,9 @@
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    kmovw %k1, %ecx
 ; X86-NEXT:    addl %edi, %ecx
-; X86-NEXT:    addl %eax, %ecx
-; X86-NEXT:    addl %edx, %ecx
-; X86-NEXT:    movw %cx, (%esi)
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    movw %ax, (%esi)
 ; X86-NEXT:    leal -8(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
@@ -107,10 +107,10 @@
 ; X64-NEXT:    kmovw %k1, %ebx
 ; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    addl %ecx, %edx
-; X64-NEXT:    leal (%rbx,%rsi), %ecx
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    addl %edx, %ecx
-; X64-NEXT:    movw %cx, (%r14)
+; X64-NEXT:    addl %ebx, %eax
+; X64-NEXT:    addl %esi, %eax
+; X64-NEXT:    addl %edx, %eax
+; X64-NEXT:    movw %ax, (%r14)
 ; X64-NEXT:    leaq -16(%rbp), %rsp
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    popq %r14