Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -871,24 +871,26 @@
 // Otherwise, return null.
 //
 static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
-  if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
+  if (!V->getType()->isIntOrIntVectorTy())
     return nullptr;
 
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return nullptr;
-
-  if (I->getOpcode() == Instruction::Mul)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1))))
-      return I->getOperand(0);
-  if (I->getOpcode() == Instruction::Shl)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
-      // The multiplier is really 1 << CST.
-      CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
-      return I->getOperand(0);
+  if (V->hasOneUse()) {
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      if (I->getOpcode() == Instruction::Mul)
+        if ((CST = dyn_cast<Constant>(I->getOperand(1))))
+          return I->getOperand(0);
+      if (I->getOpcode() == Instruction::Shl)
+        if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
+          // The multiplier is really 1 << CST.
+          CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
+          return I->getOperand(0);
+        }
     }
-  return nullptr;
-}
+  }
 
+  CST = ConstantInt::get(V->getType(), 1);
+  return V;
+}
 
 /// WillNotOverflowSignedAdd - Return true if we can prove that:
 ///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
@@ -897,6 +899,19 @@
 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
   // There are different heuristics we can use for this.  Here are some simple
   // ones.
+  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(LHS))
+    if (ConstantInt *CI2 = dyn_cast<ConstantInt>(RHS)) {
+      APInt ACI1 = CI1->getValue();
+      APInt ACI2 = CI2->getValue();
+      bool IsLHSNegative = ACI1.isNegative();
+      bool IsRHSNegative = ACI2.isNegative();
+      bool IsResultNegative = (ACI1 + ACI2).isNegative();
+
+      if ((IsLHSNegative != IsRHSNegative) ||
+          (IsLHSNegative == IsResultNegative))
+        return true;
+      return false;
+    }
 
   // Add has the property that adding any two 2's complement numbers can only
   // have one carry bit which can change a sign.  As such, if LHS and RHS each
@@ -905,7 +920,6 @@
   if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
     return true;
 
-
   // If one of the operands only has one non-zero bit, and if the other operand
   // has a known-zero bit in a more significant place than it (not including the
   // sign bit) the ripple may go up to and fill the zero, but won't change the
@@ -1025,22 +1039,22 @@
     if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
-
   {
-    Constant *C2;
-    if (Value *X = dyn_castFoldableMul(LHS, C2)) {
-      if (X == RHS) // X*C + X --> X * (C+1)
-        return BinaryOperator::CreateMul(RHS, AddOne(C2));
-
-      // X*C1 + X*C2 --> X * (C1+C2)
-      Constant *C1;
-      if (X == dyn_castFoldableMul(RHS, C1))
-        return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
-    }
-
-    // X + X*C --> X * (C+1)
-    if (dyn_castFoldableMul(RHS, C2) == LHS)
-      return BinaryOperator::CreateMul(LHS, AddOne(C2));
+    Constant *C1, *C2;
+    // X * C1 + X * C2 --> X * (C1 + C2)
+    if (Value *X = dyn_castFoldableMul(LHS, C1))
+      if (Value *Y = dyn_castFoldableMul(RHS, C2)) {
+        if ((X == Y) && WillNotOverflowSignedAdd(C1, C2)) {
+          if (BinaryOperator *NewInst =
+                  BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2))) {
+
+            NewInst->setHasNoSignedWrap(I.hasNoSignedWrap());
+
+            // TODO: Check for unsigned wrap
+            return NewInst;
+          }
+        }
+      }
   }
 
   // A+B --> A|B iff A and B have no bits set in common.
@@ -1517,15 +1531,12 @@
     }
   }
 
-  Constant *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
-    if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1));
-
-    Constant *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
-  }
+  Constant *C1, *C2;
+  // X * C1 - X * C2 --> X * (C1 - C2)
+  if (Value *X = dyn_castFoldableMul(Op0, C1))
+    if (Value *Y = dyn_castFoldableMul(Op1, C2))
+      if (X == Y)
+        return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
 
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
Index: test/Transforms/InstCombine/add2.ll
===================================================================
--- test/Transforms/InstCombine/add2.ll
+++ test/Transforms/InstCombine/add2.ll
@@ -76,3 +76,57 @@
 ; CHECK-NEXT: %add = sub <2 x i64> <i64 1, i64 2>, %A
 ; CHECK-NEXT: ret <2 x i64> %add
 }
+
+define i32 @add_nsw_mul_nsw(i32 %x) {
+  %add1 = add nsw i32 %x, %x
+  %add2 = add nsw i32 %add1, %x
+  ret i32 %add2
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT: %add2 = mul nsw i32 %x, 3
+; CHECK-NEXT: ret i32 %add2
+}
+
+define i32 @add_no_nsw_mul_no_nsw(i32 %x) {
+  %add1 = add i32 %x, %x
+  %add2 = add i32 %add1, %x
+  ret i32 %add2
+; CHECK-LABEL: @add_no_nsw_mul_no_nsw(
+; CHECK-NEXT: %add2 = mul i32 %x, 3
+; CHECK-NEXT: ret i32 %add2
+}
+
+define i32 @mul_add_to_mul_1(i32 %x) {
+  %mul1 = mul nsw i32 %x, 8
+  %add2 = add nsw i32 %x, %mul1
+  ret i32 %add2
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT: %add2 = mul nsw i32 %x, 9
+; CHECK-NEXT: ret i32 %add2
+}
+
+define i32 @mul_add_to_mul_2(i32 %x) {
+  %mul1 = mul nsw i32 %x, 8
+  %add2 = add nsw i32 %mul1, %x
+  ret i32 %add2
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT: %add2 = mul nsw i32 %x, 9
+; CHECK-NEXT: ret i32 %add2
+}
+
+define i32 @mul_sub_to_mul_1(i32 %x) {
+  %mul1 = mul nsw i32 %x, -8
+  %sub2 = sub nsw i32 %x, %mul1
+  ret i32 %sub2
+; CHECK-LABEL: @mul_sub_to_mul_1(
+; CHECK-NEXT: %sub2 = mul i32 %x, 9
+; CHECK-NEXT: ret i32 %sub2
+}
+
+define i32 @mul_sub_to_mul_2(i32 %x) {
+  %mul1 = mul nsw i32 %x, 8
+  %add2 = sub nsw i32 %mul1, %x
+  ret i32 %add2
+; CHECK-LABEL: @mul_sub_to_mul_2(
+; CHECK-NEXT: %add2 = mul i32 %x, 7
+; CHECK-NEXT: ret i32 %add2
+}
\ No newline at end of file