Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -871,22 +871,25 @@
 // Otherwise, return null.
 //
 static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
-  if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
+  if (!V->getType()->isIntOrIntVectorTy())
     return nullptr;
 
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return nullptr;
-
-  if (I->getOpcode() == Instruction::Mul)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1))))
-      return I->getOperand(0);
-  if (I->getOpcode() == Instruction::Shl)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
-      // The multiplier is really 1 << CST.
-      CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
-      return I->getOperand(0);
+  if (V->hasOneUse()) {
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      if (I->getOpcode() == Instruction::Mul)
+        if ((CST = dyn_cast<Constant>(I->getOperand(1))))
+          return I->getOperand(0);
+      if (I->getOpcode() == Instruction::Shl)
+        if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
+          // The multiplier is really 1 << CST.
+          CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
+          return I->getOperand(0);
+        }
     }
-  return nullptr;
+  }
+
+  CST = ConstantInt::get(V->getType(), 1);
+  return V;
 }
 
 // If one of the operands only has one non-zero bit, and if the other
@@ -1074,22 +1077,28 @@
     if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
-
   {
-    Constant *C2;
-    if (Value *X = dyn_castFoldableMul(LHS, C2)) {
-      if (X == RHS) // X*C + X --> X * (C+1)
-        return BinaryOperator::CreateMul(RHS, AddOne(C2));
-
-      // X*C1 + X*C2 --> X * (C1+C2)
-      Constant *C1;
-      if (X == dyn_castFoldableMul(RHS, C1))
-        return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
-    }
-
-    // X + X*C --> X * (C+1)
-    if (dyn_castFoldableMul(RHS, C2) == LHS)
-      return BinaryOperator::CreateMul(LHS, AddOne(C2));
+    Constant *C1, *C2;
+    // X * C1 + X * C2 --> X * (C1 + C2)
+    if (Value *X = dyn_castFoldableMul(LHS, C1))
+      if (Value *Y = dyn_castFoldableMul(RHS, C2)) {
+        if (X == Y) {
+          if (BinaryOperator *NewInst =
+                  BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2))) {
+
+            bool hasNSW = I.hasNoSignedWrap();
+            if (BinaryOperator *LHSI = dyn_cast<BinaryOperator>(LHS))
+              hasNSW &= LHSI->hasNoSignedWrap();
+            if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(RHS))
+              hasNSW &= RHSI->hasNoSignedWrap();
+
+            NewInst->setHasNoSignedWrap(hasNSW);
+
+            // TODO: Check for unsigned wrap
+            return NewInst;
+          }
+        }
+      }
   }
 
   // A+B --> A|B iff A and B have no bits set in common.
@@ -1571,15 +1580,12 @@
     }
   }
 
-  Constant *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
-    if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1));
-
-    Constant *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
-  }
+  Constant *C1, *C2;
+  // X * C1 - X * C2 --> X * (C1 - C2)
+  if (Value *X = dyn_castFoldableMul(Op0, C1))
+    if (Value *Y = dyn_castFoldableMul(Op1, C2))
+      if (X == Y)
+        return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
 
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
Index: test/Transforms/InstCombine/add2.ll
===================================================================
--- test/Transforms/InstCombine/add2.ll
+++ test/Transforms/InstCombine/add2.ll
@@ -86,3 +86,61 @@
 ; CHECK-NEXT:  %d = mul i16 %a, -32767
 ; CHECK-NEXT:  ret i16 %d
 }
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+  %add1 = add nsw i16 %x, %x
+  %add2 = add nsw i16 %add1, %x
+  ret i16 %add2
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 3
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+  %mul1 = mul nsw i16 %x, 8
+  %add2 = add nsw i16 %x, %mul1
+  ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+  %mul1 = mul nsw i16 %x, 8
+  %add2 = add nsw i16 %mul1, %x
+  ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+  %mul1 = mul i16 %a, 2
+  %mul2 = mul i16 %a, 3
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT: %add = mul i16 %a, 5
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+  %mul1 = mul nsw i16 %a, 2
+  %mul2 = mul nsw i16 %a, 7
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: ret i16 %add
+}
+
+; TODO: 'add nsw' in the test should get transformed in to mul nsw
+define i16 @mul_add_to_mul_5(i16 %a) {
+  %mul1 = mul nsw i16 %a, 3
+  %mul2 = mul nsw i16 %a, 7
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT: %add = mul i16 %a, 10
+; CHECK-NEXT: ret i16 %add
+}
\ No newline at end of file