diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -322,15 +322,19 @@
   return BinaryOperator::Create(Instruction::And, NewShift, NewMask);
 }
 
-/// If we have a shift-by-constant of a bitwise logic op that itself has a
-/// shift-by-constant operand with identical opcode, we may be able to convert
-/// that into 2 independent shifts followed by the logic op. This eliminates a
-/// a use of an intermediate value (reduces dependency chain).
+/// If we have a shift-by-constant of a bin op (bitwise logic op or add w/ shl)
+/// that itself has a shift-by-constant operand with identical opcode, we may be
+/// able to convert that into 2 independent shifts followed by the logic op.
+/// This eliminates a a use of an intermediate value (reduces dependency chain).
 static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
                                             InstCombiner::BuilderTy &Builder) {
   assert(I.isShift() && "Expected a shift as input");
-  auto *LogicInst = dyn_cast<BinaryOperator>(I.getOperand(0));
-  if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse())
+  auto *BinInst = dyn_cast<BinaryOperator>(I.getOperand(0));
+  if (!BinInst ||
+      (!BinInst->isBitwiseLogicOp() &&
+       BinInst->getOpcode() != Instruction::Add &&
+       BinInst->getOpcode() != Instruction::Sub) ||
+      !BinInst->hasOneUse())
     return nullptr;
 
   Constant *C0, *C1;
@@ -338,6 +342,12 @@
     return nullptr;
 
   Instruction::BinaryOps ShiftOpcode = I.getOpcode();
+  // Transform for add/sub only works with shl.
+  if ((BinInst->getOpcode() == Instruction::Add ||
+       BinInst->getOpcode() == Instruction::Sub) &&
+      ShiftOpcode != Instruction::Shl)
+    return nullptr;
+
   Type *Ty = I.getType();
 
   // Find a matching one-use shift by constant. The fold is not valid if the sum
@@ -352,19 +362,24 @@
                  m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
   };
 
-  // Logic ops are commutative, so check each operand for a match.
-  if (matchFirstShift(LogicInst->getOperand(0)))
-    Y = LogicInst->getOperand(1);
-  else if (matchFirstShift(LogicInst->getOperand(1)))
-    Y = LogicInst->getOperand(0);
-  else
+  // Logic ops and Add are commutative, so check each operand for a match. Sub
+  // is not so we need to reorder if we match operand(1).
+  bool Reorder = false;
+  if (matchFirstShift(BinInst->getOperand(0)))
+    Y = BinInst->getOperand(1);
+  else if (matchFirstShift(BinInst->getOperand(1))) {
+    Y = BinInst->getOperand(0);
+    Reorder = BinInst->getOpcode() == Instruction::Sub;
+  } else
     return nullptr;
 
-  // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+  // shift (binop (shift X, C0), Y), C1 -> binop (shift X, C0+C1), (shift Y, C1)
   Constant *ShiftSumC = ConstantExpr::getAdd(C0, C1);
   Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC);
   Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, I.getOperand(1));
-  return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2);
+  Value *Op1 = Reorder ? NewShift2 : NewShift1;
+  Value *Op2 = Reorder ? NewShift1 : NewShift2;
+  return BinaryOperator::Create(BinInst->getOpcode(), Op1, Op2);
 }
 
 Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll
--- a/llvm/test/Transforms/InstCombine/shift-logic.ll
+++ b/llvm/test/Transforms/InstCombine/shift-logic.ll
@@ -335,9 +335,9 @@
 
 define i8 @shl_add(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_add(
-; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[SH0]], [[Y:%.*]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SH1:%.*]] = add i8 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i8 [[SH1]]
 ;
   %sh0 = shl i8 %x, 3
@@ -348,9 +348,9 @@
 
 define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_add_nonuniform(
-; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[SH0]], [[Y:%.*]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i8> [[R]], <i8 2, i8 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 2, i8 0>
+; CHECK-NEXT:    [[SH1:%.*]] = add <2 x i8> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i8> [[SH1]]
 ;
   %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
@@ -363,9 +363,9 @@
 define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_add_undef(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
-; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i64> [[Y]], [[SH0]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[Y]], <i64 7, i64 undef>
+; CHECK-NEXT:    [[SH1:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i64> [[SH1]]
 ;
   %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
@@ -419,9 +419,9 @@
 
 define i8 @shl_sub(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_sub(
-; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[SH0]], [[Y:%.*]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SH1:%.*]] = sub i8 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i8 [[SH1]]
 ;
   %sh0 = shl i8 %x, 3
@@ -433,9 +433,9 @@
 ; Make sure we don't commute operands for sub
 define i8 @shl_sub_no_commute(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_sub_no_commute(
-; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[Y:%.*]], 3
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[X:%.*]], [[SH0]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[Y:%.*]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[SH1:%.*]] = sub i8 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[SH1]]
 ;
   %sh0 = shl i8 %y, 3
@@ -446,9 +446,9 @@
 
 define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_sub_nonuniform(
-; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[SH0]], [[Y:%.*]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i8> [[R]], <i8 2, i8 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 2, i8 0>
+; CHECK-NEXT:    [[SH1:%.*]] = sub <2 x i8> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i8> [[SH1]]
 ;
   %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
@@ -461,9 +461,9 @@
 define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_sub_undef(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
-; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i64> [[Y]], [[SH0]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[Y]], <i64 7, i64 undef>
+; CHECK-NEXT:    [[SH1:%.*]] = sub <2 x i64> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i64> [[SH1]]
 ;
   %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization