diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -450,10 +450,11 @@
   Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS,
                                         Value *RHS);
 
-  // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
-  //    -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
-  // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
-  //    -> (BinOp (logic_shift (BinOp X, Y)), Mask)
+  // Fold consecutive sequences of binops that have a base of a common logic
+  // shift. If all the binops are the same, this works with non-constant
+  // operands. Otherwise it only applies for if the shift amount and all binop
+  // operands are constant.
+  // Applicable binops are: 'and', 'xor', 'or', and 'add'.
   Instruction *foldBinOpShiftWithShift(BinaryOperator &I);
 
   /// This tries to simplify binary operations by factorizing out common terms
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -730,21 +730,24 @@
   return RetVal;
 }
 
-// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
+// (OuterBinop (BOA_N...(logic_shift X,C),C1)),(BOB_N...(logic_shift Y,C)))
 //   IFF
 //    1) the logic_shifts match
-//    2) either both binops are binops and one is `and` or
-//       BinOp1 is `and`
+//    2) There can be zero binops between OuterBinOp and one of the shifts.
+//   IFF two binops total (including OuterBinOp)
+//    3) either both binops are bitwise (or `add` + `shl`) and one is `and` or
+//       OuterBinOp is `and`
 //       (logic_shift (inv_logic_shift C1, C), C) == C1 or
 //
-//    -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
+//    -> (logic_shift (OuterBinOp (BOA_N... X, inv_logic_shift(C1, C)), Y), C)
 //
-// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
+// (OuterBinOp (BO_N... (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
 //   IFF
 //    1) the logic_shifts match
-//    2) BinOp1 == BinOp2 (if BinOp ==  `add`, then also requires `shl`).
+//    2) OuterBinOp == BinOp_N (all of them). If BinOp == `add`, then also
+//       requires `shl`.
 //
-//    -> (BinOp (logic_shift (BinOp X, Y)), Mask)
+//    -> (BinOp_N... (logic_shift (BinOp X, Y)), Mask)
 Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
   auto IsValidBinOpc = [](unsigned Opc) {
     switch (Opc) {
@@ -760,110 +763,195 @@
     }
   };
 
+  auto IsValidOperand = [IsValidBinOpc](const Value *V) {
+    if (const Instruction *Ins = dyn_cast<Instruction>(V))
+      return IsValidBinOpc(Ins->getOpcode());
+    return false;
+  };
+
   // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
   // constraints.
-  auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
-                                      unsigned ShOpc) {
-    return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
-           ShOpc == Instruction::Shl;
+  auto IsCompletelyDistributable = [](bool HasAdd, unsigned ShOpc) {
+    return !HasAdd || ShOpc == Instruction::Shl;
   };
 
   auto GetInvShift = [](unsigned ShOpc) {
     return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
   };
 
-  auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
-                                 unsigned ShOpc, Constant *CMask,
-                                 Constant *CShift) {
-    // If the BinOp1 is `and` we don't need to check the mask.
-    if (BinOpc1 == Instruction::And)
-      return true;
-
-    // For all other possible transfers we need complete distributable
-    // binop/shift (anything but `add` + `lshr`).
-    if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
-      return false;
+  auto CanDistributeBinops =
+      [&](unsigned OuterBinOpc, bool HasAdd, unsigned ShOpc, Constant *CShAmt,
+          const ArrayRef<SmallVector<Instruction *>> BinOps) {
+        size_t NumInnerOps = BinOps[0].size() + BinOps[1].size();
+        // If the OuterBinOp is `and` and two total, we don't need to check the
+        // mask. TODO: This condition is overly conservative when there are
+        // multiple inner binops.
+        if (OuterBinOpc == Instruction::And && NumInnerOps == 1)
+          return true;
+
+        // For all other possible transfers we need complete distributable
+        // binop/shift (anything but `add` + `lshr`).
+        if (!IsCompletelyDistributable(HasAdd, ShOpc))
+          return false;
 
-    // If BinOp2 is `and`, any mask works (this only really helps for non-splat
-    // vecs, otherwise the mask will be simplified and the following check will
-    // handle it).
-    if (BinOpc2 == Instruction::And)
-      return true;
+        for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+          bool HasAnd = false;
+          for (auto *Ins : BinOps[OpIdx]) {
+            // Application if we have 1 InnerBinOp: If we have an inner `and`,
+            // any mask works (this only really helps for non-splat vecs,
+            // otherwise the mask will be simplified and the following check
+            // will handle it).
+            HasAnd |= Ins->getOpcode() == Instruction::And;
+            Constant *C;
+            // Transform only makes sense if we can constant evaluate shifted
+            // mask.
+            if (!match(Ins->getOperand(1), m_ImmConstant(C)))
+              return false;
 
-    // Otherwise, need mask that meets the below requirement.
-    // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
-    return ConstantExpr::get(
-               ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift),
-               CShift) == CMask;
-  };
+            if (!HasAnd || NumInnerOps != 1)
+              // Otherwise, need mask that meets the below requirement.
+              // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
+              if (ConstantExpr::get(
+                      ShOpc, ConstantExpr::get(GetInvShift(ShOpc), C, CShAmt),
+                      CShAmt) != C)
+                return false;
+          }
+        }
+        return true;
+      };
+
+  Constant *CShAmt;
+  SmallVector<Instruction *> BinOps[2];
+  Instruction *Shifts[2] = {nullptr, nullptr};
+  unsigned ShOpc, OuterBinOpc = I.getOpcode();
+  Value *ShAmt;
+  bool HasAdd = OuterBinOpc == Instruction::Add;
+  bool AllSame = true;
+  for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+    Instruction *Last = nullptr;
+    Instruction *Cur = dyn_cast<Instruction>(I.getOperand(OpIdx));
+
+    while (true) {
+      // If cur is not an instruction we can never transform (if its a
+      // ConstantExpr, we will retry later after its been evaluated).
+      if (!Cur)
+        return nullptr;
 
-  auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
-    Constant *CMask, *CShift;
-    Value *X, *Y, *ShiftedX, *Mask, *Shift;
-    if (!match(I.getOperand(ShOpnum),
-               m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift)))))
-      return nullptr;
-    if (!match(I.getOperand(1 - ShOpnum),
-               m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
-      return nullptr;
+      // If cur has multiple uses this transform will *probably* increase
+      // instruction count.
+      if (!Cur->hasOneUse())
+        return nullptr;
 
-    if (!match(ShiftedX,
-               m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift)))))
-      return nullptr;
+      // Ends when we find a value that isn't 'and', 'xor', or', or 'add'.
+      if (!IsValidOperand(Cur))
+        break;
 
-    // Make sure we are matching instruction shifts and not ConstantExpr
-    auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
-    auto *IX = dyn_cast<Instruction>(ShiftedX);
-    if (!IY || !IX)
-      return nullptr;
+      BinOps[OpIdx].push_back(Cur);
+      // Track if we find an 'add'. 'add' has additional constraints below.
+      HasAdd |= Cur->getOpcode() == Instruction::Add;
+      AllSame &= Cur->getOpcode() == OuterBinOpc;
+      Last = Cur;
+      // TODO: We could properly gather all non-constant operands for check for
+      // a shift. This is probably overkill, however, and would require extra
+      // logic to prevent exponential explosion. This transform is primarily
+      // targetted at constants hence we only meaningfully follow op1.
+      Cur = dyn_cast<Instruction>(Cur->getOperand(0));
+    }
+    // See if we have a valid shift at the end.
+    for (unsigned LastOpIdx = 0, E = (Last ? 2 : 1); LastOpIdx < E;
+         ++LastOpIdx) {
+      Value *A, *B;
+      Cur = Last ? dyn_cast<Instruction>(Last->getOperand(LastOpIdx)) : Cur;
+      if (!Cur || !match(Cur, m_LogicalShift(m_Value(A), m_Value(B))))
+        continue;
 
-    // LHS and RHS need same shift opcode
-    unsigned ShOpc = IY->getOpcode();
-    if (ShOpc != IX->getOpcode())
+      if (OpIdx == 0) {
+        ShOpc = cast<Instruction>(Cur)->getOpcode();
+        ShAmt = B;
+      } else {
+        // If shift opcodes don't match we can't transform.
+        if (ShOpc != cast<Instruction>(Cur)->getOpcode())
+          continue;
+        // If the shift amounts don't match we can't transform.
+        if (B != ShAmt)
+          continue;
+      }
+      Shifts[OpIdx] = Cur;
+      break;
+    }
+    // We didn't find a shift.
+    if (Shifts[OpIdx] == nullptr)
       return nullptr;
+  }
 
-    // Make sure binop is real instruction and not ConstantExpr
-    auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
-    if (!BO2)
-      return nullptr;
+  // If all binops the same and it's bitwise or shl with add, then just
+  // distribute to drop the shift irrelevant of constants.
+  if (AllSame && IsCompletelyDistributable(HasAdd, ShOpc)) {
+    Instruction::BinaryOps BOpc =
+        static_cast<Instruction::BinaryOps>(OuterBinOpc);
+    Value *BaseBinOp = Builder.CreateBinOp(BOpc, Shifts[0]->getOperand(0),
+                                           Shifts[1]->getOperand(0));
+    Value *AggregatedBinOp = nullptr;
+    auto NextBinOp = [&](Value *Prev, Value *V) {
+      if (Prev == nullptr)
+        return V;
+      return Builder.CreateBinOp(BOpc, Prev, V);
+    };
+    for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+      if (!BinOps[OpIdx].empty()) {
+        // We could combine all the constants here and reschedule the binop for
+        // better ILP, but that really belongs in another fold/pass.
+        for (unsigned BinOpIdx = 0; BinOpIdx + 1 < BinOps[OpIdx].size();
+             ++BinOpIdx)
+          AggregatedBinOp = NextBinOp(AggregatedBinOp,
+                                      BinOps[OpIdx][BinOpIdx]->getOperand(1));
+        Value *Last = BinOps[OpIdx].back()->getOperand(0);
+        if (Last == Shifts[OpIdx])
+          Last = BinOps[OpIdx].back()->getOperand(1);
+        AggregatedBinOp = NextBinOp(AggregatedBinOp, Last);
+      }
+    }
 
-    unsigned BinOpc = BO2->getOpcode();
-    // Make sure we have valid binops.
-    if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
-      return nullptr;
+    Value *FinalLhs = BaseBinOp, *FinalRhs = ShAmt;
+    Instruction::BinaryOps FinalBOpc =
+        static_cast<Instruction::BinaryOps>(ShOpc);
 
-    // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
-    // distribute to drop the shift irrelevant of constants.
-    if (BinOpc == I.getOpcode() &&
-        IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
-      Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
-      Value *NewBinOp1 = Builder.CreateBinOp(
-          static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
-      return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
+    if (AggregatedBinOp) {
+      FinalLhs = Builder.CreateBinOp(FinalBOpc, FinalLhs, FinalRhs);
+      FinalRhs = AggregatedBinOp;
+      FinalBOpc = BOpc;
     }
 
-    // Otherwise we can only distribute by constant shifting the mask, so
-    // ensure we have constants.
-    if (!match(Shift, m_ImmConstant(CShift)))
-      return nullptr;
-    if (!match(Mask, m_ImmConstant(CMask)))
-      return nullptr;
+    return BinaryOperator::Create(FinalBOpc, FinalLhs, FinalRhs);
+  }
 
-    // Check if we can distribute the binops.
-    if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
-      return nullptr;
+  // Otherwise we can only distribute by constant shifting the mask, so
+  // ensure we have constants.
+  if (!match(ShAmt, m_ImmConstant(CShAmt)))
+    return nullptr;
+  // Check if we can distribute the binops.
+  if (!CanDistributeBinops(OuterBinOpc, HasAdd, ShOpc, CShAmt, BinOps))
+    return nullptr;
 
-    Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift);
-    Value *NewBinOp2 = Builder.CreateBinOp(
-        static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
-    Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
-    return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
-                                  NewBinOp1, CShift);
-  };
+  // Create new binop chain.
+  Value *NewBinOps[2];
+  for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+    NewBinOps[OpIdx] = Shifts[OpIdx]->getOperand(0);
+    // Reverse order, we can't swap up order across distinct opcodes.
+    for (auto *Ins : reverse(BinOps[OpIdx])) {
+      Constant *NewCMask = ConstantExpr::get(
+          GetInvShift(ShOpc), cast<Constant>(Ins->getOperand(1)), CShAmt);
+      NewBinOps[OpIdx] = Builder.CreateBinOp(
+          static_cast<Instruction::BinaryOps>(Ins->getOpcode()),
+          NewBinOps[OpIdx], NewCMask);
+    }
+  }
 
-  if (Instruction *R = MatchBinOp(0))
-    return R;
-  return MatchBinOp(1);
+  Value *NewOuterBinOp =
+      Builder.CreateBinOp(static_cast<Instruction::BinaryOps>(OuterBinOpc),
+                          NewBinOps[0], NewBinOps[1]);
+  return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
+                                NewOuterBinOp, CShAmt);
 }
 
 Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll
--- a/llvm/test/Transforms/InstCombine/and-xor-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll
@@ -356,7 +356,7 @@
 define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: define {{[^@]+}}@and_shl
 ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y]], [[X]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]]
 ; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP2]], [[Z]]
 ; CHECK-NEXT:    ret i8 [[R]]
@@ -387,10 +387,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@xor_shl
 ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG]]
-; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X]], [[SHAMT]]
-; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[Z]], [[SX]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], [[SHAMT]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP2]], [[Z]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
@@ -405,10 +404,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@and_lshr
 ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG]]
-; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]]
-; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[Z]], [[SX]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP2]], [[Z]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
@@ -422,7 +420,7 @@
 define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: define {{[^@]+}}@or_lshr
 ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[Y]], [[X]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]]
 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP2]], [[Z]]
 ; CHECK-NEXT:    ret i8 [[R]]
@@ -561,10 +559,9 @@
 ; CHECK-LABEL: define {{[^@]+}}@xor_lshr_multiuse
 ; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) {
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]]
 ; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SHAMT]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP2]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
 ; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[A]], [[R]]
 ; CHECK-NEXT:    ret i8 [[R2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll
--- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll
@@ -3,7 +3,7 @@
 
 define i8 @shl_and_and(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_and_and(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], 4
 ; CHECK-NEXT:    [[BW1:%.*]] = and i8 [[TMP2]], 80
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -32,7 +32,7 @@
 
 define i8 @shl_add_add(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_add_add(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], 2
 ; CHECK-NEXT:    [[BW1:%.*]] = add i8 [[TMP2]], 48
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -162,7 +162,7 @@
 
 define i8 @lshr_or_or_fail(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_or_or_fail(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], 5
 ; CHECK-NEXT:    [[BW1:%.*]] = or i8 [[TMP2]], -58
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -205,7 +205,7 @@
 
 define i8 @lshr_or_or_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) {
 ; CHECK-LABEL: @lshr_or_or_no_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], [[SH:%.*]]
 ; CHECK-NEXT:    [[BW1:%.*]] = or i8 [[TMP2]], [[MASK:%.*]]
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -234,7 +234,7 @@
 
 define i8 @shl_xor_xor_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) {
 ; CHECK-LABEL: @shl_xor_xor_no_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], [[SH:%.*]]
 ; CHECK-NEXT:    [[BW1:%.*]] = xor i8 [[TMP2]], [[MASK:%.*]]
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -263,7 +263,7 @@
 
 define <2 x i8> @shl_and_and_no_const(<2 x i8> %x, <2 x i8> %y, <2 x i8> %sh, <2 x i8> %mask) {
 ; CHECK-LABEL: @shl_and_and_no_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], [[SH:%.*]]
 ; CHECK-NEXT:    [[BW1:%.*]] = and <2 x i8> [[TMP2]], [[MASK:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
@@ -277,7 +277,7 @@
 
 define i8 @shl_add_add_no_const(i8 %x, i8 %y, i8 %sh, i8 %mask) {
 ; CHECK-LABEL: @shl_add_add_no_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], [[SH:%.*]]
 ; CHECK-NEXT:    [[BW1:%.*]] = add i8 [[TMP2]], [[MASK:%.*]]
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -335,7 +335,7 @@
 
 define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_or_or_good_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], <i8 1, i8 1>
 ; CHECK-NEXT:    [[BW1:%.*]] = or <2 x i8> [[TMP2]], <i8 18, i8 24>
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
@@ -349,7 +349,7 @@
 
 define <2 x i8> @shl_or_or_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_or_or_fail_bad_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], <i8 1, i8 1>
 ; CHECK-NEXT:    [[BW1:%.*]] = or <2 x i8> [[TMP2]], <i8 19, i8 24>
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
@@ -363,7 +363,7 @@
 
 define i8 @lshr_xor_or_good_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_xor_or_good_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], 4
 ; CHECK-NEXT:    [[BW1:%.*]] = or i8 [[TMP2]], 48
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -421,7 +421,7 @@
 
 define i8 @shl_xor_xor_good_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_xor_xor_good_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], 1
 ; CHECK-NEXT:    [[BW1:%.*]] = xor i8 [[TMP2]], 88
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -435,7 +435,7 @@
 
 define i8 @shl_xor_xor_bad_mask_distribute(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_xor_xor_bad_mask_distribute(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[TMP1]], 1
 ; CHECK-NEXT:    [[BW1:%.*]] = xor i8 [[TMP2]], -68
 ; CHECK-NEXT:    ret i8 [[BW1]]
@@ -553,19 +553,18 @@
 
 define i8 @lshr_ors_x9_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) {
 ; CHECK-LABEL: @lshr_ors_x9_noconsts(
-; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[AMT:%.*]]
-; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[AMT]]
-; CHECK-NEXT:    [[X0:%.*]] = or i8 [[SX]], [[Z0:%.*]]
-; CHECK-NEXT:    [[X1:%.*]] = or i8 [[X0]], [[Z1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = or i8 [[X1]], [[Z2:%.*]]
-; CHECK-NEXT:    [[X3:%.*]] = or i8 [[X2]], [[Z3:%.*]]
-; CHECK-NEXT:    [[X4:%.*]] = or i8 [[X3]], [[Z4:%.*]]
-; CHECK-NEXT:    [[X5:%.*]] = or i8 [[X4]], [[Z5:%.*]]
-; CHECK-NEXT:    [[X6:%.*]] = or i8 [[X5]], [[Z6:%.*]]
-; CHECK-NEXT:    [[X7:%.*]] = or i8 [[X6]], [[Z7:%.*]]
-; CHECK-NEXT:    [[X8:%.*]] = or i8 [[X7]], [[Z8:%.*]]
-; CHECK-NEXT:    [[X9:%.*]] = or i8 [[X8]], [[Z9:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[X9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[Z9:%.*]], [[Z8:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[Z7:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i8 [[TMP3]], [[Z6:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or i8 [[TMP4]], [[Z5:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i8 [[TMP5]], [[Z4:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i8 [[TMP6]], [[Z3:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i8 [[TMP7]], [[Z2:%.*]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i8 [[TMP8]], [[Z1:%.*]]
+; CHECK-NEXT:    [[TMP10:%.*]] = or i8 [[TMP9]], [[Z0:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = lshr i8 [[TMP1]], [[AMT:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %amt
@@ -586,19 +585,18 @@
 
 define i8 @shl_xors_x8_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) {
 ; CHECK-LABEL: @shl_xors_x8_noconsts(
-; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[AMT:%.*]]
-; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[AMT]]
-; CHECK-NEXT:    [[X0:%.*]] = xor i8 [[SX]], [[Z0:%.*]]
-; CHECK-NEXT:    [[X1:%.*]] = xor i8 [[X0]], [[Z1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = xor i8 [[X1]], [[Z2:%.*]]
-; CHECK-NEXT:    [[X3:%.*]] = xor i8 [[X2]], [[Z3:%.*]]
-; CHECK-NEXT:    [[X4:%.*]] = xor i8 [[X3]], [[Z4:%.*]]
-; CHECK-NEXT:    [[X5:%.*]] = xor i8 [[X4]], [[Z5:%.*]]
-; CHECK-NEXT:    [[X6:%.*]] = xor i8 [[X5]], [[Z6:%.*]]
-; CHECK-NEXT:    [[X7:%.*]] = xor i8 [[X6]], [[Z7:%.*]]
-; CHECK-NEXT:    [[X8:%.*]] = xor i8 [[X7]], [[Z8:%.*]]
-; CHECK-NEXT:    [[Y9:%.*]] = xor i8 [[SY]], [[Z9:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[Y9]], [[X8]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Z9:%.*]], [[Z8:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[TMP2]], [[Z7:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], [[Z6:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i8 [[TMP4]], [[Z5:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i8 [[TMP5]], [[Z4:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i8 [[TMP6]], [[Z3:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i8 [[TMP7]], [[Z2:%.*]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i8 [[TMP8]], [[Z1:%.*]]
+; CHECK-NEXT:    [[TMP10:%.*]] = xor i8 [[TMP9]], [[Z0:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shl i8 [[TMP1]], [[AMT:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %amt
@@ -619,19 +617,18 @@
 
 define i8 @lshr_ands_x7_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) {
 ; CHECK-LABEL: @lshr_ands_x7_noconsts(
-; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[AMT:%.*]]
-; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[AMT]]
-; CHECK-NEXT:    [[X0:%.*]] = and i8 [[SX]], [[Z0:%.*]]
-; CHECK-NEXT:    [[X1:%.*]] = and i8 [[X0]], [[Z1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = and i8 [[X1]], [[Z2:%.*]]
-; CHECK-NEXT:    [[X3:%.*]] = and i8 [[X2]], [[Z3:%.*]]
-; CHECK-NEXT:    [[X4:%.*]] = and i8 [[X3]], [[Z4:%.*]]
-; CHECK-NEXT:    [[X5:%.*]] = and i8 [[X4]], [[Z5:%.*]]
-; CHECK-NEXT:    [[X6:%.*]] = and i8 [[X5]], [[Z6:%.*]]
-; CHECK-NEXT:    [[X7:%.*]] = and i8 [[X6]], [[Z7:%.*]]
-; CHECK-NEXT:    [[Y8:%.*]] = and i8 [[SY]], [[Z8:%.*]]
-; CHECK-NEXT:    [[Y9:%.*]] = and i8 [[Y8]], [[Z9:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[X7]], [[Y9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[Z7:%.*]], [[Z6:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], [[Z5:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], [[Z4:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i8 [[TMP4]], [[Z3:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = and i8 [[TMP5]], [[Z2:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], [[Z1:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = and i8 [[TMP7]], [[Z0:%.*]]
+; CHECK-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], [[Z9:%.*]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i8 [[TMP9]], [[Z8:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = lshr i8 [[TMP1]], [[AMT:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %amt
@@ -652,19 +649,18 @@
 
 define i8 @shl_adds_x6_noconsts(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) {
 ; CHECK-LABEL: @shl_adds_x6_noconsts(
-; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[AMT:%.*]]
-; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[AMT]]
-; CHECK-NEXT:    [[X0:%.*]] = add i8 [[SX]], [[Z0:%.*]]
-; CHECK-NEXT:    [[X1:%.*]] = add i8 [[X0]], [[Z1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = add i8 [[X1]], [[Z2:%.*]]
-; CHECK-NEXT:    [[X3:%.*]] = add i8 [[X2]], [[Z3:%.*]]
-; CHECK-NEXT:    [[X4:%.*]] = add i8 [[X3]], [[Z4:%.*]]
-; CHECK-NEXT:    [[X5:%.*]] = add i8 [[X4]], [[Z5:%.*]]
-; CHECK-NEXT:    [[X6:%.*]] = add i8 [[X5]], [[Z6:%.*]]
-; CHECK-NEXT:    [[Y7:%.*]] = add i8 [[SY]], [[Z7:%.*]]
-; CHECK-NEXT:    [[Y8:%.*]] = add i8 [[Y7]], [[Z8:%.*]]
-; CHECK-NEXT:    [[Y9:%.*]] = add i8 [[Y8]], [[Z9:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[X6]], [[Y9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[Z6:%.*]], [[Z5:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], [[Z4:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], [[Z3:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i8 [[TMP4]], [[Z2:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], [[Z1:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[Z0:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[TMP7]], [[Z9:%.*]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], [[Z8:%.*]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i8 [[TMP9]], [[Z7:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shl i8 [[TMP1]], [[AMT:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %amt
@@ -685,17 +681,16 @@
 
 define i8 @shl_adds_x5(i8 %amt, i8 %x, i8 %y, i8 %z0, i8 %z1, i8 %z2, i8 %z3, i8 %z4, i8 %z5, i8 %z6, i8 %z7, i8 %z8, i8 %z9) {
 ; CHECK-LABEL: @shl_adds_x5(
-; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[AMT:%.*]]
-; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[AMT]]
-; CHECK-NEXT:    [[X0:%.*]] = add i8 [[SX]], [[Z0:%.*]]
-; CHECK-NEXT:    [[X1:%.*]] = add i8 [[X0]], 88
-; CHECK-NEXT:    [[X2:%.*]] = add i8 [[X1]], [[Z2:%.*]]
-; CHECK-NEXT:    [[X4:%.*]] = add i8 [[X2]], -46
-; CHECK-NEXT:    [[X5:%.*]] = add i8 [[X4]], [[Z5:%.*]]
-; CHECK-NEXT:    [[Y7:%.*]] = add i8 [[SY]], -23
-; CHECK-NEXT:    [[Y8:%.*]] = add i8 [[Y7]], [[Z8:%.*]]
-; CHECK-NEXT:    [[Y9:%.*]] = add i8 [[Y8]], 22
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[X5]], [[Y9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[Z5:%.*]], -46
+; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], [[Z2:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], 88
+; CHECK-NEXT:    [[TMP5:%.*]] = add i8 [[TMP4]], [[Z0:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], 22
+; CHECK-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[Z8:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[TMP7]], -23
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i8 [[TMP1]], [[AMT:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %amt
@@ -803,11 +798,10 @@
 
 define <2 x i8> @shl_xor_add_and(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_xor_add_and(
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[BW3:%.*]] = xor <2 x i8> [[SHIFT1]], <i8 26, i8 50>
-; CHECK-NEXT:    [[BW2:%.*]] = add <2 x i8> [[BW3]], <i8 18, i8 20>
-; CHECK-NEXT:    [[BW1:%.*]] = and <2 x i8> [[SHIFT2]], [[BW2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 13, i8 25>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 9, i8 10>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i8> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    [[BW1:%.*]] = shl <2 x i8> [[TMP3]], <i8 1, i8 1>
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
 ;
   %shift1 = shl <2 x i8> %x, <i8 1, i8 1>
@@ -821,12 +815,11 @@
 
 define <2 x i8> @shl_xor_or_add_xor(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_xor_or_add_xor(
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[BW3:%.*]] = xor <2 x i8> [[SHIFT1]], <i8 26, i8 50>
-; CHECK-NEXT:    [[BW2:%.*]] = or <2 x i8> [[BW3]], <i8 18, i8 20>
-; CHECK-NEXT:    [[BW1:%.*]] = add <2 x i8> [[SHIFT2]], <i8 18, i8 16>
-; CHECK-NEXT:    [[BW0:%.*]] = xor <2 x i8> [[BW1]], [[BW2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 9, i8 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], <i8 13, i8 25>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i8> [[TMP2]], <i8 9, i8 10>
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i8> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[BW0:%.*]] = shl <2 x i8> [[TMP4]], <i8 1, i8 1>
 ; CHECK-NEXT:    ret <2 x i8> [[BW0]]
 ;
   %shift1 = shl <2 x i8> %x, <i8 1, i8 1>
@@ -860,12 +853,11 @@
 
 define <2 x i8> @shl_xor_or_add_and(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_xor_or_add_and(
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[BW3:%.*]] = xor <2 x i8> [[SHIFT1]], <i8 26, i8 50>
-; CHECK-NEXT:    [[BW2:%.*]] = or <2 x i8> [[BW3]], <i8 18, i8 20>
-; CHECK-NEXT:    [[BW1:%.*]] = add <2 x i8> [[SHIFT2]], <i8 18, i8 16>
-; CHECK-NEXT:    [[BW0:%.*]] = and <2 x i8> [[BW1]], [[BW2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 9, i8 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], <i8 13, i8 25>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i8> [[TMP2]], <i8 9, i8 10>
+; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i8> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[BW0:%.*]] = shl <2 x i8> [[TMP4]], <i8 1, i8 1>
 ; CHECK-NEXT:    ret <2 x i8> [[BW0]]
 ;
   %shift1 = shl <2 x i8> %x, <i8 1, i8 1>
diff --git a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll
--- a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll
+++ b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll
@@ -126,15 +126,12 @@
 ; CHECK-NEXT:    [[I1:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    br i1 [[I1]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[I:%.*]] = lshr i32 [[X]], 22
-; CHECK-NEXT:    [[I2:%.*]] = and i32 [[I]], 24
-; CHECK-NEXT:    [[I3:%.*]] = lshr i32 [[X]], 22
-; CHECK-NEXT:    [[I4:%.*]] = and i32 [[I3]], 480
-; CHECK-NEXT:    [[I5:%.*]] = or i32 [[I4]], [[I2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 22
+; CHECK-NEXT:    [[I5:%.*]] = and i32 [[TMP1]], 504
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 17
-; CHECK-NEXT:    [[I9:%.*]] = and i32 [[TMP1]], 16128
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[X]], 17
+; CHECK-NEXT:    [[I9:%.*]] = and i32 [[TMP2]], 16128
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
 ; CHECK-NEXT:    [[I10:%.*]] = phi i32 [ [[I5]], [[IF]] ], [ [[I9]], [[ELSE]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
--- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
+++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
@@ -63,31 +63,31 @@
 ; CHECK-NEXT:    [[IDX_NEG_1_1:%.*]] = xor i64 [[INDVARS_IV_1]], -1
 ; CHECK-NEXT:    [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_1]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[IDX_NEG_1_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_2]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[IDX_NEG_1_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP12]]
 ; CHECK-NEXT:    [[IDX_NEG_1_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_4]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], [[TMP14]]
 ; CHECK-NEXT:    [[IDX_NEG_1_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_5]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP17]], [[TMP16]]
 ; CHECK-NEXT:    [[IDX_NEG_1_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_6]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP18]]
 ; CHECK-NEXT:    [[IDX_NEG_1_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]]
 ; CHECK-NEXT:    [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_7]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
-; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP21]], [[TMP20]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = shl i32 [[TMP22]], 1
 ; CHECK-NEXT:    [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8