diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1767,98 +1767,143 @@
 // shift as Val * (1 << Amt).
 static Instruction *simplifyIRemMulShl(BinaryOperator &I,
                                        InstCombinerImpl &IC) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X = nullptr;
-  APInt Y, Z;
-  bool ShiftByX = false;
-
-  // If V is not nullptr, it will be matched using m_Specific.
-  auto MatchShiftOrMulXC = [](Value *Op, Value *&V, APInt &C) -> bool {
-    const APInt *Tmp = nullptr;
-    if ((!V && match(Op, m_Mul(m_Value(V), m_APInt(Tmp)))) ||
-        (V && match(Op, m_Mul(m_Specific(V), m_APInt(Tmp)))))
-      C = *Tmp;
-    else if ((!V && match(Op, m_Shl(m_Value(V), m_APInt(Tmp)))) ||
-             (V && match(Op, m_Shl(m_Specific(V), m_APInt(Tmp)))))
-      C = APInt(Tmp->getBitWidth(), 1) << *Tmp;
-    if (Tmp != nullptr)
-      return true;
-
-    // Reset `V` so we don't start with specific value on next match attempt.
-    V = nullptr;
-    return false;
-  };
-
-  auto MatchShiftCX = [](Value *Op, APInt &C, Value *&V) -> bool {
-    const APInt *Tmp = nullptr;
-    if ((!V && match(Op, m_Shl(m_APInt(Tmp), m_Value(V)))) ||
-        (V && match(Op, m_Shl(m_APInt(Tmp), m_Specific(V))))) {
-      C = *Tmp;
-      return true;
-    }
-
-    // Reset `V` so we don't start with specific value on next match attempt.
-    V = nullptr;
-    return false;
-  };
-
-  if (MatchShiftOrMulXC(Op0, X, Y) && MatchShiftOrMulXC(Op1, X, Z)) {
-    // pass
-  } else if (MatchShiftCX(Op0, Y, X) && MatchShiftCX(Op1, Z, X)) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X, *Y, *Z;
+  bool ShiftByX = false, ShiftByY = false, ShiftByZ = false;
+  if ((match(Op0, m_Mul(m_Value(X), m_Value(Y))) &&
+       match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) ||
+      (match(Op0, m_Mul(m_Value(Y), m_Value(X))) &&
+       match(Op1, m_c_Mul(m_Specific(X), m_Value(Z))))) {
+    // Pass
+  } else if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) &&
+             match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) {
+    ShiftByY = true;
+  } else if (match(Op1, m_Shl(m_Value(X), m_Value(Z))) &&
+             match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) {
+    ShiftByZ = true;
+  } else if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) &&
+             match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) {
+    ShiftByZ = true;
+    ShiftByY = true;
+  } else if (match(Op0, m_Shl(m_Value(Y), m_Value(X))) &&
+             match(Op1, m_Shl(m_Value(Z), m_Specific(X)))) {
     ShiftByX = true;
   } else {
     return nullptr;
   }
 
+  OverflowingBinaryOperator *BO0 = cast<OverflowingBinaryOperator>(Op0);
+  OverflowingBinaryOperator *BO1 = cast<OverflowingBinaryOperator>(Op1);
+
+  // If X is constant 1, then we avoid both in the mul and shl case.
+  auto CX = dyn_cast<Constant>(X);
+  if (CX && CX->isOneValue())
+    return nullptr;
+
+  auto GetOperandAsConstantInt = [](Value *Op) -> ConstantInt * {
+    if (Op->getType()->isVectorTy())
+      if (auto *COp = dyn_cast<Constant>(Op)) {
+        auto *CSplat = COp->getSplatValue();
+        return CSplat ? dyn_cast<ConstantInt>(CSplat) : nullptr;
+      }
+    return dyn_cast<ConstantInt>(Op);
+  };
+
+  ConstantInt *ConstY = GetOperandAsConstantInt(Y);
+  ConstantInt *ConstZ = GetOperandAsConstantInt(Z);
+
   bool IsSRem = I.getOpcode() == Instruction::SRem;
 
-  OverflowingBinaryOperator *BO0 = cast<OverflowingBinaryOperator>(Op0);
   // TODO: We may be able to deduce more about nsw/nuw of BO0/BO1 based on Y >=
   // Z or Z >= Y.
   bool BO0HasNSW = BO0->hasNoSignedWrap();
   bool BO0HasNUW = BO0->hasNoUnsignedWrap();
-  bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW;
-
-  APInt RemYZ = IsSRem ? Y.srem(Z) : Y.urem(Z);
-  // (rem (mul nuw/nsw X, Y), (mul X, Z))
-  //      if (rem Y, Z) == 0
-  //          -> 0
-  if (RemYZ.isZero() && BO0NoWrap)
-    return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType()));
+  bool BO1HasNSW = BO1->hasNoSignedWrap();
+  bool BO1HasNUW = BO1->hasNoUnsignedWrap();
 
-  // Helper function to emit either (RemSimplificationC << X) or
-  // (RemSimplificationC * X) depending on whether we matched Op0/Op1 as
+  // Helper function to emit either (RemSimplification << X) or
+  // (RemSimplification * X) depending on whether we matched Op0/Op1 as
   // (shl V, X) or (mul V, X) respectively.
-  auto CreateMulOrShift =
-      [&](const APInt &RemSimplificationC) -> BinaryOperator * {
-    Value *RemSimplification =
-        ConstantInt::get(I.getType(), RemSimplificationC);
+  auto CreateMulOrShift = [&](Value *RemSimplification) -> BinaryOperator * {
     return ShiftByX ? BinaryOperator::CreateShl(RemSimplification, X)
                     : BinaryOperator::CreateMul(X, RemSimplification);
   };
 
-  OverflowingBinaryOperator *BO1 = cast<OverflowingBinaryOperator>(Op1);
-  bool BO1HasNSW = BO1->hasNoSignedWrap();
-  bool BO1HasNUW = BO1->hasNoUnsignedWrap();
-  bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW;
-  // (rem (mul X, Y), (mul nuw/nsw X, Z))
-  //      if (rem Y, Z) == Y
-  //          -> (mul nuw/nsw X, Y)
-  if (RemYZ == Y && BO1NoWrap) {
-    BinaryOperator *BO = CreateMulOrShift(Y);
-    // Copy any overflow flags from Op0.
-    BO->setHasNoSignedWrap(IsSRem || BO0HasNSW);
-    BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW);
-    return BO;
-  }
-
-  // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z))
-  //      if Y >= Z
-  //          -> (mul {nuw} nsw X, (rem Y, Z))
-  if (Y.uge(Z) && (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) {
-    BinaryOperator *BO = CreateMulOrShift(RemYZ);
-    BO->setHasNoSignedWrap();
-    BO->setHasNoUnsignedWrap(BO0HasNUW);
-    return BO;
+  auto CreateCMulOrCShift =
+      [&](const APInt &RemSimplificationC) -> BinaryOperator * {
+    return CreateMulOrShift(ConstantInt::get(I.getType(), RemSimplificationC));
+  };
+
+  if (ConstY && ConstZ) {
+    APInt AdjustedY = ConstY->getValue();
+    APInt AdjustedZ = ConstZ->getValue();
+
+    // Just treat the shifts as mul, we may end up returning a mul by power
+    // of 2 but that will be cleaned up later.
+    if (ShiftByY)
+      AdjustedY = APInt(AdjustedY.getBitWidth(), 1) << AdjustedY;
+    if (ShiftByZ)
+      AdjustedZ = APInt(AdjustedZ.getBitWidth(), 1) << AdjustedZ;
+
+    bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW;
+
+    APInt RemYZ =
+        IsSRem ? AdjustedY.srem(AdjustedZ) : AdjustedY.urem(AdjustedZ);
+    // (rem (mul nuw/nsw X, Y), (mul X, Z))
+    //      if (rem Y, Z) == 0
+    //          -> 0
+    if (RemYZ.isZero() && BO0NoWrap)
+      return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType()));
+
+    bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW;
+    // (rem (mul X, Y), (mul nuw/nsw X, Z))
+    //      if (rem Y, Z) == Y
+    //          -> (mul nuw/nsw X, Y)
+    if (RemYZ == AdjustedY && BO1NoWrap) {
+      BinaryOperator *BO = CreateCMulOrCShift(AdjustedY);
+      // Copy any overflow flags from Op0.
+      BO->setHasNoSignedWrap(IsSRem || BO0HasNSW);
+      BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW);
+      return BO;
+    }
+
+    // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z))
+    //      if Y >= Z
+    //          -> (mul {nuw} nsw X, (rem Y, Z))
+    if (AdjustedY.uge(AdjustedZ) &&
+        (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) {
+      BinaryOperator *BO = CreateCMulOrCShift(RemYZ);
+      BO->setHasNoSignedWrap();
+      BO->setHasNoUnsignedWrap(BO0HasNUW);
+      return BO;
+    }
+  }
+  // Check if desirable to do generic replacement.
+  // NB: It may be beneficial to do this if we have X << Z even if there are
+  // multiple uses of Op0/Op1 as it will eliminate the urem (urem of a power
+  // of 2 is converted to add/and) and urem is pretty expensive (maybe more
+  // sense in DAGCombiner).
+  if ((ConstY && ConstZ) ||
+      (Op0->hasOneUse() && Op1->hasOneUse() &&
+       (IsSRem ? (!ShiftByY && !ShiftByZ) : (!ShiftByY || ShiftByZ)))) {
+
+    // (rem (mul nuw/nsw X, Y), (mul nuw {nsw} X, Z)
+    //        -> (mul nuw/nsw X, (rem Y, Z))
+    if (IsSRem ? (BO0HasNSW && BO1HasNSW && BO1HasNUW)
+               : (BO0HasNUW && BO1HasNUW)) {
+      if (ShiftByY)
+        Y = IC.Builder.CreateShl(ConstantInt::get(I.getType(), 1), Y);
+      if (ShiftByZ)
+        Z = IC.Builder.CreateShl(ConstantInt::get(I.getType(), 1), Z);
+
+      BinaryOperator *BO = CreateMulOrShift(
+          IsSRem ? IC.Builder.CreateSRem(Y, Z) : IC.Builder.CreateURem(Y, Z));
+
+      if (BO0HasNSW || BO1HasNSW)
+        BO->setHasNoSignedWrap();
+      if (!IsSRem || (BO0HasNUW && BO1HasNUW))
+        BO->setHasNoUnsignedWrap();
+      return BO;
+    }
   }
 
   return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
--- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
+++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
@@ -155,9 +155,8 @@
 
 define i8 @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ(i8 %X, i8 %Y, i8 %Z) {
 ; CHECK-LABEL: @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = mul nuw i8 [[Z:%.*]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw i8 %X, %Y
@@ -168,9 +167,10 @@
 
 define i8 @urem_XY_XZ_with_CX_Y_Z_is_mul_X_RemYZ(i8 %Y, i8 %Z) {
 ; CHECK-LABEL: @urem_XY_XZ_with_CX_Y_Z_is_mul_X_RemYZ(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw i8 [[Y:%.*]], 10
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw i8 10, [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw i8 [[TMP2]], 10
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw i8 10, %Y
@@ -181,9 +181,10 @@
 
 define i8 @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out1(i8 %X, i8 %Y, i8 %Z) {
 ; CHECK-LABEL: @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out1(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw i8 [[X]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw nsw i8 %X, %Y
@@ -194,9 +195,8 @@
 
 define <2 x i8> @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out2(<2 x i8> %X, <2 x i8> %Y, <2 x i8> %Z) {
 ; CHECK-LABEL: @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out2(
-; CHECK-NEXT:    [[BO0:%.*]] = shl nuw <2 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw <2 x i8> [[Z:%.*]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem <2 x i8> [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i8> [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %BO0 = shl nuw <2 x i8> %Y, %X
@@ -398,9 +398,8 @@
 
 define i8 @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ(i8 %X, i8 %Y, i8 %Z) {
 ; CHECK-LABEL: @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nsw i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = mul nuw nsw i8 [[X]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = srem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = srem i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nsw i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nsw i8 %Y, %X
@@ -411,9 +410,8 @@
 
 define i8 @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nuw_out(i8 %X, i8 %Y, i8 %Z) {
 ; CHECK-LABEL: @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nuw_out(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw nsw i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = mul nuw nsw i8 [[Z:%.*]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = srem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = srem i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nsw nuw i8 %Y, %X
@@ -646,9 +644,8 @@
 
 define i8 @urem_shl_XX_shl_ZX(i8 %X, i8 %Z) {
 ; CHECK-LABEL: @urem_shl_XX_shl_ZX(
-; CHECK-NEXT:    [[BO0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[Z:%.*]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw i8 [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = shl nuw nsw i8 %X, %X
@@ -659,9 +656,8 @@
 
 define i8 @urem_shl_YX_shl_XX(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @urem_shl_YX_shl_XX(
-; CHECK-NEXT:    [[BO0:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw i8 [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = shl nuw nsw i8 %Y, %X
@@ -672,9 +668,11 @@
 
 define i8 @urem_shl_XX_shl_XZ(i8 %X, i8 %Z) {
 ; CHECK-LABEL: @urem_shl_XX_shl_XZ(
-; CHECK-NEXT:    [[BO0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i8 1, [[X:%.*]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP3]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = shl nuw nsw i8 %X, %X
@@ -685,9 +683,11 @@
 
 define i8 @urem_shl_XY_shl_XX(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @urem_shl_XY_shl_XX(
-; CHECK-NEXT:    [[BO0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP3]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = shl nuw nsw i8 %X, %Y
@@ -711,9 +711,10 @@
 
 define i8 @urem_mul_YX_shl_XX(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @urem_mul_YX_shl_XX(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw nsw i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw nsw i8 %Y, %X
@@ -724,9 +725,10 @@
 
 define i8 @urem_mul_XX_shl_XZ(i8 %X, i8 %Z) {
 ; CHECK-LABEL: @urem_mul_XX_shl_XZ(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw nsw i8 %X, %X
@@ -737,9 +739,10 @@
 
 define i8 @urem_mul_XY_shl_XX(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @urem_mul_XY_shl_XX(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw nsw i8 %X, %Y
@@ -802,9 +805,10 @@
 
 define i8 @urem_mul_XX_shl_XX(i8 %X) {
 ; CHECK-LABEL: @urem_mul_XX_shl_XX(
-; CHECK-NEXT:    [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[BO0]], [[BO1]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %BO0 = mul nuw nsw i8 %X, %X