diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1767,98 +1767,143 @@ // shift as Val * (1 << Amt). static Instruction *simplifyIRemMulShl(BinaryOperator &I, InstCombinerImpl &IC) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X = nullptr; - APInt Y, Z; - bool ShiftByX = false; - - // If V is not nullptr, it will be matched using m_Specific. - auto MatchShiftOrMulXC = [](Value *Op, Value *&V, APInt &C) -> bool { - const APInt *Tmp = nullptr; - if ((!V && match(Op, m_Mul(m_Value(V), m_APInt(Tmp)))) || - (V && match(Op, m_Mul(m_Specific(V), m_APInt(Tmp))))) - C = *Tmp; - else if ((!V && match(Op, m_Shl(m_Value(V), m_APInt(Tmp)))) || - (V && match(Op, m_Shl(m_Specific(V), m_APInt(Tmp))))) - C = APInt(Tmp->getBitWidth(), 1) << *Tmp; - if (Tmp != nullptr) - return true; - - // Reset `V` so we don't start with specific value on next match attempt. - V = nullptr; - return false; - }; - - auto MatchShiftCX = [](Value *Op, APInt &C, Value *&V) -> bool { - const APInt *Tmp = nullptr; - if ((!V && match(Op, m_Shl(m_APInt(Tmp), m_Value(V)))) || - (V && match(Op, m_Shl(m_APInt(Tmp), m_Specific(V))))) { - C = *Tmp; - return true; - } - - // Reset `V` so we don't start with specific value on next match attempt. - V = nullptr; - return false; - }; - - if (MatchShiftOrMulXC(Op0, X, Y) && MatchShiftOrMulXC(Op1, X, Z)) { - // pass - } else if (MatchShiftCX(Op0, Y, X) && MatchShiftCX(Op1, Z, X)) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X, *Y, *Z; + bool ShiftByX = false, ShiftByY = false, ShiftByZ = false; + if ((match(Op0, m_Mul(m_Value(X), m_Value(Y))) && + match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) || + (match(Op0, m_Mul(m_Value(Y), m_Value(X))) && + match(Op1, m_c_Mul(m_Specific(X), m_Value(Z))))) { + // Pass + } else if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) && + match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) { + ShiftByY = true; + } else if (match(Op1, m_Shl(m_Value(X), m_Value(Z))) && + match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) { + ShiftByZ = true; + } else if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) && + match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) { + ShiftByZ = true; + ShiftByY = true; + } else if (match(Op0, m_Shl(m_Value(Y), m_Value(X))) && + match(Op1, m_Shl(m_Value(Z), m_Specific(X)))) { ShiftByX = true; } else { return nullptr; } + OverflowingBinaryOperator *BO0 = cast(Op0); + OverflowingBinaryOperator *BO1 = cast(Op1); + + // If X is constant 1, then we avoid both in the mul and shl case. + auto CX = dyn_cast(X); + if (CX && CX->isOneValue()) + return nullptr; + + auto GetOperandAsConstantInt = [](Value *Op) -> ConstantInt * { + if (Op->getType()->isVectorTy()) + if (auto *COp = dyn_cast(Op)) { + auto *CSplat = COp->getSplatValue(); + return CSplat ? dyn_cast(CSplat) : nullptr; + } + return dyn_cast(Op); + }; + + ConstantInt *ConstY = GetOperandAsConstantInt(Y); + ConstantInt *ConstZ = GetOperandAsConstantInt(Z); + bool IsSRem = I.getOpcode() == Instruction::SRem; - OverflowingBinaryOperator *BO0 = cast(Op0); // TODO: We may be able to deduce more about nsw/nuw of BO0/BO1 based on Y >= // Z or Z >= Y. bool BO0HasNSW = BO0->hasNoSignedWrap(); bool BO0HasNUW = BO0->hasNoUnsignedWrap(); - bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW; - - APInt RemYZ = IsSRem ? Y.srem(Z) : Y.urem(Z); - // (rem (mul nuw/nsw X, Y), (mul X, Z)) - // if (rem Y, Z) == 0 - // -> 0 - if (RemYZ.isZero() && BO0NoWrap) - return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType())); + bool BO1HasNSW = BO1->hasNoSignedWrap(); + bool BO1HasNUW = BO1->hasNoUnsignedWrap(); - // Helper function to emit either (RemSimplificationC << X) or - // (RemSimplificationC * X) depending on whether we matched Op0/Op1 as + // Helper function to emit either (RemSimplification << X) or + // (RemSimplification * X) depending on whether we matched Op0/Op1 as // (shl V, X) or (mul V, X) respectively. - auto CreateMulOrShift = - [&](const APInt &RemSimplificationC) -> BinaryOperator * { - Value *RemSimplification = - ConstantInt::get(I.getType(), RemSimplificationC); + auto CreateMulOrShift = [&](Value *RemSimplification) -> BinaryOperator * { return ShiftByX ? BinaryOperator::CreateShl(RemSimplification, X) : BinaryOperator::CreateMul(X, RemSimplification); }; - OverflowingBinaryOperator *BO1 = cast(Op1); - bool BO1HasNSW = BO1->hasNoSignedWrap(); - bool BO1HasNUW = BO1->hasNoUnsignedWrap(); - bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW; - // (rem (mul X, Y), (mul nuw/nsw X, Z)) - // if (rem Y, Z) == Y - // -> (mul nuw/nsw X, Y) - if (RemYZ == Y && BO1NoWrap) { - BinaryOperator *BO = CreateMulOrShift(Y); - // Copy any overflow flags from Op0. - BO->setHasNoSignedWrap(IsSRem || BO0HasNSW); - BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW); - return BO; - } - - // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z)) - // if Y >= Z - // -> (mul {nuw} nsw X, (rem Y, Z)) - if (Y.uge(Z) && (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) { - BinaryOperator *BO = CreateMulOrShift(RemYZ); - BO->setHasNoSignedWrap(); - BO->setHasNoUnsignedWrap(BO0HasNUW); - return BO; + auto CreateCMulOrCShift = + [&](const APInt &RemSimplificationC) -> BinaryOperator * { + return CreateMulOrShift(ConstantInt::get(I.getType(), RemSimplificationC)); + }; + + if (ConstY && ConstZ) { + APInt AdjustedY = ConstY->getValue(); + APInt AdjustedZ = ConstZ->getValue(); + + // Just treat the shifts as mul, we may end up returning a mul by power + // of 2 but that will be cleaned up later. + if (ShiftByY) + AdjustedY = APInt(AdjustedY.getBitWidth(), 1) << AdjustedY; + if (ShiftByZ) + AdjustedZ = APInt(AdjustedZ.getBitWidth(), 1) << AdjustedZ; + + bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW; + + APInt RemYZ = + IsSRem ? AdjustedY.srem(AdjustedZ) : AdjustedY.urem(AdjustedZ); + // (rem (mul nuw/nsw X, Y), (mul X, Z)) + // if (rem Y, Z) == 0 + // -> 0 + if (RemYZ.isZero() && BO0NoWrap) + return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType())); + + bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW; + // (rem (mul X, Y), (mul nuw/nsw X, Z)) + // if (rem Y, Z) == Y + // -> (mul nuw/nsw X, Y) + if (RemYZ == AdjustedY && BO1NoWrap) { + BinaryOperator *BO = CreateCMulOrCShift(AdjustedY); + // Copy any overflow flags from Op0. + BO->setHasNoSignedWrap(IsSRem || BO0HasNSW); + BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW); + return BO; + } + + // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z)) + // if Y >= Z + // -> (mul {nuw} nsw X, (rem Y, Z)) + if (AdjustedY.uge(AdjustedZ) && + (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) { + BinaryOperator *BO = CreateCMulOrCShift(RemYZ); + BO->setHasNoSignedWrap(); + BO->setHasNoUnsignedWrap(BO0HasNUW); + return BO; + } + } + // Check if desirable to do generic replacement. + // NB: It may be beneficial to do this if we have X << Z even if there are + // multiple uses of Op0/Op1 as it will eliminate the urem (urem of a power + // of 2 is converted to add/and) and urem is pretty expensive (maybe more + // sense in DAGCombiner). + if ((ConstY && ConstZ) || + (Op0->hasOneUse() && Op1->hasOneUse() && + (IsSRem ? (!ShiftByY && !ShiftByZ) : (!ShiftByY || ShiftByZ)))) { + + // (rem (mul nuw/nsw X, Y), (mul nuw {nsw} X, Z) + // -> (mul nuw/nsw X, (rem Y, Z)) + if (IsSRem ? (BO0HasNSW && BO1HasNSW && BO1HasNUW) + : (BO0HasNUW && BO1HasNUW)) { + if (ShiftByY) + Y = IC.Builder.CreateShl(ConstantInt::get(I.getType(), 1), Y); + if (ShiftByZ) + Z = IC.Builder.CreateShl(ConstantInt::get(I.getType(), 1), Z); + + BinaryOperator *BO = CreateMulOrShift( + IsSRem ? IC.Builder.CreateSRem(Y, Z) : IC.Builder.CreateURem(Y, Z)); + + if (BO0HasNSW || BO1HasNSW) + BO->setHasNoSignedWrap(); + if (!IsSRem || (BO0HasNUW && BO1HasNUW)) + BO->setHasNoUnsignedWrap(); + return BO; + } } return nullptr; diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll --- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll +++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll @@ -155,9 +155,8 @@ define i8 @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ(i8 %X, i8 %Y, i8 %Z) { ; CHECK-LABEL: @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = mul nuw i8 [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw i8 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw i8 %X, %Y @@ -168,9 +167,10 @@ define i8 @urem_XY_XZ_with_CX_Y_Z_is_mul_X_RemYZ(i8 %Y, i8 %Z) { ; CHECK-LABEL: @urem_XY_XZ_with_CX_Y_Z_is_mul_X_RemYZ( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw i8 [[Y:%.*]], 10 -; CHECK-NEXT: [[BO1:%.*]] = shl nuw i8 10, [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw i8 [[TMP2]], 10 ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw i8 10, %Y @@ -181,9 +181,10 @@ define i8 @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out1(i8 %X, i8 %Y, i8 %Z) { ; CHECK-LABEL: @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out1( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw i8 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw nsw i8 %X, %Y @@ -194,9 +195,8 @@ define <2 x i8> @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out2(<2 x i8> %X, <2 x i8> %Y, <2 x i8> %Z) { ; CHECK-LABEL: @urem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nsw_out2( -; CHECK-NEXT: [[BO0:%.*]] = shl nuw <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw <2 x i8> [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem <2 x i8> [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i8> [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl nuw <2 x i8> %Y, %X @@ -398,9 +398,8 @@ define i8 @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ(i8 %X, i8 %Y, i8 %Z) { ; CHECK-LABEL: @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ( -; CHECK-NEXT: [[BO0:%.*]] = mul nsw i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = mul nuw nsw i8 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = srem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = srem i8 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nsw i8 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nsw i8 %Y, %X @@ -411,9 +410,8 @@ define i8 @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nuw_out(i8 %X, i8 %Y, i8 %Z) { ; CHECK-LABEL: @srem_XY_XZ_with_Y_Z_is_mul_X_RemYZ_with_nuw_out( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw nsw i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = mul nuw nsw i8 [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = srem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = srem i8 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nsw nuw i8 %Y, %X @@ -646,9 +644,8 @@ define i8 @urem_shl_XX_shl_ZX(i8 %X, i8 %Z) { ; CHECK-LABEL: @urem_shl_XX_shl_ZX( -; CHECK-NEXT: [[BO0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[X]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw i8 [[TMP1]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = shl nuw nsw i8 %X, %X @@ -659,9 +656,8 @@ define i8 @urem_shl_YX_shl_XX(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem_shl_YX_shl_XX( -; CHECK-NEXT: [[BO0:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw i8 [[TMP1]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = shl nuw nsw i8 %Y, %X @@ -672,9 +668,11 @@ define i8 @urem_shl_XX_shl_XZ(i8 %X, i8 %Z) { ; CHECK-LABEL: @urem_shl_XX_shl_XZ( -; CHECK-NEXT: [[BO0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[X]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i8 1, [[X:%.*]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP3]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = shl nuw nsw i8 %X, %X @@ -685,9 +683,11 @@ define i8 @urem_shl_XY_shl_XX(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem_shl_XY_shl_XX( -; CHECK-NEXT: [[BO0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i8 1, [[Y:%.*]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP3]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = shl nuw nsw i8 %X, %Y @@ -711,9 +711,10 @@ define i8 @urem_mul_YX_shl_XX(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem_mul_YX_shl_XX( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw nsw i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw nsw i8 %Y, %X @@ -724,9 +725,10 @@ define i8 @urem_mul_XX_shl_XZ(i8 %X, i8 %Z) { ; CHECK-LABEL: @urem_mul_XX_shl_XZ( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[X]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw nsw i8 %X, %X @@ -737,9 +739,10 @@ define i8 @urem_mul_XY_shl_XX(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem_mul_XY_shl_XX( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw nsw i8 %X, %Y @@ -802,9 +805,10 @@ define i8 @urem_mul_XX_shl_XX(i8 %X) { ; CHECK-LABEL: @urem_mul_XX_shl_XX( -; CHECK-NEXT: [[BO0:%.*]] = mul nuw nsw i8 [[X:%.*]], [[X]] -; CHECK-NEXT: [[BO1:%.*]] = shl nuw nsw i8 [[X]], [[X]] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[BO0]], [[BO1]] +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[NOTMASK]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i8 [[TMP2]], [[X]] ; CHECK-NEXT: ret i8 [[R]] ; %BO0 = mul nuw nsw i8 %X, %X