diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -566,6 +566,13 @@ return false; return true; } + case Instruction::Mul: { + const APInt *MulConst; + // We can fold (shr (mul X, -(1 << C)), C) -> (and (mul X, -1), C`) + return !IsLeftShift && match(I->getOperand(1), m_APInt(MulConst)) && + MulConst->isNegatedPowerOf2() && + MulConst->countTrailingZeros() == NumBits; + } } } @@ -680,6 +687,16 @@ isLeftShift, IC, DL)); return PN; } + case Instruction::Mul: { + auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0)); + IC.InsertNewInstWith(Neg, *I); + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits); + auto *And = BinaryOperator::CreateAnd(Neg, + ConstantInt::get(I->getType(), Mask)); + And->takeName(I); + return IC.InsertNewInstWith(And, *I); + } } } diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll --- a/llvm/test/Transforms/InstCombine/apint-shift.ll +++ b/llvm/test/Transforms/InstCombine/apint-shift.ll @@ -199,9 +199,9 @@ define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) { ; CHECK-LABEL: @lshr_shl_splat_vec( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[SH1:%.*]] = lshr exact <2 x i7> [[MUL]], -; CHECK-NEXT: ret <2 x i7> [[SH1]] +; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i7> [[X:%.*]], +; CHECK-NEXT: [[SH2:%.*]] = and <2 x i7> [[DOTNEG]], +; CHECK-NEXT: ret <2 x i7> [[SH2]] ; %mul = mul <2 x i7> %X, %sh1 = lshr exact <2 x i7> %mul, diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1832,9 +1832,9 @@ define i64 @lshr_mul_negpow2(i64 %x) { ; CHECK-LABEL: @lshr_mul_negpow2( -; CHECK-NEXT: [[A:%.*]] = mul i64 [[X:%.*]], -4294967296 -; CHECK-NEXT: [[B:%.*]] = lshr exact i64 [[A]], 32 -; CHECK-NEXT: ret i64 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = and i64 [[TMP1]], 4294967295 +; CHECK-NEXT: ret i64 [[A]] ; %a = mul i64 %x, -4294967296 %b = lshr i64 %a, 32 @@ -1843,9 +1843,9 @@ define i64 @lshr_mul_negpow2_2(i64 %x) { ; CHECK-LABEL: @lshr_mul_negpow2_2( -; CHECK-NEXT: [[A:%.*]] = mul i64 [[X:%.*]], -65536 -; CHECK-NEXT: [[B:%.*]] = lshr exact i64 [[A]], 16 -; CHECK-NEXT: ret i64 [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = and i64 [[TMP1]], 281474976710655 +; CHECK-NEXT: ret i64 [[A]] ; %a = mul i64 %x, -65536 %b = lshr i64 %a, 16 @@ -1854,9 +1854,9 @@ define <2 x i32> @lshr_mul_negpow2_3(<2 x i32> %x) { ; CHECK-LABEL: @lshr_mul_negpow2_3( -; CHECK-NEXT: [[A:%.*]] = mul <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[A]], -; CHECK-NEXT: ret <2 x i32> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[A]] ; %a = mul <2 x i32> %x, %b = lshr <2 x i32> %a, @@ -1865,10 +1865,10 @@ define i32 @lshr_mul_negpow2_4(i32 %x) { ; CHECK-LABEL: @lshr_mul_negpow2_4( -; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], -65536 -; CHECK-NEXT: [[B:%.*]] = lshr exact i32 [[A]], 16 -; CHECK-NEXT: [[C:%.*]] = xor i32 [[B]], 1 -; CHECK-NEXT: ret i32 [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = and i32 [[TMP1]], 65535 +; CHECK-NEXT: [[B:%.*]] = xor i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[B]] ; %a = mul i32 %x, -65536 %b = xor i32 %a, 65536 @@ -1878,10 +1878,10 @@ define <2 x i32> @lshr_mul_negpow2_5(<2 x i32> %x) { ; CHECK-LABEL: @lshr_mul_negpow2_5( -; CHECK-NEXT: [[A:%.*]] = mul <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[A]], -; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[B]], -; CHECK-NEXT: ret <2 x i32> [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A]], +; CHECK-NEXT: ret <2 x i32> [[B]] ; %a = mul <2 x i32> %x, %b = or <2 x i32> %a,