diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -158,7 +158,8 @@ return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - unsigned BitWidth = I.getType()->getScalarSizeInBits(); + Type *Ty = I.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); // X * -1 == 0 - X if (match(Op1, m_AllOnes())) { @@ -212,6 +213,25 @@ if (Value *NegOp0 = Negator::Negate(/*IsNegation*/ true, Op0, *this)) return BinaryOperator::CreateMul( NegOp0, ConstantExpr::getNeg(cast(Op1)), I.getName()); + + // Try to convert multiply of extended operand to narrow negate and shift + // for better analysis. + // This is valid if the shift amount (trailing zeros in the multiplier + // constant) clears more high bits than the bitwidth difference between + // source and destination types: + // ({z/s}ext X) * (-1< (zext (-X)) << C + const APInt *NegPow2C; + Value *X; + if (match(Op0, m_ZExtOrSExt(m_Value(X))) && + match(Op1, m_APIntAllowUndef(NegPow2C))) { + unsigned SrcWidth = X->getType()->getScalarSizeInBits(); + unsigned ShiftAmt = NegPow2C->countTrailingZeros(); + if (ShiftAmt >= BitWidth - SrcWidth) { + Value *N = Builder.CreateNeg(X, X->getName() + ".neg"); + Value *Z = Builder.CreateZExt(N, Ty, N->getName() + ".z"); + return BinaryOperator::CreateShl(Z, ConstantInt::get(Ty, ShiftAmt)); + } + } } if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) @@ -320,7 +340,6 @@ // 2) X * Y --> X & Y, iff X, Y can be only {0,1}. // Note: We could use known bits to generalize this and related patterns with // shifts/truncs - Type *Ty = I.getType(); if (Ty->isIntOrIntVectorTy(1) || (match(Op0, m_And(m_Value(), m_One())) && match(Op1, m_And(m_Value(), m_One())))) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -2254,18 +2254,16 @@ ; CHECK-LABEL: @PR57576( ; CHECK-NEXT: [[ZX:%.*]] = zext i64 [[X:%.*]] to i128 ; CHECK-NEXT: [[ZY:%.*]] = zext i64 [[Y:%.*]] to i128 -; CHECK-NEXT: [[ZW:%.*]] = zext i64 [[W:%.*]] to i128 ; CHECK-NEXT: [[ZZ:%.*]] = zext i64 [[Z:%.*]] to i128 ; CHECK-NEXT: [[SHY:%.*]] = shl nuw i128 [[ZY]], 64 ; CHECK-NEXT: [[XY:%.*]] = or i128 [[SHY]], [[ZX]] -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i128 [[ZW]], 64 -; CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[ZZ]] -; CHECK-NEXT: [[ADD:%.*]] = sub i128 [[XY]], [[TMP2]] -; CHECK-NEXT: [[T:%.*]] = trunc i128 [[ADD]] to i64 -; CHECK-NEXT: [[H:%.*]] = lshr i128 [[ADD]], 64 -; CHECK-NEXT: [[T2:%.*]] = trunc i128 [[H]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i128 [[XY]], [[ZZ]] +; CHECK-NEXT: [[T:%.*]] = trunc i128 [[SUB]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[SUB]], 64 +; CHECK-NEXT: [[DOTTR:%.*]] = trunc i128 [[TMP1]] to i64 +; CHECK-NEXT: [[DOTNARROW:%.*]] = sub i64 [[DOTTR]], [[W:%.*]] ; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i64 } poison, i64 [[T]], 0 -; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i64 } [[R1]], i64 [[T2]], 1 +; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i64 } [[R1]], i64 [[DOTNARROW]], 1 ; CHECK-NEXT: ret { i64, i64 } [[R2]] ; %zx = zext i64 %x to i128 diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -1484,8 +1484,9 @@ define i32 @zext_negpow2(i8 %x) { ; CHECK-LABEL: @zext_negpow2( -; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = mul i32 [[ZX]], -16777216 +; CHECK-NEXT: [[X_NEG:%.*]] = sub i8 0, [[X:%.*]] +; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext i8 [[X_NEG]] to i32 +; CHECK-NEXT: [[R:%.*]] = shl nuw i32 [[X_NEG_Z]], 24 ; CHECK-NEXT: ret i32 [[R]] ; %zx = zext i8 %x to i32 @@ -1493,10 +1494,13 @@ ret i32 %r } +; splat constant + define <2 x i14> @zext_negpow2_vec(<2 x i5> %x) { ; CHECK-LABEL: @zext_negpow2_vec( -; CHECK-NEXT: [[ZX:%.*]] = zext <2 x i5> [[X:%.*]] to <2 x i14> -; CHECK-NEXT: [[R:%.*]] = mul <2 x i14> [[ZX]], +; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i5> zeroinitializer, [[X:%.*]] +; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext <2 x i5> [[X_NEG]] to <2 x i14> +; CHECK-NEXT: [[R:%.*]] = shl <2 x i14> [[X_NEG_Z]], ; CHECK-NEXT: ret <2 x i14> [[R]] ; %zx = zext <2 x i5> %x to <2 x i14> @@ -1504,6 +1508,8 @@ ret <2 x i14> %r } +; negative test - mul must be big enough to cover bitwidth diff + define i32 @zext_negpow2_too_small(i8 %x) { ; CHECK-LABEL: @zext_negpow2_too_small( ; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X:%.*]] to i32 @@ -1517,8 +1523,9 @@ define i16 @sext_negpow2(i9 %x) { ; CHECK-LABEL: @sext_negpow2( -; CHECK-NEXT: [[SX:%.*]] = sext i9 [[X:%.*]] to i16 -; CHECK-NEXT: [[R:%.*]] = mul i16 [[SX]], -1024 +; CHECK-NEXT: [[X_NEG:%.*]] = sub i9 0, [[X:%.*]] +; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext i9 [[X_NEG]] to i16 +; CHECK-NEXT: [[R:%.*]] = shl i16 [[X_NEG_Z]], 10 ; CHECK-NEXT: ret i16 [[R]] ; %sx = sext i9 %x to i16 @@ -1526,10 +1533,13 @@ ret i16 %r } +; splat constant with poison element(s) + define <2 x i16> @sext_negpow2_vec(<2 x i8> %x) { ; CHECK-LABEL: @sext_negpow2_vec( -; CHECK-NEXT: [[SX:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16> -; CHECK-NEXT: [[R:%.*]] = mul <2 x i16> [[SX]], +; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]] +; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext <2 x i8> [[X_NEG]] to <2 x i16> +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], ; CHECK-NEXT: ret <2 x i16> [[R]] ; %sx = sext <2 x i8> %x to <2 x i16> @@ -1537,6 +1547,8 @@ ret <2 x i16> %r } +; negative test - mul must be big enough to cover bitwidth diff + define <2 x i16> @sext_negpow2_too_small_vec(<2 x i8> %x) { ; CHECK-LABEL: @sext_negpow2_too_small_vec( ; CHECK-NEXT: [[SX:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16> @@ -1548,6 +1560,8 @@ ret <2 x i16> %r } +; negative test - too many uses + define i32 @zext_negpow2_use(i8 %x) { ; CHECK-LABEL: @zext_negpow2_use( ; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X:%.*]] to i32