diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -995,6 +995,41 @@ return nullptr; } +// Fold variations of a^2 + 2*a*b + b^2 -> (a + b)^2 +Instruction *InstCombinerImpl::foldSquareSumInts(BinaryOperator &I) { + Value *A, *B; + + // (a * a) + (((a << 1) + b) * b) + bool Matches = match( + &I, m_c_Add(m_OneUse(m_Mul(m_Value(A), m_Deferred(A))), + m_OneUse(m_Mul(m_c_Add(m_Shl(m_Deferred(A), + m_SpecificInt(1)), + m_Value(B)), + m_Deferred(B))))); + + // ((a * b) << 1) or ((a << 1) * b) + // + + // (a * a + b * b) or (b * b + a * a) + if (!Matches) { + Matches = match( + &I, + m_c_Add(m_CombineOr(m_OneUse(m_Shl(m_Mul(m_Value(A), m_Value(B)), + m_SpecificInt(1))), + m_OneUse(m_Mul(m_Shl(m_Value(A), m_SpecificInt(1)), + m_Value(B)))), + m_OneUse(m_c_Add(m_Mul(m_Deferred(A), m_Deferred(A)), + m_Mul(m_Deferred(B), m_Deferred(B)))))); + } + + // if one of them matches: -> (a + b)^2 + if (Matches) { + Value *AB = Builder.CreateAdd(A, B); + return BinaryOperator::CreateMul(AB, AB); + } + + return nullptr; +} + // Matches multiplication expression Op * C where C is a constant. Returns the // constant value in C and the other operand in Op. Returns true if such a // match is found. @@ -1615,6 +1650,9 @@ I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, {Builder.CreateOr(A, B)})); + if (Instruction *Res = foldSquareSumInts(I)) + return Res; + if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -540,6 +540,8 @@ Instruction *foldAddWithConstant(BinaryOperator &Add); + Instruction *foldSquareSumInts(BinaryOperator &I); + /// Try to rotate an operation below a PHI node, using PHI nodes for /// its operands. Instruction *foldPHIArgOpIntoPHI(PHINode &PN); diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -3098,11 +3098,8 @@ define i32 @add_reduce_sqr_sum_nsw(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_nsw( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %a_sq = mul nsw i32 %a, %a @@ -3115,11 +3112,8 @@ define i32 @add_reduce_sqr_sum_u(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_u( -; CHECK-NEXT: [[A_SQ:%.*]] = mul i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %a_sq = mul i32 %a, %a @@ -3132,11 +3126,8 @@ define i32 @add_reduce_sqr_sum_nuw(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_nuw( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nuw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[TWO_A_PLUS_B]], [[B]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %a_sq = mul nuw i32 %a, %a @@ -3149,11 +3140,8 @@ define i32 @add_reduce_sqr_sum_flipped(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_flipped( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A_SQ]], [[MUL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %a_sq = mul nsw i32 %a, %a @@ -3166,11 +3154,8 @@ define i32 @add_reduce_sqr_sum_flipped2(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_flipped2( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %a_sq = mul nsw i32 %a, %a @@ -3183,11 +3168,8 @@ define i32 @add_reduce_sqr_sum_flipped3(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_flipped3( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %a_sq = mul nsw i32 %a, %a @@ -3200,11 +3182,8 @@ define i32 @add_reduce_sqr_sum_order2(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order2( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3218,11 +3197,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB_B2]], [[A_SQ]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3236,11 +3212,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped2(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped2( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3254,11 +3227,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped3(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped3( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3272,12 +3242,8 @@ define i32 @add_reduce_sqr_sum_order3(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order3( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3291,12 +3257,8 @@ define i32 @add_reduce_sqr_sum_order3_flipped(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order3_flipped( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A2_B2]], [[TWOAB]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3310,12 +3272,8 @@ define i32 @add_reduce_sqr_sum_order3_flipped2(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order3_flipped2( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[B_SQ]], [[A_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3329,12 +3287,8 @@ define i32 @add_reduce_sqr_sum_order3_flipped3(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order3_flipped3( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3348,12 +3302,8 @@ define i32 @add_reduce_sqr_sum_order4(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order4( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[AB:%.*]] = mul i32 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB:%.*]] = shl i32 [[AB]], 1 -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3367,12 +3317,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[AB:%.*]] = mul i32 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB:%.*]] = shl i32 [[AB]], 1 -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A2_B2]], [[TWOAB]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3386,12 +3332,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped2(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped2( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[AB:%.*]] = mul i32 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB:%.*]] = shl i32 [[AB]], 1 -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[B_SQ]], [[A_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3405,12 +3347,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped3(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped3( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[AB:%.*]] = mul i32 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TWOAB:%.*]] = shl i32 [[AB]], 1 -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3424,12 +3362,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped4(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped4( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[AB:%.*]] = mul i32 [[B:%.*]], [[A]] -; CHECK-NEXT: [[TWOAB:%.*]] = shl i32 [[AB]], 1 -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3443,12 +3377,8 @@ define i32 @add_reduce_sqr_sum_order5(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order5( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOB:%.*]] = shl i32 [[B:%.*]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3462,12 +3392,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOB:%.*]] = shl i32 [[B:%.*]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A2_B2]], [[TWOAB]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3481,12 +3407,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped2(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped2( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOB:%.*]] = shl i32 [[B:%.*]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[B_SQ]], [[A_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3500,12 +3422,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped3(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped3( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOB:%.*]] = shl i32 [[B:%.*]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a @@ -3519,12 +3437,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped4(i32 %a, i32 %b) { ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped4( -; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]] -; CHECK-NEXT: [[TWOB:%.*]] = shl i32 [[B:%.*]], 1 -; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]] -; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]] -; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret i32 [[AB2]] ; %a_sq = mul nsw i32 %a, %a