diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1344,6 +1344,18 @@ if (match(RHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(LHS))))) return BinaryOperator::CreateAdd(A, Builder.CreateShl(LHS, 1, "reass.add")); + { + // Reassociate add sequences to reduce dependency chains: + // ((A + B) + C) + RHS --> (A + B) + (C + RHS) + Value *A, *B, *C; + if (match(LHS, m_OneUse(m_c_Add(m_OneUse(m_c_Add(m_Value(A), m_Value(B))), + m_Value(C))))) { + Value *AB = Builder.CreateAdd(A, B, "reass.add"); + Value *CD = Builder.CreateAdd(C, RHS, "reass.add"); + return BinaryOperator::CreateAdd(AB, CD); + } + } + { // (A + C1) + (C2 - B) --> (A - B) + (C1 + C2) Constant *C1, *C2; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll @@ -833,10 +833,10 @@ ; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]] ; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]] ; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64 -; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]] ; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]] -; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]] -; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i64 [[T1]], [[T3]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i64 [[T13]], [[T11]] +; CHECK-NEXT: [[T15:%.*]] = add i64 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i64 [[T15]] ; %v00 = insertelement <4 x float> poison, float %f, i32 0 @@ -914,10 +914,10 @@ ; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]] ; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]] ; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64 -; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]] ; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]] -; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]] -; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i64 [[T1]], [[T3]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i64 [[T13]], [[T11]] +; CHECK-NEXT: [[T15:%.*]] = add i64 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i64 [[T15]] ; %v00 = insertelement <4 x float> poison, float %f, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -833,10 +833,10 @@ ; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]] ; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]] ; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64 -; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]] ; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]] -; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]] -; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i64 [[T1]], [[T3]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i64 [[T13]], [[T11]] +; CHECK-NEXT: [[T15:%.*]] = add i64 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i64 [[T15]] ; %v00 = insertelement <4 x float> undef, float %f, i32 0 @@ -914,10 +914,10 @@ ; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]] ; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]] ; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64 -; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]] ; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]] -; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]] -; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i64 [[T1]], [[T3]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i64 [[T13]], [[T11]] +; CHECK-NEXT: [[T15:%.*]] = add i64 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i64 [[T15]] ; %v00 = insertelement <4 x float> undef, float %f, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts-inseltpoison.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts-inseltpoison.ll @@ -50,10 +50,10 @@ ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i64 [[TMP13]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i64 [[TMP15]] ; %v00 = insertelement <4 x float> poison, float %f, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll @@ -48,10 +48,10 @@ ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i64 [[TMP13]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i64 [[TMP15]] ; %v00 = insertelement <4 x float> undef, float %f, i32 0 diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -298,9 +298,9 @@ ;; TODO: shl A, 1? define i32 @test13(i32 %A, i32 %B, i32 %C) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: [[D_OK:%.*]] = add i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[E_OK:%.*]] = add i32 [[D_OK]], [[C:%.*]] -; CHECK-NEXT: [[F:%.*]] = add i32 [[E_OK]], [[A]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[C:%.*]], [[A]] +; CHECK-NEXT: [[F:%.*]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i32 [[F]] ; %D_OK = add i32 %A, %B @@ -1693,9 +1693,9 @@ define i32 @add_add_add(i32 %A, i32 %B, i32 %C, i32 %D) { ; CHECK-LABEL: @add_add_add( -; CHECK-NEXT: [[E:%.*]] = add i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[F:%.*]] = add i32 [[E]], [[C:%.*]] -; CHECK-NEXT: [[G:%.*]] = add i32 [[F]], [[D:%.*]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[G:%.*]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i32 [[G]] ; %E = add i32 %A, %B @@ -1706,9 +1706,9 @@ define i32 @add_add_add_commute1(i32 %A, i32 %B, i32 %C, i32 %D) { ; CHECK-LABEL: @add_add_add_commute1( -; CHECK-NEXT: [[E:%.*]] = add i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[F:%.*]] = add i32 [[E]], [[C:%.*]] -; CHECK-NEXT: [[G:%.*]] = add i32 [[F]], [[D:%.*]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[G:%.*]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i32 [[G]] ; %E = add i32 %B, %A @@ -1719,9 +1719,9 @@ define i32 @add_add_add_commute2(i32 %A, i32 %B, i32 %C, i32 %D) { ; CHECK-LABEL: @add_add_add_commute2( -; CHECK-NEXT: [[E:%.*]] = add i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[F:%.*]] = add i32 [[E]], [[C:%.*]] -; CHECK-NEXT: [[G:%.*]] = add i32 [[F]], [[D:%.*]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[G:%.*]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i32 [[G]] ; %E = add i32 %B, %A @@ -1732,9 +1732,9 @@ define i32 @add_add_add_commute3(i32 %A, i32 %B, i32 %C, i32 %D) { ; CHECK-LABEL: @add_add_add_commute3( -; CHECK-NEXT: [[E:%.*]] = add i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[F:%.*]] = add i32 [[E]], [[C:%.*]] -; CHECK-NEXT: [[G:%.*]] = add i32 [[F]], [[D:%.*]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[G:%.*]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: ret i32 [[G]] ; %E = add i32 %B, %A diff --git a/llvm/test/Transforms/InstCombine/call-guard.ll b/llvm/test/Transforms/InstCombine/call-guard.ll --- a/llvm/test/Transforms/InstCombine/call-guard.ll +++ b/llvm/test/Transforms/InstCombine/call-guard.ll @@ -116,9 +116,9 @@ ; CHECK-LABEL: @negative_window( ; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[V1:%.*]], 0 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[A]], i32 123) [ "deopt"() ] -; CHECK-NEXT: [[V2:%.*]] = add i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[V3:%.*]] = add i32 [[V2]], [[C:%.*]] -; CHECK-NEXT: [[V4:%.*]] = add i32 [[V3]], [[D:%.*]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[V4:%.*]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: [[B:%.*]] = icmp slt i32 [[V4]], 0 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[B]], i32 456) [ "deopt"() ] ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -58,21 +58,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]]) +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD3]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -166,21 +166,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND1]]) +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]]) ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD3]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP9]] = mul i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -226,20 +226,20 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]]) +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -894,9 +894,9 @@ ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, i32* [[L2]], align 4 ; CHECK-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L6]] -; CHECK-NEXT: [[L8:%.*]] = add i32 [[L7]], [[L3]] -; CHECK-NEXT: [[L10]] = add i32 [[L8]], [[SUM_02]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add i32 [[SUM_02]], [[L6]] +; CHECK-NEXT: [[REASS_ADD1:%.*]] = add i32 [[L3]], [[SUM_02]] +; CHECK-NEXT: [[L10]] = add i32 [[REASS_ADD]], [[REASS_ADD1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 @@ -939,21 +939,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]]) +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD3]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -97,7 +97,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -154,23 +154,23 @@ ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] -; CHECK-NEXT: [[TMP41:%.*]] = add <4 x i32> [[TMP40]], [[TMP38]] -; CHECK-NEXT: [[TMP42:%.*]] = add <4 x i32> [[TMP41]], [[TMP39]] -; CHECK-NEXT: [[TMP43]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP42]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] +; CHECK-NEXT: [[REASS_ADD7:%.*]] = add <4 x i32> [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = add <4 x i32> [[REASS_ADD]], [[REASS_ADD7]] +; CHECK-NEXT: [[TMP41]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) +; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: ; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll @@ -18,16 +18,16 @@ ; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53 ; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820 ; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2 -; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[DOTSCALAR1:%.*]] = add i32 [[DOTSCALAR]], [[DIV17]] -; CHECK-NEXT: [[DOTSCALAR2:%.*]] = add i32 [[DOTSCALAR1]], [[MUL5]] -; CHECK-NEXT: [[DOTSCALAR3:%.*]] = add i32 [[DOTSCALAR2]], [[DIV]] -; CHECK-NEXT: [[DOTSCALAR4:%.*]] = add i32 [[DOTSCALAR3]], [[MUL13]] -; CHECK-NEXT: [[DOTSCALAR5:%.*]] = add i32 [[DOTSCALAR4]], [[MUL]] -; CHECK-NEXT: [[DOTSCALAR6:%.*]] = add i32 [[DOTSCALAR5]], [[DIV9]] -; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]] -; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[DOTSCALAR8]], i64 0 +; CHECK-NEXT: [[REASS_ADD_SCALAR:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[REASS_ADD22_SCALAR:%.*]] = add i32 [[DIV17]], [[MUL5]] +; CHECK-NEXT: [[REASS_ADD24_SCALAR:%.*]] = add i32 [[DIV]], [[MUL13]] +; CHECK-NEXT: [[REASS_ADD26_SCALAR:%.*]] = add i32 [[MUL]], [[DIV9]] +; CHECK-NEXT: [[REASS_ADD27_SCALAR:%.*]] = add i32 [[MUL21]], 317425 +; CHECK-NEXT: [[REASS_ADD25_SCALAR:%.*]] = add i32 [[REASS_ADD26_SCALAR]], [[REASS_ADD27_SCALAR]] +; CHECK-NEXT: [[REASS_ADD23_SCALAR:%.*]] = add i32 [[REASS_ADD24_SCALAR]], [[REASS_ADD25_SCALAR]] +; CHECK-NEXT: [[REASS_ADD21_SCALAR:%.*]] = add i32 [[REASS_ADD22_SCALAR]], [[REASS_ADD23_SCALAR]] +; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[REASS_ADD_SCALAR]], [[REASS_ADD21_SCALAR]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[DOTSCALAR]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]] ; CHECK-NEXT: ret <4 x i32> [[ADD29]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll @@ -18,16 +18,16 @@ ; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53 ; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820 ; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2 -; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[DOTSCALAR1:%.*]] = add i32 [[DOTSCALAR]], [[DIV17]] -; CHECK-NEXT: [[DOTSCALAR2:%.*]] = add i32 [[DOTSCALAR1]], [[MUL5]] -; CHECK-NEXT: [[DOTSCALAR3:%.*]] = add i32 [[DOTSCALAR2]], [[DIV]] -; CHECK-NEXT: [[DOTSCALAR4:%.*]] = add i32 [[DOTSCALAR3]], [[MUL13]] -; CHECK-NEXT: [[DOTSCALAR5:%.*]] = add i32 [[DOTSCALAR4]], [[MUL]] -; CHECK-NEXT: [[DOTSCALAR6:%.*]] = add i32 [[DOTSCALAR5]], [[DIV9]] -; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]] -; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> , i32 [[DOTSCALAR8]], i64 0 +; CHECK-NEXT: [[REASS_ADD_SCALAR:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[REASS_ADD22_SCALAR:%.*]] = add i32 [[DIV17]], [[MUL5]] +; CHECK-NEXT: [[REASS_ADD24_SCALAR:%.*]] = add i32 [[DIV]], [[MUL13]] +; CHECK-NEXT: [[REASS_ADD26_SCALAR:%.*]] = add i32 [[MUL]], [[DIV9]] +; CHECK-NEXT: [[REASS_ADD27_SCALAR:%.*]] = add i32 [[MUL21]], 317425 +; CHECK-NEXT: [[REASS_ADD25_SCALAR:%.*]] = add i32 [[REASS_ADD26_SCALAR]], [[REASS_ADD27_SCALAR]] +; CHECK-NEXT: [[REASS_ADD23_SCALAR:%.*]] = add i32 [[REASS_ADD24_SCALAR]], [[REASS_ADD25_SCALAR]] +; CHECK-NEXT: [[REASS_ADD21_SCALAR:%.*]] = add i32 [[REASS_ADD22_SCALAR]], [[REASS_ADD23_SCALAR]] +; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[REASS_ADD_SCALAR]], [[REASS_ADD21_SCALAR]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> , i32 [[DOTSCALAR]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]] ; CHECK-NEXT: ret <4 x i32> [[ADD29]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -41,7 +41,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ;