diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -246,12 +246,16 @@ static unsigned getSelectFoldableOperands(BinaryOperator *I) { switch (I->getOpcode()) { case Instruction::Add: + case Instruction::FAdd: case Instruction::Mul: + case Instruction::FMul: case Instruction::And: case Instruction::Or: case Instruction::Xor: return 3; // Can fold through either operand. case Instruction::Sub: // Can only fold on the amount subtracted. + case Instruction::FSub: + case Instruction::FDiv: // Can only fold on the divisor amount. case Instruction::Shl: // Can only fold on the shift amount. case Instruction::LShr: case Instruction::AShr: diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll --- a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll @@ -3,8 +3,8 @@ define float @select_fadd(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fadd( -; CHECK-NEXT: [[C:%.*]] = fadd float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fadd float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fadd float %A, %B @@ -14,8 +14,8 @@ define float @select_fadd_swapped(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fadd_swapped( -; CHECK-NEXT: [[C:%.*]] = fadd float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fadd float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fadd float %A, %B @@ -25,8 +25,8 @@ define float @select_fadd_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fadd_fast_math( -; CHECK-NEXT: [[C:%.*]] = fadd fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fadd fast float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fadd fast float %A, %B @@ -36,8 +36,8 @@ define float @select_fadd_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fadd_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = fadd fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fadd fast float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fadd fast float %A, %B @@ -47,8 +47,8 @@ define float @select_fmul(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fmul( -; CHECK-NEXT: [[C:%.*]] = fmul float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fmul float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fmul float %A, %B @@ -58,8 +58,8 @@ define float @select_fmul_swapped(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fmul_swapped( -; CHECK-NEXT: [[C:%.*]] = fmul float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fmul float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fmul float %A, %B @@ -69,8 +69,8 @@ define float @select_fmul_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fmul_fast_math( -; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fmul fast float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fmul fast float %A, %B @@ -80,8 +80,8 @@ define float @select_fmul_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fmul_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fmul fast float [[C]], [[A:%.*]] ; CHECK-NEXT: ret float [[D]] ; %C = fmul fast float %A, %B @@ -91,8 +91,8 @@ define float @select_fsub(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fsub( -; CHECK-NEXT: [[C:%.*]] = fsub float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fsub float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fsub float %A, %B @@ -102,8 +102,8 @@ define float @select_fsub_swapped(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fsub_swapped( -; CHECK-NEXT: [[C:%.*]] = fsub float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fsub float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fsub float %A, %B @@ -113,8 +113,8 @@ define float @select_fsub_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fsub_fast_math( -; CHECK-NEXT: [[C:%.*]] = fsub fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fsub fast float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fsub fast float %A, %B @@ -124,8 +124,8 @@ define float @select_fsub_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fsub_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = fsub fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fsub fast float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fsub fast float %A, %B @@ -147,8 +147,8 @@ define float @select_fdiv(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fdiv( -; CHECK-NEXT: [[C:%.*]] = fdiv float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fdiv float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fdiv float %A, %B @@ -158,8 +158,8 @@ define float @select_fdiv_swapped(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fdiv_swapped( -; CHECK-NEXT: [[C:%.*]] = fdiv float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fdiv float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fdiv float %A, %B @@ -169,8 +169,8 @@ define float @select_fdiv_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fdiv_fast_math( -; CHECK-NEXT: [[C:%.*]] = fdiv fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00 +; CHECK-NEXT: [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fdiv fast float %A, %B @@ -180,8 +180,8 @@ define float @select_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) { ; CHECK-LABEL: @select_fdiv_swapped_fast_math( -; CHECK-NEXT: [[C:%.*]] = fdiv fast float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]] +; CHECK-NEXT: [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]] ; CHECK-NEXT: ret float [[D]] ; %C = fdiv fast float %A, %B diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -43,8 +43,8 @@ ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <4 x float> [[TMP9]], [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[TMP10]], [[VEC_PHI]] -; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP11]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x float> , <4 x float> [[TMP10]] +; CHECK-NEXT: [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP11]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -959,7 +959,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1019,7 +1019,7 @@ ; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) +; CHECK-NEXT: [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 @@ -1366,9 +1366,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> , <4 x i1> [[TMP11]] -; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP13]], <4 x float> , <4 x float> [[PREDPHI_V]] +; CHECK-NEXT: [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -58,21 +58,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]]) ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD3]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -166,21 +166,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND1]]) ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD3]]) ; CHECK-NEXT: [[TMP9]] = mul i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -226,20 +226,20 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]]) ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -551,7 +551,7 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 @@ -559,7 +559,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP4]], <4 x float> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP5]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP4]], <4 x float> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -818,9 +818,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> , <4 x i1> [[TMP11]] -; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP13]], <4 x float> , <4 x float> [[PREDPHI_V]] +; CHECK-NEXT: [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -941,21 +941,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND2]]) +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]]) ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD3]]) ; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -170,8 +170,8 @@ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, float* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15 -; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]] +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP4]], <4 x float> , <4 x float> [[WIDE_LOAD15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP7]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4