diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -17,10 +17,37 @@ ; } define void @inner_loop_reduction(double* noalias nocapture readonly %a.in, double* noalias nocapture readonly %b.in, double* noalias nocapture %c.out) { ; CHECK-LABEL: @inner_loop_reduction( -; CHECK: fadd <4 x double> -; CHECK: add nuw nsw <4 x i32> -; CHECK: icmp eq <4 x i32> -; CHECK: "llvm.loop.isvectorized", i32 1 + +; CHECK: vector.body: +; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] +; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ , %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] +; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, double* %a.in, <4 x i64> %[[VEC_INDEX]] +; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @[[LLVM_INTRINSIC:.*]](<4 x double*> %[[A_PTR]], i32 8, <4 x i1> , <4 x double> undef) +; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, double* %b.in, <4 x i64> %[[VEC_INDEX]] +; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @[[LLVM_INTRINSIC]](<4 x double*> %[[B_PTR]], i32 8, <4 x i1> , <4 x double> undef) +; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] + +; CHECK: [[FOR2_HEADER]]: +; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ], [ zeroinitializer, %vector.body ] +; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ], [ %[[MASKED_GATHER1]], %vector.body ] +; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[MASKED_GATHER2]], %[[REDUCTION]] +; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], +; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 +; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} + +; CHECK: [[FOR1_LATCH]]: +; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] +; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, double* %c.out, <4 x i64> %[[VEC_INDEX]] +; CHECK-NEXT: call void @{{.*}}(<4 x double> %[[REDUCTION]], <4 x double*> %[[C_PTR]], i32 8, <4 x i1> ) +; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], +; CHECK-NEXT: %{{.*}} = extractelement <4 x i1> %[[VEC_PTR]], i32 0 +; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add i64 %[[FOR1_INDEX]], 4 +; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body + entry: br label %for1.header