Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -61,9 +61,7 @@ Bits = 0; else if (TailFoldType == "all") Bits = TFAll; - else if (TailFoldType == "default") - Bits = 0; // Currently defaults to never tail-folding. - else if (TailFoldType == "simple") + else if (TailFoldType == "default"|| TailFoldType == "simple") add(TFSimple); else if (TailFoldType == "reductions") add(TFReductions); Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -loop-vectorize -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF -; RUN: opt < %s -loop-vectorize -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF +; RUN: opt < %s -loop-vectorize -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-SIMPLE +; RUN: opt < %s -loop-vectorize -sve-tail-folding=simple -S | FileCheck %s -check-prefix=CHECK-SIMPLE ; RUN: opt < %s -loop-vectorize -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF ; RUN: opt < %s -loop-vectorize -sve-tail-folding=disabled+simple+reductions+recurrences -S | FileCheck %s -check-prefix=CHECK-TF ; RUN: opt < %s -loop-vectorize -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED @@ -17,6 +18,14 @@ ; CHECK-NOTF-NOT: %{{.*}} = phi ; CHECK-NOTF: store %[[SPLAT]], * +; CHECK-SIMPLE-LABEL: @simple_memset( +; CHECK-SIMPLE: vector.ph: +; CHECK-SIMPLE: %[[INSERT:.*]] = insertelement poison, i32 %val, i32 0 +; CHECK-SIMPLE: %[[SPLAT:.*]] = shufflevector %[[INSERT]], poison, zeroinitializer +; CHECK-SIMPLE: vector.body: +; CHECK-SIMPLE: %[[ACTIVE_LANE_MASK:.*]] = phi +; CHECK-SIMPLE: call void @llvm.masked.store.nxv4i32.p0nxv4i32( %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] + ; CHECK-TF-NORED-LABEL: @simple_memset( ; CHECK-TF-NORED: vector.ph: ; CHECK-TF-NORED: %[[INSERT:.*]] = insertelement poison, i32 %val, i32 0 @@ -73,6 +82,14 @@ ; CHECK-NOTF: middle.block: ; CHECK-NOTF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, %[[ADD]]) +; CHECK-SIMPLE-LABEL: @fadd_red_fast +; CHECK-SIMPLE: vector.body: +; CHECK-SIMPLE-NOT: %{{.*}} = phi +; CHECK-SIMPLE: %[[LOAD:.*]] = load +; CHECK-SIMPLE: %[[ADD:.*]] = fadd fast %[[LOAD]] +; CHECK-SIMPLE: middle.block: +; CHECK-SIMPLE-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, %[[ADD]]) + ; CHECK-TF-NORED-LABEL: @fadd_red_fast ; CHECK-TF-NORED: vector.body: ; CHECK-TF-NORED-NOT: %{{.*}} = phi @@ -141,6 +158,19 @@ ; CHECK-NOTF: %[[ADD:.*]] = add nsw %[[LOAD]], %[[SPLICE]] ; CHECK-NOTF: store %[[ADD]] +; CHECK-SIMPLE-LABEL: @add_recur +; CHECK-SIMPLE: entry: +; CHECK-SIMPLE: %[[PRE:.*]] = load i32, i32* %src, align 4 +; CHECK-SIMPLE: vector.ph: +; CHECK-SIMPLE: %[[RECUR_INIT:.*]] = insertelement poison, i32 %[[PRE]] +; CHECK-SIMPLE: vector.body: +; CHECK-SIMPLE-NOT: %{{.*}} = phi +; CHECK-SIMPLE: %[[VECTOR_RECUR:.*]] = phi [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] +; CHECK-SIMPLE: %[[LOAD]] = load +; CHECK-SIMPLE: %[[SPLICE:.*]] = call @llvm.experimental.vector.splice.nxv4i32( %[[VECTOR_RECUR]], %[[LOAD]], i32 -1) +; CHECK-SIMPLE: %[[ADD:.*]] = add nsw %[[LOAD]], %[[SPLICE]] +; CHECK-SIMPLE: store %[[ADD]] + ; CHECK-TF-NORED-LABEL: @add_recur ; CHECK-TF-NORED: entry: ; CHECK-TF-NORED: %[[PRE:.*]] = load i32, i32* %src, align 4 @@ -220,6 +250,12 @@ ; CHECK-NOTF: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> ; CHECK-NOTF: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> +; CHECK-SIMPLE-LABEL: @interleave( +; CHECK-SIMPLE: vector.body: +; CHECK-SIMPLE: %[[LOAD:.*]] = load <8 x float>, <8 x float> +; CHECK-SIMPLE: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> +; CHECK-SIMPLE: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> + ; CHECK-TF-LABEL: @interleave( ; CHECK-TF: vector.body: ; CHECK-TF: %[[LOAD:.*]] = load <8 x float>, <8 x float>