Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5577,9 +5577,11 @@ // consider interleaving beneficial (eg. MVE). if (TTI.getMaxInterleaveFactor(VF) <= 1) return false; - // FIXME: We should consider changing the threshold for scalable - // vectors to take VScaleForTuning into account. - if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF) + + unsigned Multiplier = 1; + if (VF.isScalable()) + Multiplier = getVScaleForTuning().value_or(1); + if ((Multiplier * VF.getKnownMinValue()) >= EpilogueVectorizationMinVF) return true; return false; } Index: llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll @@ -1,5 +1,7 @@ -; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphoneos -vectorizer-min-trip-count=8 -S %s | FileCheck --check-prefixes=CHECK,DEFAULT %s -; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphoneos -vectorizer-min-trip-count=8 -vectorize-memory-check-threshold=1 -S %s | FileCheck --check-prefixes=CHECK,THRESHOLD %s +; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphoneos -vectorizer-min-trip-count=8 \ +; RUN: -enable-epilogue-vectorization=false -S %s | FileCheck --check-prefixes=CHECK,DEFAULT %s +; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphoneos -vectorizer-min-trip-count=8 \ +; RUN: -enable-epilogue-vectorization=false -vectorize-memory-check-threshold=1 -S %s | FileCheck --check-prefixes=CHECK,THRESHOLD %s ; Tests for loops with large numbers of runtime checks. Check that loops are ; vectorized, if the loop trip counts are large and the impact of the runtime Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-vscale-tune.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-vscale-tune.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; REQUIRES: asserts +; RUN: opt -S -passes=loop-vectorize,instsimplify -force-vector-interleave=1 \ +; RUN: -mcpu=neoverse-v1 < %s | FileCheck %s --check-prefix=CHECK-NV1 +; RUN: opt -S -passes=loop-vectorize,instsimplify -force-vector-interleave=1 \ +; RUN: -mcpu=neoverse-v2 < %s | FileCheck %s --check-prefix=CHECK-NV2 +; RUN: opt -S -passes=loop-vectorize,instsimplify -force-vector-interleave=1 \ +; RUN: -mcpu=cortex-x2 < %s | FileCheck %s --check-prefix=CHECK-X2 + +target triple = "aarch64-unknown-linux-gnu" + +define void @foo(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i64 %len) #0 { +; CHECK-NV1-LABEL: define void @foo +; CHECK-NV1-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i64 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NV1-NEXT: iter.check: +; CHECK-NV1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NV1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[LEN]], [[TMP1]] +; CHECK-NV1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NV1: vector.main.loop.iter.check: +; CHECK-NV1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NV1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[LEN]], [[TMP3]] +; CHECK-NV1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NV1: vector.ph: +; CHECK-NV1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 +; CHECK-NV1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP5]] +; CHECK-NV1-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] +; CHECK-NV1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-NV1: vector.body: +; CHECK-NV1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NV1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] +; CHECK-NV1-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 +; CHECK-NV1-NEXT: [[TMP7:%.*]] = add nuw nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i16 2, i64 0), poison, zeroinitializer) +; CHECK-NV1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NV1-NEXT: store [[TMP7]], ptr [[TMP8]], align 2 +; CHECK-NV1-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NV1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; CHECK-NV1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NV1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NV1: middle.block: +; CHECK-NV1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[LEN]], [[N_VEC]] +; CHECK-NV1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK-NV1: vec.epilog.iter.check: +; CHECK-NV1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 +; CHECK-NV1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]] +; CHECK-NV1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NV1: vec.epilog.ph: +; CHECK-NV1-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NV1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 +; CHECK-NV1-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[LEN]], [[TMP15]] +; CHECK-NV1-NEXT: [[N_VEC3:%.*]] = sub i64 [[LEN]], [[N_MOD_VF2]] +; CHECK-NV1-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK-NV1: vec.epilog.vector.body: +; CHECK-NV1-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NV1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX5]] +; CHECK-NV1-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 2 +; CHECK-NV1-NEXT: [[TMP17:%.*]] = add nuw nsw [[WIDE_LOAD6]], shufflevector ( insertelement ( poison, i16 2, i64 0), poison, zeroinitializer) +; CHECK-NV1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX5]] +; CHECK-NV1-NEXT: store [[TMP17]], ptr [[TMP18]], align 2 +; CHECK-NV1-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV1-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 +; CHECK-NV1-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], [[TMP20]] +; CHECK-NV1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NV1-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NV1: vec.epilog.middle.block: +; CHECK-NV1-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[LEN]], [[N_VEC3]] +; CHECK-NV1-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NV1: vec.epilog.scalar.ph: +; CHECK-NV1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NV1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NV1: for.body: +; CHECK-NV1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NV1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] +; CHECK-NV1-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NV1-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[TMP22]], 2 +; CHECK-NV1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] +; CHECK-NV1-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 +; CHECK-NV1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NV1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[LEN]] +; CHECK-NV1-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NV1: exit: +; CHECK-NV1-NEXT: ret void +; +; CHECK-NV2-LABEL: define void @foo +; CHECK-NV2-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i64 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NV2-NEXT: entry: +; CHECK-NV2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NV2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[LEN]], [[TMP1]] +; CHECK-NV2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NV2: vector.ph: +; CHECK-NV2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NV2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] +; CHECK-NV2-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] +; CHECK-NV2-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-NV2: vector.body: +; CHECK-NV2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NV2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] +; CHECK-NV2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 2 +; CHECK-NV2-NEXT: [[TMP5:%.*]] = add nuw nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i16 2, i64 0), poison, zeroinitializer) +; CHECK-NV2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NV2-NEXT: store [[TMP5]], ptr [[TMP6]], align 2 +; CHECK-NV2-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NV2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-NV2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NV2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NV2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NV2: middle.block: +; CHECK-NV2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[LEN]], [[N_VEC]] +; CHECK-NV2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NV2: scalar.ph: +; CHECK-NV2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NV2-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NV2: for.body: +; CHECK-NV2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NV2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] +; CHECK-NV2-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NV2-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[TMP10]], 2 +; CHECK-NV2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] +; CHECK-NV2-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 +; CHECK-NV2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NV2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[LEN]] +; CHECK-NV2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NV2: exit: +; CHECK-NV2-NEXT: ret void +; +; CHECK-X2-LABEL: define void @foo +; CHECK-X2-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i64 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-X2-NEXT: entry: +; CHECK-X2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-X2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-X2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[LEN]], [[TMP1]] +; CHECK-X2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-X2: vector.ph: +; CHECK-X2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-X2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-X2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] +; CHECK-X2-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] +; CHECK-X2-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-X2: vector.body: +; CHECK-X2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-X2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] +; CHECK-X2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 2 +; CHECK-X2-NEXT: [[TMP5:%.*]] = add nuw nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i16 2, i64 0), poison, zeroinitializer) +; CHECK-X2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-X2-NEXT: store [[TMP5]], ptr [[TMP6]], align 2 +; CHECK-X2-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-X2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-X2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-X2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-X2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-X2: middle.block: +; CHECK-X2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[LEN]], [[N_VEC]] +; CHECK-X2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-X2: scalar.ph: +; CHECK-X2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-X2-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-X2: for.body: +; CHECK-X2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-X2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] +; CHECK-X2-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-X2-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[TMP10]], 2 +; CHECK-X2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] +; CHECK-X2-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 +; CHECK-X2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-X2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[LEN]] +; CHECK-X2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-X2: exit: +; CHECK-X2-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv + %0 = load i16, ptr %arrayidx + %add = add nuw nsw i16 %0, 2 + %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv + store i16 %add, ptr %arrayidx3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %len + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %for.body + ret void +} + +attributes #0 = { "target-features"="+sve" vscale_range(1,16) } Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -1,13 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=loop-vectorize,dce -mtriple aarch64-linux-gnu -mattr=+sve \ -; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s -S | FileCheck %s +; RUN: opt -passes=loop-vectorize,dce -prefer-predicate-over-epilogue=scalar-epilogue \ +; RUN: -enable-epilogue-vectorization=false < %s -S | FileCheck %s -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" ; This should be vscale x 8 vectorized, maybe with some interleaving. -define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef readonly %s, i32 noundef %n) { +define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef readonly %s, i32 noundef %n) #0 { ; CHECK-LABEL: @fneg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S2:%.*]] = ptrtoint ptr [[S:%.*]] to i64 @@ -100,3 +99,5 @@ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } + +attributes #0 = { "target-features"="+sve" vscale_range(1,16) } Index: llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; REQUIRES: asserts ; RUN: opt -S -passes=loop-vectorize,instsimplify -force-vector-interleave=1 \ -; RUN: -debug-only=loop-vectorize 2>%t < %s | FileCheck %s +; RUN: -enable-epilogue-vectorization=false -debug-only=loop-vectorize 2>%t < %s | FileCheck %s ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST target triple = "aarch64-unknown-linux-gnu" @@ -17,7 +17,6 @@ ; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = zext i8 %0 to i32 ; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = zext i8 %0 to i32 ; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %conv = zext i8 %0 to i32 - ; CHECK-LABEL: define void @zext_i8_i16 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -101,7 +100,6 @@ ; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = sext i8 %0 to i32 ; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = sext i8 %0 to i32 ; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %conv = sext i8 %0 to i32 - ; CHECK-LABEL: define void @sext_i8_i16 ; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.*]], ptr noalias nocapture [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: