diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -96,6 +96,20 @@ FK_Enabled = 1, ///< Forcing enabled. }; + enum ScalableForceKind { + /// Not selected. + SK_Unspecified = -1, + /// Disables vectorization with scalable vectors. + SK_FixedWidthOnly = 0, + /// Vectorize loops using scalable vectors or fixed-width vectors, but favor + /// scalable vectors when the cost-model is inconclusive. This is the + /// default when the scalable.enable hint is enabled through a pragma. + SK_PreferScalable = 1, + /// Vectorize loops using scalable vectors or fixed-width vectors, but + /// favor fixed-width vectors when the cost is inconclusive. + SK_PreferFixedWidth = 2, + }; + LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE); @@ -109,7 +123,8 @@ void emitRemarkWithHints() const; ElementCount getWidth() const { - return ElementCount::get(Width.Value, isScalable()); + return ElementCount::get(Width.Value, + isScalableVectorizationExplicitlyEnabled()); } unsigned getInterleave() const { if (Interleave.Value) @@ -129,7 +144,16 @@ return (ForceKind)Force.Value; } - bool isScalable() const { return Scalable.Value; } + /// \return true if scalable vectorization has been explicitly enabled. + bool isScalableVectorizationExplicitlyEnabled() const { + return Scalable.Value == SK_PreferFixedWidth || + Scalable.Value == SK_PreferScalable; + } + + /// \return true if scalable vectorization has been explicitly disabled. + bool isScalableVectorizationDisabled() const { + return Scalable.Value == SK_FixedWidthOnly; + } /// If hints are provided that force vectorization, use the AlwaysPrint /// pass name to force the frontend to print the diagnostic. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -48,6 +48,23 @@ cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma")); +// FIXME: When scalable vectorization is stable enough, change the default +// to SK_PreferFixedWidth. +static cl::opt ScalableVectorization( + "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly), + cl::Hidden, + cl::desc("Control whether the compiler can use scalable vectors to " + "vectorize a loop"), + cl::values( + clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", + "Scalable vectorization is disabled."), + clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on", + "Scalable vectorization is available, but favor fixed-width " + "vectorization when the cost is inconclusive."), + clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", + "Scalable vectorization is available and favored when the " + "cost is inconclusive."))); + /// Maximum vectorization interleave count. static const unsigned MaxInterleaveFactor = 16; @@ -77,8 +94,8 @@ Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), - Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L), - ORE(ORE) { + Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE), + TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -86,6 +103,16 @@ if (VectorizerParams::isInterleaveForced()) Interleave.Value = VectorizerParams::VectorizationInterleave; + if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) + // If the width is set, but the metadata says nothing about the scalable + // property, then assume it concerns only a fixed-width UserVF. + // If width is not set, the flag takes precedence. + Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization; + else if (ScalableVectorization == SK_FixedWidthOnly) + // If the flag is set to disable any use of scalable vectors, override the + // loop hint. + Scalable.Value = SK_FixedWidthOnly; + if (IsVectorized.Value != 1) // If the vectorization width and interleaving count are both 1 then // consider the loop to have been already vectorized because there's diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5657,6 +5657,12 @@ return ElementCount::getScalable(0); } + if (Hints->isScalableVectorizationDisabled()) { + reportVectorizationInfo("Scalable vectorization is explicitly disabled", + "ScalableVectorizationDisabled", ORE, TheLoop); + return ElementCount::getScalable(0); + } + auto MaxScalableVF = ElementCount::getScalable( std::numeric_limits::max()); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll @@ -1,5 +1,5 @@ -; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1 -; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2 +; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1 +; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2 ; We vectorize this first order recurrence, with a set of insertelements for ; each unrolled part. Make sure these insertelements are generated in-order, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -loop-vectorize -force-vector-interleave=1 -S -debug < %s 2>%t | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-interleave=1 -S -debug-only=loop-vectorize < %s 2>%t | FileCheck %s ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) { ; CHECK-LABEL: @vec_load diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S 2>%t | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK ; Reduction can be vectorized diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_strict diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll @@ -1,6 +1,8 @@ ; REQUIRES: asserts -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW ; Test that the MaxVF for the following loop, that has no dependence distances, ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 @@ -8,6 +10,8 @@ define void @test0(i32* %a, i8* %b, i32* %c) { ; CHECK: LV: Checking a loop in "test0" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 +; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16 entry: br label %loop @@ -35,6 +39,8 @@ define void @test1(i32* %a, i8* %b) { ; CHECK: LV: Checking a loop in "test1" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 +; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4 entry: br label %loop @@ -63,6 +69,8 @@ define void @test2(i32* %a, i8* %b) { ; CHECK: LV: Checking a loop in "test2" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 +; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2 +; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2 entry: br label %loop @@ -91,6 +99,8 @@ define void @test3(i32* %a, i8* %b) { ; CHECK: LV: Checking a loop in "test3" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 +; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1 +; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1 entry: br label %loop @@ -120,6 +130,8 @@ define void @test4(i32* %a, i32* %b) { ; CHECK: LV: Checking a loop in "test4" ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF +; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF +; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -1,8 +1,8 @@ ; REQUIRES: asserts -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 | FileCheck %s -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s -; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s -; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve < %s -S | FileCheck %s +; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -scalable-vectorization=on < %s -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) { ; CHECK-LABEL: @cond_inv_load_i32i32i16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-extract-last-veclane.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s +; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) { ; CHECK-LABEL: @gather_nxv4i32_ind64 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine < %s -S | FileCheck %s +; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s ; Test that we can add on the induction variable ; for (long long i = 0; i < n; i++) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -scalable-vectorization=on -mattr=+sve -mtriple aarch64-linux-gnu < %s | FileCheck %s define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) { ; CHECK-LABEL: @invariant_load diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s | FileCheck %s +; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) { ; CHECK-LABEL: @stride7_i32( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s +; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @mloadstore_f32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine < %s -S | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll @@ -10,7 +10,7 @@ ; The test checks if the mask is being correctly created, reverted and used -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -4,7 +4,7 @@ ; for (int i = N-1; i >= 0; --i) ; a[i] = b[i] + 1.0; -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -S < %s | FileCheck %s define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{ ; CHECK-LABEL: @vector_reverse_f64 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S < %s | FileCheck %s +; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S < %s | FileCheck %s ; Ensure that we can vectorize loops such as: ; int *ptr = c; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll @@ -1,5 +1,5 @@ ; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \ -; RUN: -riscv-v-vector-bits-max=512 -S < %s 2>&1 \ +; RUN: -riscv-v-vector-bits-max=512 -S -scalable-vectorization=on < %s 2>&1 \ ; RUN: | FileCheck %s ; void test(int *a, int *b, int N) { diff --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll --- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=on | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S -scalable-vectorization=preferred | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -65,11 +66,37 @@ ret void } +; Test that when only the width property is specified, and not the scalable.enable flag, +; that the hint defaults to 'scalable.enable = false'. + +; CHECK-LABEL: @test4( +; CHECK: store <8 x i32> +; CHECK: ret void +define void @test4(i32* nocapture %a, i32 %n) #0 { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 42, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6 + +for.end: ; preds = %for.body, %entry + ret void +} + attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -!0 = !{!0, !1} +!0 = !{!0, !1, !5} !1 = !{!"llvm.loop.vectorize.width", i32 8} !2 = !{!2, !1, !3} !3 = !{!"llvm.loop.vectorize.scalable.enable", i32 1} !4 = !{!4, !1, !5} !5 = !{!"llvm.loop.vectorize.scalable.enable", i32 0} +!6 = !{!6, !7} +!7 = !{!"llvm.loop.vectorize.width", i32 8} diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -1,5 +1,5 @@ -; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1 -; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2 +; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF1 +; RUN: opt -loop-vectorize -scalable-vectorization=on -force-vector-width=4 -force-vector-interleave=2 -force-target-supports-scalable-vectors=true -S < %s | FileCheck %s --check-prefix=CHECK-VF4UF2 ; void recurrence_1(int *a, int *b, int n) { ; for(int i = 0; i < n; i++) diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true < %s | FileCheck %s --check-prefix=CHECKUF1 -; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true < %s | FileCheck %s --check-prefix=CHECKUF2 +; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF1 +; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF2 ; CHECKUF1: for.body.preheader: ; CHECKUF1-DAG: %wide.trip.count = zext i32 %N to i64 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-vectorize -force-target-supports-scalable-vectors=true -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-target-supports-scalable-vectors=true -scalable-vectorization=on -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s +; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"