diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -205,6 +205,12 @@ return Status == VectorizationSafetyStatus::Safe; } + /// Return true if the number of elements that are safe to operate on + /// simultaneously is not bounded. + bool isSafeForAnyVectorWidth() const { + return MaxSafeVectorWidthInBits == UINT_MAX; + } + /// The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -325,6 +325,10 @@ const LoopAccessInfo *getLAI() const { return LAI; } + bool isSafeForAnyVectorWidth() const { + return LAI->getDepChecker().isSafeForAnyVectorWidth(); + } + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } uint64_t getMaxSafeVectorWidthInBits() const { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -272,6 +272,12 @@ "an instruction to a single constant value. Mostly " "useful for getting consistent testing.")); +static cl::opt ForceTargetSupportsScalableVectors( + "force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, + cl::desc( + "Pretend that scalable vectors are supported, even if the target does " + "not support them. This flag should only be used for testing.")); + static cl::opt SmallLoopCost( "small-loop-cost", cl::init(20), cl::Hidden, cl::desc( @@ -5592,6 +5598,30 @@ ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount, ElementCount UserVF) { + bool IgnoreScalableUserVF = UserVF.isScalable() && + !TTI.supportsScalableVectors() && + !ForceTargetSupportsScalableVectors; + if (IgnoreScalableUserVF) { + LLVM_DEBUG( + dbgs() << "LV: Ignoring VF=" << UserVF + << " because target does not support scalable vectors.\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreScalableUserVF", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "Ignoring VF=" << ore::NV("UserVF", UserVF) + << " because target does not support scalable vectors."; + }); + } + + // Beyond this point two scenarios are handled. If UserVF isn't specified + // then a suitable VF is chosen. If UserVF is specified and there are + // dependencies, check if it's legal. However, if a UserVF is specified and + // there are no dependencies, then there's nothing to do. + if (UserVF.isNonZero() && !IgnoreScalableUserVF && + Legal->isSafeForAnyVectorWidth()) + return UserVF; + MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); @@ -5603,15 +5633,42 @@ // dependence distance). unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits(); - if (UserVF.isNonZero()) { - // For now, don't verify legality of scalable vectors. - // This will be addressed properly in https://reviews.llvm.org/D91718. - if (UserVF.isScalable()) - return UserVF; + // If the user vectorization factor is legally unsafe, clamp it to a safe + // value. Otherwise, return as is. + if (UserVF.isNonZero() && !IgnoreScalableUserVF) { + unsigned MaxSafeElements = + PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType); + ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements); + + if (UserVF.isScalable()) { + Optional MaxVScale = TTI.getMaxVScale(); + + // Scale VF by vscale before checking if it's safe. + MaxSafeVF = ElementCount::getScalable( + MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); + + if (MaxSafeVF.isZero()) { + // The dependence distance is too small to use scalable vectors, + // fallback on fixed. + LLVM_DEBUG( + dbgs() + << "LV: Max legal vector width too small, scalable vectorization " + "unfeasible. Using fixed-width vectorization instead.\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "Max legal vector width too small, scalable vectorization " + << "unfeasible. Using fixed-width vectorization instead."; + }); + return computeFeasibleMaxVF( + ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue())); + } + } - // If legally unsafe, clamp the user vectorization factor to a safe value. - unsigned MaxSafeVF = PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType); - if (UserVF.getFixedValue() <= MaxSafeVF) + LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n"); + + if (ElementCount::isKnownLE(UserVF, MaxSafeVF)) return UserVF; LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF @@ -5626,7 +5683,7 @@ << " is unsafe, clamping to maximum safe vectorization factor " << ore::NV("VectorizationFactor", MaxSafeVF); }); - return ElementCount::getFixed(MaxSafeVF); + return MaxSafeVF; } WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits); @@ -7426,17 +7483,24 @@ ElementCount MaxVF = MaybeMaxVF.getValue(); assert(MaxVF.isNonZero() && "MaxVF is zero."); - if (!UserVF.isZero() && ElementCount::isKnownLE(UserVF, MaxVF)) { - LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); - assert(isPowerOf2_32(UserVF.getKnownMinValue()) && + bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxVF); + if (!UserVF.isZero() && + (UserVFIsLegal || (UserVF.isScalable() && MaxVF.isScalable()))) { + // FIXME: MaxVF is temporarily used inplace of UserVF for illegal scalable + // VFs here, this should be reverted to only use legal UserVFs once the + // loop below supports scalable VFs. + ElementCount VF = UserVFIsLegal ? UserVF : MaxVF; + LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max") + << " VF " << VF << ".\n"); + assert(isPowerOf2_32(VF.getKnownMinValue()) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. - CM.selectUserVectorizationFactor(UserVF); + CM.selectUserVectorizationFactor(VF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(UserVF, UserVF); + buildVPlansWithVPRecipes(VF, VF); LLVM_DEBUG(printPlans(dbgs())); - return {{UserVF, 0}}; + return {{VF, 0}}; } assert(!MaxVF.isScalable() && diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -0,0 +1,368 @@ +; REQUIRES: asserts +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +; These tests validate the behaviour of scalable vectorization factor hints, +; where the following applies: +; +; * If the backend does not support scalable vectors, ignore the hint and let +; the vectorizer pick a VF. +; * If there are no dependencies and assuming the VF is a power of 2 the VF +; should be accepted. This applies to both fixed and scalable VFs. +; * If the dependency is too small to use scalable vectors, change the VF to +; fixed, where existing behavior applies (clamping). +; * If scalable vectorization is feasible given the dependency and the VF is +; valid, accept it. Otherwise, clamp to the max scalable VF. + +; test1 +; +; Scalable vectorization unfeasible, clamp VF from (4, scalable) -> (4, fixed). +; +; The pragma applied to this loop implies a scalable vector +; be used for vectorization. For fixed vectors the MaxVF=8, otherwise there +; would be a dependence between vector lanes for vectors greater than 256 bits. +; +; void test1(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(4, scalable) +; for (int i=0; i:0:0: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead. +; CHECK-DBG: LV: The max safe VF is: 8. +; CHECK-DBG: LV: Selecting VF: 4. +; CHECK-LABEL: @test1 +; CHECK: <4 x i32> +define void @test1(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 8 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 + +exit: + ret void +} + +!0 = !{!0, !1, !2} +!1 = !{!"llvm.loop.vectorize.width", i32 4} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; test2 +; +; Scalable vectorization unfeasible, clamp VF from (8, scalable) -> (4, fixed). +; +; void test2(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(8, scalable) +; for (int i=0; i +define void @test2(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 4 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3 + +exit: + ret void +} + +!3 = !{!3, !4, !5} +!4 = !{!"llvm.loop.vectorize.width", i32 8} +!5 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; test3 +; +; Scalable vectorization feasible and the VF is valid. +; +; Specifies a vector of , i.e. maximum of 32 x i32 with 2 +; words per 128-bits (unpacked). +; +; void test3(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(2, scalable) +; for (int i=0; i +define void @test3(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 32 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6 + +exit: + ret void +} + +!6 = !{!6, !7, !8} +!7 = !{!"llvm.loop.vectorize.width", i32 2} +!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; test4 +; +; Scalable vectorization feasible, but the VF is unsafe. Should clamp. +; +; Specifies a vector of , i.e. maximum of 64 x i32 with 4 +; words per 128-bits (packed). +; +; void test4(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(4, scalable) +; for (int i=0; i:0:0: User-specified vectorization factor vscale x 4 is unsafe, clamping to maximum safe vectorization factor vscale x 2 +; CHECK-DBG: LV: Using max VF vscale x 2 +; CHECK-LABEL: @test4 +; CHECK: +define void @test4(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 32 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9 + +exit: + ret void +} + +!9 = !{!9, !10, !11} +!10 = !{!"llvm.loop.vectorize.width", i32 4} +!11 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; test5 +; +; Scalable vectorization feasible and the VF is valid. +; +; Specifies a vector of , i.e. maximum of 64 x i32 with 4 +; words per 128-bits (packed). +; +; void test5(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(4, scalable) +; for (int i=0; i +define void @test5(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 128 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12 + +exit: + ret void +} + +!12 = !{!12, !13, !14} +!13 = !{!"llvm.loop.vectorize.width", i32 4} +!14 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; test6 +; +; Scalable vectorization feasible, but the VF is unsafe. Should clamp. +; +; Specifies a vector of , i.e. maximum of 256 x i32. +; +; void test6(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(16, scalable) +; for (int i=0; i:0:0: User-specified vectorization factor vscale x 16 is unsafe, clamping to maximum safe vectorization factor vscale x 8 +; CHECK-DBG: LV: Using max VF vscale x 8 +; CHECK-LABEL: @test6 +; CHECK: +define void @test6(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 128 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15 + +exit: + ret void +} + +!15 = !{!15, !16, !17} +!16 = !{!"llvm.loop.vectorize.width", i32 16} +!17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; CHECK-NO-SVE-LABEL: LV: Checking a loop in "test_no_sve" +; CHECK-NO-SVE: LV: Ignoring VF=vscale x 4 because target does not support scalable vectors. +; CHECK-NO-SVE: remark: :0:0: Ignoring VF=vscale x 4 because target does not support scalable vectors. +; CHECK-NO-SVE: LV: Selecting VF: 4. +; CHECK-NO-SVE: <4 x i32> +; CHECK-NO-SVE-NOT: +define void @test_no_sve(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18 + +exit: + ret void +} + +!18 = !{!18, !19, !20} +!19 = !{!"llvm.loop.vectorize.width", i32 4} +!20 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} + +; Test the LV falls back to fixed-width vectorization if scalable vectors are +; supported but max vscale is undefined. +; +; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale" +; CHECK-NO-MAX-VSCALE: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead. +; CEHCK-NO-MAX-VSCALE: The max safe VF is: 4. +; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4. +; CHECK-NO-MAX-VSCALE: <4 x i32> +define void @test_no_max_vscale(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 4 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21 + +exit: + ret void +} + +!21 = !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll --- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-target-supports-scalable-vectors=true -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll @@ -99,27 +99,3 @@ for.end: ; preds = %for.end.loopexit, %entry ret void } - -; Currently we cannot handle scalable vectorization factors. -; CHECK: LV: Checking a loop in "f4" -; CHECK: LEV: Epilogue vectorization for scalable vectors not yet supported. - -define void @f4(i8* %A) { -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %A, i64 %iv - store i8 1, i8* %arrayidx, align 1 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp ne i64 %iv.next, 1024 - br i1 %exitcond, label %for.body, label %exit, !llvm.loop !0 - -exit: - ret void -} - -!0 = !{!0, !1, !2} -!1 = !{!"llvm.loop.vectorize.width", i32 4} -!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll @@ -0,0 +1,27 @@ +; REQUIRES: asserts +; RUN: opt < %s -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" + +; Currently we cannot handle scalable vectorization factors. +; CHECK: LV: Checking a loop in "f1" +; CHECK: LEV: Epilogue vectorization for scalable vectors not yet supported. + +define void @f1(i8* %A) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i8, i8* %A, i64 %iv + store i8 1, i8* %arrayidx, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp ne i64 %iv.next, 1024 + br i1 %exitcond, label %for.body, label %exit, !llvm.loop !0 + +exit: + ret void +} + +!0 = !{!0, !1} +!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 < %s | FileCheck %s --check-prefix=CHECKUF1 -; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 < %s | FileCheck %s --check-prefix=CHECKUF2 +; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true < %s | FileCheck %s --check-prefix=CHECKUF1 +; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true < %s | FileCheck %s --check-prefix=CHECKUF2 ; CHECKUF1: for.body.preheader: ; CHECKUF1-DAG: %wide.trip.count = zext i32 %N to i64 @@ -96,6 +96,5 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1 } -!1 = distinct !{!1, !2, !3} -!2 = !{!"llvm.loop.vectorize.width", i32 4} -!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll @@ -0,0 +1,33 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; CHECK: LV: Ignoring VF=vscale x 4 because target does not support scalable vectors. +; CHECK: remark: :0:0: Ignoring VF=vscale x 4 because target does not support scalable vectors. +; CHECK: LV: The Widest register safe to use is: 32 bits. +define void @test1(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 4 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 + +exit: + ret void +} + +!0 = !{!0, !1, !2} +!1 = !{!"llvm.loop.vectorize.width", i32 4} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}