diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -501,6 +501,9 @@ unsigned VScale = getLangOpts().ArmSveVectorBits / 128; CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), VScale, VScale)); + } else if (getContext().getTargetInfo().hasFeature("sve")) { + CurFn->addFnAttr( + llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), 0, 16)); } // If we generated an unreachable return block, delete it now. diff --git a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c --- a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c +++ b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c @@ -3,10 +3,13 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE // CHECK-LABEL: @func() #0 // CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} } -// CHECK-NONE-NOT: vscale_range +// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} } void func() {} diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -903,9 +903,6 @@ /// \return The width of the smallest vector register type. unsigned getMinVectorRegisterBitWidth() const; - /// \return The maximum value of vscale if the target specifies an - /// architectural maximum vector length, and None otherwise. - Optional getMaxVScale() const; /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a @@ -1536,7 +1533,6 @@ virtual const char *getRegisterClassName(unsigned ClassID) const = 0; virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; - virtual Optional getMaxVScale() const = 0; virtual bool shouldMaximizeVectorBandwidth() const = 0; virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const = 0; @@ -1985,9 +1981,6 @@ unsigned getMinVectorRegisterBitWidth() override { return Impl.getMinVectorRegisterBitWidth(); } - Optional getMaxVScale() const override { - return Impl.getMaxVScale(); - } bool shouldMaximizeVectorBandwidth() const override { return Impl.shouldMaximizeVectorBandwidth(); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -393,8 +393,6 @@ unsigned getMinVectorRegisterBitWidth() const { return 128; } - Optional getMaxVScale() const { return None; } - bool shouldMaximizeVectorBandwidth() const { return false; } ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -652,8 +652,6 @@ return TypeSize::getFixed(32); } - Optional getMaxVScale() const { return None; } - /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -594,10 +594,6 @@ return TTIImpl->getMinVectorRegisterBitWidth(); } -Optional TargetTransformInfo::getMaxVScale() const { - return TTIImpl->getMaxVScale(); -} - bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const { return TTIImpl->shouldMaximizeVectorBandwidth(); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -125,12 +125,6 @@ return ST->getMinVectorRegisterBitWidth(); } - Optional getMaxVScale() const { - if (ST->hasSVE()) - return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; - return BaseT::getMaxVScale(); - } - unsigned getMaxInterleaveFactor(unsigned VF); InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1421,7 +1421,12 @@ return InstructionCost::getInvalid(); ElementCount LegalVF = LT.second.getVectorElementCount(); - Optional MaxNumVScale = getMaxVScale(); + Optional MaxNumVScale; + if (I->getFunction()->hasFnAttribute(Attribute::VScaleRange)) { + Attribute VScaleRangeAttr = + I->getFunction()->getFnAttribute(Attribute::VScaleRange); + MaxNumVScale = VScaleRangeAttr.getVScaleRangeArgs().second; + } assert(MaxNumVScale && "Expected valid max vscale value"); InstructionCost MemOpCost = diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -55,7 +55,6 @@ bool shouldExpandReduction(const IntrinsicInst *II) const; bool supportsScalableVectors() const { return ST->hasStdExtV(); } - Optional getMaxVScale() const; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { switch (K) { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -116,20 +116,6 @@ } } -Optional RISCVTTIImpl::getMaxVScale() const { - // There is no assumption of the maximum vector length in V specification. - // We use the value specified by users as the maximum vector length. - // This function will use the assumed maximum vector length to get the - // maximum vscale for LoopVectorizer. - // If users do not specify the maximum vector length, we have no way to - // know whether the LoopVectorizer is safe to do or not. - // We only consider to use single vector register (LMUL = 1) to vectorize. - unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits(); - if (ST->hasStdExtV() && MaxVectorSizeInBits != 0) - return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock; - return BaseT::getMaxVScale(); -} - InstructionCost RISCVTTIImpl::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5693,7 +5693,12 @@ return MaxScalableVF; // Limit MaxScalableVF by the maximum safe dependence distance. - Optional MaxVScale = TTI.getMaxVScale(); + Optional MaxVScale; + if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) { + Attribute VScaleRangeAttr = + TheFunction->getFnAttribute(Attribute::VScaleRange); + MaxVScale = VScaleRangeAttr.getVScaleRangeArgs().second; + } MaxScalableVF = ElementCount::getScalable( MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); if (!MaxScalableVF) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll @@ -2,7 +2,7 @@ ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s -define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) { +define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) vscale_range(0, 16) { ; CHECK-LABEL: 'masked_gathers' ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32 diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll @@ -2,7 +2,7 @@ ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s -define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) { +define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) vscale_range(0, 16) { ; CHECK-LABEL: 'masked_scatters' ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll @@ -18,7 +18,7 @@ ; return a; ; } ; -define i32 @PR33613(double* %b, double %j, i32 %d) { +define i32 @PR33613(double* %b, double %j, i32 %d) #0 { ; CHECK-VF4UF2-LABEL: @PR33613 ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ] @@ -66,7 +66,7 @@ ; } ; ; Check that the sext sank after the load in the vector loop. -define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { +define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 { ; CHECK-VF4UF1-LABEL: @PR34711 ; CHECK-VF4UF1: vector.body ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ] @@ -100,5 +100,6 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED -define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { +define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_strict ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] @@ -48,7 +48,7 @@ ret float %add } -define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) { +define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_strict_unroll ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ] @@ -112,7 +112,7 @@ ret float %add } -define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_strict_interleave ; CHECK-ORDERED: entry ; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1 @@ -205,7 +205,7 @@ ret void } -define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_of_sum ; CHECK-ORDERED: vector.body ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] @@ -267,7 +267,7 @@ ret float %res } -define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_conditional ; CHECK-ORDERED: vector.body ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] @@ -342,7 +342,7 @@ } ; Negative test - loop contains multiple fadds which we cannot safely reorder -define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) { +define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_multiple ; CHECK-ORDERED-NOT: vector.body @@ -389,6 +389,7 @@ ret float %rdx } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !3, !6, !8} !1 = distinct !{!1, !3, !7, !8} !2 = distinct !{!2, !4, !6, !8} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -7,7 +7,7 @@ ; Test that the MaxVF for the following loop, that has no dependence distances, ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 ; (maximized bandwidth for i8 in the loop). -define void @test0(i32* %a, i8* %b, i32* %c) { +define void @test0(i32* %a, i8* %b, i32* %c) #0 { ; CHECK: LV: Checking a loop in "test0" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -40,7 +40,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 64 elements, is calculated as (maxvscale = 16) * 4. -define void @test1(i32* %a, i8* %b) { +define void @test1(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test1" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -74,7 +74,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 32 elements, is calculated as (maxvscale = 16) * 2. -define void @test2(i32* %a, i8* %b) { +define void @test2(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test2" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -108,7 +108,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 16 elements, is calculated as (maxvscale = 16) * 1. -define void @test3(i32* %a, i8* %b) { +define void @test3(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test3" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -142,7 +142,7 @@ ; Test the fallback mechanism when scalable vectors are not feasible due ; to e.g. dependence distance. -define void @test4(i32* %a, i32* %b) { +define void @test4(i32* %a, i32* %b) #0 { ; CHECK: LV: Checking a loop in "test4" ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -172,3 +172,5 @@ exit: ret void } + +attributes #0 = { vscale_range(0, 16) } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -44,7 +44,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test1 ; CHECK: <4 x i32> -define void @test1(i32* %a, i32* %b) { +define void @test1(i32* %a, i32* %b) #0 { entry: br label %loop @@ -88,7 +88,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test2 ; CHECK: <4 x i32> -define void @test2(i32* %a, i32* %b) { +define void @test2(i32* %a, i32* %b) #0 { entry: br label %loop @@ -135,7 +135,7 @@ ; CHECK-DBG: LV: Using user VF vscale x 2. ; CHECK-LABEL: @test3 ; CHECK: -define void @test3(i32* %a, i32* %b) { +define void @test3(i32* %a, i32* %b) #0 { entry: br label %loop @@ -186,7 +186,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test4 ; CHECK: <4 x i32> -define void @test4(i32* %a, i32* %b) { +define void @test4(i32* %a, i32* %b) #0 { entry: br label %loop @@ -233,7 +233,7 @@ ; CHECK-DBG: LV: Using user VF vscale x 4 ; CHECK-LABEL: @test5 ; CHECK: -define void @test5(i32* %a, i32* %b) { +define void @test5(i32* %a, i32* %b) #0 { entry: br label %loop @@ -283,7 +283,7 @@ ; CHECK-DBG: Selecting VF: 4. ; CHECK-LABEL: @test6 ; CHECK: <4 x i32> -define void @test6(i32* %a, i32* %b) { +define void @test6(i32* %a, i32* %b) #0 { entry: br label %loop @@ -317,7 +317,7 @@ ; CHECK-NO-SVE-LABEL: @test_no_sve ; CHECK-NO-SVE: <4 x i32> ; CHECK-NO-SVE-NOT: -define void @test_no_sve(i32* %a, i32* %b) { +define void @test_no_sve(i32* %a, i32* %b) #0 { entry: br label %loop @@ -350,7 +350,7 @@ ; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4. ; CHECK-NO-SVE-LABEL: @test_no_max_vscale ; CHECK-NO-SVE: <4 x i32> -define void @test_no_max_vscale(i32* %a, i32* %b) { +define void @test_no_max_vscale(i32* %a, i32* %b) #0 { entry: br label %loop @@ -372,6 +372,7 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !21 = !{!21, !22, !23} !22 = !{!"llvm.loop.vectorize.width", i32 4} !23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -1,6 +1,6 @@ ; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s -define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) { +define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @cond_inv_load_i32i32i16 ; CHECK: vector.ph: ; CHECK: %[[INVINS:.*]] = insertelement poison, i16* %inv, i32 0 @@ -39,7 +39,7 @@ ret void } -define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) { +define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @cond_inv_load_f64f64f64 ; CHECK: vector.ph: ; CHECK: %[[INVINS:.*]] = insertelement poison, double* %inv, i32 0 @@ -76,7 +76,7 @@ ret void } -define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) { +define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @invariant_load_cond ; CHECK: vector.body ; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42 @@ -117,6 +117,7 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -1,6 +1,6 @@ ; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s -define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) { +define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { ; CHECK-LABEL: @gather_nxv4i32_ind64 ; CHECK: vector.body: ; CHECK: %[[IND:.*]] = load , * @@ -29,7 +29,7 @@ ; NOTE: I deliberately chose '%b' as an array of i32 indices, since the ; additional 'sext' in the for.body loop exposes additional code paths ; during vectorisation. -define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) { +define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 { ; CHECK-LABEL: @scatter_nxv4i32_ind32 ; CHECK: vector.body: ; CHECK: %[[VALS:.*]] = load @@ -57,7 +57,7 @@ ret void } -define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) { +define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @scatter_inv_nxv4i32 ; CHECK: vector.ph: ; CHECK: %[[INS:.*]] = insertelement poison, i32* %inv, i32 0 @@ -89,7 +89,7 @@ ret void } -define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) { +define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @gather_inv_nxv4i32 ; CHECK: vector.ph: ; CHECK: %[[INS:.*]] = insertelement poison, i32* %inv, i32 0 @@ -122,6 +122,7 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -59,7 +59,7 @@ ret void } -attributes #0 = { "target-features"="+neon,+sve" } +attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s -define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) { +define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_i32( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -27,7 +27,7 @@ ret void } -define void @stride7_f64(double* noalias nocapture %dst, i64 %n) { +define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_f64( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -55,7 +55,7 @@ } -define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) { +define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @cond_stride7_f64( ; CHECK: vector.body ; CHECK: %[[MASK:.*]] = icmp ne @@ -90,7 +90,7 @@ ret void } - +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -12,7 +12,7 @@ ; that we can use gather instructions with the correct offsets, taking ; vscale into account. -define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) { +define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 { ; CHECK-LABEL: @widen_ptr_phi_unrolled( ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ] @@ -122,7 +122,7 @@ ; because it is stored to memory. ; -define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) { +define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK-LABEL: @pointer_iv_mixed( ; CHECK: vector.body ; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -170,7 +170,7 @@ ret i32 %tmp5 } - +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4}