diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -871,6 +871,11 @@ /// across the current set of primary and secondary targets. virtual ArrayRef getTargetBuiltins() const = 0; + /// Returns target-specific min and max values VScale_Range. + virtual Optional> + getVScaleRange(const LangOptions &LangOpts) const { + return None; + } /// The __builtin_clz* and __builtin_ctz* built-in /// functions are specified to have undefined results for zero inputs, but /// on targets that support these operations in a way that provides diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -96,6 +96,9 @@ ArrayRef getTargetBuiltins() const override; + Optional> + getVScaleRange(const LangOptions &LangOpts) const override; + bool hasFeature(StringRef Feature) const override; bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -424,6 +424,17 @@ Builtin::FirstTSBuiltin); } +Optional> +AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const { + if (LangOpts.ArmSveVectorBits) { + unsigned VScale = LangOpts.ArmSveVectorBits / 128; + return std::pair(VScale, VScale); + } + if (hasFeature("sve")) + return std::pair(0, 16); + return None; +} + bool AArch64TargetInfo::hasFeature(StringRef Feature) const { return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" || (Feature == "neon" && (FPU & NeonMode)) || diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -484,11 +484,13 @@ // function. CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); - // Add vscale attribute if appropriate. - if (getLangOpts().ArmSveVectorBits) { - unsigned VScale = getLangOpts().ArmSveVectorBits / 128; - CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), - VScale, VScale)); + // Add vscale_range attribute if appropriate. + Optional> VScaleRange = + getContext().getTargetInfo().getVScaleRange(getLangOpts()); + if (VScaleRange) { + CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( + getLLVMContext(), VScaleRange.getValue().first, + VScaleRange.getValue().second)); } // If we generated an unreachable return block, delete it now. diff --git a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c --- a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c +++ b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c @@ -3,10 +3,13 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=256 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE // CHECK-LABEL: @func() #0 // CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} } -// CHECK-NONE-NOT: vscale_range +// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} } void func() {} diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -125,22 +125,25 @@ return ST->getMinVectorRegisterBitWidth(); } - Optional getMaxVScale() const { - if (ST->hasSVE()) - return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; - return BaseT::getMaxVScale(); - } /// Try to return an estimate cost factor that can be used as a multiplier /// when scalarizing an operation for a vector with ElementCount \p VF. /// For scalable vectors this currently takes the most pessimistic view based /// upon the maximum possible value for vscale. - unsigned getMaxNumElements(ElementCount VF) const { + unsigned getMaxNumElements(ElementCount VF, + const Function *F = nullptr) const { if (!VF.isScalable()) return VF.getFixedValue(); - Optional MaxNumVScale = getMaxVScale(); - assert(MaxNumVScale && "Expected valid max vscale value"); - return *MaxNumVScale * VF.getKnownMinValue(); + + unsigned MaxNumVScale = 16; + if (F && F->hasFnAttribute(Attribute::VScaleRange)) { + unsigned VScaleMax = + F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second; + if (VScaleMax > 0) + MaxNumVScale = VScaleMax; + } + + return MaxNumVScale * VF.getKnownMinValue(); } unsigned getMaxInterleaveFactor(unsigned VF); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1589,7 +1589,7 @@ ElementCount LegalVF = LT.second.getVectorElementCount(); InstructionCost MemOpCost = getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I); - return LT.first * MemOpCost * getMaxNumElements(LegalVF); + return LT.first * MemOpCost * getMaxNumElements(LegalVF, I->getFunction()); } bool AArch64TTIImpl::useNeonVector(const Type *Ty) const { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5636,6 +5636,13 @@ // Limit MaxScalableVF by the maximum safe dependence distance. Optional MaxVScale = TTI.getMaxVScale(); + if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) { + unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange) + .getVScaleRangeArgs() + .second; + if (VScaleMax > 0) + MaxVScale = VScaleMax; + } MaxScalableVF = ElementCount::getScalable( MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); if (!MaxScalableVF) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll @@ -2,7 +2,7 @@ ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s -define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) { +define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) vscale_range(0, 16) { ; CHECK-LABEL: 'masked_gathers' ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32 diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll @@ -2,7 +2,7 @@ ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s -define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) { +define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) vscale_range(0, 16) { ; CHECK-LABEL: 'masked_scatters' ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll @@ -18,7 +18,7 @@ ; return a; ; } ; -define i32 @PR33613(double* %b, double %j, i32 %d) { +define i32 @PR33613(double* %b, double %j, i32 %d) #0 { ; CHECK-VF4UF2-LABEL: @PR33613 ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ] @@ -66,7 +66,7 @@ ; } ; ; Check that the sext sank after the load in the vector loop. -define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { +define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 { ; CHECK-VF4UF1-LABEL: @PR34711 ; CHECK-VF4UF1: vector.body ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ] @@ -100,5 +100,6 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED -define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { +define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_strict ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] @@ -49,7 +49,7 @@ ret float %add } -define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) { +define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_strict_unroll ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ] @@ -113,7 +113,7 @@ ret float %add } -define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_strict_interleave ; CHECK-ORDERED: entry ; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1 @@ -206,7 +206,7 @@ ret void } -define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_of_sum ; CHECK-ORDERED: vector.body ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] @@ -268,7 +268,7 @@ ret float %res } -define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_conditional ; CHECK-ORDERED: vector.body ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] @@ -343,7 +343,7 @@ } ; Negative test - loop contains multiple fadds which we cannot safely reorder -define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) { +define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 { ; CHECK-ORDERED-LABEL: @fadd_multiple ; CHECK-ORDERED-NOT: vector.body @@ -390,6 +390,7 @@ ret float %rdx } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !3, !6, !8} !1 = distinct !{!1, !3, !7, !8} !2 = distinct !{!2, !4, !6, !8} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -7,7 +7,7 @@ ; Test that the MaxVF for the following loop, that has no dependence distances, ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 ; (maximized bandwidth for i8 in the loop). -define void @test0(i32* %a, i8* %b, i32* %c) { +define void @test0(i32* %a, i8* %b, i32* %c) #0 { ; CHECK: LV: Checking a loop in "test0" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -40,7 +40,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 64 elements, is calculated as (maxvscale = 16) * 4. -define void @test1(i32* %a, i8* %b) { +define void @test1(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test1" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -74,7 +74,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 32 elements, is calculated as (maxvscale = 16) * 2. -define void @test2(i32* %a, i8* %b) { +define void @test2(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test2" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -108,7 +108,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 16 elements, is calculated as (maxvscale = 16) * 1. -define void @test3(i32* %a, i8* %b) { +define void @test3(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test3" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -142,7 +142,7 @@ ; Test the fallback mechanism when scalable vectors are not feasible due ; to e.g. dependence distance. -define void @test4(i32* %a, i32* %b) { +define void @test4(i32* %a, i32* %b) #0 { ; CHECK: LV: Checking a loop in "test4" ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 @@ -172,3 +172,5 @@ exit: ret void } + +attributes #0 = { vscale_range(0, 16) } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -45,7 +45,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test1 ; CHECK: <4 x i32> -define void @test1(i32* %a, i32* %b) { +define void @test1(i32* %a, i32* %b) #0 { entry: br label %loop @@ -90,7 +90,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test2 ; CHECK: <4 x i32> -define void @test2(i32* %a, i32* %b) { +define void @test2(i32* %a, i32* %b) #0 { entry: br label %loop @@ -138,7 +138,7 @@ ; CHECK-DBG: LV: Using user VF vscale x 2. ; CHECK-LABEL: @test3 ; CHECK: -define void @test3(i32* %a, i32* %b) { +define void @test3(i32* %a, i32* %b) #0 { entry: br label %loop @@ -190,7 +190,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test4 ; CHECK: <4 x i32> -define void @test4(i32* %a, i32* %b) { +define void @test4(i32* %a, i32* %b) #0 { entry: br label %loop @@ -238,7 +238,7 @@ ; CHECK-DBG: LV: Using user VF vscale x 4 ; CHECK-LABEL: @test5 ; CHECK: -define void @test5(i32* %a, i32* %b) { +define void @test5(i32* %a, i32* %b) #0 { entry: br label %loop @@ -289,7 +289,7 @@ ; CHECK-DBG: Selecting VF: vscale x 4. ; CHECK-LABEL: @test6 ; CHECK: -define void @test6(i32* %a, i32* %b) { +define void @test6(i32* %a, i32* %b) #0 { entry: br label %loop @@ -322,7 +322,7 @@ ; CHECK-NO-SVE-LABEL: @test_no_sve ; CHECK-NO-SVE: <4 x i32> ; CHECK-NO-SVE-NOT: -define void @test_no_sve(i32* %a, i32* %b) { +define void @test_no_sve(i32* %a, i32* %b) #0 { entry: br label %loop @@ -356,7 +356,7 @@ ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test_no_max_vscale ; CHECK: <4 x i32> -define void @test_no_max_vscale(i32* %a, i32* %b) { +define void @test_no_max_vscale(i32* %a, i32* %b) #0 { entry: br label %loop @@ -378,6 +378,7 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !21 = !{!21, !22, !23} !22 = !{!"llvm.loop.vectorize.width", i32 4} !23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -1,6 +1,6 @@ ; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s -define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) { +define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @cond_inv_load_i32i32i16 ; CHECK: vector.ph: ; CHECK: %[[INVINS:.*]] = insertelement poison, i16* %inv, i32 0 @@ -39,7 +39,7 @@ ret void } -define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) { +define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @cond_inv_load_f64f64f64 ; CHECK: vector.ph: ; CHECK: %[[INVINS:.*]] = insertelement poison, double* %inv, i32 0 @@ -76,7 +76,7 @@ ret void } -define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) { +define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @invariant_load_cond ; CHECK: vector.body ; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42 @@ -117,6 +117,7 @@ ret void } +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -1,6 +1,6 @@ ; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - | FileCheck %s -define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) { +define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { ; CHECK-LABEL: @gather_nxv4i32_ind64 ; CHECK: vector.body: ; CHECK: %[[IND:.*]] = load , * @@ -29,7 +29,7 @@ ; NOTE: I deliberately chose '%b' as an array of i32 indices, since the ; additional 'sext' in the for.body loop exposes additional code paths ; during vectorisation. -define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) { +define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 { ; CHECK-LABEL: @scatter_nxv4i32_ind32 ; CHECK: vector.body: ; CHECK: %[[VALS:.*]] = load @@ -57,7 +57,7 @@ ret void } -define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) { +define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @scatter_inv_nxv4i32 ; CHECK: vector.ph: ; CHECK: %[[INS:.*]] = insertelement poison, i32* %inv, i32 0 @@ -89,7 +89,7 @@ ret void } -define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) { +define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @gather_inv_nxv4i32 ; CHECK: vector.ph: ; CHECK: %[[INS:.*]] = insertelement poison, i32* %inv, i32 0 @@ -124,7 +124,7 @@ -define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @gather_nxv4i32_ind64_stride2 ; CHECK: vector.body: ; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -153,6 +153,8 @@ ret void } +attributes #0 = { vscale_range(0, 16) } + !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -59,7 +59,7 @@ ret void } -attributes #0 = { "target-features"="+neon,+sve" } +attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s -define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) { +define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_i32( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -27,7 +27,7 @@ ret void } -define void @stride7_f64(double* noalias nocapture %dst, i64 %n) { +define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_f64( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -55,7 +55,7 @@ } -define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) { +define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @cond_stride7_f64( ; CHECK: vector.body ; CHECK: %[[MASK:.*]] = icmp ne @@ -90,7 +90,7 @@ ret void } - +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll @@ -49,7 +49,7 @@ ret double %add } -attributes #0 = { "target-features"="+sve" } +attributes #0 = { "target-features"="+sve" vscale_range(0, 16) } !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -12,7 +12,7 @@ ; that we can use gather instructions with the correct offsets, taking ; vscale into account. -define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) { +define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 { ; CHECK-LABEL: @widen_ptr_phi_unrolled( ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ] @@ -122,7 +122,7 @@ ; because it is stored to memory. ; -define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) { +define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK-LABEL: @pointer_iv_mixed( ; CHECK: vector.body ; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ] @@ -170,7 +170,7 @@ ret i32 %tmp5 } - +attributes #0 = { vscale_range(0, 16) } !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4}