Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -641,6 +641,11 @@ } bool useSVEForFixedLengthVectors() const; + + /// Returns the value of vscale we should use for tuning the cost model + /// using attributes found in Function \p F. If \p F is nullptr then return + /// a default of 16. + unsigned getVScaleForTuning(const Function *F) const; }; } // End llvm namespace Index: llvm/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -353,3 +353,34 @@ } bool AArch64Subtarget::useAA() const { return UseAA; } + +unsigned AArch64Subtarget::getVScaleForTuning(const Function *F) const { + unsigned MaxVScale = 16; + // Bail out if we don't have a Function + if (!F) + return MaxVScale; + + Attribute CPUAttr = F->getFnAttribute("target-cpu"); + Attribute TuneAttr = F->getFnAttribute("tune-cpu"); + + std::string CPU = + CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : ""; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; + + if (TuneCPU == "generic") return 4; + else if (TuneCPU == "neoverse-v1") return 2; + else if (TuneCPU == "neoverse-n2") return 1; + + // We couldn't find a CPU to use for tuning so let's fall back on the + // vscale_range attribute instead. + + if (F && F->hasFnAttribute(Attribute::VScaleRange)) { + unsigned Arg = + F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second; + if (Arg > 0) + MaxVScale = Arg; + } + + return MaxVScale; +} Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -135,15 +135,7 @@ if (!VF.isScalable()) return VF.getFixedValue(); - unsigned MaxNumVScale = 16; - if (F && F->hasFnAttribute(Attribute::VScaleRange)) { - unsigned VScaleMax = - F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second; - if (VScaleMax > 0) - MaxNumVScale = VScaleMax; - } - - return MaxNumVScale * VF.getKnownMinValue(); + return VF.getKnownMinValue() * ST->getVScaleForTuning(F); } unsigned getMaxInterleaveFactor(unsigned VF); Index: llvm/test/Analysis/CostModel/AArch64/sve-gather.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-gather.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-gather.ll @@ -1,23 +1,61 @@ -; Check getIntrinsicInstrCost in BasicTTIImpl.h for masked gather +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -cost-model < %s | FileCheck %s +; RUN: opt -analyze -cost-model -mcpu=neoverse-v1 < %s | FileCheck %s --check-prefix=CHECK-CPU-NEOVERSE-V1 +; RUN: opt -analyze -cost-model -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefix=CHECK-CPU-NEOVERSE-N2 -; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s +target triple="aarch64--linux-gnu" -define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) vscale_range(0, 16) { +define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #0 { ; CHECK-LABEL: 'masked_gathers' -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128 -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-V1-LABEL: 'masked_gathers' +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-N2-LABEL: 'masked_gathers' +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %res.nxv4i32 = call @llvm.masked.gather.nxv4i32( undef, i32 0, %nxv4i1mask, zeroinitializer) %res.nxv8i32 = call @llvm.masked.gather.nxv8i32( undef, i32 0, %nxv8i1mask, zeroinitializer) - %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> undef, i32 0, <4 x i1> %v4i1mask, <4 x i32> zeroinitializer) - %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> undef, i32 0, <1 x i1> %v1i1mask, <1 x i128> zeroinitializer) - %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) + %res.nxv1i64 = call @llvm.masked.gather.nxv1i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ret void } -define void @masked_gathers_no_vscale_range() { +define void @masked_gathers_tune_generic( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #1 { +; CHECK-LABEL: 'masked_gathers_tune_generic' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-V1-LABEL: 'masked_gathers_tune_generic' +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-N2-LABEL: 'masked_gathers_tune_generic' +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 0, %nxv4i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 0, %nxv8i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %res.nxv4i32 = call @llvm.masked.gather.nxv4i32( undef, i32 0, %nxv4i1mask, zeroinitializer) + %res.nxv8i32 = call @llvm.masked.gather.nxv8i32( undef, i32 0, %nxv8i1mask, zeroinitializer) + %res.nxv1i64 = call @llvm.masked.gather.nxv1i64( undef, i32 0, %nxv1i1mask, zeroinitializer) + ret void +} + +define void @masked_gathers_no_vscale_range() #2 { ; CHECK-LABEL: 'masked_gathers_no_vscale_range' ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) @@ -27,6 +65,30 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-V1-LABEL: 'masked_gathers_no_vscale_range' +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-N2-LABEL: 'masked_gathers_no_vscale_range' +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 1, undef, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %res.nxv4f64 = call @llvm.masked.gather.nxv4f64( undef, i32 1, undef, undef) %res.nxv2f64 = call @llvm.masked.gather.nxv2f64( undef, i32 1, undef, undef) @@ -41,11 +103,13 @@ ret void } +attributes #0 = { "target-features"="+sve" vscale_range(0, 8) } +attributes #1 = { "target-features"="+sve" vscale_range(0, 16) "tune-cpu"="generic" } +attributes #2 = { "target-features"="+sve" } + declare @llvm.masked.gather.nxv4i32(, i32, , ) declare @llvm.masked.gather.nxv8i32(, i32, , ) -declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) -declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x i1>, <1 x i128>) -declare @llvm.masked.gather.nxv1i64.nxv1p0i64(, i32, , ) +declare @llvm.masked.gather.nxv1i64(, i32, , ) declare @llvm.masked.gather.nxv4f64(, i32, , ) declare @llvm.masked.gather.nxv2f64(, i32, , ) declare @llvm.masked.gather.nxv8f32(, i32, , ) Index: llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll @@ -1,23 +1,61 @@ -; Check getIntrinsicInstrCost in BasicTTIImpl.h with for masked scatter +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -cost-model < %s | FileCheck %s +; RUN: opt -analyze -cost-model -mcpu=neoverse-v1 < %s | FileCheck %s --check-prefix=CHECK-CPU-NEOVERSE-V1 +; RUN: opt -analyze -cost-model -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefix=CHECK-CPU-NEOVERSE-N2 -; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s +target triple="aarch64--linux-gnu" -define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) vscale_range(0, 16) { +define void @masked_scatters( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #0 { ; CHECK-LABEL: 'masked_scatters' -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128 -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-V1-LABEL: 'masked_scatters' +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-N2-LABEL: 'masked_scatters' +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; call void @llvm.masked.scatter.nxv4i32( undef, undef, i32 0, %nxv4i1mask) call void @llvm.masked.scatter.nxv8i32( undef, undef, i32 0, %nxv8i1mask) - call void @llvm.masked.scatter.v4i32(<4 x i32> undef, <4 x i32*> undef, i32 0, <4 x i1> %v4i1mask) - call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> undef, <1 x i128*> undef, i32 0, <1 x i1> %v1i1mask) - call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) + call void @llvm.masked.scatter.nxv1i64( undef, undef, i32 0, %nxv1i1mask) ret void } -define void @masked_scatters_no_vscale_range() { +define void @masked_scatters_tune_generic( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #1 { +; CHECK-LABEL: 'masked_scatters_tune_generic' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-V1-LABEL: 'masked_scatters_tune_generic' +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-N2-LABEL: 'masked_scatters_tune_generic' +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 0, %nxv4i1mask) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 0, %nxv8i1mask) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.masked.scatter.nxv4i32( undef, undef, i32 0, %nxv4i1mask) + call void @llvm.masked.scatter.nxv8i32( undef, undef, i32 0, %nxv8i1mask) + call void @llvm.masked.scatter.nxv1i64( undef, undef, i32 0, %nxv1i1mask) + ret void +} + +define void @masked_scatters_no_vscale_range() #2 { ; CHECK-LABEL: 'masked_scatters_no_vscale_range' ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) @@ -27,6 +65,30 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-V1-LABEL: 'masked_scatters_no_vscale_range' +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-V1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-CPU-NEOVERSE-N2-LABEL: 'masked_scatters_no_vscale_range' +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 1, undef) +; CHECK-CPU-NEOVERSE-N2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; call void @llvm.masked.scatter.nxv4f64( undef, undef, i32 1, undef) call void @llvm.masked.scatter.nxv2f64( undef, undef, i32 1, undef) @@ -41,11 +103,13 @@ ret void } +attributes #0 = { "target-features"="+sve" vscale_range(0, 8) } +attributes #1 = { "target-features"="+sve" vscale_range(0, 16) "tune-cpu"="generic" } +attributes #2 = { "target-features"="+sve" } + declare void @llvm.masked.scatter.nxv4i32(, , i32, ) declare void @llvm.masked.scatter.nxv8i32(, , i32, ) -declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) -declare void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128>, <1 x i128*>, i32, <1 x i1>) -declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(, , i32, ) +declare void @llvm.masked.scatter.nxv1i64(, , i32, ) declare void @llvm.masked.scatter.nxv4f64(, , i32, ) declare void @llvm.masked.scatter.nxv2f64(, , i32, ) declare void @llvm.masked.scatter.nxv8f32(, , i32, )