diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1340,6 +1340,14 @@ auto LT = TLI->getTypeLegalizationCost(DL, Src); if (!LT.first.isValid()) return InstructionCost::getInvalid(); + + // The code-generator is currently not able to handle scalable vectors + // of yet, so return a high cost to avoid selecting it. + // This change will be removed when code-generation for these types + // is sufficiently reliable. + if (cast(Src)->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getMax(); + return LT.first * 2; } @@ -1355,6 +1363,14 @@ if (!LT.first.isValid()) return InstructionCost::getInvalid(); + // The code-generator is currently not able to handle scalable vectors + // of yet, so return a high cost to avoid selecting it. + // This change will be removed when code-generation for these types + // is sufficiently reliable. + if (cast(DataTy)->getElementCount() == + ElementCount::getScalable(1)) + return InstructionCost::getMax(); + ElementCount LegalVF = LT.second.getVectorElementCount(); Optional MaxNumVScale = getMaxVScale(); assert(MaxNumVScale && "Expected valid max vscale value"); @@ -1384,6 +1400,14 @@ if (!LT.first.isValid()) return InstructionCost::getInvalid(); + // The code-generator is currently not able to handle scalable vectors + // of yet, so return a high cost to avoid selecting it. + // This change will be removed when code-generation for these types + // is sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getMax(); + // TODO: consider latency as well for TCK_SizeAndLatency. if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) return LT.first; diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll --- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll @@ -70,6 +70,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32 = call @llvm.masked.load.nxv2f32.p0nxv2f32(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32 = call @llvm.masked.load.nxv4f32.p0nxv4f32(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64 = call @llvm.masked.load.nxv2f64.p0nxv2f64(* undef, i32 8, undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9223372036854775807 for instruction: %nxv1i64 = call @llvm.masked.load.nxv1i64.p0nxv1i64(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = call @llvm.masked.load.nxv4i64.p0nxv4i64(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32f16 = call @llvm.masked.load.nxv32f16.p0nxv32f16(* undef, i32 8, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -96,6 +97,7 @@ %nxv2f64 = call @llvm.masked.load.nxv2f64.p0nxv2f64( *undef, i32 8, undef, undef) ; A couple of examples of illegal scalable types + %nxv1i64 = call @llvm.masked.load.nxv1i64.p0nxv1i64( *undef, i32 8, undef, undef) %nxv4i64 = call @llvm.masked.load.nxv4i64.p0nxv4i64( *undef, i32 8, undef, undef) %nxv32f16 = call @llvm.masked.load.nxv32f16.p0nxv32f16( *undef, i32 8, undef, undef) @@ -133,6 +135,7 @@ declare @llvm.masked.load.nxv4i32.p0nxv4i32(*, i32, , ) declare @llvm.masked.load.nxv2i64.p0nxv2i64(*, i32, , ) declare @llvm.masked.load.nxv4i64.p0nxv4i64(*, i32, , ) +declare @llvm.masked.load.nxv1i64.p0nxv1i64(*, i32, , ) declare @llvm.masked.load.nxv2f16.p0nxv2f16(*, i32, , ) declare @llvm.masked.load.nxv4f16.p0nxv4f16(*, i32, , ) declare @llvm.masked.load.nxv8f16.p0nxv8f16(*, i32, , ) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll @@ -8,10 +8,12 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 9223372036854775807 for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64 %res.nxv4i32 = call @llvm.masked.gather.nxv4i32( undef, i32 0, %nxv4i1mask, zeroinitializer) %res.nxv8i32 = call @llvm.masked.gather.nxv8i32( undef, i32 0, %nxv8i1mask, zeroinitializer) %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> undef, i32 0, <4 x i1> %v4i1mask, <4 x i32> zeroinitializer) %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> undef, i32 0, <1 x i1> %v1i1mask, <1 x i128> zeroinitializer) + %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ret void } @@ -19,3 +21,4 @@ declare @llvm.masked.gather.nxv8i32(, i32, , ) declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x i1>, <1 x i128>) +declare @llvm.masked.gather.nxv1i64.nxv1p0i64(, i32, , ) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll b/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll @@ -5,9 +5,11 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv8i8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv16i8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv32i8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 9223372036854775807 for instruction: %res.nxv1i64 %res.nxv8i8 = load , * undef %res.nxv16i8 = load , * undef %res.nxv32i8 = load , * undef + %res.nxv1i64 = load , * undef ret void } @@ -16,8 +18,10 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store +; CHECK-NEXT: Cost Model: Found an estimated cost of 9223372036854775807 for instruction: store store undef, * undef store undef, * undef store undef, * undef + store undef, * undef ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll @@ -8,10 +8,12 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 9223372036854775807 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64 call void @llvm.masked.scatter.nxv4i32( undef, undef, i32 0, %nxv4i1mask) call void @llvm.masked.scatter.nxv8i32( undef, undef, i32 0, %nxv8i1mask) call void @llvm.masked.scatter.v4i32(<4 x i32> undef, <4 x i32*> undef, i32 0, <4 x i1> %v4i1mask) call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> undef, <1 x i128*> undef, i32 0, <1 x i1> %v1i1mask) + call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 0, %nxv1i1mask) ret void } @@ -19,3 +21,4 @@ declare void @llvm.masked.scatter.nxv8i32(, , i32, ) declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128>, <1 x i128*>, i32, <1 x i1>) +declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(, , i32, ) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S | FileCheck %s +; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -dce -instcombine < %s -S | FileCheck %s ; Test that we can add on the induction variable ; for (long long i = 0; i < n; i++) {