diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -875,14 +875,18 @@ switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: - return getBroadcastShuffleOverhead(cast(Tp)); + if (auto *FVT = dyn_cast(Tp)) + return getBroadcastShuffleOverhead(FVT); + return InstructionCost::getInvalid(); case TTI::SK_Select: case TTI::SK_Splice: case TTI::SK_Reverse: case TTI::SK_Transpose: case TTI::SK_PermuteSingleSrc: case TTI::SK_PermuteTwoSrc: - return getPermuteShuffleOverhead(cast(Tp)); + if (auto *FVT = dyn_cast(Tp)) + return getPermuteShuffleOverhead(FVT); + return InstructionCost::getInvalid(); case TTI::SK_ExtractSubvector: return getExtractSubvectorOverhead(Tp, Index, cast(SubTp)); diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; Check getShuffleCost for scalable vector + +; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s + +define void @vector_broadcast() { +; CHECK-LABEL: 'vector_broadcast' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %zero = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %1 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %11 = shufflevector undef, undef, zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %zero = shufflevector undef, undef, zeroinitializer + %1 = shufflevector undef, undef, zeroinitializer + %2 = shufflevector undef, undef, zeroinitializer + %3 = shufflevector undef, undef, zeroinitializer + %4 = shufflevector undef, undef, zeroinitializer + %5 = shufflevector undef, undef, zeroinitializer + %6 = shufflevector undef, undef, zeroinitializer + %7 = shufflevector undef, undef, zeroinitializer + %8 = shufflevector undef, undef, zeroinitializer + %9 = shufflevector undef, undef, zeroinitializer + %10 = shufflevector undef, undef, zeroinitializer + %11 = shufflevector undef, undef, zeroinitializer + ret void +} + +define void @vector_insert_extract( %v0, %v1, <16 x i32> %v2) { +; CHECK-LABEL: 'vector_insert_extract' +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %v1, %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) + %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) + %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) + %insert_scalable_into_scalable = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %v1, %v0, i64 0) + ret void +} +declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(, i64) +declare @llvm.experimental.vector.insert.nxv4i32.v16i32(, <16 x i32>, i64) +declare @llvm.experimental.vector.extract.nxv4i32.nxv16i32(, i64) +declare @llvm.experimental.vector.insert.nxv16i32.nxv4i32(, , i64) + +define void @vector_reverse() { +; CHECK-LABEL: 'vector_reverse' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv2i16 = call @llvm.experimental.vector.reverse.nxv2i16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i16 = call @llvm.experimental.vector.reverse.nxv4i16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv2i1 = call @llvm.experimental.vector.reverse.nxv2i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) + %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) + %reverse_nxv2i16 = call @llvm.experimental.vector.reverse.nxv2i16( undef) + %reverse_nxv4i16 = call @llvm.experimental.vector.reverse.nxv4i16( undef) + %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) + %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) + %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) + %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) + %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) + %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) + %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) + %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) + %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) + %reverse_nxv2i1 = call @llvm.experimental.vector.reverse.nxv2i1( undef) + ret void +} + +declare @llvm.experimental.vector.reverse.nxv16i8() +declare @llvm.experimental.vector.reverse.nxv32i8() +declare @llvm.experimental.vector.reverse.nxv2i16() +declare @llvm.experimental.vector.reverse.nxv4i16() +declare @llvm.experimental.vector.reverse.nxv8i16() +declare @llvm.experimental.vector.reverse.nxv16i16() +declare @llvm.experimental.vector.reverse.nxv4i32() +declare @llvm.experimental.vector.reverse.nxv8i32() +declare @llvm.experimental.vector.reverse.nxv2i64() +declare @llvm.experimental.vector.reverse.nxv4i64() +declare @llvm.experimental.vector.reverse.nxv16i1() +declare @llvm.experimental.vector.reverse.nxv8i1() +declare @llvm.experimental.vector.reverse.nxv4i1() +declare @llvm.experimental.vector.reverse.nxv2i1() + + +define void @vector_splice() { +; CHECK-LABEL: 'vector_splice' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i16 = call @llvm.experimental.vector.splice.nxv2i16( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i16 = call @llvm.experimental.vector.splice.nxv4i16( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i16 = call @llvm.experimental.vector.splice.nxv8i16( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i16 = call @llvm.experimental.vector.splice.nxv16i16( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i32 = call @llvm.experimental.vector.splice.nxv4i32( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i32 = call @llvm.experimental.vector.splice.nxv8i32( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i64 = call @llvm.experimental.vector.splice.nxv2i64( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i64 = call @llvm.experimental.vector.splice.nxv4i64( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i1 = call @llvm.experimental.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i1 = call @llvm.experimental.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i1 = call @llvm.experimental.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i1 = call @llvm.experimental.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %splice_nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv2i16 = call @llvm.experimental.vector.splice.nxv2i16( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv4i16 = call @llvm.experimental.vector.splice.nxv4i16( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv8i16 = call @llvm.experimental.vector.splice.nxv8i16( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv16i16 = call @llvm.experimental.vector.splice.nxv16i16( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv4i32 = call @llvm.experimental.vector.splice.nxv4i32( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv8i32 = call @llvm.experimental.vector.splice.nxv8i32( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv2i64 = call @llvm.experimental.vector.splice.nxv2i64( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv4i64 = call @llvm.experimental.vector.splice.nxv4i64( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv16i1 = call @llvm.experimental.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv8i1 = call @llvm.experimental.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv4i1 = call @llvm.experimental.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) + %splice_nxv2i1 = call @llvm.experimental.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) + ret void +} + +declare @llvm.experimental.vector.splice.nxv2i1(, , i32) +declare @llvm.experimental.vector.splice.nxv4i1(, , i32) +declare @llvm.experimental.vector.splice.nxv8i1(, , i32) +declare @llvm.experimental.vector.splice.nxv16i1(, , i32) +declare @llvm.experimental.vector.splice.nxv2i8(, , i32) +declare @llvm.experimental.vector.splice.nxv16i8(, , i32) +declare @llvm.experimental.vector.splice.nxv32i8(, , i32) +declare @llvm.experimental.vector.splice.nxv2i16(, , i32) +declare @llvm.experimental.vector.splice.nxv4i16(, , i32) +declare @llvm.experimental.vector.splice.nxv8i16(, , i32) +declare @llvm.experimental.vector.splice.nxv16i16(, , i32) +declare @llvm.experimental.vector.splice.nxv4i32(, , i32) +declare @llvm.experimental.vector.splice.nxv8i32(, , i32) +declare @llvm.experimental.vector.splice.nxv2i64(, , i32) +declare @llvm.experimental.vector.splice.nxv4i64(, , i32)