diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1278,6 +1278,11 @@ TTI::SK_InsertSubvector, cast(Args[0]->getType()), Index, cast(Args[1]->getType())); } + case Intrinsic::experimental_vector_reverse: { + return thisT()->getShuffleCost(TTI::SK_Reverse, + cast(Args[0]->getType()), 0, + cast(RetTy)); + } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1183,7 +1183,8 @@ int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp) { if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || - Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) { + Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || + Kind == TTI::SK_Reverse) { static const CostTblEntry ShuffleTbl[] = { // Broadcast shuffle kinds can be performed with 'dup'. { TTI::SK_Broadcast, MVT::v8i8, 1 }, @@ -1233,6 +1234,15 @@ { TTI::SK_Broadcast, MVT::nxv8bf16, 1 }, { TTI::SK_Broadcast, MVT::nxv4f32, 1 }, { TTI::SK_Broadcast, MVT::nxv2f64, 1 }, + // Handle the cases for vector.reverse with scalable vectors + { TTI::SK_Reverse, MVT::nxv16i8, 1 }, + { TTI::SK_Reverse, MVT::nxv8i16, 1 }, + { TTI::SK_Reverse, MVT::nxv4i32, 1 }, + { TTI::SK_Reverse, MVT::nxv2i64, 1 }, + { TTI::SK_Reverse, MVT::nxv8f16, 1 }, + { TTI::SK_Reverse, MVT::nxv8bf16, 1 }, + { TTI::SK_Reverse, MVT::nxv4f32, 1 }, + { TTI::SK_Reverse, MVT::nxv2f64, 1 }, }; std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second)) diff --git a/llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/getIntrinsicInstrCost-vector-reverse.ll @@ -0,0 +1,65 @@ +; Check getIntrinsicInstrCost in BasicTTIImpl.h for vector.reverse + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define void @vector_reverse() #0{ +; CHECK-LABEL: 'vector_reverse': +; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %1 = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %2 = call <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %3 = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %4 = call <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %5 = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %6 = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %9 = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %10 = call <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %11 = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %12 = call <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %15 = call <8 x bfloat> @llvm.experimental.vector.reverse.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %16 = call <16 x bfloat> @llvm.experimental.vector.reverse.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void + + call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> undef) + call <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8> undef) + call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> undef) + call <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16> undef) + call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> undef) + call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> undef) + call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> undef) + call <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64> undef) + call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> undef) + call <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half> undef) + call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> undef) + call <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float> undef) + call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> undef) + call <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double> undef) + call <8 x bfloat> @llvm.experimental.vector.reverse.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.experimental.vector.reverse.v16bf16(<16 x bfloat> undef) + ret void +} + +attributes #0 = { "target-features"="+sve,+bf16" } +declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>) +declare <32 x i8> @llvm.experimental.vector.reverse.v32i8(<32 x i8>) +declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>) +declare <16 x i16> @llvm.experimental.vector.reverse.v16i16(<16 x i16>) +declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>) +declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>) +declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>) +declare <4 x i64> @llvm.experimental.vector.reverse.v4i64(<4 x i64>) +declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>) +declare <16 x half> @llvm.experimental.vector.reverse.v16f16(<16 x half>) +declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>) +declare <8 x float> @llvm.experimental.vector.reverse.v8f32(<8 x float>) +declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>) +declare <4 x double> @llvm.experimental.vector.reverse.v4f64(<4 x double>) +declare <8 x bfloat> @llvm.experimental.vector.reverse.v8bf16(<8 x bfloat>) +declare <16 x bfloat> @llvm.experimental.vector.reverse.v16bf16(<16 x bfloat>) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll @@ -0,0 +1,66 @@ +; Check getIntrinsicInstrCost in BasicTTIImpl.h for vector.reverse + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define void @vector_reverse() #0 { +; CHECK-LABEL: 'vector_reverse': +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call @llvm.experimental.vector.reverse.nxv16i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call @llvm.experimental.vector.reverse.nxv32i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call @llvm.experimental.vector.reverse.nxv8i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call @llvm.experimental.vector.reverse.nxv16i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.experimental.vector.reverse.nxv4i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call @llvm.experimental.vector.reverse.nxv8i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.experimental.vector.reverse.nxv2i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.experimental.vector.reverse.nxv4i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.experimental.vector.reverse.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.experimental.vector.reverse.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.experimental.vector.reverse.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call @llvm.experimental.vector.reverse.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call @llvm.experimental.vector.reverse.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call @llvm.experimental.vector.reverse.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.experimental.vector.reverse.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call @llvm.experimental.vector.reverse.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void + + call @llvm.experimental.vector.reverse.nxv16i8( undef) + call @llvm.experimental.vector.reverse.nxv32i8( undef) + call @llvm.experimental.vector.reverse.nxv8i16( undef) + call @llvm.experimental.vector.reverse.nxv16i16( undef) + call @llvm.experimental.vector.reverse.nxv4i32( undef) + call @llvm.experimental.vector.reverse.nxv8i32( undef) + call @llvm.experimental.vector.reverse.nxv2i64( undef) + call @llvm.experimental.vector.reverse.nxv4i64( undef) + call @llvm.experimental.vector.reverse.nxv8f16( undef) + call @llvm.experimental.vector.reverse.nxv16f16( undef) + call @llvm.experimental.vector.reverse.nxv4f32( undef) + call @llvm.experimental.vector.reverse.nxv8f32( undef) + call @llvm.experimental.vector.reverse.nxv2f64( undef) + call @llvm.experimental.vector.reverse.nxv4f64( undef) + call @llvm.experimental.vector.reverse.nxv8bf16( undef) + call @llvm.experimental.vector.reverse.nxv16bf16( undef) + ret void +} + +attributes #0 = { "target-features"="+sve,+bf16" } + +declare @llvm.experimental.vector.reverse.nxv16i8() +declare @llvm.experimental.vector.reverse.nxv32i8() +declare @llvm.experimental.vector.reverse.nxv8i16() +declare @llvm.experimental.vector.reverse.nxv16i16() +declare @llvm.experimental.vector.reverse.nxv4i32() +declare @llvm.experimental.vector.reverse.nxv8i32() +declare @llvm.experimental.vector.reverse.nxv2i64() +declare @llvm.experimental.vector.reverse.nxv4i64() +declare @llvm.experimental.vector.reverse.nxv8f16() +declare @llvm.experimental.vector.reverse.nxv16f16() +declare @llvm.experimental.vector.reverse.nxv4f32() +declare @llvm.experimental.vector.reverse.nxv8f32() +declare @llvm.experimental.vector.reverse.nxv2f64() +declare @llvm.experimental.vector.reverse.nxv4f64() +declare @llvm.experimental.vector.reverse.nxv8bf16() +declare @llvm.experimental.vector.reverse.nxv16bf16()