diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2909,8 +2909,7 @@ if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || - Kind == TTI::SK_Reverse) { - + Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) { static const CostTblEntry ShuffleTbl[] = { // Broadcast shuffle kinds can be performed with 'dup'. { TTI::SK_Broadcast, MVT::v8i8, 1 }, @@ -2971,6 +2970,21 @@ { TTI::SK_Reverse, MVT::v4f16, 1 }, // REV64 { TTI::SK_Reverse, MVT::v4i16, 1 }, // REV64 { TTI::SK_Reverse, MVT::v8i8, 1 }, // REV64 + // Splice can all be lowered as `ext`. + { TTI::SK_Splice, MVT::v2i32, 1 }, + { TTI::SK_Splice, MVT::v4i32, 1 }, + { TTI::SK_Splice, MVT::v2i64, 1 }, + { TTI::SK_Splice, MVT::v2f32, 1 }, + { TTI::SK_Splice, MVT::v4f32, 1 }, + { TTI::SK_Splice, MVT::v2f64, 1 }, + { TTI::SK_Splice, MVT::v8f16, 1 }, + { TTI::SK_Splice, MVT::v8bf16, 1 }, + { TTI::SK_Splice, MVT::v8i16, 1 }, + { TTI::SK_Splice, MVT::v16i8, 1 }, + { TTI::SK_Splice, MVT::v4bf16, 1 }, + { TTI::SK_Splice, MVT::v4f16, 1 }, + { TTI::SK_Splice, MVT::v4i16, 1 }, + { TTI::SK_Splice, MVT::v8i8, 1 }, // Broadcast shuffle kinds for scalable vectors { TTI::SK_Broadcast, MVT::nxv16i8, 1 }, { TTI::SK_Broadcast, MVT::nxv8i16, 1 }, diff --git a/llvm/test/Analysis/CostModel/AArch64/splice.ll b/llvm/test/Analysis/CostModel/AArch64/splice.ll --- a/llvm/test/Analysis/CostModel/AArch64/splice.ll +++ b/llvm/test/Analysis/CostModel/AArch64/splice.ll @@ -3,33 +3,35 @@ define void @vector_splice() #0 { ; CHECK-LABEL: 'vector_splice' -; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %splice.v16i8 = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %splice.v32i8 = call <32 x i8> @llvm.experimental.vector.splice.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2i16 = call <2 x i16> @llvm.experimental.vector.splice.v2i16(<2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4i16 = call <4 x i16> @llvm.experimental.vector.splice.v4i16(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8i16 = call <8 x i16> @llvm.experimental.vector.splice.v8i16(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %splice.v16i16 = call <16 x i16> @llvm.experimental.vector.splice.v16i16(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4i32 = call <4 x i32> @llvm.experimental.vector.splice.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %splice.v8i32 = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2i64 = call <2 x i64> @llvm.experimental.vector.splice.v2i64(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %splice.v4i64 = call <4 x i64> @llvm.experimental.vector.splice.v4i64(<4 x i64> zeroinitializer, <4 x i64> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2f16 = call <2 x half> @llvm.experimental.vector.splice.v2f16(<2 x half> zeroinitializer, <2 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4f16 = call <4 x half> @llvm.experimental.vector.splice.v4f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8f16 = call <8 x half> @llvm.experimental.vector.splice.v8f16(<8 x half> zeroinitializer, <8 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %splice.v16f16 = call <16 x half> @llvm.experimental.vector.splice.v16f16(<16 x half> zeroinitializer, <16 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2f32 = call <2 x float> @llvm.experimental.vector.splice.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4f32 = call <4 x float> @llvm.experimental.vector.splice.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %splice.v8f32 = call <8 x float> @llvm.experimental.vector.splice.v8f32(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2f64 = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %splice.v4f64 = call <4 x double> @llvm.experimental.vector.splice.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2bf16 = call <2 x bfloat> @llvm.experimental.vector.splice.v2bf16(<2 x bfloat> zeroinitializer, <2 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4bf16 = call <4 x bfloat> @llvm.experimental.vector.splice.v4bf16(<4 x bfloat> zeroinitializer, <4 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8bf16 = call <8 x bfloat> @llvm.experimental.vector.splice.v8bf16(<8 x bfloat> zeroinitializer, <8 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %splice.v16bf16 = call <16 x bfloat> @llvm.experimental.vector.splice.v16bf16(<16 x bfloat> zeroinitializer, <16 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %splice.v16i1 = call <16 x i1> @llvm.experimental.vector.splice.v16i1(<16 x i1> zeroinitializer, <16 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8i1 = call <8 x i1> @llvm.experimental.vector.splice.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4i1 = call <4 x i1> @llvm.experimental.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2i1 = call <2 x i1> @llvm.experimental.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v16i8 = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v32i8 = call <32 x i8> @llvm.experimental.vector.splice.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2i16 = call <2 x i16> @llvm.experimental.vector.splice.v2i16(<2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4i16 = call <4 x i16> @llvm.experimental.vector.splice.v4i16(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8i16 = call <8 x i16> @llvm.experimental.vector.splice.v8i16(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v16i16 = call <16 x i16> @llvm.experimental.vector.splice.v16i16(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4i32 = call <4 x i32> @llvm.experimental.vector.splice.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v8i32 = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2i64 = call <2 x i64> @llvm.experimental.vector.splice.v2i64(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v4i64 = call <4 x i64> @llvm.experimental.vector.splice.v4i64(<4 x i64> zeroinitializer, <4 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2f16 = call <2 x half> @llvm.experimental.vector.splice.v2f16(<2 x half> zeroinitializer, <2 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4f16 = call <4 x half> @llvm.experimental.vector.splice.v4f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8f16 = call <8 x half> @llvm.experimental.vector.splice.v8f16(<8 x half> zeroinitializer, <8 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v16f16 = call <16 x half> @llvm.experimental.vector.splice.v16f16(<16 x half> zeroinitializer, <16 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2f32 = call <2 x float> @llvm.experimental.vector.splice.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4f32 = call <4 x float> @llvm.experimental.vector.splice.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v8f32 = call <8 x float> @llvm.experimental.vector.splice.v8f32(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2f64 = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v4f64 = call <4 x double> @llvm.experimental.vector.splice.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2bf16 = call <2 x bfloat> @llvm.experimental.vector.splice.v2bf16(<2 x bfloat> zeroinitializer, <2 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4bf16 = call <4 x bfloat> @llvm.experimental.vector.splice.v4bf16(<4 x bfloat> zeroinitializer, <4 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8bf16 = call <8 x bfloat> @llvm.experimental.vector.splice.v8bf16(<8 x bfloat> zeroinitializer, <8 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v16bf16 = call <16 x bfloat> @llvm.experimental.vector.splice.v16bf16(<16 x bfloat> zeroinitializer, <16 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v16i1 = call <16 x i1> @llvm.experimental.vector.splice.v16i1(<16 x i1> zeroinitializer, <16 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8i1 = call <8 x i1> @llvm.experimental.vector.splice.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4i1 = call <4 x i1> @llvm.experimental.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2i1 = call <2 x i1> @llvm.experimental.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %splice.v2i128 = call <2 x i128> @llvm.experimental.vector.splice.v2i128(<2 x i128> zeroinitializer, <2 x i128> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %splice.v16i8 = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1) %splice.v32i8 = call <32 x i8> @llvm.experimental.vector.splice.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1) @@ -58,6 +60,7 @@ %splice.v8i1 = call <8 x i1> @llvm.experimental.vector.splice.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1) %splice.v4i1 = call <4 x i1> @llvm.experimental.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1) %splice.v2i1 = call <2 x i1> @llvm.experimental.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1) + %splice.v2i128 = call <2 x i128> @llvm.experimental.vector.splice.v2i128(<2 x i128> zeroinitializer, <2 x i128> zeroinitializer, i32 1) ret void } @@ -90,5 +93,6 @@ declare <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float>, <16 x float>, i32) declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double>, <2 x double>, i32) declare <4 x double> @llvm.experimental.vector.splice.v4f64(<4 x double>, <4 x double>, i32) +declare <2 x i128> @llvm.experimental.vector.splice.v2i128(<2 x i128>, <2 x i128>, i32) attributes #0 = { "target-features"="+bf16" }