Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2095,6 +2095,18 @@ ArrayRef Mask, int Index, VectorType *SubTp) { Kind = improveShuffleKindFromMask(Kind, Mask); + auto LT = TLI->getTypeLegalizationCost(DL, Tp); + + // Subvector insertions: vector concats are cheap. They are a zip or a mov, + // see also test/CodeGen/AArch64/concat-vector.ll. + if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) { + int NumElts = LT.second.getVectorNumElements(); + auto SubLT = TLI->getTypeLegalizationCost(DL, SubTp); + int NumSubElts = SubLT.second.getVectorNumElements(); + if (Index + NumSubElts == NumElts && SubLT.second.isVector()) + return SubLT.first; + } + if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_Reverse) { Index: llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -36,19 +36,19 @@ define void @concat() { ; CHECK-LABEL: 'concat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void