diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2602,6 +2602,7 @@ VectorType *SubTp, ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_Reverse) { @@ -2696,11 +2697,26 @@ { TTI::SK_Reverse, MVT::nxv4i1, 1 }, { TTI::SK_Reverse, MVT::nxv2i1, 1 }, }; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; } + if (Kind == TTI::SK_Splice && isa(Tp)) return getSpliceCost(Tp, Index); + + // Inserting a subvector can often be done with either a D, S or H register + // move, so long as the inserted vector is "aligned". + if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() && + LT.second.getSizeInBits() <= 128 && SubTp) { + std::pair SubLT = + TLI->getTypeLegalizationCost(DL, SubTp); + if (SubLT.second.isVector()) { + int NumElts = LT.second.getVectorNumElements(); + int NumSubElts = SubLT.second.getVectorNumElements(); + if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) + return SubLT.first; + } + } + return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); } diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -36,21 +36,21 @@ define void @concat() { ; CHECK-LABEL: 'concat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> @@ -81,40 +81,40 @@ define void @insert_subvec() { ; CHECK-LABEL: 'insert_subvec' ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4i8_2_0 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i8_2_1 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2_1 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8i8_2_0 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i8_2_1 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i8_2_2 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i8_2_3 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2_1 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2_2 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2_3 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i8_2_05 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v16i8_4_0 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i8_4_1 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i8_4_2 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i8_4_3 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_4_1 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_4_2 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_4_3 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i8_4_05 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4i16_2_0 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i16_2_1 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2_1 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8i16_2_0 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i16_2_1 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i16_2_2 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i16_2_3 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_1 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_2 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_3 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i16_2_05 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16i16_4_0 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i16_4_1 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16i16_4_2 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i16_4_3 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_1 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_2 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_3 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i16_4_05 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_2_0 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i32_2_1 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2_1 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_2_0 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32_2_1 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_2_2 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32_2_3 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_2_1 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_2_2 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_2_3 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32_2_05 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_4_0 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16i32_4_1 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16i32_4_2 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16i32_4_3 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_4_1 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_4_2 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_4_3 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16i32_4_05 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ;