diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1069,9 +1069,9 @@ {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb - {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b - {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b - {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b + {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, // vpermt2b + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // vpermt2b + {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2} // vpermt2b }; if (ST->hasVBMI()) @@ -1083,22 +1083,18 @@ {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb - {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw - {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw + {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw + {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2 - {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16 - {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc - {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc + {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w + {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w + {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1 - {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc }; if (ST->hasBWI()) diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -859,27 +859,27 @@ ; AVX512BW-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' @@ -894,27 +894,27 @@ ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -1249,27 +1249,27 @@ ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -229,16 +229,16 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -285,18 +285,18 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -386,7 +386,7 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -202,19 +202,19 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -293,17 +293,17 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -268,21 +268,21 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -372,17 +372,17 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll --- a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll @@ -13,9 +13,9 @@ ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 32 For instruction: %1 = load entry: br label %for.body @@ -38,10 +38,10 @@ ;CHECK-LABEL: load_i16_stride3 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 5 for VF 32 For instruction: %1 = load entry: br label %for.body @@ -64,10 +64,10 @@ ;CHECK-LABEL: load_i16_stride4 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 5 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load entry: br label %for.body @@ -89,11 +89,11 @@ define void @load_i16_stride5() { ;CHECK-LABEL: load_i16_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 6 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 2 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 5 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 10 for VF 32 For instruction: %1 = load entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll --- a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll @@ -14,7 +14,7 @@ ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load ;CHECK: Found an estimated cost of 20 for VF 64 For instruction: %1 = load entry: @@ -40,7 +40,7 @@ ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 8 For instruction: %1 = load ;CHECK: Found an estimated cost of 13 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 16 for VF 32 For instruction: %1 = load ;CHECK: Found an estimated cost of 25 for VF 64 For instruction: %1 = load @@ -67,7 +67,7 @@ ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 8 For instruction: %1 = load ;CHECK: Found an estimated cost of 8 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 20 for VF 32 For instruction: %1 = load ;CHECK: Found an estimated cost of 59 for VF 64 For instruction: %1 = load @@ -93,7 +93,7 @@ ;CHECK-LABEL: load_i8_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 4 For instruction: %1 = load ;CHECK: Found an estimated cost of 8 for VF 8 For instruction: %1 = load ;CHECK: Found an estimated cost of 20 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 39 for VF 32 For instruction: %1 = load