diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30611,6 +30611,11 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; + // Truncate to mask registers aren't free. + // TODO: No vector truncates are free. + if (Subtarget.hasAVX512() && VT2.isVector() && + VT2.getVectorElementType() == MVT::i1) + return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 > NumBits2; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1396,6 +1396,17 @@ { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 }, + { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 }, }; static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { @@ -1420,6 +1431,20 @@ { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // zmm vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // zmm vpslld+vptestmd + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // zmm vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // zmm vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, // vpsllq+vptestmq { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, @@ -1539,6 +1564,15 @@ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb }; static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = { @@ -1564,6 +1598,19 @@ }; static const TypeConversionCostTblEntry AVX512VLConversionTbl[] = { + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, // split+2*v8i8 + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, // split+2*v8i16 + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // vpsllq+vptestmq + // sign extend is vpcmpeq+maskedmove+vpmovdw+vpacksswb // zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw+vpackuswb { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 }, diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -44,10 +44,10 @@ ; AVX512-LABEL: 'add' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = zext <4 x i1> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <4 x i1> undef to <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = trunc <4 x i32> undef to <4 x i1> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = zext <8 x i1> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <8 x i1> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i1> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef diff --git a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll --- a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll +++ b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll @@ -549,86 +549,113 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512VL256-LABEL: 'trunc_vXi1' -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; VEC512-LABEL: 'trunc_vXi1' -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> -; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512VL512-LABEL: 'trunc_vXi1' +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SKX256-LABEL: 'trunc_vXi1' -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> ; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SKX512-LABEL: 'trunc_vXi1' +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; %V2i64 = trunc <2 x i64> undef to <2 x i1> %V4i64 = trunc <4 x i64> undef to <4 x i1> %V8i64 = trunc <8 x i64> undef to <8 x i1> diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -408,32 +408,59 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512-LABEL: 'trunc_vXi1' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512F-LABEL: 'trunc_vXi1' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'trunc_vXi1' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i1> %V4i64 = trunc <4 x i64> undef to <4 x i1>