Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16227,12 +16227,13 @@ assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) && "Unexpected PACK opcode"); + EVT SrcVT = In.getValueType(); + // Requires SSE2 but AVX512 has fast truncate. - if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) + if (!Subtarget.hasSSE2() || Subtarget.hasBWI() || + (Subtarget.hasAVX512() && SrcVT.getScalarType() != MVT::i16)) return SDValue(); - EVT SrcVT = In.getValueType(); - // No truncation required, we might get here due to recursive calls. if (SrcVT == DstVT) return In; @@ -34649,10 +34650,6 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // Requires SSE2 but AVX512 has fast truncate. - if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) - return SDValue(); - if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) return SDValue(); @@ -34666,6 +34663,11 @@ MVT InVT = In.getValueType().getSimpleVT(); MVT InSVT = InVT.getScalarType(); + // Requires SSE2 but AVX512 has fast truncate. + if (!Subtarget.hasSSE2() || Subtarget.hasBWI() || + (Subtarget.hasAVX512() && InSVT != MVT::i16)) + return SDValue(); + // Check we have a truncation suited for PACKSS. if (!VT.is128BitVector() && !VT.is256BitVector()) return SDValue(); Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -1720,13 +1720,10 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { ; KNL-LABEL: zext_32xi1_to_32xi8: ; KNL: # BB#0: -; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; KNL-NEXT: retq ; Index: test/CodeGen/X86/avx512-trunc.ll =================================================================== --- test/CodeGen/X86/avx512-trunc.ll +++ test/CodeGen/X86/avx512-trunc.ll @@ -542,8 +542,8 @@ ; KNL-LABEL: usat_trunc_wb_256_mem: ; KNL: ## BB#0: ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vmovdqu %xmm0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -564,8 +564,8 @@ ; KNL-LABEL: usat_trunc_wb_256: ; KNL: ## BB#0: ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -317,16 +317,16 @@ ; AVX512F-LABEL: test_cmp_v16i16: ; AVX512F: # BB#0: ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v16i16: ; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -1122,24 +1122,18 @@ ; ; AVX512F-LABEL: test_cmp_v32i16: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32i16: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32i16: Index: test/CodeGen/X86/vector-sext.ll =================================================================== --- test/CodeGen/X86/vector-sext.ll +++ test/CodeGen/X86/vector-sext.ll @@ -5017,13 +5017,10 @@ ; ; AVX512F-LABEL: sext_32xi1_to_32xi8: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: sext_32xi1_to_32xi8: Index: test/CodeGen/X86/vector-trunc.ll =================================================================== --- test/CodeGen/X86/vector-trunc.ll +++ test/CodeGen/X86/vector-trunc.ll @@ -1137,8 +1137,8 @@ ; AVX512F-LABEL: trunc16i16_16i8_ashr: ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqu %xmm0, (%rax) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1146,8 +1146,8 @@ ; AVX512VL-LABEL: trunc16i16_16i8_ashr: ; AVX512VL: # BB#0: # %entry ; AVX512VL-NEXT: vpsraw $8, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqu %xmm0, (%rax) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1204,8 +1204,8 @@ ; AVX512F-LABEL: trunc16i16_16i8_lshr: ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqu %xmm0, (%rax) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1213,8 +1213,8 @@ ; AVX512VL-LABEL: trunc16i16_16i8_lshr: ; AVX512VL: # BB#0: # %entry ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqu %xmm0, (%rax) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1943,10 +1943,10 @@ ; AVX512F-NEXT: vmovdqu 32(%rdi), %ymm1 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512F-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vmovdqu %xmm0, (%rsi) ; AVX512F-NEXT: vmovdqu %xmm1, 16(%rsi) ; AVX512F-NEXT: vzeroupper @@ -1958,10 +1958,10 @@ ; AVX512VL-NEXT: vmovdqu 32(%rdi), %ymm1 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512VL-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovdqu %xmm0, (%rsi) ; AVX512VL-NEXT: vmovdqu %xmm1, 16(%rsi) ; AVX512VL-NEXT: vzeroupper Index: test/CodeGen/X86/vselect-packss.ll =================================================================== --- test/CodeGen/X86/vselect-packss.ll +++ test/CodeGen/X86/vselect-packss.ll @@ -53,8 +53,8 @@ ; AVX512-LABEL: vselect_packss_v16i16: ; AVX512: # BB#0: ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq