Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -39281,6 +39281,31 @@ OpVT, SDLoc(N), InVec.getNode()->ops().slice(IdxVal, OpVT.getVectorNumElements())); + // If we're extracting the lowest subvector and we're the only user, + // we may be able to perform this with a smaller vector width. + if (IdxVal == 0 && InVec.hasOneUse()) { + unsigned InOpcode = InVec.getOpcode(); + if (OpVT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) { + // v2f64 CVTDQ2PD(v4i32). + if (InOpcode == ISD::SINT_TO_FP && + InVec.getOperand(0).getValueType() == MVT::v4i32) { + return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), OpVT, InVec.getOperand(0)); + } + // v2f64 CVTPS2PD(v4f32). + if (InOpcode == ISD::FP_EXTEND && + InVec.getOperand(0).getValueType() == MVT::v4f32) { + return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0)); + } + } + if ((InOpcode == X86ISD::VZEXT || InOpcode == X86ISD::VSEXT) && + OpVT.is128BitVector() && + InVec.getOperand(0).getValueType().is128BitVector()) { + unsigned ExtOp = InOpcode == X86ISD::VZEXT ? ISD::ZERO_EXTEND_VECTOR_INREG + : ISD::SIGN_EXTEND_VECTOR_INREG; + return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0)); + } + } + return SDValue(); } Index: test/CodeGen/X86/prefer-avx256-mask-shuffle.ll =================================================================== --- test/CodeGen/X86/prefer-avx256-mask-shuffle.ll +++ test/CodeGen/X86/prefer-avx256-mask-shuffle.ll @@ -131,7 +131,7 @@ ; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX256VL-NEXT: vpmovsxbw %xmm1, %ymm1 +; AVX256VL-NEXT: vpmovsxbw %xmm1, %xmm1 ; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1 ; AVX256VL-NEXT: vpmovsxbw %xmm0, %ymm0 Index: test/CodeGen/X86/trunc-subvector.ll =================================================================== --- test/CodeGen/X86/trunc-subvector.ll +++ test/CodeGen/X86/trunc-subvector.ll @@ -49,8 +49,7 @@ ; AVX2-LABEL: test3: ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -76,8 +75,7 @@ ; ; AVX2-LABEL: test4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -175,8 +173,7 @@ ; AVX2-LABEL: test8: ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -201,8 +198,7 @@ ; ; AVX2-LABEL: test9: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; Index: test/CodeGen/X86/vec_fpext.ll =================================================================== --- test/CodeGen/X86/vec_fpext.ll +++ test/CodeGen/X86/vec_fpext.ll @@ -14,16 +14,12 @@ ; ; AVX-LABEL: fpext_4f32_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0] -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512VL-LABEL: fpext_4f32_to_2f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0] -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] %cvt = fpext <4 x float> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> @@ -38,8 +34,7 @@ ; ; AVX-LABEL: fpext_8f32_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0] -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] ; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -95,9 +95,7 @@ ; ; AVX-LABEL: sitofp_4i32_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX-NEXT: vzeroupper +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %cvt = sitofp <4 x i32> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> @@ -130,21 +128,11 @@ ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: sitofp_8i16_to_2f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: sitofp_8i16_to_2f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; VEX-LABEL: sitofp_8i16_to_2f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovsxwd %xmm0, %xmm0 +; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: sitofp_8i16_to_2f64: ; AVX512: # %bb.0: @@ -186,21 +174,11 @@ ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: sitofp_16i8_to_2f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: sitofp_16i8_to_2f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; VEX-LABEL: sitofp_16i8_to_2f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovsxbd %xmm0, %xmm0 +; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: sitofp_16i8_to_2f64: ; AVX512: # %bb.0: @@ -361,17 +339,11 @@ ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: sitofp_8i16_to_4f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: sitofp_8i16_to_4f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: retq +; VEX-LABEL: sitofp_8i16_to_4f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovsxwd %xmm0, %xmm0 +; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: sitofp_8i16_to_4f64: ; AVX512: # %bb.0: @@ -416,17 +388,11 @@ ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: sitofp_16i8_to_4f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: sitofp_16i8_to_4f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: retq +; VEX-LABEL: sitofp_16i8_to_4f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovsxbd %xmm0, %xmm0 +; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: sitofp_16i8_to_4f64: ; AVX512: # %bb.0: @@ -658,21 +624,11 @@ ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: uitofp_8i16_to_2f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: uitofp_8i16_to_2f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; VEX-LABEL: uitofp_8i16_to_2f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: uitofp_8i16_to_2f64: ; AVX512: # %bb.0: @@ -714,21 +670,11 @@ ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: uitofp_16i8_to_2f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: uitofp_16i8_to_2f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; VEX-LABEL: uitofp_16i8_to_2f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: uitofp_16i8_to_2f64: ; AVX512: # %bb.0: @@ -939,17 +885,11 @@ ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: uitofp_8i16_to_4f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: uitofp_8i16_to_4f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: retq +; VEX-LABEL: uitofp_8i16_to_4f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: uitofp_8i16_to_4f64: ; AVX512: # %bb.0: @@ -996,17 +936,11 @@ ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: uitofp_16i8_to_4f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: uitofp_16i8_to_4f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX2-NEXT: retq +; VEX-LABEL: uitofp_16i8_to_4f64: +; VEX: # %bb.0: +; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; VEX-NEXT: retq ; ; AVX512-LABEL: uitofp_16i8_to_4f64: ; AVX512: # %bb.0: