Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -18306,27 +18306,33 @@ case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; } + // Need to build a vector containing shift amount. + // SSE/AVX packed shifts only use the lower 64-bit of the shift count. + // +=================+============+=======================================+ + // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | + // +=================+============+=======================================+ + // | i64 | Yes, No | Use ShAmt as lowest elt | + // | i32 | Yes | zero-extend in-reg | + // | (i32 zext(i16)) | Yes | zero-extend in-reg | + // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | + // +=================+============+=======================================+ const X86Subtarget &Subtarget = static_cast(DAG.getSubtarget()); - if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && - ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { - // Let the shuffle legalizer expand this shift amount node. + if (SVT == MVT::i64) + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); + else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && + ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { SDValue Op0 = ShAmt.getOperand(0); Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0); - ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG); + ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64); + } else if (Subtarget.hasSSE41() && + ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); + ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); } else { - // Need to build a vector containing shift amount. - // SSE/AVX packed shifts only use the lower 64-bit of the shift count. - SmallVector ShOps; - ShOps.push_back(ShAmt); - if (SVT == MVT::i32) { - ShOps.push_back(DAG.getConstant(0, dl, SVT)); - ShOps.push_back(DAG.getUNDEF(SVT)); - } - ShOps.push_back(DAG.getUNDEF(SVT)); - - MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64; - ShAmt = DAG.getBuildVector(BVT, dl, ShOps); + SmallVector ShOps = {ShAmt, DAG.getConstant(0, dl, SVT), + DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; + ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); } // The return type has to be a 128-bit type with the same element Index: test/CodeGen/X86/lower-vec-shift-2.ll =================================================================== --- test/CodeGen/X86/lower-vec-shift-2.ll +++ test/CodeGen/X86/lower-vec-shift-2.ll @@ -12,8 +12,7 @@ ; ; AVX-LABEL: test1: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -32,8 +31,7 @@ ; ; AVX-LABEL: test2: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -68,8 +66,7 @@ ; ; AVX-LABEL: test4: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -88,8 +85,7 @@ ; ; AVX-LABEL: test5: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -124,8 +120,7 @@ ; ; AVX-LABEL: test7: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -144,8 +139,7 @@ ; ; AVX-LABEL: test8: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: Index: test/CodeGen/X86/vector-rotate-128.ll =================================================================== --- test/CodeGen/X86/vector-rotate-128.ll +++ test/CodeGen/X86/vector-rotate-128.ll @@ -87,14 +87,12 @@ ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm4 ; X32-SSE-NEXT: psllq %xmm3, %xmm4 -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-SSE-NEXT: movdqa %xmm0, %xmm3 ; X32-SSE-NEXT: psllq %xmm1, %xmm3 ; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1] ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 ; X32-SSE-NEXT: psrlq %xmm3, %xmm1 -; X32-SSE-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero ; X32-SSE-NEXT: psrlq %xmm2, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: orpd %xmm4, %xmm1 Index: test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-128.ll +++ test/CodeGen/X86/vector-shift-ashr-128.ll @@ -90,20 +90,19 @@ ; ; X32-SSE-LABEL: var_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] -; X32-SSE-NEXT: movdqa %xmm3, %xmm4 -; X32-SSE-NEXT: psrlq %xmm2, %xmm4 -; X32-SSE-NEXT: movq {{.*#+}} xmm5 = xmm1[0],zero -; X32-SSE-NEXT: psrlq %xmm5, %xmm3 -; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1] -; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: psrlq %xmm5, %xmm0 -; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-SSE-NEXT: xorpd %xmm4, %xmm1 -; X32-SSE-NEXT: psubq %xmm4, %xmm1 -; X32-SSE-NEXT: movdqa %xmm1, %xmm0 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] +; X32-SSE-NEXT: movdqa %xmm2, %xmm4 +; X32-SSE-NEXT: psrlq %xmm3, %xmm4 +; X32-SSE-NEXT: psrlq %xmm1, %xmm2 +; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1] +; X32-SSE-NEXT: movdqa %xmm0, %xmm2 +; X32-SSE-NEXT: psrlq %xmm3, %xmm2 +; X32-SSE-NEXT: psrlq %xmm1, %xmm0 +; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] +; X32-SSE-NEXT: xorpd %xmm4, %xmm2 +; X32-SSE-NEXT: psubq %xmm4, %xmm2 +; X32-SSE-NEXT: movdqa %xmm2, %xmm0 ; X32-SSE-NEXT: retl %shift = ashr <2 x i64> %a, %b ret <2 x i64> %shift @@ -637,7 +636,6 @@ ; ; X32-SSE-LABEL: splatvar_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] ; X32-SSE-NEXT: psrlq %xmm1, %xmm2 ; X32-SSE-NEXT: psrlq %xmm1, %xmm0 @@ -659,29 +657,25 @@ ; ; SSE41-LABEL: splatvar_shift_v4i32: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7] -; SSE41-NEXT: psrad %xmm2, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: psrad %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_shift_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v4i32: ; XOP: # BB#0: -; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v4i32: ; AVX512: ## BB#0: -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; @@ -706,29 +700,25 @@ ; ; SSE41-LABEL: splatvar_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; SSE41-NEXT: psraw %xmm2, %xmm0 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: psraw %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_shift_v8i16: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v8i16: ; XOP: # BB#0: -; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v8i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; Index: test/CodeGen/X86/vector-shift-ashr-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-256.ll +++ test/CodeGen/X86/vector-shift-ashr-256.ll @@ -426,9 +426,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -436,16 +435,14 @@ ; ; AVX2-LABEL: splatvar_shift_v8i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1: # BB#0: -; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -453,15 +450,13 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v8i32: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v8i32: ; AVX512: ## BB#0: -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer @@ -473,8 +468,7 @@ ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpextrw $0, %xmm1, %eax -; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -482,16 +476,14 @@ ; ; AVX2-LABEL: splatvar_shift_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vpextrw $0, %xmm1, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # BB#0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax -; XOPAVX1-NEXT: vmovd %eax, %xmm1 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -499,15 +491,13 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v16i16: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax -; XOPAVX2-NEXT: vmovd %eax, %xmm1 +; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v16i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vpextrw $0, %xmm1, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer Index: test/CodeGen/X86/vector-shift-ashr-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-512.ll +++ test/CodeGen/X86/vector-shift-ashr-512.ll @@ -525,8 +525,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { ; ALL-LABEL: splatvar_shift_v16i32: ; ALL: ## BB#0: -; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer @@ -537,16 +536,14 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm2 +; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512DQ-NEXT: vpsraw %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsraw %xmm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v32i16: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -69,7 +69,6 @@ ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 ; X32-SSE-NEXT: psrlq %xmm3, %xmm2 -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-SSE-NEXT: psrlq %xmm1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; X32-SSE-NEXT: movapd %xmm2, %xmm0 @@ -493,7 +492,6 @@ ; ; X32-SSE-LABEL: splatvar_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-SSE-NEXT: psrlq %xmm1, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -511,29 +509,25 @@ ; ; SSE41-LABEL: splatvar_shift_v4i32: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7] -; SSE41-NEXT: psrld %xmm2, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: psrld %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_shift_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v4i32: ; XOP: # BB#0: -; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v4i32: ; AVX512: ## BB#0: -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; @@ -558,29 +552,25 @@ ; ; SSE41-LABEL: splatvar_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; SSE41-NEXT: psrlw %xmm2, %xmm0 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: psrlw %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_shift_v8i16: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v8i16: ; XOP: # BB#0: -; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v8i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; Index: test/CodeGen/X86/vector-shift-lshr-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-256.ll +++ test/CodeGen/X86/vector-shift-lshr-256.ll @@ -337,9 +337,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -347,16 +346,14 @@ ; ; AVX2-LABEL: splatvar_shift_v8i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1: # BB#0: -; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -364,15 +361,13 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v8i32: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v8i32: ; AVX512: ## BB#0: -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer @@ -384,8 +379,7 @@ ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpextrw $0, %xmm1, %eax -; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -393,16 +387,14 @@ ; ; AVX2-LABEL: splatvar_shift_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vpextrw $0, %xmm1, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # BB#0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax -; XOPAVX1-NEXT: vmovd %eax, %xmm1 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -410,15 +402,13 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v16i16: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax -; XOPAVX2-NEXT: vmovd %eax, %xmm1 +; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v16i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vpextrw $0, %xmm1, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer Index: test/CodeGen/X86/vector-shift-lshr-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-512.ll +++ test/CodeGen/X86/vector-shift-lshr-512.ll @@ -505,8 +505,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { ; ALL-LABEL: splatvar_shift_v16i32: ; ALL: ## BB#0: -; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer @@ -517,16 +516,14 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm2 +; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v32i16: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -67,7 +67,6 @@ ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 ; X32-SSE-NEXT: psllq %xmm3, %xmm2 -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-SSE-NEXT: psllq %xmm1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; X32-SSE-NEXT: movapd %xmm2, %xmm0 @@ -441,7 +440,6 @@ ; ; X32-SSE-LABEL: splatvar_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-SSE-NEXT: psllq %xmm1, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -459,29 +457,25 @@ ; ; SSE41-LABEL: splatvar_shift_v4i32: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7] -; SSE41-NEXT: pslld %xmm2, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: pslld %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_shift_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v4i32: ; XOP: # BB#0: -; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v4i32: ; AVX512: ## BB#0: -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; @@ -506,29 +500,25 @@ ; ; SSE41-LABEL: splatvar_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; SSE41-NEXT: psllw %xmm2, %xmm0 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: psllw %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_shift_v8i16: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v8i16: ; XOP: # BB#0: -; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v8i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; Index: test/CodeGen/X86/vector-shift-shl-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-256.ll +++ test/CodeGen/X86/vector-shift-shl-256.ll @@ -301,9 +301,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -311,16 +310,14 @@ ; ; AVX2-LABEL: splatvar_shift_v8i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1: # BB#0: -; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -328,15 +325,13 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v8i32: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] +; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v8i32: ; AVX512: ## BB#0: -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer @@ -348,8 +343,7 @@ ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpextrw $0, %xmm1, %eax -; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -357,16 +351,14 @@ ; ; AVX2-LABEL: splatvar_shift_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vpextrw $0, %xmm1, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # BB#0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax -; XOPAVX1-NEXT: vmovd %eax, %xmm1 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -374,15 +366,13 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v16i16: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax -; XOPAVX2-NEXT: vmovd %eax, %xmm1 +; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v16i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vpextrw $0, %xmm1, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer Index: test/CodeGen/X86/vector-shift-shl-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-512.ll +++ test/CodeGen/X86/vector-shift-shl-512.ll @@ -502,8 +502,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { ; ALL-LABEL: splatvar_shift_v16i32: ; ALL: ## BB#0: -; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] +; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer @@ -514,16 +513,14 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm2 +; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v32i16: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer Index: test/CodeGen/X86/vshift-4.ll =================================================================== --- test/CodeGen/X86/vshift-4.ll +++ test/CodeGen/X86/vshift-4.ll @@ -9,7 +9,6 @@ ; X32-LABEL: shift1a: ; X32: # BB#0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-NEXT: psllq %xmm1, %xmm0 ; X32-NEXT: movdqa %xmm0, (%eax) ; X32-NEXT: retl @@ -34,7 +33,6 @@ ; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X32-NEXT: movdqa %xmm0, %xmm3 ; X32-NEXT: psllq %xmm2, %xmm3 -; X32-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero ; X32-NEXT: psllq %xmm1, %xmm0 ; X32-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1] ; X32-NEXT: movapd %xmm3, (%eax)