Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -3779,6 +3779,14 @@ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); } +static bool MayFoldIntoZeroExtend(SDValue Op) { + if (Op.hasOneUse()) { + unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); + return (ISD::ZERO_EXTEND == Opcode); + } + return false; +} + static bool isTargetShuffle(unsigned Opcode) { switch(Opcode) { default: return false; @@ -12374,8 +12382,10 @@ } if (VT.getSizeInBits() == 16) { - // If Idx is 0, it's cheaper to do a move instead of a pextrw. - if (isNullConstant(Op.getOperand(1))) + // If Idx is 0, it's cheaper to do a move instead of a pextrw, unless we're + // going to fold the store or zero extend the register. + if (isNullConstant(Op.getOperand(1)) && !MayFoldIntoStore(Op) && + !MayFoldIntoZeroExtend(Op)) return DAG.getNode( ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, @@ -12519,7 +12529,9 @@ MVT VT = Op.getSimpleValueType(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { - if (IdxVal == 0) + // If Idx is 0, it's cheaper to do a move instead of a pextrw, unless we're + // going to zero extend the register. + if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op)) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), Idx)); Index: test/CodeGen/X86/2011-12-8-bitcastintprom.ll =================================================================== --- test/CodeGen/X86/2011-12-8-bitcastintprom.ll +++ test/CodeGen/X86/2011-12-8-bitcastintprom.ll @@ -19,8 +19,7 @@ ; SSE41: ## BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: movw %ax, (%rdi) +; SSE41-NEXT: pextrw $0, %xmm0, (%rdi) ; SSE41-NEXT: retq %r = bitcast <4 x i8> %t to <2 x i16> %o = extractelement <2 x i16> %r, i32 0 Index: test/CodeGen/X86/avx512-trunc.ll =================================================================== --- test/CodeGen/X86/avx512-trunc.ll +++ test/CodeGen/X86/avx512-trunc.ll @@ -96,8 +96,7 @@ ; KNL-LABEL: trunc_qb_128_mem: ; KNL: ## BB#0: ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; KNL-NEXT: vmovd %xmm0, %eax -; KNL-NEXT: movw %ax, (%rdi) +; KNL-NEXT: vpextrw $0, %xmm0, (%rdi) ; KNL-NEXT: retq ; ; SKX-LABEL: trunc_qb_128_mem: Index: test/CodeGen/X86/extract-store.ll =================================================================== --- test/CodeGen/X86/extract-store.ll +++ test/CodeGen/X86/extract-store.ll @@ -48,16 +48,20 @@ } define void @extract_i16_0(i16* nocapture %dst, <8 x i16> %foo) { -; SSE-LABEL: extract_i16_0: -; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movw %ax, (%rdi) -; SSE-NEXT: retq +; SSE2-LABEL: extract_i16_0: +; SSE2: # BB#0: +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movw %ax, (%rdi) +; SSE2-NEXT: retq +; +; SSE41-LABEL: extract_i16_0: +; SSE41: # BB#0: +; SSE41-NEXT: pextrw $0, %xmm0, (%rdi) +; SSE41-NEXT: retq ; ; AVX-LABEL: extract_i16_0: ; AVX: # BB#0: -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: movw %ax, (%rdi) +; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX-NEXT: retq %vecext = extractelement <8 x i16> %foo, i32 0 store i16 %vecext, i16* %dst, align 1 Index: test/CodeGen/X86/lower-vec-shift-2.ll =================================================================== --- test/CodeGen/X86/lower-vec-shift-2.ll +++ test/CodeGen/X86/lower-vec-shift-2.ll @@ -5,8 +5,7 @@ define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) { ; SSE2-LABEL: test1: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psllw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -62,8 +61,7 @@ define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) { ; SSE2-LABEL: test4: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psrlw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -119,8 +117,7 @@ define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) { ; SSE2-LABEL: test7: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psraw %xmm1, %xmm0 ; SSE2-NEXT: retq Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -9230,8 +9230,7 @@ ; AVX-NEXT: testb $1, %al ; AVX-NEXT: je LBB59_2 ; AVX-NEXT: ## BB#1: ## %cond.store -; AVX-NEXT: vmovd %xmm1, %eax -; AVX-NEXT: movw %ax, (%rdi) +; AVX-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX-NEXT: LBB59_2: ## %else ; AVX-NEXT: vpextrb $2, %xmm0, %eax ; AVX-NEXT: testb $1, %al @@ -9288,8 +9287,7 @@ ; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: je LBB59_2 ; AVX512F-NEXT: ## BB#1: ## %cond.store -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX512F-NEXT: LBB59_2: ## %else ; AVX512F-NEXT: kshiftlw $14, %k0, %k1 ; AVX512F-NEXT: kshiftrw $15, %k1, %k1 @@ -9367,8 +9365,7 @@ ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB60_2 ; AVX1-NEXT: ## BB#1: ## %cond.store -; AVX1-NEXT: vmovd %xmm1, %eax -; AVX1-NEXT: movw %ax, (%rdi) +; AVX1-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX1-NEXT: LBB60_2: ## %else ; AVX1-NEXT: vpextrb $1, %xmm0, %eax ; AVX1-NEXT: testb $1, %al @@ -9417,8 +9414,7 @@ ; AVX1-NEXT: je LBB60_18 ; AVX1-NEXT: ## BB#17: ## %cond.store15 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovd %xmm2, %eax -; AVX1-NEXT: movw %ax, 16(%rdi) +; AVX1-NEXT: vpextrw $0, %xmm2, 16(%rdi) ; AVX1-NEXT: LBB60_18: ## %else16 ; AVX1-NEXT: vpextrb $9, %xmm0, %eax ; AVX1-NEXT: testb $1, %al @@ -9478,8 +9474,7 @@ ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB60_2 ; AVX2-NEXT: ## BB#1: ## %cond.store -; AVX2-NEXT: vmovd %xmm1, %eax -; AVX2-NEXT: movw %ax, (%rdi) +; AVX2-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX2-NEXT: LBB60_2: ## %else ; AVX2-NEXT: vpextrb $1, %xmm0, %eax ; AVX2-NEXT: testb $1, %al @@ -9528,8 +9523,7 @@ ; AVX2-NEXT: je LBB60_18 ; AVX2-NEXT: ## BB#17: ## %cond.store15 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vmovd %xmm2, %eax -; AVX2-NEXT: movw %ax, 16(%rdi) +; AVX2-NEXT: vpextrw $0, %xmm2, 16(%rdi) ; AVX2-NEXT: LBB60_18: ## %else16 ; AVX2-NEXT: vpextrb $9, %xmm0, %eax ; AVX2-NEXT: testb $1, %al @@ -9594,8 +9588,7 @@ ; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: je LBB60_2 ; AVX512F-NEXT: ## BB#1: ## %cond.store -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX512F-NEXT: LBB60_2: ## %else ; AVX512F-NEXT: kshiftlw $14, %k0, %k1 ; AVX512F-NEXT: kshiftrw $15, %k1, %k1 @@ -9660,8 +9653,7 @@ ; AVX512F-NEXT: je LBB60_18 ; AVX512F-NEXT: ## BB#17: ## %cond.store15 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: movw %ax, 16(%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm0, 16(%rdi) ; AVX512F-NEXT: LBB60_18: ## %else16 ; AVX512F-NEXT: kshiftlw $6, %k0, %k1 ; AVX512F-NEXT: kshiftrw $15, %k1, %k1 @@ -9746,8 +9738,7 @@ ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB61_2 ; AVX1-NEXT: ## BB#1: ## %cond.store -; AVX1-NEXT: vmovd %xmm1, %eax -; AVX1-NEXT: movw %ax, (%rdi) +; AVX1-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX1-NEXT: LBB61_2: ## %else ; AVX1-NEXT: vpextrb $1, %xmm0, %eax ; AVX1-NEXT: testb $1, %al @@ -9796,8 +9787,7 @@ ; AVX1-NEXT: je LBB61_18 ; AVX1-NEXT: ## BB#17: ## %cond.store15 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vmovd %xmm3, %eax -; AVX1-NEXT: movw %ax, 16(%rdi) +; AVX1-NEXT: vpextrw $0, %xmm3, 16(%rdi) ; AVX1-NEXT: LBB61_18: ## %else16 ; AVX1-NEXT: vpextrb $9, %xmm0, %eax ; AVX1-NEXT: testb $1, %al @@ -9853,8 +9843,7 @@ ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB61_34 ; AVX1-NEXT: ## BB#33: ## %cond.store31 -; AVX1-NEXT: vmovd %xmm2, %eax -; AVX1-NEXT: movw %ax, 32(%rdi) +; AVX1-NEXT: vpextrw $0, %xmm2, 32(%rdi) ; AVX1-NEXT: LBB61_34: ## %else32 ; AVX1-NEXT: vpextrb $1, %xmm0, %eax ; AVX1-NEXT: testb $1, %al @@ -9903,8 +9892,7 @@ ; AVX1-NEXT: je LBB61_50 ; AVX1-NEXT: ## BB#49: ## %cond.store47 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 -; AVX1-NEXT: vmovd %xmm1, %eax -; AVX1-NEXT: movw %ax, 48(%rdi) +; AVX1-NEXT: vpextrw $0, %xmm1, 48(%rdi) ; AVX1-NEXT: LBB61_50: ## %else48 ; AVX1-NEXT: vpextrb $9, %xmm0, %eax ; AVX1-NEXT: testb $1, %al @@ -9964,8 +9952,7 @@ ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB61_2 ; AVX2-NEXT: ## BB#1: ## %cond.store -; AVX2-NEXT: vmovd %xmm1, %eax -; AVX2-NEXT: movw %ax, (%rdi) +; AVX2-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX2-NEXT: LBB61_2: ## %else ; AVX2-NEXT: vpextrb $1, %xmm0, %eax ; AVX2-NEXT: testb $1, %al @@ -10014,8 +10001,7 @@ ; AVX2-NEXT: je LBB61_18 ; AVX2-NEXT: ## BB#17: ## %cond.store15 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vmovd %xmm3, %eax -; AVX2-NEXT: movw %ax, 16(%rdi) +; AVX2-NEXT: vpextrw $0, %xmm3, 16(%rdi) ; AVX2-NEXT: LBB61_18: ## %else16 ; AVX2-NEXT: vpextrb $9, %xmm0, %eax ; AVX2-NEXT: testb $1, %al @@ -10071,8 +10057,7 @@ ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB61_34 ; AVX2-NEXT: ## BB#33: ## %cond.store31 -; AVX2-NEXT: vmovd %xmm2, %eax -; AVX2-NEXT: movw %ax, 32(%rdi) +; AVX2-NEXT: vpextrw $0, %xmm2, 32(%rdi) ; AVX2-NEXT: LBB61_34: ## %else32 ; AVX2-NEXT: vpextrb $1, %xmm0, %eax ; AVX2-NEXT: testb $1, %al @@ -10121,8 +10106,7 @@ ; AVX2-NEXT: je LBB61_50 ; AVX2-NEXT: ## BB#49: ## %cond.store47 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 -; AVX2-NEXT: vmovd %xmm1, %eax -; AVX2-NEXT: movw %ax, 48(%rdi) +; AVX2-NEXT: vpextrw $0, %xmm1, 48(%rdi) ; AVX2-NEXT: LBB61_50: ## %else48 ; AVX2-NEXT: vpextrb $9, %xmm0, %eax ; AVX2-NEXT: testb $1, %al @@ -10182,8 +10166,7 @@ ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB61_2 ; AVX512F-NEXT: ## BB#1: ## %cond.store -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm1, (%rdi) ; AVX512F-NEXT: LBB61_2: ## %else ; AVX512F-NEXT: vpextrb $1, %xmm0, %eax ; AVX512F-NEXT: testb $1, %al @@ -10232,8 +10215,7 @@ ; AVX512F-NEXT: je LBB61_18 ; AVX512F-NEXT: ## BB#17: ## %cond.store15 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX512F-NEXT: vmovd %xmm3, %eax -; AVX512F-NEXT: movw %ax, 16(%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm3, 16(%rdi) ; AVX512F-NEXT: LBB61_18: ## %else16 ; AVX512F-NEXT: vpextrb $9, %xmm0, %eax ; AVX512F-NEXT: testb $1, %al @@ -10289,8 +10271,7 @@ ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB61_34 ; AVX512F-NEXT: ## BB#33: ## %cond.store31 -; AVX512F-NEXT: vmovd %xmm2, %eax -; AVX512F-NEXT: movw %ax, 32(%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm2, 32(%rdi) ; AVX512F-NEXT: LBB61_34: ## %else32 ; AVX512F-NEXT: vpextrb $1, %xmm0, %eax ; AVX512F-NEXT: testb $1, %al @@ -10339,8 +10320,7 @@ ; AVX512F-NEXT: je LBB61_50 ; AVX512F-NEXT: ## BB#49: ## %cond.store47 ; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: movw %ax, 48(%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm1, 48(%rdi) ; AVX512F-NEXT: LBB61_50: ## %else48 ; AVX512F-NEXT: vpextrb $9, %xmm0, %eax ; AVX512F-NEXT: testb $1, %al Index: test/CodeGen/X86/trunc-ext-ld-st.ll =================================================================== --- test/CodeGen/X86/trunc-ext-ld-st.ll +++ test/CodeGen/X86/trunc-ext-ld-st.ll @@ -25,8 +25,7 @@ ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: paddq {{.*}}(%rip), %xmm0 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: movw %ax, (%rdi) +; SSE41-NEXT: pextrw $0, %xmm0, (%rdi) ; SSE41-NEXT: retq %T = load <2 x i8>, <2 x i8>* %A %G = add <2 x i8> %T, Index: test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-128.ll +++ test/CodeGen/X86/vector-shift-ashr-128.ll @@ -699,8 +699,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_shift_v8i16: ; SSE2: # BB#0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psraw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -735,8 +734,7 @@ ; ; X32-SSE-LABEL: splatvar_shift_v8i16: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: movzwl %ax, %eax +; X32-SSE-NEXT: pextrw $0, %xmm1, %eax ; X32-SSE-NEXT: movd %eax, %xmm1 ; X32-SSE-NEXT: psraw %xmm1, %xmm0 ; X32-SSE-NEXT: retl Index: test/CodeGen/X86/vector-shift-ashr-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-256.ll +++ test/CodeGen/X86/vector-shift-ashr-256.ll @@ -473,8 +473,7 @@ ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovd %xmm1, %eax -; AVX1-NEXT: movzwl %ax, %eax +; AVX1-NEXT: vpextrw $0, %xmm1, %eax ; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 @@ -483,8 +482,7 @@ ; ; AVX2-LABEL: splatvar_shift_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vmovd %xmm1, %eax -; AVX2-NEXT: movzwl %ax, %eax +; AVX2-NEXT: vpextrw $0, %xmm1, %eax ; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -492,8 +490,7 @@ ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # BB#0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; XOPAVX1-NEXT: vmovd %xmm1, %eax -; XOPAVX1-NEXT: movzwl %ax, %eax +; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax ; XOPAVX1-NEXT: vmovd %eax, %xmm1 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 @@ -502,16 +499,14 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v16i16: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vmovd %xmm1, %eax -; XOPAVX2-NEXT: movzwl %ax, %eax +; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax ; XOPAVX2-NEXT: vmovd %eax, %xmm1 ; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v16i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vmovd %xmm1, %eax -; AVX512-NEXT: movzwl %ax, %eax +; AVX512-NEXT: vpextrw $0, %xmm1, %eax ; AVX512-NEXT: vmovd %eax, %xmm1 ; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq Index: test/CodeGen/X86/vector-shift-ashr-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-512.ll +++ test/CodeGen/X86/vector-shift-ashr-512.ll @@ -140,8 +140,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vmovd %xmm2, %eax -; AVX512DQ-NEXT: movzwl %ax, %eax +; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax ; AVX512DQ-NEXT: vmovd %eax, %xmm2 ; AVX512DQ-NEXT: vpsraw %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsraw %xmm2, %ymm1, %ymm1 @@ -149,8 +148,7 @@ ; ; AVX512BW-LABEL: splatvar_shift_v32i16: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vmovd %xmm1, %eax -; AVX512BW-NEXT: movzwl %ax, %eax +; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax ; AVX512BW-NEXT: vmovd %eax, %xmm1 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -551,8 +551,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_shift_v8i16: ; SSE2: # BB#0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psrlw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -587,8 +586,7 @@ ; ; X32-SSE-LABEL: splatvar_shift_v8i16: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: movzwl %ax, %eax +; X32-SSE-NEXT: pextrw $0, %xmm1, %eax ; X32-SSE-NEXT: movd %eax, %xmm1 ; X32-SSE-NEXT: psrlw %xmm1, %xmm0 ; X32-SSE-NEXT: retl Index: test/CodeGen/X86/vector-shift-lshr-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-256.ll +++ test/CodeGen/X86/vector-shift-lshr-256.ll @@ -384,8 +384,7 @@ ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovd %xmm1, %eax -; AVX1-NEXT: movzwl %ax, %eax +; AVX1-NEXT: vpextrw $0, %xmm1, %eax ; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 @@ -394,8 +393,7 @@ ; ; AVX2-LABEL: splatvar_shift_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vmovd %xmm1, %eax -; AVX2-NEXT: movzwl %ax, %eax +; AVX2-NEXT: vpextrw $0, %xmm1, %eax ; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -403,8 +401,7 @@ ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # BB#0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; XOPAVX1-NEXT: vmovd %xmm1, %eax -; XOPAVX1-NEXT: movzwl %ax, %eax +; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax ; XOPAVX1-NEXT: vmovd %eax, %xmm1 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 @@ -413,16 +410,14 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v16i16: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vmovd %xmm1, %eax -; XOPAVX2-NEXT: movzwl %ax, %eax +; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax ; XOPAVX2-NEXT: vmovd %eax, %xmm1 ; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v16i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vmovd %xmm1, %eax -; AVX512-NEXT: movzwl %ax, %eax +; AVX512-NEXT: vpextrw $0, %xmm1, %eax ; AVX512-NEXT: vmovd %eax, %xmm1 ; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq Index: test/CodeGen/X86/vector-shift-lshr-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-512.ll +++ test/CodeGen/X86/vector-shift-lshr-512.ll @@ -121,8 +121,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vmovd %xmm2, %eax -; AVX512DQ-NEXT: movzwl %ax, %eax +; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax ; AVX512DQ-NEXT: vmovd %eax, %xmm2 ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 @@ -130,8 +129,7 @@ ; ; AVX512BW-LABEL: splatvar_shift_v32i16: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vmovd %xmm1, %eax -; AVX512BW-NEXT: movzwl %ax, %eax +; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax ; AVX512BW-NEXT: vmovd %eax, %xmm1 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -499,8 +499,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_shift_v8i16: ; SSE2: # BB#0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psllw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -535,8 +534,7 @@ ; ; X32-SSE-LABEL: splatvar_shift_v8i16: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: movzwl %ax, %eax +; X32-SSE-NEXT: pextrw $0, %xmm1, %eax ; X32-SSE-NEXT: movd %eax, %xmm1 ; X32-SSE-NEXT: psllw %xmm1, %xmm0 ; X32-SSE-NEXT: retl Index: test/CodeGen/X86/vector-shift-shl-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-256.ll +++ test/CodeGen/X86/vector-shift-shl-256.ll @@ -348,8 +348,7 @@ ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovd %xmm1, %eax -; AVX1-NEXT: movzwl %ax, %eax +; AVX1-NEXT: vpextrw $0, %xmm1, %eax ; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 @@ -358,8 +357,7 @@ ; ; AVX2-LABEL: splatvar_shift_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vmovd %xmm1, %eax -; AVX2-NEXT: movzwl %ax, %eax +; AVX2-NEXT: vpextrw $0, %xmm1, %eax ; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -367,8 +365,7 @@ ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # BB#0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; XOPAVX1-NEXT: vmovd %xmm1, %eax -; XOPAVX1-NEXT: movzwl %ax, %eax +; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax ; XOPAVX1-NEXT: vmovd %eax, %xmm1 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 @@ -377,16 +374,14 @@ ; ; XOPAVX2-LABEL: splatvar_shift_v16i16: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vmovd %xmm1, %eax -; XOPAVX2-NEXT: movzwl %ax, %eax +; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax ; XOPAVX2-NEXT: vmovd %eax, %xmm1 ; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_shift_v16i16: ; AVX512: ## BB#0: -; AVX512-NEXT: vmovd %xmm1, %eax -; AVX512-NEXT: movzwl %ax, %eax +; AVX512-NEXT: vpextrw $0, %xmm1, %eax ; AVX512-NEXT: vmovd %eax, %xmm1 ; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq Index: test/CodeGen/X86/vector-shift-shl-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-512.ll +++ test/CodeGen/X86/vector-shift-shl-512.ll @@ -117,8 +117,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vmovd %xmm2, %eax -; AVX512DQ-NEXT: movzwl %ax, %eax +; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax ; AVX512DQ-NEXT: vmovd %eax, %xmm2 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1 @@ -126,8 +125,7 @@ ; ; AVX512BW-LABEL: splatvar_shift_v32i16: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vmovd %xmm1, %eax -; AVX512BW-NEXT: movzwl %ax, %eax +; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax ; AVX512BW-NEXT: vmovd %eax, %xmm1 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq Index: test/CodeGen/X86/widen_conv-1.ll =================================================================== --- test/CodeGen/X86/widen_conv-1.ll +++ test/CodeGen/X86/widen_conv-1.ll @@ -39,8 +39,7 @@ ; X86-NEXT: pextrb $8, %xmm0, 2(%eax) ; X86-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; X86-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; X86-NEXT: movd %xmm0, %ecx -; X86-NEXT: movw %cx, (%eax) +; X86-NEXT: pextrw $0, %xmm0, (%eax) ; X86-NEXT: popl %eax ; X86-NEXT: retl ; @@ -51,8 +50,7 @@ ; X64-NEXT: pextrb $8, %xmm0, 2(%rdi) ; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: movw %ax, (%rdi) +; X64-NEXT: pextrw $0, %xmm0, (%rdi) ; X64-NEXT: retq entry: %load = load <3 x i32>, <3 x i32>* %src.addr Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -172,8 +172,7 @@ ; CHECK-NEXT: pextrb $8, %xmm1, 2(%rdi) ; CHECK-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: movw %ax, (%rdi) +; CHECK-NEXT: pextrw $0, %xmm0, (%rdi) ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %a = load %i8vec3, %i8vec3* %ap, align 16 @@ -214,14 +213,12 @@ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <158,158,158,u> ; CHECK-NEXT: pshufb %xmm0, %xmm1 ; CHECK-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; CHECK-NEXT: movd %xmm1, %eax -; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: pextrw $0, %xmm1, (%rsi) ; CHECK-NEXT: movb $-98, 2(%rsi) ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <1,1,1,u> ; CHECK-NEXT: pshufb %xmm0, %xmm1 ; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: movw %ax, (%rdx) +; CHECK-NEXT: pextrw $0, %xmm0, (%rdx) ; CHECK-NEXT: movb $1, 2(%rdx) ; CHECK-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; CHECK-NEXT: movdqa %xmm0, %xmm1 @@ -230,8 +227,7 @@ ; CHECK-NEXT: pextrb $8, %xmm1, 2(%rdi) ; CHECK-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: movw %ax, (%rdi) +; CHECK-NEXT: pextrw $0, %xmm0, (%rdi) ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: