Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7184,15 +7184,15 @@ } } - // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { uint64_t Amt = CAmt->getZExtValue(); - unsigned Size = VT.getSizeInBits(); + unsigned Size = VT.getScalarSizeInBits(); - if (Amt < Size / 2) { + if (Amt < Size) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); Index: test/CodeGen/X86/reduce-trunc-shl.ll =================================================================== --- test/CodeGen/X86/reduce-trunc-shl.ll +++ test/CodeGen/X86/reduce-trunc-shl.ll @@ -26,3 +26,28 @@ store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out ret void } + +define <8 x i16> @trunc_shl_v8i16_v8i32(<8 x i32> %a) { +; SSE2-LABEL: trunc_shl_v8i16_v8i32: +; SSE2: # BB#0: +; SSE2-NEXT: pslld $17, %xmm0 +; SSE2-NEXT: pslld $17, %xmm1 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_v8i16_v8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpslld $17, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %shl = shl <8 x i32> %a, + %conv = trunc <8 x i32> %shl to <8 x i16> + ret <8 x i16> %conv +}