Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7188,15 +7188,15 @@ } } - // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { uint64_t Amt = CAmt->getZExtValue(); - unsigned Size = VT.getSizeInBits(); + unsigned Size = VT.getScalarSizeInBits(); - if (Amt < Size / 2) { + if (Amt < Size) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); Index: llvm/trunk/test/CodeGen/X86/reduce-trunc-shl.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/reduce-trunc-shl.ll +++ llvm/trunk/test/CodeGen/X86/reduce-trunc-shl.ll @@ -26,3 +26,142 @@ store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out ret void } + +define <8 x i16> @trunc_shl_v8i16_v8i32(<8 x i32> %a) { +; SSE2-LABEL: trunc_shl_v8i16_v8i32: +; SSE2: # BB#0: +; SSE2-NEXT: pslld $17, %xmm0 +; SSE2-NEXT: pslld $17, %xmm1 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_v8i16_v8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpslld $17, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %shl = shl <8 x i32> %a, + %conv = trunc <8 x i32> %shl to <8 x i16> + ret <8 x i16> %conv +} + +define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) { +; SSE2-LABEL: trunc_shl_31_i32_i64: +; SSE2: # BB#0: +; SSE2-NEXT: movl (%rsi), %eax +; SSE2-NEXT: shll $31, %eax +; SSE2-NEXT: movl %eax, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_31_i32_i64: +; AVX2: # BB#0: +; AVX2-NEXT: movl (%rsi), %eax +; AVX2-NEXT: shll $31, %eax +; AVX2-NEXT: movl %eax, (%rdi) +; AVX2-NEXT: retq + %val = load i64, i64* %in + %shl = shl i64 %val, 31 + %trunc = trunc i64 %shl to i32 + store i32 %trunc, i32* %out + ret void +} + +define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) { +; SSE2-LABEL: trunc_shl_32_i32_i64: +; SSE2: # BB#0: +; SSE2-NEXT: movl $0, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_32_i32_i64: +; AVX2: # BB#0: +; AVX2-NEXT: movl $0, (%rdi) +; AVX2-NEXT: retq + %val = load i64, i64* %in + %shl = shl i64 %val, 32 + %trunc = trunc i64 %shl to i32 + store i32 %trunc, i32* %out + ret void +} + +define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) { +; SSE2-LABEL: trunc_shl_15_i16_i64: +; SSE2: # BB#0: +; SSE2-NEXT: movzwl (%rsi), %eax +; SSE2-NEXT: shlw $15, %ax +; SSE2-NEXT: movw %ax, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_15_i16_i64: +; AVX2: # BB#0: +; AVX2-NEXT: movzwl (%rsi), %eax +; AVX2-NEXT: shlw $15, %ax +; AVX2-NEXT: movw %ax, (%rdi) +; AVX2-NEXT: retq + %val = load i64, i64* %in + %shl = shl i64 %val, 15 + %trunc = trunc i64 %shl to i16 + store i16 %trunc, i16* %out + ret void +} + +define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) { +; SSE2-LABEL: trunc_shl_16_i16_i64: +; SSE2: # BB#0: +; SSE2-NEXT: movw $0, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_16_i16_i64: +; AVX2: # BB#0: +; AVX2-NEXT: movw $0, (%rdi) +; AVX2-NEXT: retq + %val = load i64, i64* %in + %shl = shl i64 %val, 16 + %trunc = trunc i64 %shl to i16 + store i16 %trunc, i16* %out + ret void +} + +define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) { +; SSE2-LABEL: trunc_shl_7_i8_i64: +; SSE2: # BB#0: +; SSE2-NEXT: movb (%rsi), %al +; SSE2-NEXT: shlb $7, %al +; SSE2-NEXT: movb %al, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_7_i8_i64: +; AVX2: # BB#0: +; AVX2-NEXT: movb (%rsi), %al +; AVX2-NEXT: shlb $7, %al +; AVX2-NEXT: movb %al, (%rdi) +; AVX2-NEXT: retq + %val = load i64, i64* %in + %shl = shl i64 %val, 7 + %trunc = trunc i64 %shl to i8 + store i8 %trunc, i8* %out + ret void +} + +define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) { +; SSE2-LABEL: trunc_shl_8_i8_i64: +; SSE2: # BB#0: +; SSE2-NEXT: movb $0, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: trunc_shl_8_i8_i64: +; AVX2: # BB#0: +; AVX2-NEXT: movb $0, (%rdi) +; AVX2-NEXT: retq + %val = load i64, i64* %in + %shl = shl i64 %val, 8 + %trunc = trunc i64 %shl to i8 + store i8 %trunc, i8* %out + ret void +}