Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9895,10 +9895,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate - if (N0.getValueType() == N->getValueType(0)) + if (SrcVT == VT) return N0; // fold (truncate (truncate x)) -> (truncate x) @@ -9999,6 +10000,19 @@ } } + // Attempt to pre-truncate BUILD_VECTOR sources. + if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { + SDLoc DL(N); + EVT SVT = VT.getScalarType(); + SmallVector TruncOps; + for (const SDValue &Op : N0->op_values()) { + SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op); + TruncOps.push_back(TruncOp); + } + return DAG.getBuildVector(VT, DL, TruncOps); + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to Index: llvm/trunk/test/CodeGen/X86/bool-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bool-vector.ll +++ llvm/trunk/test/CodeGen/X86/bool-vector.ll @@ -77,17 +77,17 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %esi, -8 -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $1, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X32-NEXT: andl $1, %edx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: andl $1, %esi -; X32-NEXT: leal (%eax,%ecx,2), %eax -; X32-NEXT: leal (%eax,%edx,4), %eax -; X32-NEXT: leal (%eax,%esi,8), %eax +; X32-NEXT: andl $1, %edx +; X32-NEXT: andl $1, %ecx +; X32-NEXT: andl $1, %eax +; X32-NEXT: leal (%esi,%edx,2), %edx +; X32-NEXT: leal (%edx,%ecx,4), %ecx +; X32-NEXT: leal (%ecx,%eax,8), %eax ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll @@ -36,17 +36,10 @@ ; ; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32: ; X64: # %bb.0: -; X64-NEXT: movslq %edi, %rax -; X64-NEXT: movslq %esi, %rsi -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: movslq %ecx, %rcx -; X64-NEXT: vmovq %rcx, %xmm0 -; X64-NEXT: vmovq %rdx, %xmm1 -; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-NEXT: vmovq %rsi, %xmm1 -; X64-NEXT: vmovq %rax, %xmm2 -; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] +; X64-NEXT: vmovd %edi, %xmm0 +; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 +; X64-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = sext i8 %a0 to i64