Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -17307,34 +17307,50 @@ // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && - Amt.getOpcode() == ISD::BITCAST && - Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) { + + // Peek through any splat that was introduced for i64 shift vectorization. + int SplatIndex = -1; + if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) + if (SVN->isSplat()) { + SplatIndex = SVN->getSplatIndex(); + Amt = Amt.getOperand(0); + assert(SplatIndex < (int)VT.getVectorNumElements() && + "Splat shuffle referencing second operand"); + } + + if (Amt.getOpcode() != ISD::BITCAST || + Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; + unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); for (unsigned i = 0; i != Ratio; ++i) { - ConstantSDNode *C = dyn_cast(Amt.getOperand(i)); + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + BaseOp)); if (!C) return SDValue(); // 6 == Log2(64) ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); } - // Check remaining shift amounts. - for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { - uint64_t ShAmt = 0; - for (unsigned j = 0; j != Ratio; ++j) { - ConstantSDNode *C = - dyn_cast(Amt.getOperand(i + j)); - if (!C) + + // Check remaining shift amounts (if not a splat). + if (SplatIndex < 0) { + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); + if (!C) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) return SDValue(); - // 6 == Log2(64) - ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); } - if (ShAmt != ShiftAmt) - return SDValue(); } if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) Index: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -877,21 +877,17 @@ ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648] -; X32-SSE-NEXT: movdqa %xmm1, %xmm3 -; X32-SSE-NEXT: psrlq %xmm2, %xmm3 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm4 -; X32-SSE-NEXT: psrlq %xmm4, %xmm1 -; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] +; X32-SSE-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE-NEXT: psrlq $7, %xmm2 +; X32-SSE-NEXT: psrlq $1, %xmm1 +; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: psrlq %xmm4, %xmm0 +; X32-SSE-NEXT: psrlq $7, %xmm1 +; X32-SSE-NEXT: psrlq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-SSE-NEXT: xorpd %xmm3, %xmm1 -; X32-SSE-NEXT: psubq %xmm3, %xmm1 +; X32-SSE-NEXT: xorpd %xmm2, %xmm1 +; X32-SSE-NEXT: psubq %xmm2, %xmm1 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0 ; X32-SSE-NEXT: retl %shift = ashr <2 x i64> %a, Index: llvm/trunk/test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-lshr-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -655,13 +655,9 @@ ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 -; X32-SSE-NEXT: psrlq %xmm2, %xmm0 +; X32-SSE-NEXT: psrlq $7, %xmm1 +; X32-SSE-NEXT: psrlq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: movapd %xmm1, %xmm0 ; X32-SSE-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-shl-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-shl-128.ll @@ -607,13 +607,9 @@ ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psllq %xmm2, %xmm1 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 -; X32-SSE-NEXT: psllq %xmm2, %xmm0 +; X32-SSE-NEXT: psllq $7, %xmm1 +; X32-SSE-NEXT: psllq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: movapd %xmm1, %xmm0 ; X32-SSE-NEXT: retl