Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -12695,7 +12695,7 @@ MVT LogicVT; MVT EltVT; unsigned NumElts; - + if (VT.isVector()) { LogicVT = VT; EltVT = VT.getVectorElementType(); @@ -15410,7 +15410,7 @@ SDValue Mask = Op.getOperand(3); SDValue RoundingMode; // We allways add rounding mode to the Node. - // If the rounding mode is not specified, we add the + // If the rounding mode is not specified, we add the // "current direction" mode. if (Op.getNumOperands() == 4) RoundingMode = @@ -17305,34 +17305,49 @@ // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && - Amt.getOpcode() == ISD::BITCAST && - Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) { + + // Peek through any splat that was introduced for i64 shift vectorization. + int SplatIndex = -1; + if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) + if (SVN->isSplat()) { + SplatIndex = SVN->getSplatIndex(); + Amt = SplatIndex > (int)VT.getVectorNumElements() ? Amt.getOperand(1) + : Amt.getOperand(0); + } + + if (Amt.getOpcode() != ISD::BITCAST || + Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; + unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); for (unsigned i = 0; i != Ratio; ++i) { - ConstantSDNode *C = dyn_cast(Amt.getOperand(i)); + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + BaseOp)); if (!C) return SDValue(); // 6 == Log2(64) ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); } - // Check remaining shift amounts. - for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { - uint64_t ShAmt = 0; - for (unsigned j = 0; j != Ratio; ++j) { - ConstantSDNode *C = - dyn_cast(Amt.getOperand(i + j)); - if (!C) + + // Check remaining shift amounts (if not a splat). + if (SplatIndex < 0) { + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); + if (!C) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) return SDValue(); - // 6 == Log2(64) - ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); } - if (ShAmt != ShiftAmt) - return SDValue(); } if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -655,13 +655,9 @@ ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 -; X32-SSE-NEXT: psrlq %xmm2, %xmm0 +; X32-SSE-NEXT: psrlq $7, %xmm1 +; X32-SSE-NEXT: psrlq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: movapd %xmm1, %xmm0 ; X32-SSE-NEXT: retl Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -607,13 +607,9 @@ ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psllq %xmm2, %xmm1 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 -; X32-SSE-NEXT: psllq %xmm2, %xmm0 +; X32-SSE-NEXT: psllq $7, %xmm1 +; X32-SSE-NEXT: psllq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: movapd %xmm1, %xmm0 ; X32-SSE-NEXT: retl