Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -7452,9 +7452,8 @@ DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); } -static SDValue -NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { +static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -7479,33 +7478,43 @@ // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! - if (VT == MVT::v8i16 || VT == MVT::v16i8 || - VT == MVT::v16i16 || VT == MVT::v32i8) { + if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 || + VT == MVT::v32i8) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); - } else if ((VT == MVT::v4i32 || - (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { // FIXME: Figure out a cleaner way to do this. - // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getSimpleValueType(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), NewVT, true, false)) - return getVZextMovL(VT, NewVT, NewOp.getOperand(0), - DAG, Subtarget, dl); + return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget, + dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getSimpleValueType(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) - return getVZextMovL(VT, NewVT, NewOp.getOperand(1), - DAG, Subtarget, dl); + return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, + dl); } } + } else if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Subtarget->hasSSE2()) { + // Emit movq and vmovq to copy an i64 or f64 to a vector and zero the + // other bits. + if (ISD::isBuildVectorAllZeros(V2.getNode())) { + MVT NewVT = SVOp->getSimpleValueType(0); + if (isCommutedMOVLMask(SVOp->getMask(), NewVT, true, false)) + return getVZextMovL(VT, NewVT, SVOp->getOperand(0), DAG, Subtarget, dl); + } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { + MVT NewVT = SVOp->getSimpleValueType(0); + if (isMOVLMask(SVOp->getMask(), NewVT)) + return getVZextMovL(VT, NewVT, SVOp->getOperand(1), DAG, Subtarget, dl); + } } return SDValue(); } Index: test/CodeGen/X86/avx-shuffle.ll =================================================================== --- test/CodeGen/X86/avx-shuffle.ll +++ test/CodeGen/X86/avx-shuffle.ll @@ -306,3 +306,11 @@ store <3 x double> %a1, <3 x double>* undef, align 1 ret void } + +define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { +; CHECK-LABEL: test_insert_64_zext +; CHECK-NOT: xor +; CHECK: vmovq + %1 = shufflevector <2 x i64> %i, <2 x i64> , <2 x i32> + ret <2 x i64> %1 +} Index: test/CodeGen/X86/sse2.ll =================================================================== --- test/CodeGen/X86/sse2.ll +++ test/CodeGen/X86/sse2.ll @@ -221,3 +221,11 @@ %double2float.i = fptrunc <4 x double> %0 to <4 x float> ret <4 x float> %double2float.i } + +define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { +; CHECK-LABEL: test_insert_64_zext +; CHECK-NOT: xor +; CHECK: movq + %1 = shufflevector <2 x i64> %i, <2 x i64> , <2 x i32> + ret <2 x i64> %1 +}