Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -11542,6 +11542,23 @@ if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); + SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; + // Insure elements came from the same Op. + int MaxOp1Index = VT.getVectorNumElements()/2 - 1; + for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { + if (WidenedMask[i] == SM_SentinelZero) + return SDValue(); + if (WidenedMask[i] == SM_SentinelUndef) + continue; + + SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1; + unsigned OpIndex = (i < Size/2) ? 0 : 1; + if (Ops[OpIndex].isUndef()) + Ops[OpIndex] = Op; + else if (Ops[OpIndex] != Op) + return SDValue(); + } + // Form a 128-bit permutation. // Convert the 64-bit shuffle mask selection values into 128-bit selection // bits defined by a vshuf64x2 instruction's immediate control byte. @@ -11549,15 +11566,12 @@ unsigned ControlBitsNum = WidenedMask.size() / 2; for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if (WidenedMask[i] == SM_SentinelZero) - return SDValue(); - // Use first element in place of undef mask. Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); } - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], DAG.getConstant(PermMask, DL, MVT::i8)); } Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2271,3 +2271,35 @@ %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> ret <16 x float> %res } + +define <8 x double> @shuffle_v8f64_23014567(<8 x double> %a0, <8 x double> %a1) { +; ALL-LABEL: shuffle_v8f64_23014567: +; ALL: # BB#0: +; ALL-NEXT: vshuff64x2 $225, %zmm1, %zmm1, %zmm0 # zmm0 = zmm1[2,3,0,1,4,5,6,7] + %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> + ret <8 x double> %1 +} + +define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1) { +; ALL-LABEL: shuffle_v8f64_2301uu67: +; ALL: # BB#0: +; ALL-NEXT: vshuff64x2 $193, %zmm1, %zmm1, %zmm0 # zmm0 = zmm1[2,3,0,1,0,1,6,7] + %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> + ret <8 x double> %1 +} + +define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) { +; ALL-LABEL: shuffle_v8f64_2301uuuu: +; ALL: # BB#0: +; ALL-NEXT: vshuff64x2 $1, %zmm0, %zmm1, %zmm0 # zmm0 = zmm1[2,3,0,1],zmm0[0,1,0,1] + %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> + ret <8 x double> %1 +} + +define <8 x double> @shuffle_v8f64_uuu2301(<8 x double> %a0, <8 x double> %a1) { +; ALL-LABEL: shuffle_v8f64_uuu2301: +; ALL: # BB#0: +; ALL-NEXT: vshuff64x2 $16, %zmm1, %zmm0, %zmm0 # zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1] + %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> + ret <8 x double> %1 +}