Index: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp =================================================================== --- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -255,15 +255,13 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - if (Imm & 0x88) - return; // Not a shuffle - unsigned HalfSize = VT.getVectorNumElements() / 2; for (unsigned l = 0; l != 2; ++l) { - unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize; + unsigned HalfMask = Imm >> (l * 4); + unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) - ShuffleMask.push_back(i); + ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : i); } } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -4390,6 +4390,7 @@ /// IsUnary to true if only uses one source. Note that this will set IsUnary for /// shuffles which use a single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. +/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero. static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); @@ -4519,6 +4520,10 @@ ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast(ImmN)->getZExtValue(), Mask); if (Mask.empty()) return false; + // Mask only contains negative index if an element is zero. + if (std::any_of(Mask.begin(), Mask.end(), + [](int M){ return M == SM_SentinelZero; })) + return false; break; case X86ISD::MOVSLDUP: DecodeMOVSLDUPMask(VT, Mask); Index: llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll +++ llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll @@ -269,7 +269,7 @@ define <4 x double> @vperm2z_0x08(<4 x double> %a) { ; ALL-LABEL: vperm2z_0x08: ; ALL: # BB#0: -; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -279,7 +279,7 @@ ; ALL-LABEL: vperm2z_0x18: ; ALL: # BB#0: ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 -; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -288,7 +288,7 @@ define <4 x double> @vperm2z_0x28(<4 x double> %a) { ; ALL-LABEL: vperm2z_0x28: ; ALL: # BB#0: -; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -298,7 +298,7 @@ ; ALL-LABEL: vperm2z_0x38: ; ALL: # BB#0: ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 -; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -307,7 +307,7 @@ define <4 x double> @vperm2z_0x80(<4 x double> %a) { ; ALL-LABEL: vperm2z_0x80: ; ALL: # BB#0: -; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -316,7 +316,7 @@ define <4 x double> @vperm2z_0x81(<4 x double> %a) { ; ALL-LABEL: vperm2z_0x81: ; ALL: # BB#0: -; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -325,7 +325,7 @@ define <4 x double> @vperm2z_0x82(<4 x double> %a) { ; ALL-LABEL: vperm2z_0x82: ; ALL: # BB#0: -; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -334,7 +334,7 @@ define <4 x double> @vperm2z_0x83(<4 x double> %a) { ; ALL-LABEL: vperm2z_0x83: ; ALL: # BB#0: -; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -345,8 +345,8 @@ define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) { ; ALL-LABEL: vperm2z_int_0x83: ; ALL: # BB#0: -; AVX1: vperm2f128 $129, %ymm0, %ymm0, %ymm0 -; AVX2: vperm2i128 $129, %ymm0, %ymm0, %ymm0 +; AVX1: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; AVX2: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero %s = shufflevector <4 x i64> , <4 x i64> %a, <4 x i32> %c = add <4 x i64> %b, %s ret <4 x i64> %c