Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -12085,12 +12085,12 @@ Zeroable, Subtarget, DAG)) return Blend; - bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode()); - bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode()); + bool IsLowZero = (Zeroable & 0x3) == 0x3; + bool IsHighZero = (Zeroable & 0xc) == 0xc; // If either input operand is a zero vector, use VPERM2X128 because its mask // allows us to replace the zero input with an implicit zero. - if (!IsV1Zero && !IsV2Zero) { + if (!IsLowZero && !IsHighZero) { // Check for patterns which can be matched with a single insert of a 128-bit // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); @@ -12124,30 +12124,17 @@ // [6] - ignore // [7] - zero high half of destination - int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0]; - int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1]; + assert(WidenedMask[0] >= 0 && WidenedMask[1] >= 0 && "Undef half?"); - unsigned PermMask = MaskLO | (MaskHI << 4); + unsigned PermMask = 0; + PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); + PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); - // If either input is a zero vector, replace it with an undef input. - // Shuffle mask values < 4 are selecting elements of V1. - // Shuffle mask values >= 4 are selecting elements of V2. - // Adjust each half of the permute mask by clearing the half that was - // selecting the zero vector and setting the zero mask bit. - if (IsV1Zero) { + // Check the immediate mask and replace unused sources with undef. + if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00) V1 = DAG.getUNDEF(VT); - if (MaskLO < 2) - PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI < 2) - PermMask = (PermMask & 0x0f) | 0x80; - } - if (IsV2Zero) { + if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20) V2 = DAG.getUNDEF(VT); - if (MaskLO >= 2) - PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI >= 2) - PermMask = (PermMask & 0x0f) | 0x80; - } return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, DL, MVT::i8));