Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -11424,20 +11424,35 @@ DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return V; - if (isSingleInputShuffleMask(Mask)) { - // There are no generalized cross-lane shuffle operations available on i8 - // element types. - if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) - return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, - Mask, DAG); + SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + bool SingleInputMask = true; + bool SingleInputAndZeroesMask = true; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + if (Mask[i] >= Size) { + SingleInputMask = false; + if (!Zeroable[i]) { + SingleInputAndZeroesMask = false; + break; + } + } + } + // There are no generalized cross-lane shuffle operations available on i8 + // element types. + if (SingleInputMask && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) + return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, + DAG); + if (SingleInputAndZeroesMask) { SDValue PSHUFBMask[32]; - for (int i = 0; i < 32; ++i) - PSHUFBMask[i] = - Mask[i] < 0 - ? DAG.getUNDEF(MVT::i8) - : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL, - MVT::i8); + for (int i = 0; i < 32; ++i) { + if (Mask[i] < 0) + PSHUFBMask[i] = DAG.getUNDEF(MVT::i8); + else if (Zeroable[i]) + PSHUFBMask[i] = DAG.getConstant(0x80, DL, MVT::i8); + else + PSHUFBMask[i] = + DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL, MVT::i8); + } return DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, V1, DAG.getBuildVector(MVT::v32i8, DL, PSHUFBMask)); Index: test/CodeGen/X86/vector-shuffle-256-v32.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v32.ll +++ test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -953,6 +953,22 @@ ret <32 x i8> %shuffle } +define <32 x i8> @shuffle_v32i8_01_zz_02_zz_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31(<32 x i8> %a) { +; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: +; AVX1: # BB#0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,5,6,7,8,9,10,11,12,13,14,15] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v32i8_01_zz_02_zz_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: +; AVX2: # BB#0: +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] +; AVX2-NEXT: retq + %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + ret <32 x i8> %shuffle +} + define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) { ; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32: ; AVX1: # BB#0: