Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -6351,6 +6351,14 @@ } } +// support full register inputs (like SSE paterns) +multiclass avx512_extend_lowering { + def : Pat<(To.VT (OpNode (From.VT From.RC:$src))), + (!cast(NAME#To.ZSuffix#"rr") + (EXTRACT_SUBREG From.RC:$src, SubRegIdx))>; +} + multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { @@ -6360,6 +6368,7 @@ defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasBWI] in { @@ -6378,6 +6387,7 @@ defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6396,6 +6406,7 @@ defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6414,6 +6425,7 @@ defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6432,6 +6444,7 @@ defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { @@ -6451,6 +6464,7 @@ defm Z256: avx512_extend_common, + avx512_extend_lowering, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -1810,3 +1810,72 @@ ret <64 x i16> %ret } +define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { +; ALL-LABEL: shuffle_zext_16x8_to_16x16: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; ALL-NEXT: retq + %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <16 x i16> + ret <16 x i16> %2 +} + +define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { +; ALL-LABEL: zext_32x8_to_16x16: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; ALL-NEXT: retq + %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <16 x i16> + ret <16 x i16> %2 +} + +define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { +; ALL-LABEL: zext_32x8_to_8x32: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; ALL-NEXT: retq + %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <8 x i32> + ret <8 x i32> %2 +} + +define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { +; ALL-LABEL: zext_32x8_to_4x64: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; ALL-NEXT: retq + %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> + %2 = bitcast <32 x i8> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { +; ALL-LABEL: zext_16x16_to_8x32: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; ALL-NEXT: retq + %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> + %2 = bitcast <16 x i16> %1 to <8 x i32> + ret <8 x i32> %2 +} + +define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { +; ALL-LABEL: zext_16x16_to_4x64: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; ALL-NEXT: retq + %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> + %2 = bitcast <16 x i16> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { +; ALL-LABEL: zext_8x32_to_4x64: +; ALL: ## BB#0: +; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; ALL-NEXT: retq + %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +}