Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -978,6 +978,44 @@ (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } +multiclass avx512_int_broadcastbw_reg opc, string Name, + X86VectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg> { + let ExeDomain = _.ExeDomain in + defm r : AVX512_maskable_custom, T8PD, EVEX; + + def : Pat <(_.VT (OpNode SrcRC:$src)), + (!cast(Name#r) + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), + (!cast(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), + (!cast(Name#rkz) _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; +} + +multiclass avx512_int_broadcastbw_reg_vl opc, string Name, + AVX512VLVectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcastbw_reg, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcastbw_reg, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg, EVEX_V128; + } +} + multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { @@ -989,18 +1027,11 @@ } } -let isCodeGenOnly = 1 in { -defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - X86VBroadcast, GR8, HasBWI>; -defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - X86VBroadcast, GR16, HasBWI>; -} -let isAsmParserOnly = 1 in { - defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - null_frag, GR32, HasBWI>; - defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - null_frag, GR32, HasBWI>; -} +defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", + avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", + avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, + HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, Index: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1921,9 +1921,9 @@ ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovq %rsi, %k1 -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2 +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm2 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq @@ -1934,9 +1934,9 @@ ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2 +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm2 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl @@ -1954,20 +1954,20 @@ ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm2 +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm2 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2 +; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm2 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2799,9 +2799,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] ; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2819,9 +2819,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2839,9 +2839,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2859,9 +2859,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1635,7 +1635,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 @@ -1688,7 +1688,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2274,7 +2274,7 @@ ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2390,7 +2390,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2443,7 +2443,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -4009,7 +4009,7 @@ ; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %ymm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2431,7 +2431,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %ymm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -228,7 +228,7 @@ ; SKX-LABEL: insert_dup_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -249,7 +249,7 @@ ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16: ; SKX: ## BB#0: ; SKX-NEXT: movswl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -269,7 +269,7 @@ ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -288,7 +288,7 @@ ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -330,7 +330,7 @@ ; AVX512BW: # BB#0: ; AVX512BW-NEXT: movsbl (%rdi), %eax ; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: vpbroadcastb %al, %zmm0 +; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: @@ -346,7 +346,7 @@ ; AVX512VBMI: # BB#0: ; AVX512VBMI-NEXT: movsbl (%rdi), %eax ; AVX512VBMI-NEXT: shrl $8, %eax -; AVX512VBMI-NEXT: vpbroadcastb %al, %zmm0 +; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512VBMI-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32