Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2133,16 +2133,16 @@ llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_128: GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_256: GCCBuiltin<"__builtin_ia32_vpmultishiftqb256_mask">, - Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_512: GCCBuiltin<"__builtin_ia32_vpmultishiftqb512_mask">, - Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_v8i64_ty, llvm_i64_ty], [IntrNoMem]>; } // Pack ops. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3794,7 +3794,7 @@ avx512vl_i32_info, avx512vl_i64_info, X86pmuludq, HasAVX512, 1>; defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P, - avx512vl_i8_info, avx512vl_i8_info, + avx512vl_i64_info, avx512vl_i64_info, X86multishift, HasVBMI, 0>, T8PD; multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, Index: test/CodeGen/X86/avx512vbmi-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vbmi-intrinsics.ll +++ test/CodeGen/X86/avx512vbmi-intrinsics.ll @@ -20,24 +20,24 @@ ret <64 x i8> %res4 } -declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +declare <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64>, <8 x i64>, <8 x i64>, i64) -define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +define <8 x i64>@test_int_x86_avx512_mask_pmultishift_qb_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i64 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovq %rdi, %k1 ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z} ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) - %res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) - %res3 = add <64 x i8> %res, %res1 - %res4 = add <64 x i8> %res3, %res2 - ret <64 x i8> %res4 + %res = call <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i64 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i64 %x3) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i64 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res3, %res2 + ret <8 x i64> %res4 } declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) Index: test/CodeGen/X86/avx512vbmivl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -41,44 +41,44 @@ ret <32 x i8> %res4 } -declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) +declare <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64>, <2 x i64>, <2 x i64>, i16) -define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { +define <2 x i64>@test_int_x86_avx512_mask_pmultishift_qb_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xcb] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) - %res3 = add <16 x i8> %res, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res = call <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i16 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i16 %x3) + %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i16 -1) + %res3 = add <2 x i64> %res, %res1 + %res4 = add <2 x i64> %res3, %res2 + ret <2 x i64> %res4 } -declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) +declare <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64>, <4 x i64>, <4 x i64>, i32) -define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { +define <4 x i64>@test_int_x86_avx512_mask_pmultishift_qb_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xcb] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) - %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) - %res3 = add <32 x i8> %res, %res1 - %res4 = add <32 x i8> %res3, %res2 - ret <32 x i8> %res4 + %res = call <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 %x3) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i32 %x3) + %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 -1) + %res3 = add <4 x i64> %res, %res1 + %res4 = add <4 x i64> %res3, %res2 + ret <4 x i64> %res4 } declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) Index: test/MC/X86/avx512vbmi-encoding.s =================================================================== --- test/MC/X86/avx512vbmi-encoding.s +++ test/MC/X86/avx512vbmi-encoding.s @@ -541,3 +541,15 @@ //CHECK: vpmultishiftqb 4660(%rax,%r14,8), %zmm29, %zmm30 //CHECK: encoding: [0x62,0x22,0x95,0x40,0x83,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpmultishiftqb (%rax){1to2}, %xmm2, %xmm1 +// CHECK: vpmultishiftqb (%rax){1to2}, %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0xed,0x18,0x83,0x08] + + vpmultishiftqb (%rax){1to4}, %ymm2, %ymm1 +// CHECK: vpmultishiftqb (%rax){1to4}, %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0xed,0x38,0x83,0x08] + + vpmultishiftqb (%rax){1to8}, %zmm2, %zmm1 +// CHECK: vpmultishiftqb (%rax){1to8}, %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xed,0x58,0x83,0x08] +