Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -4484,6 +4484,15 @@ def int_x86_avx512_mask_pmulh_w_256 : GCCBuiltin<"__builtin_ia32_pmulhw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmul_hr_sw_256 : GCCBuiltin<"__builtin_ia32_pmulhrsw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmul_hr_sw_512 : GCCBuiltin<"__builtin_ia32_pmulhrsw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -404,6 +404,8 @@ PMULUDQ, // Vector multiply packed signed doubleword integers PMULDQ, + // Vector Multiply Packed UnsignedIntegers with Round and Scale + MULHRS, // FMA nodes FMADD, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -18597,6 +18597,7 @@ case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; case X86ISD::AVG: return "X86ISD::AVG"; + case X86ISD::MULHRS: return "X86ISD::MULHRS"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; } Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3140,6 +3140,8 @@ HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P, HasBWI, 1>; +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P, + HasBWI, 1>, T8PD; defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, SSE_INTALU_ITINS_P, HasBWI, 1>; Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -187,6 +187,7 @@ def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>; def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>; +def X86mulhrs : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>; def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -650,6 +650,9 @@ X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_128, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_256, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_512, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(avx512_mask_pmulh_w_128, INTR_TYPE_2OP_MASK, ISD::MULHS, 0), X86_INTRINSIC_DATA(avx512_mask_pmulh_w_256, INTR_TYPE_2OP_MASK, ISD::MULHS, 0), X86_INTRINSIC_DATA(avx512_mask_pmulh_w_512, INTR_TYPE_2OP_MASK, ISD::MULHS, 0), Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1024,3 +1024,17 @@ %res2 = add <32 x i16> %res, %res1 ret <32 x i16> %res2 } + +declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: {%k1} +; CHECK: vpmulhrsw {{.*}}encoding: [0x62 +define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} \ No newline at end of file Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3817,3 +3817,29 @@ %res2 = add <16 x i16> %res, %res1 ret <16 x i16> %res2 } + +declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: {%k1} +; CHECK: vpmulhrsw {{.*}}encoding: [0x62 +define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: {%k1} +; CHECK: vpmulhrsw {{.*}}encoding: [0x62 +define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} \ No newline at end of file Index: test/MC/X86/x86-64-avx512bw.s =================================================================== --- test/MC/X86/x86-64-avx512bw.s +++ test/MC/X86/x86-64-avx512bw.s @@ -3740,3 +3740,39 @@ // CHECK: encoding: [0x62,0x61,0x2d,0x40,0xe5,0xb2,0xc0,0xdf,0xff,0xff] vpmulhw -8256(%rdx), %zmm26, %zmm30 +// CHECK: vpmulhrsw %zmm25, %zmm27, %zmm21 +// CHECK: encoding: [0x62,0x82,0x25,0x40,0x0b,0xe9] + vpmulhrsw %zmm25, %zmm27, %zmm21 + +// CHECK: vpmulhrsw %zmm25, %zmm27, %zmm21 {%k7} +// CHECK: encoding: [0x62,0x82,0x25,0x47,0x0b,0xe9] + vpmulhrsw %zmm25, %zmm27, %zmm21 {%k7} + +// CHECK: vpmulhrsw %zmm25, %zmm27, %zmm21 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x25,0xc7,0x0b,0xe9] + vpmulhrsw %zmm25, %zmm27, %zmm21 {%k7} {z} + +// CHECK: vpmulhrsw (%rcx), %zmm27, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0x29] + vpmulhrsw (%rcx), %zmm27, %zmm21 + +// CHECK: vpmulhrsw 291(%rax,%r14,8), %zmm27, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x25,0x40,0x0b,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmulhrsw 291(%rax,%r14,8), %zmm27, %zmm21 + +// CHECK: vpmulhrsw 8128(%rdx), %zmm27, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0x6a,0x7f] + vpmulhrsw 8128(%rdx), %zmm27, %zmm21 + +// CHECK: vpmulhrsw 8192(%rdx), %zmm27, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0xaa,0x00,0x20,0x00,0x00] + vpmulhrsw 8192(%rdx), %zmm27, %zmm21 + +// CHECK: vpmulhrsw -8192(%rdx), %zmm27, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0x6a,0x80] + vpmulhrsw -8192(%rdx), %zmm27, %zmm21 + +// CHECK: vpmulhrsw -8256(%rdx), %zmm27, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0xaa,0xc0,0xdf,0xff,0xff] + vpmulhrsw -8256(%rdx), %zmm27, %zmm21 + Index: test/MC/X86/x86-64-avx512bw_vl.s =================================================================== --- test/MC/X86/x86-64-avx512bw_vl.s +++ test/MC/X86/x86-64-avx512bw_vl.s @@ -6726,3 +6726,76 @@ // CHECK: vpmulhw -4128(%rdx), %ymm27, %ymm22 // CHECK: encoding: [0x62,0xe1,0x25,0x20,0xe5,0xb2,0xe0,0xef,0xff,0xff] vpmulhw -4128(%rdx), %ymm27, %ymm22 + +// CHECK: vpmulhrsw %xmm26, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x02,0x65,0x00,0x0b,0xe2] + vpmulhrsw %xmm26, %xmm19, %xmm28 + +// CHECK: vpmulhrsw %xmm26, %xmm19, %xmm28 {%k6} +// CHECK: encoding: [0x62,0x02,0x65,0x06,0x0b,0xe2] + vpmulhrsw %xmm26, %xmm19, %xmm28 {%k6} + +// CHECK: vpmulhrsw %xmm26, %xmm19, %xmm28 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0x65,0x86,0x0b,0xe2] + vpmulhrsw %xmm26, %xmm19, %xmm28 {%k6} {z} + +// CHECK: vpmulhrsw (%rcx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0x0b,0x21] + vpmulhrsw (%rcx), %xmm19, %xmm28 + +// CHECK: vpmulhrsw 291(%rax,%r14,8), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x22,0x65,0x00,0x0b,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmulhrsw 291(%rax,%r14,8), %xmm19, %xmm28 + +// CHECK: vpmulhrsw 2032(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0x0b,0x62,0x7f] + vpmulhrsw 2032(%rdx), %xmm19, %xmm28 + +// CHECK: vpmulhrsw 2048(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0x0b,0xa2,0x00,0x08,0x00,0x00] + vpmulhrsw 2048(%rdx), %xmm19, %xmm28 + +// CHECK: vpmulhrsw -2048(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0x0b,0x62,0x80] + vpmulhrsw -2048(%rdx), %xmm19, %xmm28 + +// CHECK: vpmulhrsw -2064(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x62,0x65,0x00,0x0b,0xa2,0xf0,0xf7,0xff,0xff] + vpmulhrsw -2064(%rdx), %xmm19, %xmm28 + +// CHECK: vpmulhrsw %ymm26, %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x02,0x5d,0x20,0x0b,0xe2] + vpmulhrsw %ymm26, %ymm20, %ymm28 + +// CHECK: vpmulhrsw %ymm26, %ymm20, %ymm28 {%k3} +// CHECK: encoding: [0x62,0x02,0x5d,0x23,0x0b,0xe2] + vpmulhrsw %ymm26, %ymm20, %ymm28 {%k3} + +// CHECK: vpmulhrsw %ymm26, %ymm20, %ymm28 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x5d,0xa3,0x0b,0xe2] + vpmulhrsw %ymm26, %ymm20, %ymm28 {%k3} {z} + +// CHECK: vpmulhrsw (%rcx), %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0x21] + vpmulhrsw (%rcx), %ymm20, %ymm28 + +// CHECK: vpmulhrsw 291(%rax,%r14,8), %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x22,0x5d,0x20,0x0b,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmulhrsw 291(%rax,%r14,8), %ymm20, %ymm28 + +// CHECK: vpmulhrsw 4064(%rdx), %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0x62,0x7f] + vpmulhrsw 4064(%rdx), %ymm20, %ymm28 + +// CHECK: vpmulhrsw 4096(%rdx), %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0xa2,0x00,0x10,0x00,0x00] + vpmulhrsw 4096(%rdx), %ymm20, %ymm28 + +// CHECK: vpmulhrsw -4096(%rdx), %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0x62,0x80] + vpmulhrsw -4096(%rdx), %ymm20, %ymm28 + +// CHECK: vpmulhrsw -4128(%rdx), %ymm20, %ymm28 +// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0xa2,0xe0,0xef,0xff,0xff] + vpmulhrsw -4128(%rdx), %ymm20, %ymm28 +