Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2210,6 +2210,38 @@ def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty,llvm_v4i32_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_d_256 : GCCBuiltin<"__builtin_ia32_psrld256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty,llvm_v8i32_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_q_128: GCCBuiltin<"__builtin_ia32_psrlq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty,llvm_v2i64_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_q_256 : GCCBuiltin<"__builtin_ia32_psrlq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty,llvm_v4i64_ty,llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_psrli_d_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_i8_ty,llvm_v4i32_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrli_d_256 : GCCBuiltin<"__builtin_ia32_psrldi256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i8_ty,llvm_v8i32_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrli_d_512 : GCCBuiltin<"__builtin_ia32_psrldi512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_i8_ty,llvm_v16i32_ty,llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrli_q_128 : GCCBuiltin<"__builtin_ia32_psrlqi128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_i8_ty,llvm_v2i64_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrli_q_256 : GCCBuiltin<"__builtin_ia32_psrlqi256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i8_ty,llvm_v4i64_ty,llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrli_q_512 : GCCBuiltin<"__builtin_ia32_psrlqi512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i8_ty,llvm_v8i64_ty,llvm_i8_ty], [IntrNoMem]>; } // Pack ops. Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1162,9 +1162,19 @@ X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrli_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrli_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrli_d_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrli_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrli_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrli_q_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -6328,3 +6328,31 @@ ret <2 x double> %res } +declare <16 x i32> @llvm.x86.avx512.mask.psrli.d.512(<16 x i32>, i8, <16 x i32>, i16) nounwind readnone +define <16 x i32> @test_psrldi_512(<16 x i32> %src, i8 %imm, <16 x i32> %a0, i16 %mask) { + ; CHECK-LABEL: test_psrldi_512 + ; CHECK: vpsrld $255, %zmm0, %zmm1 {%k1} + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d.512(<16 x i32> %src, i8 255, <16 x i32> %a0 ,i16 %mask) + ret <16 x i32> %res +} +define <16 x i32> @test_psrldi_512z(<16 x i32> %src ,i8 %imm ,<16 x i32> %a0 ,i16 %mask) { + ; CHECK-LABEL: test_psrldi_512z + ; CHECK: vpsrld $255, %zmm0, %zmm0 {%k1} {z} + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d.512(<16 x i32> %src, i8 255, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrli.q.512(<8 x i64>, i8,<8 x i64>, i8 ) nounwind readnone +define <8 x i64> @test_psrlqi_512(<8 x i64> %src, i8 %imm, <8 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlqi_512 + ; CHECK: vpsrlq $255, %zmm0, %zmm1 {%k1} + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q.512(<8 x i64> %src, i8 255, <8 x i64> %a0, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_psrlqi_512z(<8 x i64> %src, i8 %imm, <8 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlqi_512z + ; CHECK: vpsrlq $255, %zmm0, %zmm0 {%k1} {z} + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q.512(<8 x i64> %src, i8 255, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5530,3 +5530,116 @@ %res4 = fadd <4 x double> %res2, %res3 ret <4 x double> %res4 } + +declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>,<4 x i32> ,i8) nounwind readnone +define <4 x i32> @test_psrld_128(<4 x i32> %src, <4 x i32> %a0, <4 x i32> %count, i8 %mask) { + ; CHECK-LABEL: test_psrld_128 + ; CHECK: vpsrld %xmm1, %xmm0, %xmm2 {%k1} + %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %src, <4 x i32> %a0, <4 x i32> %count, i8 %mask) + ret <4 x i32> %res +} +define <4 x i32> @test_psrld_128z(<4 x i32> %src, <4 x i32> %count, <4 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrld_128z + ; CHECK: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %src, <4 x i32> %count, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) nounwind readnone +define <8 x i32> @test_psrld_256(<8 x i32> %src, <4 x i32> %count, <8 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrld_256 + ; CHECK: vpsrld %xmm1, %ymm0, %ymm2 {%k1} + %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %src, <4 x i32> %count, <8 x i32> %a0, i8 %mask) + ret <8 x i32> %res +} +define <8 x i32> @test_psrld_256z(<8 x i32> %src, <4 x i32> %count, <8 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrld_256z + ; CHECK: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} + %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %src, <4 x i32> %count, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.psrli.d.128(<4 x i32>, i8, <4 x i32>, i8) nounwind readnone +define <4 x i32> @test_psrldi_128(<4 x i32> %src, i8 %imm, <4 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrldi_128 + ; CHECK: vpsrld $255, %xmm0, %xmm1 {%k1} + %res = call <4 x i32> @llvm.x86.avx512.mask.psrli.d.128(<4 x i32> %src, i8 255 , <4 x i32> %a0, i8 %mask) + ret <4 x i32> %res +} +define <4 x i32> @test_psrldi_128z(<4 x i32> %src, i8 %imm, <4 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrldi_128z + ; CHECK: vpsrld $255, %xmm0, %xmm0 {%k1} {z} + %res = call <4 x i32> @llvm.x86.avx512.mask.psrli.d.128(<4 x i32> %src, i8 255, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <2 x i64 > @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone +define <2 x i64> @test_psrlq_128(<2 x i64> %src, <2 x i64> %count, <2 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlq_128 + ; CHECK: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} + %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %src, <2 x i64> %count, <2 x i64> %a0, i8 %mask) + ret <2 x i64> %res +} +define <2 x i64> @test_psrlq_128z(<2 x i64> %src, <2 x i64> %count, <2 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlq_128z + ; CHECK: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %src, <2 x i64> %count, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + + +declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) nounwind readnone +define <4 x i64> @test_psrlq_256(<4 x i64> %src, <2 x i64> %count, <4 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlq_256 + ; CHECK: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} + %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %src, <2 x i64> %count, <4 x i64> %a0, i8 %mask) + ret <4 x i64> %res +} +define <4 x i64> @test_psrlq_256z(<4 x i64> %src, <2 x i64> %count, <4 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlq_256z + ; CHECK: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} + %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %src, <2 x i64> %count, <4 x i64> zeroinitializer, i8 %mask) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.psrli.d.256(<8 x i32>, i8,<8 x i32> ,i8) nounwind readnone +define <8 x i32> @test_psrldi_256(<8 x i32> %src, i8 %imm, <8 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrldi_256 + ; CHECK: vpsrld $255, %ymm0, %ymm1 {%k1} + %res = call <8 x i32> @llvm.x86.avx512.mask.psrli.d.256(<8 x i32> %src, i8 255, <8 x i32> %a0, i8 %mask) + ret <8 x i32> %res +} +define <8 x i32> @test_psrldi_256z(<8 x i32> %src, i8 %imm, <8 x i32> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrldi_256z + ; CHECK: vpsrld $255, %ymm0, %ymm0 {%k1} {z} + %res = call <8 x i32> @llvm.x86.avx512.mask.psrli.d.256(<8 x i32> %src, i8 255, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <2 x i64> @llvm.x86.avx512.mask.psrli.q.128(<2 x i64>, i8, <2 x i64>,i8) nounwind readnone +define <2 x i64> @test_psrlqi_128(<2 x i64> %src, i8 %imm, <2 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlqi_128 + ; CHECK: vpsrlq $255, %xmm0, %xmm1 {%k1} + %res = call <2 x i64> @llvm.x86.avx512.mask.psrli.q.128(<2 x i64> %src, i8 255, <2 x i64> %a0, i8 %mask) + ret <2 x i64> %res +} +define <2 x i64> @test_psrlqi_128z(<2 x i64> %src, i8 %imm, <2 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlqi_128z + ; CHECK: vpsrlq $255, %xmm0, %xmm0 {%k1} {z} + %res = call <2 x i64> @llvm.x86.avx512.mask.psrli.q.128(<2 x i64> %src, i8 255, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +declare <4 x i64> @llvm.x86.avx512.mask.psrli.q.256(<4 x i64>, i8, <4 x i64>, i8 ) nounwind readnone +define <4 x i64> @test_psrlqi_256(<4 x i64> %src, i8 %imm, <4 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlqi_256 + ; CHECK: vpsrlq $255, %ymm0, %ymm1 {%k1} + %res = call <4 x i64> @llvm.x86.avx512.mask.psrli.q.256(<4 x i64> %src, i8 255, <4 x i64> %a0, i8 %mask) + ret <4 x i64> %res +} +define <4 x i64> @test_psrlqi_256z(<4 x i64> %src, i8 %imm, <4 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_psrlqi_256z + ; CHECK: vpsrlq $255, %ymm0, %ymm0 {%k1} {z} + %res = call <4 x i64> @llvm.x86.avx512.mask.psrli.q.256(<4 x i64> %src, i8 255, <4 x i64> zeroinitializer, i8 %mask) + ret <4 x i64> %res +}