Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2763,6 +2763,18 @@ //===----------------------------------------------------------------------===// // AVX512 +// Mask ops +let TargetPrefix = "x86" in { + def int_x86_avx512_kadd_b : GCCBuiltin<"__builtin_ia32_kaddqi">, + Intrinsic<[llvm_v8i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>; + def int_x86_avx512_kadd_w : GCCBuiltin<"__builtin_ia32_kaddhi">, + Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>; + def int_x86_avx512_kadd_d : GCCBuiltin<"__builtin_ia32_kaddsi">, + Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>; + def int_x86_avx512_kadd_q : GCCBuiltin<"__builtin_ia32_kadddi">, + Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>; +} + // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">, Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -404,6 +404,10 @@ X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0), + X86_INTRINSIC_DATA(avx512_kadd_b, INTR_TYPE_2OP, X86ISD::KADD, 0), + X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0), + X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0), + X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0), X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FADDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM, Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2,6 +2,62 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 +define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind { +; CHECK-LABEL: test_int_x86_avx512_kadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0] +; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9] +; CHECK-NEXT: kaddd %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfd,0x4a,0xc1] +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: kortestd %k0, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc0] +; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = icmp ne <32 x i16> %A, zeroinitializer + %1 = icmp ne <32 x i16> %B, zeroinitializer + %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1) + %3 = bitcast <32 x i1> %2 to i32 + %4 = icmp eq i32 %3, 0 + %5 = zext i1 %4 to i32 + ret i32 %5 +} +declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>) + +define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind { +; X86-LABEL: test_int_x86_avx512_kadd_q: +; X86: # %bb.0: # %entry +; X86-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0] +; X86-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9] +; X86-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1] +; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] +; X86-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; X86-NEXT: kortestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc1] +; X86-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_kadd_q: +; X64: # %bb.0: # %entry +; X64-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0] +; X64-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9] +; X64-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1] +; X64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; X64-NEXT: kortestq %k0, %k0 # encoding: [0xc4,0xe1,0xf8,0x98,0xc0] +; X64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] +entry: + %0 = icmp ne <64 x i8> %A, zeroinitializer + %1 = icmp ne <64 x i8> %B, zeroinitializer + %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1) + %3 = bitcast <64 x i1> %2 to i64 + %4 = icmp eq i64 %3, 0 + %5 = zext i1 %4 to i32 + ret i32 %5 +} +declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>) + define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_mask_packs_epi32_rr_512: ; CHECK: # %bb.0: Index: test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512dq-intrinsics.ll +++ test/CodeGen/X86/avx512dq-intrinsics.ll @@ -4,6 +4,50 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512DQ,X64-AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512DQVL,X64-AVX512DQVL +define i32 @test_int_x86_avx512_kadd_w(<16 x i32> %A, <16 x i32> %B) nounwind { +; CHECK-LABEL: test_int_x86_avx512_kadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0] +; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9] +; CHECK-NEXT: kaddw %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4a,0xc1] +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: kortestw %k0, %k0 # encoding: [0xc5,0xf8,0x98,0xc0] +; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = icmp ne <16 x i32> %A, zeroinitializer + %1 = icmp ne <16 x i32> %B, zeroinitializer + %2 = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> %0, <16 x i1> %1) + %3 = bitcast <16 x i1> %2 to i16 + %4 = icmp eq i16 %3, 0 + %5 = zext i1 %4 to i32 + ret i32 %5 +} +declare <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1>, <16 x i1>) + +define i32 @test_int_x86_avx512_kadd_b(<8 x i64> %A, <8 x i64> %B) nounwind { +; CHECK-LABEL: test_int_x86_avx512_kadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] +; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9] +; CHECK-NEXT: kaddb %k1, %k0, %k0 # encoding: [0xc5,0xfd,0x4a,0xc1] +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: kortestb %k0, %k0 # encoding: [0xc5,0xf9,0x98,0xc0] +; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +entry: + %0 = icmp ne <8 x i64> %A, zeroinitializer + %1 = icmp ne <8 x i64> %B, zeroinitializer + %2 = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> %0, <8 x i1> %1) + %3 = bitcast <8 x i1> %2 to i8 + %4 = icmp eq i8 %3, 0 + %5 = zext i1 %4 to i32 + ret i32 %5 +} +declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>) + declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {