Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -4019,6 +4019,12 @@ def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2295,7 +2295,8 @@ // Mask unpacking multiclass avx512_mask_unpck { + RegisterClass KRCSrc, RegisterClass GRC, + Predicate prd> { let Predicates = [prd] in { def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), @@ -2306,22 +2307,20 @@ (!cast(NAME##rr) (COPY_TO_REGCLASS KRCSrc:$src2, KRC), (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>; + + // Intrinsic call + def : Pat<(!cast("int_x86_avx512_kunpck_"#Suffix) + GRC:$src1, GRC:$src2), + (COPY_TO_REGCLASS (!cast(NAME##rr) + (VT (COPY_TO_REGCLASS GRC:$src1, KRC)), + (VT (COPY_TO_REGCLASS GRC:$src2, KRC))), + GRC)>; } } -defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD; -defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS; -defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W; - -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_bw") - (i16 GR16:$src1), (i16 GR16:$src2)), - (COPY_TO_REGCLASS (!cast(InstName##"BWrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), - (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; -} -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; +defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, GR16, HasAVX512>, PD; +defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, GR32, HasBWI>, PS; +defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, GR64, HasBWI>, PS, VEX_W; // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1267,3 +1267,32 @@ %res2 = add <64 x i8> %res, %res1 ret <64 x i8> %res2 } + +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) + +define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_int_x86_avx512_kunpck_wd: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %esi, %k0 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: kunpckwd %k0, %k1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) + ret i32 %res +} + +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) + +define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { +; CHECK-LABEL: test_int_x86_avx512_kunpck_qd: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq %rsi, %k0 +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: kunpckdq %k0, %k1, %k0 +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: retq + %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) + ret i64 %res +} +