Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2594,6 +2594,72 @@ } //===----------------------------------------------------------------------===// +// TBM + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_tbm_blcfill_u32 : GCCBuiltin<"__builtin_ia32_blcfill_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blcfill_u64 : GCCBuiltin<"__builtin_ia32_blcfill_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_blci_u32 : GCCBuiltin<"__builtin_ia32_blci_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blci_u64 : GCCBuiltin<"__builtin_ia32_blci_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_blcic_u32 : GCCBuiltin<"__builtin_ia32_blcic_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blcic_u64 : GCCBuiltin<"__builtin_ia32_blcic_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_blcmsk_u32 : GCCBuiltin<"__builtin_ia32_blcmsk_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blcmsk_u64 : GCCBuiltin<"__builtin_ia32_blcmsk_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_blcs_u32 : GCCBuiltin<"__builtin_ia32_blcs_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blcs_u64 : GCCBuiltin<"__builtin_ia32_blcs_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_blsfill_u32 : GCCBuiltin<"__builtin_ia32_blsfill_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blsfill_u64 : GCCBuiltin<"__builtin_ia32_blsfill_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_blsic_u32 : GCCBuiltin<"__builtin_ia32_blsic_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_blsic_u64 : GCCBuiltin<"__builtin_ia32_blsic_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_t1mskc_u32 : GCCBuiltin<"__builtin_ia32_t1mskc_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_t1mskc_u64 : GCCBuiltin<"__builtin_ia32_t1mskc_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + def int_x86_tbm_tzmsk_u32 : GCCBuiltin<"__builtin_ia32_tzmsk_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem]>; + def int_x86_tbm_tzmsk_u64 : GCCBuiltin<"__builtin_ia32_tzmsk_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// // RDRAND intrinsics - Return a random value and whether it is valid. // RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and // whether it is valid. Index: lib/Target/X86/X86InstrFormats.td =================================================================== --- lib/Target/X86/X86InstrFormats.td +++ lib/Target/X86/X86InstrFormats.td @@ -139,6 +139,7 @@ class TAXD { bits<5> Prefix = 19; } class XOP8 { bits<5> Prefix = 20; } class XOP9 { bits<5> Prefix = 21; } +class XOPA { bits<5> Prefix = 22; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -664,6 +664,7 @@ def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; +def HasTBM : Predicate<"Subtarget->hasTBM()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -1907,6 +1908,84 @@ } //===----------------------------------------------------------------------===// +// TBM Instructions +// +let isAsmParserOnly = 1, Predicates = [HasTBM], Defs = [EFLAGS] in { + +multiclass tbm_ternary_imm_intr opc, RegisterClass RC, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Intrinsic Int> { + def rr : Ii32, + XOP, XOPA, VEX; + def mr : Ii32, + XOP, XOPA, VEX; +} + +defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr", i32mem, loadi32, + int_x86_tbm_bextri_u32>; +defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr", i64mem, loadi64, + int_x86_tbm_bextri_u64>, VEX_W; + +multiclass tbm_binary_rm opc, Format FormReg, Format FormMem, + RegisterClass RC, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Intrinsic Int> { + def rr : I, + XOP, XOP9, VEX_4V; + def rm : I, + XOP, XOP9, VEX_4V; +} + +multiclass tbm_binary_intr opc, string OpcodeStr, + Format FormReg, Format FormMem, + Intrinsic Int32, Intrinsic Int64> { + defm _32 : tbm_binary_rm; + defm _64 : tbm_binary_rm, VEX_W; +} + +defm BLCFILL : tbm_binary_intr<0x01, "blcfill", MRM1r, MRM1m, + int_x86_tbm_blcfill_u32, + int_x86_tbm_blcfill_u64>; +defm BLCI : tbm_binary_intr<0x02, "blci", MRM6r, MRM6m, + int_x86_tbm_blci_u32, + int_x86_tbm_blci_u64>; +defm BLCIC : tbm_binary_intr<0x01, "blcic", MRM5r, MRM5m, + int_x86_tbm_blcic_u32, + int_x86_tbm_blcic_u64>; +defm BLCMSK : tbm_binary_intr<0x02, "blcmsk", MRM1r, MRM1m, + int_x86_tbm_blcmsk_u32, + int_x86_tbm_blcmsk_u64>; +defm BLCS : tbm_binary_intr<0x01, "blcs", MRM3r, MRM3m, + int_x86_tbm_blcs_u32, + int_x86_tbm_blcs_u64>; +defm BLSFILL : tbm_binary_intr<0x01, "blsfill", MRM2r, MRM2m, + int_x86_tbm_blsfill_u32, + int_x86_tbm_blsfill_u64>; +defm BLSIC : tbm_binary_intr<0x01, "blsic", MRM6r, MRM6m, + int_x86_tbm_blsic_u32, + int_x86_tbm_blsic_u64>; +defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m, + int_x86_tbm_t1mskc_u32, + int_x86_tbm_t1mskc_u64>; +defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m, + int_x86_tbm_tzmsk_u32, + int_x86_tbm_tzmsk_u64>; +} // isAsmParserOnly, HasTBM, EFLAGS + +//===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// Index: test/CodeGen/X86/tbm-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-x86.ll +++ test/CodeGen/X86/tbm-intrinsics-x86.ll @@ -0,0 +1,422 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+tbm < %s | FileCheck %s + +define i32 @BEXTRI(i32 %a) nounwind readnone { +entry: + ; CHECK: BEXTRI: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 2814) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone + +define i32 @BEXTRI_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BEXTRI_m: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %tmp1, i32 2814) + ret i32 %0 +} + +define i64 @BEXTRI64(i64 %a) nounwind readnone { +entry: + ; CHECK: BEXTRI64: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i32 2814) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.bextri.u64(i64, i32) nounwind readnone + +define i64 @BEXTRI64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BEXTRI64_m: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i32 2814) + ret i64 %0 +} + +define i32 @BLCFILL(i32 %a) nounwind readnone { +entry: + ; CHECK: BLCFILL: + ; CHECK-NOT: mov + ; CHECK: blcfill % + %0 = tail call i32 @llvm.x86.tbm.blcfill.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blcfill.u32(i32) nounwind readnone + +define i32 @BLCFILL_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCFILL_m: + ; CHECK-NOT: mov + ; CHECK: blcfill (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blcfill.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLCFILL64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLCFILL64: + ; CHECK-NOT: mov + ; CHECK: blcfill % + %0 = tail call i64 @llvm.x86.tbm.blcfill.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blcfill.u64(i64) nounwind readnone + +define i64 @BLCFILL64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCFILL64_m: + ; CHECK-NOT: mov + ; CHECK: blcfill (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blcfill.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @BLCI(i32 %a) nounwind readnone { +entry: + ; CHECK: BLCI: + ; CHECK-NOT: mov + ; CHECK: blci % + %0 = tail call i32 @llvm.x86.tbm.blci.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blci.u32(i32) nounwind readnone + +define i32 @BLCI_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCI_m: + ; CHECK-NOT: mov + ; CHECK: blci (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blci.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLCI64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLCI64: + ; CHECK-NOT: mov + ; CHECK: blci % + %0 = tail call i64 @llvm.x86.tbm.blci.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blci.u64(i64) nounwind readnone + +define i64 @BLCI64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCI64_m: + ; CHECK-NOT: mov + ; CHECK: blci (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blci.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @BLCIC(i32 %a) nounwind readnone { +entry: + ; CHECK: BLCIC: + ; CHECK-NOT: mov + ; CHECK: blcic % + %0 = tail call i32 @llvm.x86.tbm.blcic.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blcic.u32(i32) nounwind readnone + +define i32 @BLCIC_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCIC_m: + ; CHECK-NOT: mov + ; CHECK: blcic (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blcic.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLCIC64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLCIC64: + ; CHECK-NOT: mov + ; CHECK: blcic % + %0 = tail call i64 @llvm.x86.tbm.blcic.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blcic.u64(i64) nounwind readnone + +define i64 @BLCIC64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCIC64_m: + ; CHECK-NOT: mov + ; CHECK: blcic (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blcic.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @BLCMSK(i32 %a) nounwind readnone { +entry: + ; CHECK: BLCMSK: + ; CHECK-NOT: mov + ; CHECK: blcmsk % + %0 = tail call i32 @llvm.x86.tbm.blcmsk.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blcmsk.u32(i32) nounwind readnone + +define i32 @BLCMSK_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCMSK_m: + ; CHECK-NOT: mov + ; CHECK: blcmsk (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blcmsk.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLCMSK64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLCMSK64: + ; CHECK-NOT: mov + ; CHECK: blcmsk % + %0 = tail call i64 @llvm.x86.tbm.blcmsk.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blcmsk.u64(i64) nounwind readnone + +define i64 @BLCMSK64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCMSK64_m: + ; CHECK-NOT: mov + ; CHECK: blcmsk (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blcmsk.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @BLCS(i32 %a) nounwind readnone { +entry: + ; CHECK: BLCS: + ; CHECK-NOT: mov + ; CHECK: blcs % + %0 = tail call i32 @llvm.x86.tbm.blcs.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blcs.u32(i32) nounwind readnone + +define i32 @BLCS_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCS_m: + ; CHECK-NOT: mov + ; CHECK: blcs (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blcs.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLCS64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLCS64: + ; CHECK-NOT: mov + ; CHECK: blcs % + %0 = tail call i64 @llvm.x86.tbm.blcs.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blcs.u64(i64) nounwind readnone + +define i64 @BLCS64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLCS64_m: + ; CHECK-NOT: mov + ; CHECK: blcs (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blcs.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @BLSFILL(i32 %a) nounwind readnone { +entry: + ; CHECK: BLSFILL: + ; CHECK-NOT: mov + ; CHECK: blsfill % + %0 = tail call i32 @llvm.x86.tbm.blsfill.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blsfill.u32(i32) nounwind readnone + +define i32 @BLSFILL_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLSFILL_m: + ; CHECK-NOT: mov + ; CHECK: blsfill (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blsfill.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLSFILL64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLSFILL64: + ; CHECK-NOT: mov + ; CHECK: blsfill % + %0 = tail call i64 @llvm.x86.tbm.blsfill.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blsfill.u64(i64) nounwind readnone + +define i64 @BLSFILL64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLSFILL64_m: + ; CHECK-NOT: mov + ; CHECK: blsfill (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blsfill.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @BLSIC(i32 %a) nounwind readnone { +entry: + ; CHECK: BLSIC: + ; CHECK-NOT: mov + ; CHECK: blsic % + %0 = tail call i32 @llvm.x86.tbm.blsic.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.blsic.u32(i32) nounwind readnone + +define i32 @BLSIC_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLSIC_m: + ; CHECK-NOT: mov + ; CHECK: blsic (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.blsic.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @BLSIC64(i64 %a) nounwind readnone { +entry: + ; CHECK: BLSIC64: + ; CHECK-NOT: mov + ; CHECK: blsic % + %0 = tail call i64 @llvm.x86.tbm.blsic.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.blsic.u64(i64) nounwind readnone + +define i64 @BLSIC64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: BLSIC64_m: + ; CHECK-NOT: mov + ; CHECK: blsic (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.blsic.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @T1MSKC(i32 %a) nounwind readnone { +entry: + ; CHECK: T1MSKC: + ; CHECK-NOT: mov + ; CHECK: t1mskc % + %0 = tail call i32 @llvm.x86.tbm.t1mskc.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.t1mskc.u32(i32) nounwind readnone + +define i32 @T1MSKC_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: T1MSKC_m: + ; CHECK-NOT: mov + ; CHECK: t1mskc (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.t1mskc.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @T1MSKC64(i64 %a) nounwind readnone { +entry: + ; CHECK: T1MSKC64: + ; CHECK-NOT: mov + ; CHECK: t1mskc % + %0 = tail call i64 @llvm.x86.tbm.t1mskc.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.t1mskc.u64(i64) nounwind readnone + +define i64 @T1MSKC64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: T1MSKC64_m: + ; CHECK-NOT: mov + ; CHECK: t1mskc (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.t1mskc.u64(i64 %tmp1) + ret i64 %0 +} + +define i32 @TZMSK(i32 %a) nounwind readnone { +entry: + ; CHECK: TZMSK: + ; CHECK-NOT: mov + ; CHECK: tzmsk % + %0 = tail call i32 @llvm.x86.tbm.tzmsk.u32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.tbm.tzmsk.u32(i32) nounwind readnone + +define i32 @TZMSK_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK: TZMSK_m: + ; CHECK-NOT: mov + ; CHECK: tzmsk (% + %tmp1 = load i32* %a, align 4 + %0 = tail call i32 @llvm.x86.tbm.tzmsk.u32(i32 %tmp1) + ret i32 %0 +} + +define i64 @TZMSK64(i64 %a) nounwind readnone { +entry: + ; CHECK: TZMSK64: + ; CHECK-NOT: mov + ; CHECK: tzmsk % + %0 = tail call i64 @llvm.x86.tbm.tzmsk.u64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.tbm.tzmsk.u64(i64) nounwind readnone + +define i64 @TZMSK64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK: TZMSK64_m: + ; CHECK-NOT: mov + ; CHECK: tzmsk (% + %tmp1 = load i64* %a, align 8 + %0 = tail call i64 @llvm.x86.tbm.tzmsk.u64(i64 %tmp1) + ret i64 %0 +} +