diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1235,34 +1235,34 @@ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ff2bf16x2_rn : GCCBuiltin<"__nvvm_ff2bf16x2_rn">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff2bf16x2_rn_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rn_relu">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff2bf16x2_rz : GCCBuiltin<"__nvvm_ff2bf16x2_rz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff2bf16x2_rz_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rz_relu">, Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_nvvm_ff2f16x2_rn : GCCBuiltin<"__nvvm_ff2f16x2_rn">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff2f16x2_rn_relu : GCCBuiltin<"__nvvm_ff2f16x2_rn_relu">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff2f16x2_rz : GCCBuiltin<"__nvvm_ff2f16x2_rz">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff2f16x2_rz_relu : GCCBuiltin<"__nvvm_ff2f16x2_rz_relu">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2bf16_rn : GCCBuiltin<"__nvvm_f2bf16_rn">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2bf16_rn_relu : GCCBuiltin<"__nvvm_f2bf16_rn_relu">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2bf16_rz : GCCBuiltin<"__nvvm_f2bf16_rz">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2bf16_rz_relu : GCCBuiltin<"__nvvm_f2bf16_rz_relu">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2tf32_rna : GCCBuiltin<"__nvvm_f2tf32_rna">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; // // Bitcast @@ -1287,20 +1287,20 @@ // Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType, llvm_i32_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType, llvm_i32_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; class SCOPED_ATOMIC2_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; class SCOPED_ATOMIC3_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrArgMemOnly, NoCapture>]>; + [IntrArgMemOnly, IntrNoCallback, NoCapture>]>; multiclass PTXAtomicWithScope2 { def _cta : SCOPED_ATOMIC2_impl; @@ -1330,80 +1330,80 @@ // The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the // intrinsics in this file, this one is a user-facing API. def int_nvvm_barrier0 : GCCBuiltin<"__syncthreads">, - Intrinsic<[], [], [IntrConvergent]>; + Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>; // Synchronize all threads in the CTA at barrier 'n'. def int_nvvm_barrier_n : GCCBuiltin<"__nvvm_bar_n">, - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>; + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; // Synchronize 'm', a multiple of warp size, (arg 2) threads in // the CTA at barrier 'n' (arg 1). def int_nvvm_barrier : GCCBuiltin<"__nvvm_bar">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>; def int_nvvm_bar_sync : - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, GCCBuiltin<"__nvvm_bar_sync">; def int_nvvm_bar_warp_sync : - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, GCCBuiltin<"__nvvm_bar_warp_sync">; // barrier.sync id[, cnt] def int_nvvm_barrier_sync : - Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, + Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, GCCBuiltin<"__nvvm_barrier_sync">; def int_nvvm_barrier_sync_cnt : - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>, GCCBuiltin<"__nvvm_barrier_sync_cnt">; // Membar def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">, - Intrinsic<[], [], []>; + Intrinsic<[], [], [IntrNoCallback]>; def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">, - Intrinsic<[], [], []>; + Intrinsic<[], [], [IntrNoCallback]>; def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">, - Intrinsic<[], [], []>; + Intrinsic<[], [], [IntrNoCallback]>; // Async Copy def int_nvvm_cp_async_mbarrier_arrive : GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive">, - Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_mbarrier_arrive_shared : GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">, - Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_mbarrier_arrive_noinc : GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">, - Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_mbarrier_arrive_noinc_shared : GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">, - Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_cp_async_ca_shared_global_4 : GCCBuiltin<"__nvvm_cp_async_ca_shared_global_4">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.ca.shared.global.4">; def int_nvvm_cp_async_ca_shared_global_8 : GCCBuiltin<"__nvvm_cp_async_ca_shared_global_8">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.ca.shared.global.8">; def int_nvvm_cp_async_ca_shared_global_16 : GCCBuiltin<"__nvvm_cp_async_ca_shared_global_16">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.ca.shared.global.16">; def int_nvvm_cp_async_cg_shared_global_16 : GCCBuiltin<"__nvvm_cp_async_cg_shared_global_16">, Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty], - [IntrArgMemOnly, NoAlias>, NoAlias>, + [IntrArgMemOnly, IntrNoCallback, NoAlias>, NoAlias>, WriteOnly>, ReadOnly>], "llvm.nvvm.cp.async.cg.shared.global.16">; @@ -1421,85 +1421,87 @@ // mbarrier def int_nvvm_mbarrier_init : GCCBuiltin<"__nvvm_mbarrier_init">, - Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_init_shared : GCCBuiltin<"__nvvm_mbarrier_init_shared">, - Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_inval : GCCBuiltin<"__nvvm_mbarrier_inval">, Intrinsic<[],[llvm_i64ptr_ty], - [IntrConvergent, IntrWriteMem, IntrArgMemOnly, + [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>]>; def int_nvvm_mbarrier_inval_shared : GCCBuiltin<"__nvvm_mbarrier_inval_shared">, Intrinsic<[],[llvm_shared_i64ptr_ty], - [IntrConvergent, IntrWriteMem, IntrArgMemOnly, + [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>]>; def int_nvvm_mbarrier_arrive : GCCBuiltin<"__nvvm_mbarrier_arrive">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_shared : GCCBuiltin<"__nvvm_mbarrier_arrive_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_noComplete : GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_noComplete_shared : GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, + llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop : GCCBuiltin<"__nvvm_mbarrier_arrive_drop">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop_shared : GCCBuiltin<"__nvvm_mbarrier_arrive_drop_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop_noComplete : GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">, - Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_arrive_drop_noComplete_shared : GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">, - Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>; + Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, + llvm_i32_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_test_wait : GCCBuiltin<"__nvvm_mbarrier_test_wait">, - Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>; + Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_test_wait_shared : GCCBuiltin<"__nvvm_mbarrier_test_wait_shared">, - Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>; + Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>; def int_nvvm_mbarrier_pending_count : GCCBuiltin<"__nvvm_mbarrier_pending_count">, - Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent]>; + Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>; // Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the // pointer's alignment. def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the // pointer's alignment. def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers @@ -1540,7 +1542,7 @@ // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [IntrNoMem, IntrSpeculatable], + [IntrNoMem, IntrSpeculatable, IntrNoCallback], "llvm.nvvm.ptr.gen.to.param">; // Move intrinsics, used in nvvm internally @@ -4353,13 +4355,13 @@ // FIXME: Enable this once v4i32 support is enabled in back-end. // def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>; - def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">; - def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">; - def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">; - def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, + def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">; } @@ -4373,10 +4375,10 @@ // Intrinsics to read registers with non-constant values. E.g. the values that // do change over the kernel lifetime. Such reads should not be CSE'd. class PTXReadNCSRegIntrinsic_r32 - : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>, + : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; class PTXReadNCSRegIntrinsic_r64 - : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>, + : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">; @@ -4426,12 +4428,14 @@ if i.withGccBuiltin then { def i.Name : GCCBuiltin, Intrinsic; } if i.withoutGccBuiltin then { def i.Name : Intrinsic; + [IntrInaccessibleMemOnly, IntrConvergent, + IntrNoCallback], i.IntrName>; } } } @@ -4446,22 +4450,22 @@ // vote.all pred def int_nvvm_vote_all : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">, GCCBuiltin<"__nvvm_vote_all">; // vote.any pred def int_nvvm_vote_any : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">, GCCBuiltin<"__nvvm_vote_any">; // vote.uni pred def int_nvvm_vote_uni : Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">, GCCBuiltin<"__nvvm_vote_uni">; // vote.ballot pred def int_nvvm_vote_ballot : Intrinsic<[llvm_i32_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">, GCCBuiltin<"__nvvm_vote_ballot">; // @@ -4471,22 +4475,22 @@ // vote.sync.all mask, pred def int_nvvm_vote_all_sync : Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all.sync">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">, GCCBuiltin<"__nvvm_vote_all_sync">; // vote.sync.any mask, pred def int_nvvm_vote_any_sync : Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any.sync">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">, GCCBuiltin<"__nvvm_vote_any_sync">; // vote.sync.uni mask, pred def int_nvvm_vote_uni_sync : Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni.sync">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">, GCCBuiltin<"__nvvm_vote_uni_sync">; // vote.sync.ballot mask, pred def int_nvvm_vote_ballot_sync : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot.sync">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">, GCCBuiltin<"__nvvm_vote_ballot_sync">; // @@ -4495,12 +4499,12 @@ // match.any.sync.b32 mask, value def int_nvvm_match_any_sync_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i32">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">, GCCBuiltin<"__nvvm_match_any_sync_i32">; // match.any.sync.b64 mask, value def int_nvvm_match_any_sync_i64 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">, + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">, GCCBuiltin<"__nvvm_match_any_sync_i64">; // match.all instruction have two variants -- one returns a single value, another @@ -4510,11 +4514,11 @@ // match.all.sync.b32p mask, value def int_nvvm_match_all_sync_i32p : Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">; // match.all.sync.b64p mask, value def int_nvvm_match_all_sync_i64p : Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">; + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">; // // REDUX.SYNC @@ -4522,42 +4526,42 @@ // redux.sync.min.u32 dst, src, membermask; def int_nvvm_redux_sync_umin : GCCBuiltin<"__nvvm_redux_sync_umin">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.max.u32 dst, src, membermask; def int_nvvm_redux_sync_umax : GCCBuiltin<"__nvvm_redux_sync_umax">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.add.s32 dst, src, membermask; def int_nvvm_redux_sync_add : GCCBuiltin<"__nvvm_redux_sync_add">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.min.s32 dst, src, membermask; def int_nvvm_redux_sync_min : GCCBuiltin<"__nvvm_redux_sync_min">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.max.s32 dst, src, membermask; def int_nvvm_redux_sync_max : GCCBuiltin<"__nvvm_redux_sync_max">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.and.b32 dst, src, membermask; def int_nvvm_redux_sync_and : GCCBuiltin<"__nvvm_redux_sync_and">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.xor.b32 dst, src, membermask; def int_nvvm_redux_sync_xor : GCCBuiltin<"__nvvm_redux_sync_xor">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // redux.sync.or.b32 dst, src, membermask; def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly]>; + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // // WMMA instructions @@ -4566,7 +4570,7 @@ class NVVM_WMMA_LD : Intrinsic>, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly>, NoCapture>], WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>; // WMMA.STORE.D @@ -4576,7 +4580,7 @@ [llvm_anyptr_ty], Frag.regs, !if(WithStride, [llvm_i32_ty], [])), - [IntrWriteMem, IntrArgMemOnly, WriteOnly>, NoCapture>], + [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>], WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>; // Create all load/store variants @@ -4599,7 +4603,7 @@ WMMA_REGS C, WMMA_REGS D> : Intrinsic.llvm>; foreach layout_a = ["row", "col"] in { @@ -4626,7 +4630,7 @@ WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> : Intrinsic.llvm>; foreach layout_a = ["row", "col"] in { @@ -4647,7 +4651,7 @@ // LDMATRIX class NVVM_LDMATRIX : Intrinsic>, + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly>, NoCapture>], LDMATRIX_NAME.intr>; diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -246,7 +246,7 @@ !13 = !{i32 7, !"openmp-device", i32 50} ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind willreturn } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -145,12 +145,12 @@ ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X]] to i32* ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[X_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR8:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR9:[0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -164,7 +164,7 @@ ; CHECK: master1: ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*) to [4 x i32]* ; CHECK-NEXT: [[A0:%.*]] = bitcast [4 x i32]* [[X_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR8]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR7]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() @@ -172,7 +172,7 @@ ; CHECK: master2: ; CHECK-NEXT: [[Y_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*) to [4 x i32]* ; CHECK-NEXT: [[B1:%.*]] = bitcast [4 x i32]* [[Y_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR8]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR7]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -186,11 +186,11 @@ ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR7]], !dbg [[DBG10:![0-9]+]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] ; CHECK-NEXT: [[Z_ON_STACK:%.*]] = bitcast i8* [[Z]] to [6 x i32]* ; CHECK-NEXT: [[C1:%.*]] = bitcast [6 x i32]* [[Z_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR8]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR9]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR7]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -223,12 +223,11 @@ ; CHECK: attributes #[[ATTR1]] = { nofree nounwind writeonly } ; CHECK: attributes #[[ATTR2]] = { nosync nounwind readonly allocsize(0) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind readnone speculatable } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CHECK: attributes #[[ATTR6:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR7]] = { nounwind readonly } -; CHECK: attributes #[[ATTR8]] = { nounwind writeonly } -; CHECK: attributes #[[ATTR9]] = { nounwind } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR6]] = { nounwind readonly } +; CHECK: attributes #[[ATTR7]] = { nounwind writeonly } +; CHECK: attributes #[[ATTR8]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")