diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -796,7 +796,7 @@ // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} -// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nofree nounwind readonly } +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { mustprogress nocallback nofree nosync nounwind readonly willreturn } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[$EXEC]] = !{!"exec"} // CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -510,10 +510,10 @@ def int_frameaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_sponentry : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; -def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], +def int_read_register : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], - [], "llvm.write_register">; + [IntrNoCallback], "llvm.write_register">; def int_read_volatile_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrHasSideEffects], "llvm.read_volatile_register">; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -11,10 +11,10 @@ //===----------------------------------------------------------------------===// class AMDGPUReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; class AMDGPUReadPreloadRegisterIntrinsicNamed - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, ClangBuiltin; + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, ClangBuiltin; // Used to tag image and resource intrinsics with information used to generate // mem operands. @@ -53,33 +53,33 @@ // AS 7 is PARAM_I_ADDRESS, used for kernel arguments def int_r600_implicitarg_ptr : ClangBuiltin<"__builtin_r600_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_r600_rat_store_typed : // 1st parameter: Data // 2nd parameter: Index // 3rd parameter: Constant RAT ID - Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>, + DefaultAttrsIntrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, ClangBuiltin<"__builtin_r600_rat_store_typed">; -def int_r600_recipsqrt_ieee : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_recipsqrt_ieee : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_r600_recipsqrt_clamped : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_recipsqrt_clamped : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_r600_cube : Intrinsic< - [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_cube : DefaultAttrsIntrinsic< + [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_r600_store_stream_output : Intrinsic< - [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn] +def int_r600_store_stream_output : DefaultAttrsIntrinsic< + [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] >; -class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [ +class TextureIntrinsicFloatInput : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [ llvm_v4f32_ty, // Coord llvm_i32_ty, // offset_x llvm_i32_ty, // offset_y, @@ -90,10 +90,10 @@ llvm_i32_ty, // coord_type_y llvm_i32_ty, // coord_type_z llvm_i32_ty], // coord_type_w - [IntrNoMem, IntrWillReturn] + [IntrNoMem] >; -class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [ +class TextureIntrinsicInt32Input : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty, // Coord llvm_i32_ty, // offset_x llvm_i32_ty, // offset_y, @@ -104,11 +104,11 @@ llvm_i32_ty, // coord_type_y llvm_i32_ty, // coord_type_z llvm_i32_ty], // coord_type_w - [IntrNoMem, IntrWillReturn] + [IntrNoMem] >; def int_r600_store_swizzle : - Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn] + Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_r600_tex : TextureIntrinsicFloatInput; @@ -122,11 +122,11 @@ def int_r600_ddx : TextureIntrinsicFloatInput; def int_r600_ddy : TextureIntrinsicFloatInput; -def int_r600_dot4 : Intrinsic<[llvm_float_ty], - [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_dot4 : DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_r600_kill : Intrinsic<[], [llvm_float_ty], [IntrWillReturn]>; +def int_r600_kill : DefaultAttrsIntrinsic<[], [llvm_float_ty], []>; } // End TargetPrefix = "r600" @@ -141,40 +141,40 @@ <"__builtin_amdgcn_workgroup_id">; def int_amdgcn_dispatch_ptr : - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_queue_ptr : ClangBuiltin<"__builtin_amdgcn_queue_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_kernarg_segment_ptr : ClangBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicitarg_ptr : ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_groupstaticsize : ClangBuiltin<"__builtin_amdgcn_groupstaticsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_dispatch_id : ClangBuiltin<"__builtin_amdgcn_dispatch_id">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; // For internal use. Coordinates LDS lowering between IR transform and backend. def int_amdgcn_lds_kernel_id : - Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicit_buffer_ptr : ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; // Set EXEC to the 64-bit value given. // This is always moved to the beginning of the basic block. @@ -195,7 +195,7 @@ def int_amdgcn_wavefrontsize : ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// @@ -218,10 +218,10 @@ [ImmArg>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The 1st parameter is a mask for the types of instructions that may be allowed // to cross the SCHED_BARRIER during scheduling. @@ -239,7 +239,7 @@ // MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER. def int_amdgcn_sched_barrier : ClangBuiltin<"__builtin_amdgcn_sched_barrier">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrConvergent, - IntrWillReturn]>; + IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The first parameter is a mask that determines the types of instructions that // you would like to synchronize around and add to a scheduling group. The @@ -252,59 +252,59 @@ def int_amdgcn_sched_group_barrier : ClangBuiltin<"__builtin_amdgcn_sched_group_barrier">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, IntrNoMem, IntrHasSideEffects, - IntrConvergent, IntrWillReturn]>; + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // Scheduler optimization hint. // MASK = 0: Small gemm opt def int_amdgcn_iglp_opt : ClangBuiltin<"__builtin_amdgcn_iglp_opt">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrConvergent, - IntrWillReturn]>; + IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_waitcnt : ClangBuiltin<"__builtin_amdgcn_s_waitcnt">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -def int_amdgcn_div_scale : Intrinsic< +def int_amdgcn_div_scale : DefaultAttrsIntrinsic< // 1st parameter: Numerator // 2nd parameter: Denominator // 3rd parameter: Select quotient. Must equal Numerator or Denominator. // (0 = Denominator, 1 = Numerator). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>, IntrWillReturn] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; -def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], +def int_amdgcn_div_fmas : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], +def int_amdgcn_div_fixup : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // Look Up 2.0 / pi src0 with segment select src1[4:0] -def int_amdgcn_trig_preop : Intrinsic< +def int_amdgcn_trig_preop : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sin : Intrinsic< +def int_amdgcn_sin : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cos : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_cos : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_log_clamp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_log_clamp : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative] >; // Fused single-precision multiply-add with legacy behaviour for the multiply, @@ -313,132 +313,132 @@ // v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and // has a completely different kind of legacy behaviour.) def int_amdgcn_fma_legacy : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative] >; -def int_amdgcn_rcp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_rcp : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sqrt : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_sqrt : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_rsq : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_rsq : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">, - Intrinsic< - [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic< + [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; // out = 1.0 / sqrt(a) result clamped to +/- max_float. -def int_amdgcn_rsq_clamp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; -def int_amdgcn_ldexp : Intrinsic< +def int_amdgcn_ldexp : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_frexp_mant : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_frexp_exp : Intrinsic< - [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_frexp_exp : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] >; // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 // and always uses rtz, so is not suitable for implementing the OpenCL // fract function. It should be ok on VI. -def int_amdgcn_fract : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_fract : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pknorm_i16 : ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, - Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pknorm_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, - Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_i16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">, - Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_class : Intrinsic< +def int_amdgcn_class : DefaultAttrsIntrinsic< [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmed3 : ClangBuiltin<"__builtin_amdgcn_fmed3">, - Intrinsic<[llvm_anyfloat_ty], + DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz // should be used. def int_amdgcn_sffbh : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable] >; // v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support. def int_amdgcn_fmad_ftz : - Intrinsic<[llvm_anyfloat_ty], + DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // Fields should mirror atomicrmw @@ -449,7 +449,7 @@ llvm_i32_ty, // scope llvm_i1_ty], // isVolatile [IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>], "", + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -464,7 +464,7 @@ llvm_i32_ty, // scope llvm_i1_ty], // isVolatile [IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>] + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree] >; // FIXME: The m0 argument should be moved after the normal arguments @@ -483,7 +483,7 @@ llvm_i1_ty], // wave done, set to 1 for the last ordered instruction [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, ImmArg>, - ImmArg>, ImmArg>, ImmArg> + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree ] >; @@ -492,7 +492,7 @@ [llvm_anyptr_ty, // LDS or GDS ptr llvm_i1_ty], // isVolatile [IntrConvergent, IntrWillReturn, IntrArgMemOnly, - NoCapture>, ImmArg>], + NoCapture>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -990,30 +990,29 @@ defset list AMDGPUBufferIntrinsics = { -class AMDGPUBufferLoad : Intrinsic < +class AMDGPUBufferLoad : DefaultAttrsIntrinsic < [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem, IntrWillReturn, - ImmArg>, ImmArg>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; // Generate a buffer_load instruction that may be optimized to s_buffer_load if // the offset argument is uniform. -def int_amdgcn_s_buffer_load : Intrinsic < +def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // byte offset llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc) - [IntrNoMem, IntrWillReturn, ImmArg>]>, + [IntrNoMem, ImmArg>]>, AMDGPURsrcIntrinsic<0>; -class AMDGPUBufferStore : Intrinsic < +class AMDGPUBufferStore : DefaultAttrsIntrinsic < [], [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) @@ -1021,8 +1020,7 @@ llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem, IntrWillReturn, - ImmArg>, ImmArg>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -1034,7 +1032,7 @@ // and swizzling changes depending on whether idxen is set in the instruction. // These new instrinsics also keep the offset and soffset arguments separate as // they behave differently in bounds checking and swizzling. -class AMDGPURawBufferLoad : Intrinsic < +class AMDGPURawBufferLoad : DefaultAttrsIntrinsic < [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -1043,12 +1041,12 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; -class AMDGPUStructBufferLoad : Intrinsic < +class AMDGPUStructBufferLoad : DefaultAttrsIntrinsic < [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -1058,12 +1056,12 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; -class AMDGPURawBufferStore : Intrinsic < +class AMDGPURawBufferStore : DefaultAttrsIntrinsic < [], [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) @@ -1073,12 +1071,12 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; -class AMDGPUStructBufferStore : Intrinsic < +class AMDGPUStructBufferStore : DefaultAttrsIntrinsic < [], [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) @@ -1089,7 +1087,7 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; @@ -1101,7 +1099,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; @@ -1125,7 +1123,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; // gfx908 intrinsic @@ -1139,7 +1137,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic; @@ -1162,7 +1160,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; // gfx908 intrinsic @@ -1174,7 +1172,7 @@ // Obsolescent tbuffer intrinsics. -def int_amdgcn_tbuffer_load : Intrinsic < +def int_amdgcn_tbuffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -1185,12 +1183,12 @@ llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem, IntrWillReturn, + [IntrReadMem, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_tbuffer_store : Intrinsic < +def int_amdgcn_tbuffer_store : DefaultAttrsIntrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) @@ -1202,7 +1200,7 @@ llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem, IntrWillReturn, ImmArg>, + [IntrWriteMem, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1211,7 +1209,7 @@ // - raw and struct variants // - joint format field // - joint cachepolicy field -def int_amdgcn_raw_tbuffer_load : Intrinsic < +def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -1221,11 +1219,11 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_raw_tbuffer_store : Intrinsic < +def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) @@ -1236,11 +1234,11 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_struct_tbuffer_load : Intrinsic < +def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -1251,11 +1249,11 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_struct_tbuffer_store : Intrinsic < +def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) @@ -1267,7 +1265,7 @@ // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1278,7 +1276,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; @@ -1298,7 +1296,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic; @@ -1310,7 +1308,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; // Legacy form of the intrinsic. raw and struct forms should be preferred. @@ -1329,7 +1327,7 @@ // bit 2 = dlc on gfx10+)) // swizzled buffer (bit 3 = swz)) [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, - ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; + ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS; class AMDGPUStructBufferLoadLDS : Intrinsic < @@ -1346,14 +1344,14 @@ // bit 2 = dlc on gfx10+)) // swizzled buffer (bit 3 = swz)) [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, - ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; + ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS; } // defset AMDGPUBufferIntrinsics // Uses that do not set the done bit should set IntrWriteMem on the // call site. -def int_amdgcn_exp : Intrinsic <[], [ +def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_any_ty, // src0 (f32 or i32) @@ -1364,12 +1362,11 @@ llvm_i1_ty // vm (ignored on GFX11+) ], [ImmArg>, ImmArg>, ImmArg>, - ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly, - IntrWillReturn] + ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with row_en bit set. Only supported on GFX11+. -def int_amdgcn_exp_row : Intrinsic <[], [ +def int_amdgcn_exp_row : DefaultAttrsIntrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_any_ty, // src0 (f32 or i32) @@ -1379,11 +1376,11 @@ llvm_i1_ty, // done llvm_i32_ty], // row number [ImmArg>, ImmArg>, ImmArg>, - IntrWriteMem, IntrInaccessibleMemOnly, IntrWillReturn] + IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with compr bit set. Not supported on GFX11+. -def int_amdgcn_exp_compr : Intrinsic <[], [ +def int_amdgcn_exp_compr : DefaultAttrsIntrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_anyvector_ty, // src0 (v2f16 or v2i16) @@ -1391,58 +1388,57 @@ llvm_i1_ty, // done llvm_i1_ty], // vm [ImmArg>, ImmArg>, ImmArg>, - ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly, - IntrWillReturn] + ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly] >; def int_amdgcn_buffer_wbinvl1_sc : ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_buffer_wbinvl1 : ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_dcache_inv : ClangBuiltin<"__builtin_amdgcn_s_dcache_inv">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_memtime : ClangBuiltin<"__builtin_amdgcn_s_memtime">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_sleep : ClangBuiltin<"__builtin_amdgcn_s_sleep">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]> { + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { } def int_amdgcn_s_incperflevel : ClangBuiltin<"__builtin_amdgcn_s_incperflevel">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]> { + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { } def int_amdgcn_s_decperflevel : ClangBuiltin<"__builtin_amdgcn_s_decperflevel">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]> { + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { } def int_amdgcn_s_sethalt : - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]>; def int_amdgcn_s_setprio : ClangBuiltin<"__builtin_amdgcn_s_setprio">, - Intrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]>; // This is IntrHasSideEffects so it can be used to read cycle counters. def int_amdgcn_s_getreg : ClangBuiltin<"__builtin_amdgcn_s_getreg">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>] >; // Note this can be used to set FP environment properties that are @@ -1451,8 +1447,8 @@ // subtarget. llvm.amdgcn.s.setreg(hwmode, value) def int_amdgcn_s_setreg : ClangBuiltin<"__builtin_amdgcn_s_setreg">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] + DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>] >; // int_amdgcn_s_getpc is provided to allow a specific style of position @@ -1463,16 +1459,16 @@ // especially as we explicitly use IntrNoMem to allow optimizations. def int_amdgcn_s_getpc : ClangBuiltin<"__builtin_amdgcn_s_getpc">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, + DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : ClangBuiltin<"__builtin_amdgcn_interp_mov">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p1 , , , @@ -1480,17 +1476,17 @@ // so it behaves like IntrNoMem. def int_amdgcn_interp_p1 : ClangBuiltin<"__builtin_amdgcn_interp_p1">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p2 , , , , def int_amdgcn_interp_p2 : ClangBuiltin<"__builtin_amdgcn_interp_p2">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. @@ -1498,183 +1494,183 @@ // high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p1_f16 : ClangBuiltin<"__builtin_amdgcn_interp_p1_f16">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p2_f16 , , , , , // high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p2_f16 : ClangBuiltin<"__builtin_amdgcn_interp_p2_f16">, - Intrinsic<[llvm_half_ty], + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // llvm.amdgcn.lds.direct.load // The input argument is m0, which contains a packed combination of address // offset and flags describing the data type. def int_amdgcn_lds_direct_load : - Intrinsic<[llvm_any_ty], // overloaded for types u8, u16, i32/f32, i8, i16 + DefaultAttrsIntrinsic<[llvm_any_ty], // overloaded for types u8, u16, i32/f32, i8, i16 [llvm_i32_ty], - [IntrReadMem, IntrSpeculatable, IntrWillReturn]>; + [IntrReadMem, IntrSpeculatable]>; // llvm.amdgcn.lds.param.load , , // Like interp intrinsics, this reads from lds, but the memory values are constant, // so it behaves like IntrNoMem. def int_amdgcn_lds_param_load : - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // llvm.amdgcn.interp.inreg.p10

, , def int_amdgcn_interp_inreg_p10 : - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [IntrNoMem, IntrSpeculatable]>; // llvm.amdgcn.interp.inreg.p2

, , def int_amdgcn_interp_inreg_p2 : - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [IntrNoMem, IntrSpeculatable]>; // llvm.amdgcn.interp.inreg.p10.f16

, , , // high selects whether high or low 16-bits are used for p and p0 operands def int_amdgcn_interp_inreg_p10_f16: - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>]>; // llvm.amdgcn.interp.inreg.p2.f16

, , , // high selects whether high or low 16-bits are used for p operand def int_amdgcn_interp_inreg_p2_f16 : - Intrinsic<[llvm_half_ty], + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>]>; // Deprecated: use llvm.amdgcn.live.mask instead. -def int_amdgcn_ps_live : Intrinsic < +def int_amdgcn_ps_live : DefaultAttrsIntrinsic < [llvm_i1_ty], [], - [IntrNoMem, IntrWillReturn]>; + [IntrNoMem]>; // Query currently live lanes. // Returns true if lane is live (and not a helper lane). -def int_amdgcn_live_mask : Intrinsic <[llvm_i1_ty], - [], [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn] +def int_amdgcn_live_mask : DefaultAttrsIntrinsic <[llvm_i1_ty], + [], [IntrReadMem, IntrInaccessibleMemOnly] >; def int_amdgcn_mbcnt_lo : ClangBuiltin<"__builtin_amdgcn_mbcnt_lo">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_amdgcn_mbcnt_hi : ClangBuiltin<"__builtin_amdgcn_mbcnt_hi">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // llvm.amdgcn.ds.swizzle src offset def int_amdgcn_ds_swizzle : ClangBuiltin<"__builtin_amdgcn_ds_swizzle">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, ImmArg>]>; -def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], +def int_amdgcn_ubfe : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], +def int_amdgcn_sbfe : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_lerp : ClangBuiltin<"__builtin_amdgcn_lerp">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_u8 : ClangBuiltin<"__builtin_amdgcn_sad_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_msad_u8 : ClangBuiltin<"__builtin_amdgcn_msad_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_hi_u8 : ClangBuiltin<"__builtin_amdgcn_sad_hi_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_u16 : ClangBuiltin<"__builtin_amdgcn_sad_u16">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_qsad_pk_u16_u8 : ClangBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_mqsad_pk_u16_u8 : ClangBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_mqsad_u32_u8 : ClangBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, - Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_u8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_icmp : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>]>; + [IntrNoMem, IntrConvergent, + ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_fcmp : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>]>; + [IntrNoMem, IntrConvergent, + ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ballot : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_readfirstlane : ClangBuiltin<"__builtin_amdgcn_readfirstlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The lane argument must be uniform across the currently active threads of the // current wave. Otherwise, the result is undefined. def int_amdgcn_readlane : ClangBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The value to write and lane select arguments must be uniform across the // currently active threads of the current wave. Otherwise, the result is @@ -1686,32 +1682,32 @@ llvm_i32_ty, // uniform lane select llvm_i32_ty // returned by all lanes other than the selected one ], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mulhi_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mulhi_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) @@ -1723,7 +1719,7 @@ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrWriteMem, - IntrInaccessibleMemOnly, IntrWillReturn], "", + IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1734,7 +1730,7 @@ ClangBuiltin<"__builtin_amdgcn_ds_gws_barrier">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1743,7 +1739,7 @@ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_v">, Intrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1752,7 +1748,7 @@ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_br">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1761,7 +1757,7 @@ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_p">, Intrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1770,7 +1766,7 @@ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">, Intrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1778,33 +1774,33 @@ // Copies the source value to the destination value, with the guarantee that // the source value is computed as if the entire program were executed in WQM. def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], - [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Copies the source value to the destination value, such that the source // is computed as if the entire program were executed in WQM if any other // program code executes in WQM. def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty], - [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Return true if at least one thread within the pixel quad passes true into // the function. def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], - [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn] + [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // If false, set EXEC=0 for the current thread until the end of program. // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn? -def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; +def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], [IntrNoCallback, IntrNoFree]>; def int_amdgcn_endpgm : ClangBuiltin<"__builtin_amdgcn_endpgm">, - Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects] + Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects, IntrNoCallback, IntrNoFree] >; // If false, mark all active lanes as helper lanes until the end of program. def int_amdgcn_wqm_demote : Intrinsic<[], - [llvm_i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly] + [llvm_i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly, IntrNoCallback, IntrNoFree] >; // Copies the active channels of the source value to the destination value, @@ -1814,16 +1810,16 @@ // undefined value. def int_amdgcn_strict_wwm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, - IntrConvergent, IntrWillReturn] + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Deprecated. Use int_amdgcn_strict_wwm instead. def int_amdgcn_wwm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, - IntrConvergent, IntrWillReturn] + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_strict_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, - IntrConvergent, IntrWillReturn] + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Given a value, copies it while setting all the inactive lanes to a given @@ -1834,18 +1830,18 @@ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, // value to be copied LLVMMatchType<0>], // value for the inactive lanes to take - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, - Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>] >; // Return if the given flat pointer points to a prvate memory address. def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, - Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>] >; //===----------------------------------------------------------------------===// @@ -1854,11 +1850,11 @@ def int_amdgcn_s_dcache_inv_vol : ClangBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_buffer_wbinvl1_vol : ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; //===----------------------------------------------------------------------===// // VI Intrinsics @@ -1871,7 +1867,7 @@ llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.update.dpp.i32 // Should be equivalent to: @@ -1883,37 +1879,37 @@ llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_dcache_wb : ClangBuiltin<"__builtin_amdgcn_s_dcache_wb">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_dcache_wb_vol : ClangBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_memrealtime : ClangBuiltin<"__builtin_amdgcn_s_memrealtime">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.ds.permute def int_amdgcn_ds_permute : ClangBuiltin<"__builtin_amdgcn_ds_permute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.ds.bpermute def int_amdgcn_ds_bpermute : ClangBuiltin<"__builtin_amdgcn_ds_bpermute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.perm def int_amdgcn_perm : ClangBuiltin<"__builtin_amdgcn_perm">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, IntrNoCallback, IntrNoFree]>; //===----------------------------------------------------------------------===// // GFX9 Intrinsics @@ -1930,7 +1926,7 @@ // bit 2 = dlc on gfx10+)) // bit 4 = scc/nt on gfx90a+)) [IntrWillReturn, NoCapture>, NoCapture>, - ImmArg>, ImmArg>, ImmArg>, ImmArg>], + ImmArg>, ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; @@ -1943,14 +1939,14 @@ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.permlanex16 def int_amdgcn_permlanex16 : ClangBuiltin<"__builtin_amdgcn_permlanex16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.mov.dpp8.i32 // is a 32-bit constant whose high 8 bits must be zero which selects @@ -1959,18 +1955,18 @@ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>]>; + ImmArg>, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_get_waveid_in_workgroup : ClangBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, Intrinsic<[llvm_i32_ty], [], - [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; class AMDGPUGlobalAtomicRtn : Intrinsic < [vt], [llvm_anyptr_ty, // vaddr vt], // vdata(VGPR) - [IntrArgMemOnly, IntrWillReturn, NoCapture>], "", + [IntrArgMemOnly, IntrWillReturn, NoCapture>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn; @@ -1980,10 +1976,10 @@ // is i32 or i64. // and are both v3f16 or both v3f32. def int_amdgcn_image_bvh_intersect_ray : - Intrinsic<[llvm_v4i32_ty], + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_anyint_ty, llvm_float_ty, llvm_v3f32_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_v4i32_ty], - [IntrReadMem, IntrWillReturn]>; + [IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>; //===----------------------------------------------------------------------===// // GFX11 Intrinsics @@ -1992,17 +1988,17 @@ // llvm.amdgcn.permlane64 def int_amdgcn_permlane64 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ds_add_gs_reg_rtn : ClangBuiltin<"__builtin_amdgcn_ds_add_gs_reg_rtn">, Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], - [ImmArg>, IntrHasSideEffects, IntrWillReturn]>; + [ImmArg>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ds_sub_gs_reg_rtn : ClangBuiltin<"__builtin_amdgcn_ds_sub_gs_reg_rtn">, Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], - [ImmArg>, IntrHasSideEffects, IntrWillReturn]>; + [ImmArg>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ds_bvh_stack_rtn : Intrinsic< @@ -2013,7 +2009,7 @@ llvm_v4i32_ty, // %data1 llvm_i32_ty, // %offset ], - [ImmArg>, IntrWillReturn] + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // WMMA (Wave Matrix Multiply-Accumulate) intrinsics @@ -2029,7 +2025,7 @@ AB, // %B LLVMMatchType<0>, // %C ], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicOPSEL : @@ -2041,7 +2037,7 @@ LLVMMatchType<0>, // %C llvm_i1_ty, // %high ], - [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrConvergent, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicIU : @@ -2055,7 +2051,7 @@ LLVMMatchType<0>, // %C llvm_i1_ty, // %clamp ], - [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, ImmArg>] + [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic; @@ -2074,7 +2070,7 @@ // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2 : ClangBuiltin<"__builtin_amdgcn_fdot2">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_float_ty], // %r [ llvm_v2f16_ty, // %a @@ -2082,42 +2078,42 @@ llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2_f16_f16 : ClangBuiltin<"__builtin_amdgcn_fdot2_f16_f16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_half_ty], // %r [ llvm_v2f16_ty, // %a llvm_v2f16_ty, // %b llvm_half_ty // %c ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // bf16 %r = llvm.amdgcn.fdot2.bf16.bf16(v2bf16 %a, v2bf16 %b, bf16 %c) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2_bf16_bf16 : ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i16_ty], // %r [ llvm_v2i16_ty, // %a llvm_v2i16_ty, // %b llvm_i16_ty // %c ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // f32 %r = llvm.amdgcn.fdot2.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2_f32_bf16 : ClangBuiltin<"__builtin_amdgcn_fdot2_f32_bf16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_float_ty], // %r [ llvm_v2i16_ty, // %a @@ -2125,14 +2121,14 @@ llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_sdot2 : ClangBuiltin<"__builtin_amdgcn_sdot2">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_v2i16_ty, // %a @@ -2140,14 +2136,14 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_udot2 : ClangBuiltin<"__builtin_amdgcn_udot2">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_v2i16_ty, // %a @@ -2155,14 +2151,14 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sdot4 : ClangBuiltin<"__builtin_amdgcn_sdot4">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2170,14 +2166,14 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_udot4 : ClangBuiltin<"__builtin_amdgcn_udot4">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2185,7 +2181,7 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sudot4(i1 %a_sign, v4i8 (as i32) %a, i1 %b_sign, v4i8 (as i32) %b, i32 %c, i1 %clamp) @@ -2195,7 +2191,7 @@ // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sudot4 : ClangBuiltin<"__builtin_amdgcn_sudot4">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i1_ty, // %a_sign @@ -2205,7 +2201,7 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>] >; @@ -2214,7 +2210,7 @@ // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sdot8 : ClangBuiltin<"__builtin_amdgcn_sdot8">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2222,7 +2218,7 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) @@ -2230,7 +2226,7 @@ // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_udot8 : ClangBuiltin<"__builtin_amdgcn_udot8">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2238,7 +2234,7 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sudot8(i1 %a_sign, v8i4 (as i32) %a, i1 %b_sign, v8i4 (as i32) %b, i32 %c, i1 %clamp) @@ -2249,7 +2245,7 @@ // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sudot8 : ClangBuiltin<"__builtin_amdgcn_sudot8">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i1_ty, // %a_sign @@ -2259,7 +2255,7 @@ llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>] >; @@ -2272,10 +2268,10 @@ // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp class AMDGPUMfmaIntrinsic : ClangBuiltin, - Intrinsic<[DestTy], + DefaultAttrsIntrinsic<[DestTy], [SrcABTy, SrcABTy, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x1f32 : AMDGPUMfmaIntrinsic; @@ -2328,10 +2324,10 @@ // bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn; def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn; -def int_amdgcn_ds_fadd_v2bf16 : Intrinsic< +def int_amdgcn_ds_fadd_v2bf16 : DefaultAttrsIntrinsic< [llvm_v2i16_ty], [LLVMQualPointerType, llvm_v2i16_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture>]>, + [IntrArgMemOnly, NoCapture>]>, ClangBuiltin<"__builtin_amdgcn_ds_atomic_fadd_v2bf16">; def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; @@ -2353,10 +2349,10 @@ // llvm.amdgcn.smfmac.?32.* vdst, srcA, srcB, srcC, index, cbsz, abid class AMDGPUMSmfmacIntrinsic : ClangBuiltin, - Intrinsic<[DestTy], + DefaultAttrsIntrinsic<[DestTy], [SrcA, SrcB, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>]>; def int_amdgcn_smfmac_f32_16x16x32_f16 : AMDGPUMSmfmacIntrinsic; @@ -2380,92 +2376,92 @@ // llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3] // byte_sel selects byte from srcA. def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3] def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel // word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes. def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">, - Intrinsic<[llvm_v2f32_ty], + DefaultAttrsIntrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel. def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">, - Intrinsic<[llvm_v2f32_ty], + DefaultAttrsIntrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel // word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes. def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] // byte_sel selects byte to write into vdst. def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. // ===----------------------------------------------------------------------===// def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], - [llvm_i1_ty], [IntrConvergent, IntrWillReturn] + [llvm_i1_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], - [llvm_anyint_ty], [IntrConvergent, IntrWillReturn] + [llvm_anyint_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty, LLVMMatchType<0>], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], - [llvm_anyint_ty], [IntrConvergent, IntrWillReturn] + [llvm_anyint_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], - [IntrConvergent, IntrWillReturn]>; + [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // Represent unreachable in a divergent region. def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; // Emit 2.5 ulp, no denormal division. Should only be inserted by // pass based on !fpmath metadata. -def int_amdgcn_fdiv_fast : Intrinsic< +def int_amdgcn_fdiv_fast : DefaultAttrsIntrinsic< [llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // Represent a relocation constant. -def int_amdgcn_reloc_constant : Intrinsic< +def int_amdgcn_reloc_constant : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_metadata_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; } diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll --- a/llvm/test/Bitcode/compatibility-3.6.ll +++ b/llvm/test/Bitcode/compatibility-3.6.ll @@ -981,7 +981,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #35 + ; CHECK: call void @f.nobuiltin() #36 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #11 @@ -1183,9 +1183,10 @@ ; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #31 = { argmemonly nounwind readonly } ; CHECK: attributes #32 = { argmemonly nounwind } -; CHECK: attributes #33 = { nounwind readonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #35 = { builtin } +; CHECK: attributes #33 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #34 = { nocallback nounwind } +; CHECK: attributes #35 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #36 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll --- a/llvm/test/Bitcode/compatibility-3.7.ll +++ b/llvm/test/Bitcode/compatibility-3.7.ll @@ -1022,7 +1022,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #38 + ; CHECK: call void @f.nobuiltin() #39 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1246,9 +1246,10 @@ ; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #34 = { argmemonly nounwind readonly } ; CHECK: attributes #35 = { argmemonly nounwind } -; CHECK: attributes #36 = { nounwind readonly } -; CHECK: attributes #37 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #38 = { builtin } +; CHECK: attributes #36 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #37 = { nocallback nounwind } +; CHECK: attributes #38 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #39 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll --- a/llvm/test/Bitcode/compatibility-3.8.ll +++ b/llvm/test/Bitcode/compatibility-3.8.ll @@ -1170,7 +1170,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #41 + ; CHECK: call void @f.nobuiltin() #42 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1556,9 +1556,10 @@ ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { argmemonly nounwind readonly } ; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #41 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -1241,7 +1241,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #42 + ; CHECK: call void @f.nobuiltin() #43 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1588,7 +1588,7 @@ } declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } @@ -1629,10 +1629,11 @@ ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { argmemonly nounwind readonly } ; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #42 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { writeonly } +; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -1241,7 +1241,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #42 + ; CHECK: call void @f.nobuiltin() #43 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1606,7 +1606,7 @@ declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 ;; Constant Expressions @@ -1654,10 +1654,11 @@ ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { argmemonly nounwind readonly } ; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #42 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { writeonly } +; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll --- a/llvm/test/Bitcode/compatibility-5.0.ll +++ b/llvm/test/Bitcode/compatibility-5.0.ll @@ -1248,7 +1248,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #43 + ; CHECK: call void @f.nobuiltin() #44 ; When used in a non-strictfp function the strictfp callsite attribute ; should get translated to nobuiltin. @@ -1618,10 +1618,10 @@ declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 declare void @f.speculatable() speculatable -; CHECK: declare void @f.speculatable() #41 +; CHECK: declare void @f.speculatable() #42 ;; Constant Expressions @@ -1669,11 +1669,12 @@ ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { argmemonly nounwind readonly } ; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #43 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { writeonly } +; CHECK: attributes #42 = { speculatable } +; CHECK: attributes #43 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #44 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll --- a/llvm/test/Bitcode/compatibility-6.0.ll +++ b/llvm/test/Bitcode/compatibility-6.0.ll @@ -1259,7 +1259,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #43 + ; CHECK: call void @f.nobuiltin() #44 ; When used in a non-strictfp function the strictfp callsite attribute ; should get translated to nobuiltin. @@ -1629,10 +1629,10 @@ declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 declare void @f.speculatable() speculatable -; CHECK: declare void @f.speculatable() #41 +; CHECK: declare void @f.speculatable() #42 ;; Constant Expressions @@ -1680,11 +1680,12 @@ ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { argmemonly nounwind readonly } ; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #43 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { writeonly } +; CHECK: attributes #42 = { speculatable } +; CHECK: attributes #43 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #44 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1540,7 +1540,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #50 + ; CHECK: call void @f.nobuiltin() #51 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1915,10 +1915,10 @@ declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #41 +; CHECK: declare void @f.writeonly() #42 declare void @f.speculatable() speculatable -; CHECK: declare void @f.speculatable() #42 +; CHECK: declare void @f.speculatable() #43 ;; Constant Expressions @@ -1929,16 +1929,16 @@ define void @instructions.strictfp() strictfp { call void @f.strictfp() strictfp - ; CHECK: call void @f.strictfp() #43 + ; CHECK: call void @f.strictfp() #44 ret void } declare void @f.nosanitize_coverage() nosanitize_coverage -; CHECK: declare void @f.nosanitize_coverage() #44 +; CHECK: declare void @f.nosanitize_coverage() #45 declare void @f.disable_sanitizer_instrumentation() disable_sanitizer_instrumentation -; CHECK: declare void @f.disable_sanitizer_instrumentation() #45 +; CHECK: declare void @f.disable_sanitizer_instrumentation() #46 ; immarg attribute declare void @llvm.test.immarg.intrinsic(i32 immarg) @@ -1961,10 +1961,10 @@ ; CHECK: declare void @f.allocsize_two(i32, i32) declare void @f.nosanitize_bounds() nosanitize_bounds -; CHECK: declare void @f.nosanitize_bounds() #48 +; CHECK: declare void @f.nosanitize_bounds() #49 declare void @f.allockind() allockind("alloc,uninitialized") -; CHECK: declare void @f.allockind() #49 +; CHECK: declare void @f.allockind() #50 ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } @@ -2005,18 +2005,19 @@ ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { argmemonly nounwind readonly } ; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #41 = { writeonly } -; CHECK: attributes #42 = { speculatable } -; CHECK: attributes #43 = { strictfp } -; CHECK: attributes #44 = { nosanitize_coverage } -; CHECK: attributes #45 = { disable_sanitizer_instrumentation } -; CHECK: attributes #46 = { allocsize(0) } -; CHECK: attributes #47 = { allocsize(1,0) } -; CHECK: attributes #48 = { nosanitize_bounds } -; CHECK: attributes #49 = { allockind("alloc,uninitialized") } -; CHECK: attributes #50 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #42 = { writeonly } +; CHECK: attributes #43 = { speculatable } +; CHECK: attributes #44 = { strictfp } +; CHECK: attributes #45 = { nosanitize_coverage } +; CHECK: attributes #46 = { disable_sanitizer_instrumentation } +; CHECK: attributes #47 = { allocsize(0) } +; CHECK: attributes #48 = { allocsize(1,0) } +; CHECK: attributes #49 = { nosanitize_bounds } +; CHECK: attributes #50 = { allockind("alloc,uninitialized") } +; CHECK: attributes #51 = { builtin } ;; Metadata diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -928,7 +928,7 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" } ; AKF_HSA: attributes #[[ATTR3]] = { nounwind } @@ -936,7 +936,7 @@ ; AKF_HSA: attributes #[[ATTR5]] = { nounwind sanitize_address } ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -642,11 +642,11 @@ attributes #1 = { nounwind } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -414,10 +414,10 @@ ; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. -; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll @@ -40,7 +40,6 @@ ; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm -; ; GFX10-LABEL: test_simple_indirect_call: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_getpc_b64 s[8:9] @@ -69,8 +68,8 @@ attributes #0 = { nounwind readnone speculatable willreturn } ;. -; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } ;. diff --git a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll --- a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll +++ b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll @@ -45,8 +45,7 @@ ; addrspacecasted' alloca. ; CHECK-LABEL: @test_inliner_flat_ptr( ; CHECK: call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NOT: call -; CHECK-NOT: call +; CHECK-NOT: call [[.*]]@ define amdgpu_kernel void @test_inliner_flat_ptr(float addrspace(1)* nocapture %a, i32 %n) { entry: %pvt_arr = alloca [64 x float], align 4, addrspace(5) diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -249,7 +249,8 @@ ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind willreturn } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nounwind willreturn } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}