Index: runtime/src/dllexports =================================================================== --- runtime/src/dllexports +++ runtime/src/dllexports @@ -546,6 +546,16 @@ omp_alloc 894 omp_free 895 omp_get_device_num 896 + omp_init_allocator 897 + omp_destroy_allocator 898 + %ifndef stub + __kmpc_set_default_allocator + __kmpc_get_default_allocator + __kmpc_alloc + __kmpc_free + __kmpc_init_allocator + __kmpc_destroy_allocator + %endif omp_set_affinity_format 748 omp_get_affinity_format 749 omp_display_affinity 750 @@ -558,7 +568,7 @@ omp_pause_resource_all 757 omp_get_supported_active_levels 758 - OMP_NULL_ALLOCATOR DATA + omp_null_allocator DATA omp_default_mem_alloc DATA omp_large_cap_mem_alloc DATA omp_const_mem_alloc DATA @@ -567,6 +577,12 @@ omp_cgroup_mem_alloc DATA omp_pteam_mem_alloc DATA omp_thread_mem_alloc DATA + + omp_default_mem_space DATA + omp_large_cap_mem_space DATA + omp_const_mem_space DATA + omp_high_bw_mem_space DATA + omp_low_lat_mem_space DATA %endif # OMP_50 %ifndef stub Index: runtime/src/include/50/omp.h.var =================================================================== --- runtime/src/include/50/omp.h.var +++ runtime/src/include/50/omp.h.var @@ -134,6 +134,7 @@ extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); # include +# include /* OpenMP 4.5 */ extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void); extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int); @@ -221,25 +222,98 @@ extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); /* OpenMP 5.0 Memory Management */ - typedef void *omp_allocator_t; - extern __KMP_IMP const omp_allocator_t *OMP_NULL_ALLOCATOR; - extern __KMP_IMP const omp_allocator_t *omp_default_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_large_cap_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_const_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_high_bw_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_low_lat_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_cgroup_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_pteam_mem_alloc; - extern __KMP_IMP const omp_allocator_t *omp_thread_mem_alloc; - - extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(const omp_allocator_t *); - extern const omp_allocator_t * __KAI_KMPC_CONVENTION omp_get_default_allocator(void); + typedef uintptr_t omp_uintptr_t; + + typedef enum { + OMP_ATK_THREADMODEL = 1, + OMP_ATK_ALIGNMENT = 2, + OMP_ATK_ACCESS = 3, + OMP_ATK_POOL_SIZE = 4, + OMP_ATK_FALLBACK = 5, + OMP_ATK_FB_DATA = 6, + OMP_ATK_PINNED = 7, + OMP_ATK_PARTITION = 8 + } omp_alloctrait_key_t; + + typedef enum { + OMP_ATV_FALSE = 0, + OMP_ATV_TRUE = 1, + OMP_ATV_DEFAULT = 2, + OMP_ATV_CONTENDED = 3, + OMP_ATV_UNCONTENDED = 4, + OMP_ATV_SEQUENTIAL = 5, + OMP_ATV_PRIVATE = 6, + OMP_ATV_ALL = 7, + OMP_ATV_THREAD = 8, + OMP_ATV_PTEAM = 9, + OMP_ATV_CGROUP = 10, + OMP_ATV_DEFAULT_MEM_FB = 11, + OMP_ATV_NULL_FB = 12, + OMP_ATV_ABORT_FB = 13, + OMP_ATV_ALLOCATOR_FB = 14, + OMP_ATV_ENVIRONMENT = 15, + OMP_ATV_NEAREST = 16, + OMP_ATV_BLOCKED = 17, + OMP_ATV_INTERLEAVED = 18 + } omp_alloctrait_value_t; + + typedef struct { + omp_alloctrait_key_t key; + omp_uintptr_t value; + } omp_alloctrait_t; + +# if defined(_WIN32) + // On Windows cl and icl do not support 64-bit enum, let's use integer then. + typedef omp_uintptr_t omp_allocator_handle_t; + extern __KMP_IMP omp_allocator_handle_t const omp_null_allocator; + extern __KMP_IMP omp_allocator_handle_t const omp_default_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_large_cap_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_const_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_high_bw_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_low_lat_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc; + typedef omp_uintptr_t omp_memspace_handle_t; + extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space; + extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space; + extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space; + extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space; + extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space; +# else + typedef enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX + } omp_allocator_handle_t; + typedef enum omp_memspace_handle_t { + omp_default_mem_space = 0, + omp_large_cap_mem_space = 1, + omp_const_mem_space = 2, + omp_high_bw_mem_space = 3, + omp_low_lat_mem_space = 4, + KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX + } omp_memspace_handle_t; +# endif + extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_init_allocator(omp_memspace_handle_t m, + int ntraits, omp_alloctrait_t traits[]); + extern void __KAI_KMPC_CONVENTION omp_destroy_allocator(omp_allocator_handle_t allocator); + + extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(omp_allocator_handle_t a); + extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_get_default_allocator(void); #ifdef __cplusplus - extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR); - extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR); + extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a = omp_null_allocator); + extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, omp_allocator_handle_t a = omp_null_allocator); #else - extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator); - extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator); + extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a); + extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a); #endif /* OpenMP 5.0 Affinity Format */ Index: runtime/src/include/50/omp_lib.h.var =================================================================== --- runtime/src/include/50/omp_lib.h.var +++ runtime/src/include/50/omp_lib.h.var @@ -38,8 +38,14 @@ parameter(omp_control_tool_kind=omp_integer_kind) integer omp_control_tool_result_kind parameter(omp_control_tool_result_kind=omp_integer_kind) - integer omp_allocator_kind - parameter(omp_allocator_kind=int_ptr_kind()) + integer omp_allocator_handle_kind + parameter(omp_allocator_handle_kind=int_ptr_kind()) + integer omp_memspace_handle_kind + parameter(omp_memspace_handle_kind=int_ptr_kind()) + integer omp_alloctrait_key_kind + parameter(omp_alloctrait_key_kind=omp_integer_kind) + integer omp_alloctrait_val_kind + parameter(omp_alloctrait_val_kind=int_ptr_kind()) integer omp_pause_resource_kind parameter(omp_pause_resource_kind=omp_integer_kind) @@ -119,24 +125,99 @@ integer(omp_control_tool_result_kind)omp_control_tool_ignored parameter(omp_control_tool_ignored=1) - integer(kind=omp_allocator_kind)omp_null_allocator + integer(kind=omp_alloctrait_key_kind)omp_atk_threadmodel + parameter(omp_atk_threadmodel=1) + integer(kind=omp_alloctrait_key_kind)omp_atk_alignment + parameter(omp_atk_alignment=2) + integer(kind=omp_alloctrait_key_kind)omp_atk_access + parameter(omp_atk_access=3) + integer(kind=omp_alloctrait_key_kind)omp_atk_pool_size + parameter(omp_atk_pool_size=4) + integer(kind=omp_alloctrait_key_kind)omp_atk_fallback + parameter(omp_atk_fallback=5) + integer(kind=omp_alloctrait_key_kind)omp_atk_fb_data + parameter(omp_atk_fb_data=6) + integer(kind=omp_alloctrait_key_kind)omp_atk_pinned + parameter(omp_atk_pinned=7) + integer(kind=omp_alloctrait_key_kind)omp_atk_partition + parameter(omp_atk_partition=8) + + ! Reserved for future use + integer(kind=omp_alloctrait_val_kind)omp_atv_false + parameter(omp_atv_false=0) + ! Reserved for future use + integer(kind=omp_alloctrait_val_kind)omp_atv_true + parameter(omp_atv_true=1) + integer(kind=omp_alloctrait_val_kind)omp_atv_default + parameter(omp_atv_default=2) + integer(kind=omp_alloctrait_val_kind)omp_atv_contended + parameter(omp_atv_contended=3) + integer(kind=omp_alloctrait_val_kind)omp_atv_uncontended + parameter(omp_atv_uncontended=4) + integer(kind=omp_alloctrait_val_kind)omp_atv_sequential + parameter(omp_atv_sequential=5) + integer(kind=omp_alloctrait_val_kind)omp_atv_private + parameter(omp_atv_private=6) + integer(kind=omp_alloctrait_val_kind)omp_atv_all + parameter(omp_atv_all=7) + integer(kind=omp_alloctrait_val_kind)omp_atv_thread + parameter(omp_atv_thread=8) + integer(kind=omp_alloctrait_val_kind)omp_atv_pteam + parameter(omp_atv_pteam=9) + integer(kind=omp_alloctrait_val_kind)omp_atv_cgroup + parameter(omp_atv_cgroup=10) + integer(kind=omp_alloctrait_val_kind)omp_atv_default_mem_fb + parameter(omp_atv_default_mem_fb=11) + integer(kind=omp_alloctrait_val_kind)omp_atv_null_fb + parameter(omp_atv_null_fb=12) + integer(kind=omp_alloctrait_val_kind)omp_atv_abort_fb + parameter(omp_atv_abort_fb=13) + integer(kind=omp_alloctrait_val_kind)omp_atv_allocator_fb + parameter(omp_atv_allocator_fb=14) + integer(kind=omp_alloctrait_val_kind)omp_atv_environment + parameter(omp_atv_environment=15) + integer(kind=omp_alloctrait_val_kind)omp_atv_nearest + parameter(omp_atv_nearest=16) + integer(kind=omp_alloctrait_val_kind)omp_atv_blocked + parameter(omp_atv_blocked=17) + integer(kind=omp_alloctrait_val_kind)omp_atv_interleaved + parameter(omp_atv_interleaved=18) + + type omp_alloctrait + integer (kind=omp_alloctrait_key_kind) key + integer (kind=omp_alloctrait_val_kind) value + end type omp_alloctrait + + integer(kind=omp_allocator_handle_kind)omp_null_allocator parameter(omp_null_allocator=0) - integer(kind=omp_allocator_kind)omp_default_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_default_mem_alloc parameter(omp_default_mem_alloc=1) - integer(kind=omp_allocator_kind)omp_large_cap_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_large_cap_mem_alloc parameter(omp_large_cap_mem_alloc=2) - integer(kind=omp_allocator_kind)omp_const_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_const_mem_alloc parameter(omp_const_mem_alloc=3) - integer(kind=omp_allocator_kind)omp_high_bw_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_high_bw_mem_alloc parameter(omp_high_bw_mem_alloc=4) - integer(kind=omp_allocator_kind)omp_low_lat_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_low_lat_mem_alloc parameter(omp_low_lat_mem_alloc=5) - integer(kind=omp_allocator_kind)omp_cgroup_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_cgroup_mem_alloc parameter(omp_cgroup_mem_alloc=6) - integer(kind=omp_allocator_kind)omp_pteam_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_pteam_mem_alloc parameter(omp_pteam_mem_alloc=7) - integer(kind=omp_allocator_kind)omp_thread_mem_alloc + integer(kind=omp_allocator_handle_kind)omp_thread_mem_alloc parameter(omp_thread_mem_alloc=8) + + integer(kind=omp_memspace_handle_kind)omp_default_mem_space + parameter(omp_default_mem_space=0) + integer(kind=omp_memspace_handle_kind)omp_large_cap_mem_space + parameter(omp_large_cap_mem_space=1) + integer(kind=omp_memspace_handle_kind)omp_const_mem_space + parameter(omp_const_mem_space=2) + integer(kind=omp_memspace_handle_kind)omp_high_bw_mem_space + parameter(omp_high_bw_mem_space=3) + integer(kind=omp_memspace_handle_kind)omp_low_lat_mem_space + parameter(omp_low_lat_mem_space=4) + integer(kind=omp_pause_resource_kind)omp_pause_resume parameter(omp_pause_resume=0) integer(kind=omp_pause_resource_kind)omp_pause_soft @@ -444,14 +525,27 @@ integer (kind=omp_integer_kind) omp_get_max_task_priority end function omp_get_max_task_priority - subroutine omp_set_default_allocator(svar) bind(c) + function omp_init_allocator(memspace, ntraits, traits) bind(c) + import + integer (omp_allocator_handle_kind) omp_init_allocator + integer (omp_memspace_handle_kind), value :: memspace + integer (omp_integer_kind), value :: ntraits + type(omp_alloctrait), intent(in) :: traits(*) + end function omp_init_allocator + + subroutine omp_destroy_allocator(allocator) bind(c) + import + integer (omp_allocator_handle_kind), value :: allocator + end subroutine omp_destroy_allocator + + subroutine omp_set_default_allocator(allocator) bind(c) import - integer (kind=omp_allocator_kind), value :: svar + integer (omp_allocator_handle_kind), value :: allocator end subroutine omp_set_default_allocator function omp_get_default_allocator() bind(c) import - integer (kind=omp_allocator_kind) omp_get_default_allocator + integer (omp_allocator_handle_kind) omp_get_default_allocator end function omp_get_default_allocator subroutine omp_set_affinity_format(format) Index: runtime/src/include/50/omp_lib.f.var =================================================================== --- runtime/src/include/50/omp_lib.f.var +++ runtime/src/include/50/omp_lib.f.var @@ -33,7 +33,15 @@ integer, parameter :: omp_lock_hint_kind = omp_integer_kind integer, parameter :: omp_control_tool_kind = omp_integer_kind integer, parameter :: omp_control_tool_result_kind = omp_integer_kind - integer, parameter :: omp_allocator_kind = int_ptr_kind() + integer, parameter :: omp_allocator_handle_kind = int_ptr_kind() + integer, parameter :: omp_memspace_handle_kind = int_ptr_kind() + integer, parameter :: omp_alloctrait_key_kind = omp_integer_kind + integer, parameter :: omp_alloctrait_val_kind = int_ptr_kind() + + type omp_alloctrait + integer(kind=omp_alloctrait_key_kind) key + integer(kind=omp_alloctrait_val_kind) value + end type omp_alloctrait integer, parameter :: omp_pause_resource_kind = omp_integer_kind @@ -74,15 +82,50 @@ integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 - integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 - integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 - integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 - integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 - integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 - integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 - integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 - integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 - integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_threadmodel = 1 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_alignment = 2 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_access = 3 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_pool_size = 4 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_fallback = 5 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_fb_data = 6 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_pinned = 7 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_partition = 8 + + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_false = 0 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_true = 1 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_default = 2 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_contended = 3 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_uncontended = 4 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_sequential = 5 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_private = 6 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_all = 7 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_thread = 8 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_pteam = 9 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_cgroup = 10 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_default_mem_fb = 11 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_null_fb = 12 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_abort_fb = 13 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_allocator_fb = 14 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_environment = 15 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_nearest = 16 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_blocked = 17 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_interleaved = 18 + + integer (kind=omp_allocator_handle_kind), parameter :: omp_null_allocator = 0 + integer (kind=omp_allocator_handle_kind), parameter :: omp_default_mem_alloc = 1 + integer (kind=omp_allocator_handle_kind), parameter :: omp_large_cap_mem_alloc = 2 + integer (kind=omp_allocator_handle_kind), parameter :: omp_const_mem_alloc = 3 + integer (kind=omp_allocator_handle_kind), parameter :: omp_high_bw_mem_alloc = 4 + integer (kind=omp_allocator_handle_kind), parameter :: omp_low_lat_mem_alloc = 5 + integer (kind=omp_allocator_handle_kind), parameter :: omp_cgroup_mem_alloc = 6 + integer (kind=omp_allocator_handle_kind), parameter :: omp_pteam_mem_alloc = 7 + integer (kind=omp_allocator_handle_kind), parameter :: omp_thread_mem_alloc = 8 + + integer (kind=omp_memspace_handle_kind), parameter :: omp_default_mem_space = 0 + integer (kind=omp_memspace_handle_kind), parameter :: omp_large_cap_mem_space = 1 + integer (kind=omp_memspace_handle_kind), parameter :: omp_const_mem_space = 2 + integer (kind=omp_memspace_handle_kind), parameter :: omp_high_bw_mem_space = 3 + integer (kind=omp_memspace_handle_kind), parameter :: omp_low_lat_mem_space = 4 integer (kind=omp_pause_resource_kind), parameter :: omp_pause_resume = 0 integer (kind=omp_pause_resource_kind), parameter :: omp_pause_soft = 1 @@ -393,14 +436,27 @@ integer (kind=omp_integer_kind) omp_get_max_task_priority end function omp_get_max_task_priority - subroutine omp_set_default_allocator(svar) + function omp_init_allocator(memspace, ntraits, traits) bind(c) + use omp_lib_kinds + integer (omp_allocator_handle_kind) omp_init_allocator + integer (omp_memspace_handle_kind), value :: memspace + integer (omp_integer_kind), value :: ntraits + type(omp_alloctrait), intent(in) :: traits(*) + end function omp_init_allocator + + subroutine omp_destroy_allocator(allocator) bind(c) + use omp_lib_kinds + integer (omp_allocator_handle_kind), value :: allocator + end subroutine omp_destroy_allocator + + subroutine omp_set_default_allocator(allocator) bind(c) use omp_lib_kinds - integer (kind=omp_allocator_kind) svar + integer (omp_allocator_handle_kind) allocator end subroutine omp_set_default_allocator - function omp_get_default_allocator() + function omp_get_default_allocator() bind(c) use omp_lib_kinds - integer (kind=omp_allocator_kind) omp_get_default_allocator + integer(omp_allocator_handle_kind)omp_get_default_allocator end function omp_get_default_allocator subroutine omp_set_affinity_format(format) Index: runtime/src/include/50/omp_lib.f90.var =================================================================== --- runtime/src/include/50/omp_lib.f90.var +++ runtime/src/include/50/omp_lib.f90.var @@ -30,7 +30,15 @@ integer, parameter :: omp_lock_hint_kind = omp_sync_hint_kind integer, parameter :: omp_control_tool_kind = omp_integer_kind integer, parameter :: omp_control_tool_result_kind = omp_integer_kind - integer, parameter :: omp_allocator_kind = c_intptr_t + integer, parameter :: omp_allocator_handle_kind = c_intptr_t + integer, parameter :: omp_memspace_handle_kind = c_intptr_t + integer, parameter :: omp_alloctrait_key_kind = omp_integer_kind + integer, parameter :: omp_alloctrait_val_kind = c_intptr_t + + type omp_alloctrait + integer(kind=omp_alloctrait_key_kind) key + integer(kind=omp_alloctrait_val_kind) value + end type omp_alloctrait integer, parameter :: omp_pause_resource_kind = omp_integer_kind @@ -88,15 +96,50 @@ integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 - integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 - integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 - integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 - integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 - integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 - integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 - integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 - integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 - integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_threadmodel = 1 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_alignment = 2 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_access = 3 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_pool_size = 4 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_fallback = 5 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_fb_data = 6 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_pinned = 7 + integer (kind=omp_alloctrait_key_kind), parameter :: omp_atk_partition = 8 + + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_false = 0 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_true = 1 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_default = 2 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_contended = 3 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_uncontended = 4 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_sequential = 5 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_private = 6 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_all = 7 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_thread = 8 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_pteam = 9 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_cgroup = 10 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_default_mem_fb = 11 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_null_fb = 12 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_abort_fb = 13 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_allocator_fb = 14 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_environment = 15 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_nearest = 16 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_blocked = 17 + integer (kind=omp_alloctrait_val_kind), parameter :: omp_atv_interleaved = 18 + + integer (kind=omp_allocator_handle_kind), parameter :: omp_null_allocator = 0 + integer (kind=omp_allocator_handle_kind), parameter :: omp_default_mem_alloc = 1 + integer (kind=omp_allocator_handle_kind), parameter :: omp_large_cap_mem_alloc = 2 + integer (kind=omp_allocator_handle_kind), parameter :: omp_const_mem_alloc = 3 + integer (kind=omp_allocator_handle_kind), parameter :: omp_high_bw_mem_alloc = 4 + integer (kind=omp_allocator_handle_kind), parameter :: omp_low_lat_mem_alloc = 5 + integer (kind=omp_allocator_handle_kind), parameter :: omp_cgroup_mem_alloc = 6 + integer (kind=omp_allocator_handle_kind), parameter :: omp_pteam_mem_alloc = 7 + integer (kind=omp_allocator_handle_kind), parameter :: omp_thread_mem_alloc = 8 + + integer (kind=omp_memspace_handle_kind), parameter :: omp_default_mem_space = 0 + integer (kind=omp_memspace_handle_kind), parameter :: omp_large_cap_mem_space = 1 + integer (kind=omp_memspace_handle_kind), parameter :: omp_const_mem_space = 2 + integer (kind=omp_memspace_handle_kind), parameter :: omp_high_bw_mem_space = 3 + integer (kind=omp_memspace_handle_kind), parameter :: omp_low_lat_mem_space = 4 integer (kind=omp_pause_resource_kind), parameter :: omp_pause_resume = 0 integer (kind=omp_pause_resource_kind), parameter :: omp_pause_soft = 1 @@ -409,14 +452,27 @@ integer (kind=omp_integer_kind) omp_get_max_task_priority end function omp_get_max_task_priority - subroutine omp_set_default_allocator(svar) bind(c) + function omp_init_allocator(memspace, ntraits, traits) bind(c) + use omp_lib_kinds + integer (kind=omp_allocator_handle_kind) omp_init_allocator + integer (kind=omp_memspace_handle_kind), value :: memspace + integer (kind=omp_integer_kind), value :: ntraits + type(omp_alloctrait), intent(in) :: traits(*) + end function omp_init_allocator + + subroutine omp_destroy_allocator(allocator) bind(c) + use omp_lib_kinds + integer (kind=omp_allocator_handle_kind), value :: allocator + end subroutine omp_destroy_allocator + + subroutine omp_set_default_allocator(allocator) bind(c) use omp_lib_kinds - integer (kind=omp_allocator_kind), value :: svar + integer (kind=omp_allocator_handle_kind), value :: allocator end subroutine omp_set_default_allocator function omp_get_default_allocator() bind(c) use omp_lib_kinds - integer (kind=omp_allocator_kind) omp_get_default_allocator + integer (kind=omp_allocator_handle_kind) omp_get_default_allocator end function omp_get_default_allocator subroutine omp_set_affinity_format(format) Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -858,24 +858,95 @@ #if OMP_50_ENABLED /* OpenMP 5.0 Memory Management support */ + +#ifndef __OMP_H +// Duplicate type definitios from omp.h +typedef uintptr_t omp_uintptr_t; + +typedef enum { + OMP_ATK_THREADMODEL = 1, + OMP_ATK_ALIGNMENT = 2, + OMP_ATK_ACCESS = 3, + OMP_ATK_POOL_SIZE = 4, + OMP_ATK_FALLBACK = 5, + OMP_ATK_FB_DATA = 6, + OMP_ATK_PINNED = 7, + OMP_ATK_PARTITION = 8 +} omp_alloctrait_key_t; + +typedef enum { + OMP_ATV_FALSE = 0, + OMP_ATV_TRUE = 1, + OMP_ATV_DEFAULT = 2, + OMP_ATV_CONTENDED = 3, + OMP_ATV_UNCONTENDED = 4, + OMP_ATV_SEQUENTIAL = 5, + OMP_ATV_PRIVATE = 6, + OMP_ATV_ALL = 7, + OMP_ATV_THREAD = 8, + OMP_ATV_PTEAM = 9, + OMP_ATV_CGROUP = 10, + OMP_ATV_DEFAULT_MEM_FB = 11, + OMP_ATV_NULL_FB = 12, + OMP_ATV_ABORT_FB = 13, + OMP_ATV_ALLOCATOR_FB = 14, + OMP_ATV_ENVIRONMENT = 15, + OMP_ATV_NEAREST = 16, + OMP_ATV_BLOCKED = 17, + OMP_ATV_INTERLEAVED = 18 +} omp_alloctrait_value_t; + +typedef void *omp_memspace_handle_t; +extern omp_memspace_handle_t const omp_default_mem_space; +extern omp_memspace_handle_t const omp_large_cap_mem_space; +extern omp_memspace_handle_t const omp_const_mem_space; +extern omp_memspace_handle_t const omp_high_bw_mem_space; +extern omp_memspace_handle_t const omp_low_lat_mem_space; + +typedef struct { + omp_alloctrait_key_t key; + omp_uintptr_t value; +} omp_alloctrait_t; + +typedef void *omp_allocator_handle_t; +extern omp_allocator_handle_t const omp_null_allocator; +extern omp_allocator_handle_t const omp_default_mem_alloc; +extern omp_allocator_handle_t const omp_large_cap_mem_alloc; +extern omp_allocator_handle_t const omp_const_mem_alloc; +extern omp_allocator_handle_t const omp_high_bw_mem_alloc; +extern omp_allocator_handle_t const omp_low_lat_mem_alloc; +extern omp_allocator_handle_t const omp_cgroup_mem_alloc; +extern omp_allocator_handle_t const omp_pteam_mem_alloc; +extern omp_allocator_handle_t const omp_thread_mem_alloc; +extern omp_allocator_handle_t const kmp_max_mem_alloc; +extern omp_allocator_handle_t __kmp_def_allocator; + +// end of duplicate type definitios from omp.h +#endif + extern int __kmp_memkind_available; -extern int __kmp_hbw_mem_available; -typedef void *omp_allocator_t; -extern const omp_allocator_t *OMP_NULL_ALLOCATOR; -extern const omp_allocator_t *omp_default_mem_alloc; -extern const omp_allocator_t *omp_large_cap_mem_alloc; -extern const omp_allocator_t *omp_const_mem_alloc; -extern const omp_allocator_t *omp_high_bw_mem_alloc; -extern const omp_allocator_t *omp_low_lat_mem_alloc; -extern const omp_allocator_t *omp_cgroup_mem_alloc; -extern const omp_allocator_t *omp_pteam_mem_alloc; -extern const omp_allocator_t *omp_thread_mem_alloc; -extern const omp_allocator_t *__kmp_def_allocator; - -extern void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *al); -extern const omp_allocator_t *__kmpc_get_default_allocator(int gtid); -extern void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t *al); -extern void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *al); + +typedef omp_memspace_handle_t kmp_memspace_t; // placeholder + +typedef struct kmp_allocator_t { + omp_memspace_handle_t memspace; + void **memkind; // pointer to memkind + int alignment; + omp_alloctrait_value_t fb; + kmp_allocator_t *fb_data; + kmp_uint64 pool_size; + kmp_uint64 pool_used; +} kmp_allocator_t; + +extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, + omp_memspace_handle_t, + int ntraits, + omp_alloctrait_t traits[]); +extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); +extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al); +extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid); +extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); +extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); extern void __kmp_init_memkind(); extern void __kmp_fini_memkind(); @@ -2357,7 +2428,7 @@ kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ #endif #if OMP_50_ENABLED - void *const *th_def_allocator; /* per implicit task default allocator */ + omp_allocator_handle_t th_def_allocator; /* default allocator */ #endif /* The data set by the master at reinit, then R/W by the worker */ KMP_ALIGN_CACHE int @@ -2581,7 +2652,7 @@ int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via // omp_set_num_threads() call #if OMP_50_ENABLED - void *const *t_def_allocator; /* per implicit task default allocator */ + omp_allocator_handle_t t_def_allocator; /* default allocator */ #endif // Read/write by workers as well Index: runtime/src/kmp_alloc.cpp =================================================================== --- runtime/src/kmp_alloc.cpp +++ runtime/src/kmp_alloc.cpp @@ -1222,72 +1222,215 @@ #if OMP_50_ENABLED /* OMP 5.0 Memory Management support */ -static int (*p_hbw_check)(void); -static void *(*p_hbw_malloc)(size_t); -static void (*p_hbw_free)(void *); -static int (*p_hbw_set_policy)(int); static const char *kmp_mk_lib_name; static void *h_memkind; +/* memkind experimental API: */ +// memkind_alloc +static void *(*kmp_mk_alloc)(void *k, size_t sz); +// memkind_free +static void (*kmp_mk_free)(void *kind, void *ptr); +// memkind_check_available +static int (*kmp_mk_check)(void *kind); +// kinds we are going to use +static void **mk_default; +static void **mk_interleave; +static void **mk_hbw; +static void **mk_hbw_interleave; +static void **mk_hbw_preferred; +static void **mk_hugetlb; +static void **mk_hbw_hugetlb; +static void **mk_hbw_preferred_hugetlb; + +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB +static inline void chk_kind(void ***pkind) { + KMP_DEBUG_ASSERT(pkind); + if (*pkind) // symbol found + if (kmp_mk_check(**pkind)) // kind not available or error + *pkind = NULL; +} +#endif void __kmp_init_memkind() { +// as of 2018-07-31 memkind does not support Windows*, exclude it for now #if KMP_OS_UNIX && KMP_DYNAMIC_LIB + // use of statically linked memkind is problematic, as it depends on libnuma kmp_mk_lib_name = "libmemkind.so"; h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); if (h_memkind) { - p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available"); - p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc"); - p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free"); - p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy"); - if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) { + kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); + kmp_mk_alloc = + (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); + kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); + mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); + if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && + !kmp_mk_check(*mk_default)) { __kmp_memkind_available = 1; - if (p_hbw_check() == 0) { - p_hbw_set_policy(1); // return NULL is not enough memory - __kmp_hbw_mem_available = 1; // found HBW memory available - } - return; // success - all symbols resolved + mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); + chk_kind(&mk_interleave); + mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); + chk_kind(&mk_hbw); + mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); + chk_kind(&mk_hbw_interleave); + mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); + chk_kind(&mk_hbw_preferred); + mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); + chk_kind(&mk_hugetlb); + mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); + chk_kind(&mk_hbw_hugetlb); + mk_hbw_preferred_hugetlb = + (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); + chk_kind(&mk_hbw_preferred_hugetlb); + KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); + return; // success } dlclose(h_memkind); // failure h_memkind = NULL; } - p_hbw_check = NULL; - p_hbw_malloc = NULL; - p_hbw_free = NULL; - p_hbw_set_policy = NULL; + kmp_mk_check = NULL; + kmp_mk_alloc = NULL; + kmp_mk_free = NULL; + mk_default = NULL; + mk_interleave = NULL; + mk_hbw = NULL; + mk_hbw_interleave = NULL; + mk_hbw_preferred = NULL; + mk_hugetlb = NULL; + mk_hbw_hugetlb = NULL; + mk_hbw_preferred_hugetlb = NULL; #else kmp_mk_lib_name = ""; h_memkind = NULL; - p_hbw_check = NULL; - p_hbw_malloc = NULL; - p_hbw_free = NULL; - p_hbw_set_policy = NULL; + kmp_mk_check = NULL; + kmp_mk_alloc = NULL; + kmp_mk_free = NULL; + mk_default = NULL; + mk_interleave = NULL; + mk_hbw = NULL; + mk_hbw_interleave = NULL; + mk_hbw_preferred = NULL; + mk_hugetlb = NULL; + mk_hbw_hugetlb = NULL; + mk_hbw_preferred_hugetlb = NULL; #endif } void __kmp_fini_memkind() { #if KMP_OS_UNIX && KMP_DYNAMIC_LIB + if (__kmp_memkind_available) + KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); if (h_memkind) { dlclose(h_memkind); h_memkind = NULL; } - p_hbw_check = NULL; - p_hbw_malloc = NULL; - p_hbw_free = NULL; - p_hbw_set_policy = NULL; + kmp_mk_check = NULL; + kmp_mk_alloc = NULL; + kmp_mk_free = NULL; + mk_default = NULL; + mk_interleave = NULL; + mk_hbw = NULL; + mk_hbw_interleave = NULL; + mk_hbw_preferred = NULL; + mk_hugetlb = NULL; + mk_hbw_hugetlb = NULL; + mk_hbw_preferred_hugetlb = NULL; #endif } -void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) { - if (allocator == OMP_NULL_ALLOCATOR) +omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, + int ntraits, + omp_alloctrait_t traits[]) { + // OpenMP 5.0 only allows predefined memspaces + KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space || + ms == omp_large_cap_mem_space || ms == omp_const_mem_space || + ms == omp_high_bw_mem_space); + kmp_allocator_t *al; + int i; + al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed + al->memspace = ms; // not used currently + for (i = 0; i < ntraits; ++i) { + switch (traits[i].key) { + case OMP_ATK_THREADMODEL: + case OMP_ATK_ACCESS: + case OMP_ATK_PINNED: + break; + case OMP_ATK_ALIGNMENT: + al->alignment = traits[i].value; + KMP_ASSERT(IS_POWER_OF_TWO(al->alignment)); + break; + case OMP_ATK_POOL_SIZE: + al->pool_size = traits[i].value; + break; + case OMP_ATK_FALLBACK: + al->fb = (omp_alloctrait_value_t)traits[i].value; + KMP_DEBUG_ASSERT( + al->fb == OMP_ATV_DEFAULT_MEM_FB || al->fb == OMP_ATV_NULL_FB || + al->fb == OMP_ATV_ABORT_FB || al->fb == OMP_ATV_ALLOCATOR_FB); + break; + case OMP_ATK_FB_DATA: + al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); + break; + case OMP_ATK_PARTITION: + al->memkind = RCAST(void **, traits[i].value); + break; + default: + KMP_ASSERT2(0, "Unexpected allocator trait"); + } + } + if (al->fb == 0) { + // set default allocator + al->fb = OMP_ATV_DEFAULT_MEM_FB; + al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; + } else if (al->fb == OMP_ATV_ALLOCATOR_FB) { + KMP_ASSERT(al->fb_data != NULL); + } else if (al->fb == OMP_ATV_DEFAULT_MEM_FB) { + al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; + } + if (__kmp_memkind_available) { + // Let's use memkind library if available + if (ms == omp_high_bw_mem_space) { + if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_hbw_interleave) { + al->memkind = mk_hbw_interleave; + } else if (mk_hbw_preferred) { + // AC: do not try to use MEMKIND_HBW for now, because memkind library + // cannot reliably detect exhaustion of HBW memory. + // It could be possible using hbw_verify_memory_region() but memkind + // manual says: "Using this function in production code may result in + // serious performance penalty". + al->memkind = mk_hbw_preferred; + } else { + // HBW is requested but not available --> return NULL allocator + __kmp_free(al); + return omp_null_allocator; + } + } else { + if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_interleave) { + al->memkind = mk_interleave; + } else { + al->memkind = mk_default; + } + } + } else { + if (ms == omp_high_bw_mem_space) { + // cannot detect HBW memory presence without memkind library + __kmp_free(al); + return omp_null_allocator; + } + } + return (omp_allocator_handle_t)al; +} + +void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) { + if (allocator > kmp_max_mem_alloc) + __kmp_free(allocator); +} + +void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) { + if (allocator == omp_null_allocator) allocator = omp_default_mem_alloc; - KMP_DEBUG_ASSERT( - allocator == omp_default_mem_alloc || - allocator == omp_large_cap_mem_alloc || - allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc || - allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc || - allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc); __kmp_threads[gtid]->th.th_def_allocator = allocator; } -const omp_allocator_t *__kmpc_get_default_allocator(int gtid) { + +omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) { return __kmp_threads[gtid]->th.th_def_allocator; } @@ -1295,44 +1438,137 @@ void *ptr_alloc; // Pointer returned by allocator size_t size_a; // Size of allocated memory block (initial+descriptor+align) void *ptr_align; // Pointer to aligned memory, returned - const omp_allocator_t *allocator; // allocator + kmp_allocator_t *allocator; // allocator } kmp_mem_desc_t; static int alignment = sizeof(void *); // let's align to pointer size -void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) { +void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) { + void *ptr = NULL; + kmp_allocator_t *al; KMP_DEBUG_ASSERT(__kmp_init_serial); - if (allocator == OMP_NULL_ALLOCATOR) + if (allocator == omp_null_allocator) allocator = __kmp_threads[gtid]->th.th_def_allocator; + KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); + al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); + int sz_desc = sizeof(kmp_mem_desc_t); - void *ptr = NULL; kmp_mem_desc_t desc; kmp_uintptr_t addr; // address returned by allocator kmp_uintptr_t addr_align; // address to return to caller kmp_uintptr_t addr_descr; // address of memory block descriptor - - KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); - - desc.size_a = size + sz_desc + alignment; - if (allocator == omp_default_mem_alloc) - ptr = __kmp_allocate(desc.size_a); - if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { - KMP_DEBUG_ASSERT(p_hbw_malloc != NULL); - ptr = p_hbw_malloc(desc.size_a); + int align = alignment; // default alignment + if (allocator > kmp_max_mem_alloc && al->alignment > 0) { + align = al->alignment; // alignment requested by user } + desc.size_a = size + sz_desc + align; - KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr, - desc.size_a, __kmp_hbw_mem_available)); + if (__kmp_memkind_available) { + if (allocator < kmp_max_mem_alloc) { + // pre-defined allocator + if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { + ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); + } else { + ptr = kmp_mk_alloc(*mk_default, desc.size_a); + } + } else if (al->pool_size > 0) { + // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (used + desc.size_a > al->pool_size) { + // not enough space, need to go fallback path + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + if (al->fb == OMP_ATV_DEFAULT_MEM_FB) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = kmp_mk_alloc(*mk_default, desc.size_a); + } else if (al->fb == OMP_ATV_ABORT_FB) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == OMP_ATV_ALLOCATOR_FB) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); + } // else ptr == NULL; + } else { + // pool has enough space + ptr = kmp_mk_alloc(*al->memkind, desc.size_a); + if (ptr == NULL) { + if (al->fb == OMP_ATV_DEFAULT_MEM_FB) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = kmp_mk_alloc(*mk_default, desc.size_a); + } else if (al->fb == OMP_ATV_ABORT_FB) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == OMP_ATV_ALLOCATOR_FB) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); + } + } + } + } else { + // custom allocator, pool size not requested + ptr = kmp_mk_alloc(*al->memkind, desc.size_a); + if (ptr == NULL) { + if (al->fb == OMP_ATV_DEFAULT_MEM_FB) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = kmp_mk_alloc(*mk_default, desc.size_a); + } else if (al->fb == OMP_ATV_ABORT_FB) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == OMP_ATV_ALLOCATOR_FB) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); + } + } + } + } else if (allocator < kmp_max_mem_alloc) { + // pre-defined allocator + if (allocator == omp_high_bw_mem_alloc) { + // ptr = NULL; + } else { + ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); + } + } else if (al->pool_size > 0) { + // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (used + desc.size_a > al->pool_size) { + // not enough space, need to go fallback path + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + if (al->fb == OMP_ATV_DEFAULT_MEM_FB) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); + } else if (al->fb == OMP_ATV_ABORT_FB) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == OMP_ATV_ALLOCATOR_FB) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); + } // else ptr == NULL; + } else { + // pool has enough space + ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); + if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) { + KMP_ASSERT(0); // abort fallback requested + } // no sense to look for another fallback because of same internal alloc + } + } else { + // custom allocator, pool size not requested + ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); + if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) { + KMP_ASSERT(0); // abort fallback requested + } // no sense to look for another fallback because of same internal alloc + } + KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a)); if (ptr == NULL) return NULL; addr = (kmp_uintptr_t)ptr; - addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1); + addr_align = (addr + sz_desc + align - 1) & ~(align - 1); addr_descr = addr_align - sz_desc; desc.ptr_alloc = ptr; desc.ptr_align = (void *)addr_align; - desc.allocator = allocator; + desc.allocator = al; *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents KMP_MB(); @@ -1340,11 +1576,14 @@ return desc.ptr_align; } -void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) { +void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) { KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator)); if (ptr == NULL) return; + kmp_allocator_t *al; + omp_allocator_handle_t oal; + al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); kmp_mem_desc_t desc; kmp_uintptr_t addr_align; // address to return to caller kmp_uintptr_t addr_descr; // address of memory block descriptor @@ -1355,17 +1594,37 @@ KMP_DEBUG_ASSERT(desc.ptr_align == ptr); if (allocator) { - KMP_DEBUG_ASSERT(desc.allocator == allocator); - } else { - allocator = desc.allocator; + KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data); } - KMP_DEBUG_ASSERT(allocator); - - if (allocator == omp_default_mem_alloc) - __kmp_free(desc.ptr_alloc); - if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { - KMP_DEBUG_ASSERT(p_hbw_free != NULL); - p_hbw_free(desc.ptr_alloc); + al = desc.allocator; + oal = (omp_allocator_handle_t)al; // cast to void* for comparisons + KMP_DEBUG_ASSERT(al); + + if (__kmp_memkind_available) { + if (oal < kmp_max_mem_alloc) { + // pre-defined allocator + if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { + kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); + } else { + kmp_mk_free(*mk_default, desc.ptr_alloc); + } + } else { + if (al->pool_size > 0) { // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + (void)used; // to suppress compiler warning + KMP_DEBUG_ASSERT(used >= desc.size_a); + } + kmp_mk_free(*al->memkind, desc.ptr_alloc); + } + } else { + if (oal > kmp_max_mem_alloc && al->pool_size > 0) { + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + (void)used; // to suppress compiler warning + KMP_DEBUG_ASSERT(used >= desc.size_a); + } + __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); } KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, allocator)); Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -4190,6 +4190,15 @@ #endif #if OMP_50_ENABLED +/* omp_alloc/omp_free only defined for C/C++, not for Fortran */ +void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { + return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); +} + +void omp_free(void *ptr, omp_allocator_handle_t allocator) { + __kmpc_free(__kmp_entry_gtid(), ptr, allocator); +} + int __kmpc_get_target_offload(void) { if (!__kmp_init_serial) { __kmp_serial_initialize(); Index: runtime/src/kmp_ftn_entry.h =================================================================== --- runtime/src/kmp_ftn_entry.h +++ runtime/src/kmp_ftn_entry.h @@ -368,35 +368,35 @@ } /* OpenMP 5.0 Memory Management support */ -void FTN_STDCALL FTN_SET_DEFAULT_ALLOCATOR(const omp_allocator_t *allocator) { -#ifndef KMP_STUB - __kmpc_set_default_allocator(__kmp_entry_gtid(), allocator); -#endif -} -const omp_allocator_t *FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) { +omp_allocator_handle_t FTN_STDCALL FTN_INIT_ALLOCATOR(omp_memspace_handle_t m, + int ntraits, + omp_alloctrait_t tr[]) { #ifdef KMP_STUB return NULL; #else - return __kmpc_get_default_allocator(__kmp_entry_gtid()); + return __kmpc_init_allocator(__kmp_entry_gtid(), m, ntraits, tr); #endif } -void *FTN_STDCALL FTN_ALLOC(size_t size, const omp_allocator_t *allocator) { -#ifdef KMP_STUB - return malloc(size); -#else - return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); + +void FTN_STDCALL FTN_DESTROY_ALLOCATOR(omp_allocator_handle_t al) { +#ifndef KMP_STUB + __kmpc_destroy_allocator(__kmp_entry_gtid(), al); +#endif +} +void FTN_STDCALL FTN_SET_DEFAULT_ALLOCATOR(omp_allocator_handle_t al) { +#ifndef KMP_STUB + __kmpc_set_default_allocator(__kmp_entry_gtid(), al); #endif } -void FTN_STDCALL FTN_FREE(void *ptr, const omp_allocator_t *allocator) { +omp_allocator_handle_t FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) { #ifdef KMP_STUB - free(ptr); + return NULL; #else - __kmpc_free(__kmp_entry_gtid(), ptr, allocator); + return __kmpc_get_default_allocator(__kmp_entry_gtid()); #endif } /* OpenMP 5.0 affinity format support */ - #ifndef KMP_STUB static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size, char const *csrc, size_t csrc_size) { Index: runtime/src/kmp_ftn_os.h =================================================================== --- runtime/src/kmp_ftn_os.h +++ runtime/src/kmp_ftn_os.h @@ -132,10 +132,10 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL omp_control_tool +#define FTN_INIT_ALLOCATOR omp_init_allocator +#define FTN_DESTROY_ALLOCATOR omp_destroy_allocator #define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator -#define FTN_ALLOC omp_alloc -#define FTN_FREE omp_free #define FTN_GET_DEVICE_NUM omp_get_device_num #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format @@ -264,6 +264,8 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL omp_control_tool_ +#define FTN_INIT_ALLOCATOR omp_init_allocator_ +#define FTN_DESTROY_ALLOCATOR omp_destroy_allocator_ #define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator_ #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_ #define FTN_ALLOC omp_alloc_ @@ -396,10 +398,10 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#define FTN_INIT_ALLOCATOR OMP_INIT_ALLOCATOR +#define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR #define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR -#define FTN_ALLOC OMP_ALLOC -#define FTN_FREE OMP_FREE #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT @@ -528,6 +530,8 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_ +#define FTN_INIT_ALLOCATOR OMP_INIT_ALLOCATOR_ +#define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR_ #define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR_ #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_ #define FTN_ALLOC OMP_ALLOC_ Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -300,17 +300,37 @@ #if OMP_50_ENABLED int __kmp_memkind_available = 0; -int __kmp_hbw_mem_available = 0; -const omp_allocator_t *OMP_NULL_ALLOCATOR = NULL; -const omp_allocator_t *omp_default_mem_alloc = (const omp_allocator_t *)1; -const omp_allocator_t *omp_large_cap_mem_alloc = (const omp_allocator_t *)2; -const omp_allocator_t *omp_const_mem_alloc = (const omp_allocator_t *)3; -const omp_allocator_t *omp_high_bw_mem_alloc = (const omp_allocator_t *)4; -const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; -const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; -const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; -const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; -void *const *__kmp_def_allocator = omp_default_mem_alloc; +omp_allocator_handle_t const omp_null_allocator = NULL; +omp_allocator_handle_t const omp_default_mem_alloc = + (omp_allocator_handle_t const)1; +omp_allocator_handle_t const omp_large_cap_mem_alloc = + (omp_allocator_handle_t const)2; +omp_allocator_handle_t const omp_const_mem_alloc = + (omp_allocator_handle_t const)3; +omp_allocator_handle_t const omp_high_bw_mem_alloc = + (omp_allocator_handle_t const)4; +omp_allocator_handle_t const omp_low_lat_mem_alloc = + (omp_allocator_handle_t const)5; +omp_allocator_handle_t const omp_cgroup_mem_alloc = + (omp_allocator_handle_t const)6; +omp_allocator_handle_t const omp_pteam_mem_alloc = + (omp_allocator_handle_t const)7; +omp_allocator_handle_t const omp_thread_mem_alloc = + (omp_allocator_handle_t const)8; +omp_allocator_handle_t const kmp_max_mem_alloc = + (omp_allocator_handle_t const)1024; +omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc; + +omp_memspace_handle_t const omp_default_mem_space = + (omp_memspace_handle_t const)0; +omp_memspace_handle_t const omp_large_cap_mem_space = + (omp_memspace_handle_t const)1; +omp_memspace_handle_t const omp_const_mem_space = + (omp_memspace_handle_t const)2; +omp_memspace_handle_t const omp_high_bw_mem_space = + (omp_memspace_handle_t const)3; +omp_memspace_handle_t const omp_low_lat_mem_space = + (omp_memspace_handle_t const)4; #endif /* This check ensures that the compiler is passing the correct data type for the Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -3357,7 +3357,7 @@ KMP_ASSERT(num > 0); switch (num) { case 4: - if (__kmp_hbw_mem_available) { + if (__kmp_memkind_available) { __kmp_def_allocator = omp_high_bw_mem_alloc; } else { __kmp_msg(kmp_ms_warning, @@ -3406,7 +3406,7 @@ } next = buf; if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { - if (__kmp_hbw_mem_available) { + if (__kmp_memkind_available) { __kmp_def_allocator = omp_high_bw_mem_alloc; } else { __kmp_msg(kmp_ms_warning, Index: runtime/src/kmp_stub.cpp =================================================================== --- runtime/src/kmp_stub.cpp +++ runtime/src/kmp_stub.cpp @@ -344,15 +344,44 @@ #if OMP_50_ENABLED /* OpenMP 5.0 Memory Management */ -const omp_allocator_t *OMP_NULL_ALLOCATOR = NULL; -const omp_allocator_t *omp_default_mem_alloc = (const omp_allocator_t *)1; -const omp_allocator_t *omp_large_cap_mem_alloc = (const omp_allocator_t *)2; -const omp_allocator_t *omp_const_mem_alloc = (const omp_allocator_t *)3; -const omp_allocator_t *omp_high_bw_mem_alloc = (const omp_allocator_t *)4; -const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; -const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; -const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; -const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; +#if KMP_OS_WINDOWS +omp_allocator_handle_t const omp_null_allocator = 0; +omp_allocator_handle_t const omp_default_mem_alloc = + (omp_allocator_handle_t const)1; +omp_allocator_handle_t const omp_large_cap_mem_alloc = + (omp_allocator_handle_t const)2; +omp_allocator_handle_t const omp_const_mem_alloc = + (omp_allocator_handle_t const)3; +omp_allocator_handle_t const omp_high_bw_mem_alloc = + (omp_allocator_handle_t const)4; +omp_allocator_handle_t const omp_low_lat_mem_alloc = + (omp_allocator_handle_t const)5; +omp_allocator_handle_t const omp_cgroup_mem_alloc = + (omp_allocator_handle_t const)6; +omp_allocator_handle_t const omp_pteam_mem_alloc = + (omp_allocator_handle_t const)7; +omp_allocator_handle_t const omp_thread_mem_alloc = + (omp_allocator_handle_t const)8; + +omp_memspace_handle_t const omp_default_mem_space = + (omp_memspace_handle_t const)0; +omp_memspace_handle_t const omp_large_cap_mem_space = + (omp_memspace_handle_t const)1; +omp_memspace_handle_t const omp_const_mem_space = + (omp_memspace_handle_t const)2; +omp_memspace_handle_t const omp_high_bw_mem_space = + (omp_memspace_handle_t const)3; +omp_memspace_handle_t const omp_low_lat_mem_space = + (omp_memspace_handle_t const)4; +#endif /* KMP_OS_WINDOWS */ +void *omp_alloc(size_t size, const omp_allocator_handle_t allocator) { + i; + return malloc(size); +} +void omp_free(void *ptr, const omp_allocator_handle_t allocator) { + i; + free(ptr); +} /* OpenMP 5.0 Affinity Format */ void omp_set_affinity_format(char const *format) { i; } size_t omp_get_affinity_format(char *buffer, size_t size) { Index: runtime/test/api/omp_alloc.c =================================================================== --- runtime/test/api/omp_alloc.c +++ /dev/null @@ -1,81 +0,0 @@ -// RUN: %libomp-compile-and-run - -// REQUIRES: openmp-5.0 - -#include -#include -#include -#include "omp_testsuite.h" - -#define ARRAY_SIZE 10000 - -int test_omp_alloc() { - int err; - int i, j; - int *shared_array; - const omp_allocator_t *allocator; - const omp_allocator_t *test_allocator; - // Currently, only default memory allocator is implemented - const omp_allocator_t *allocators[] = { - omp_default_mem_alloc, - }; - - err = 0; - for (i = 0; i < sizeof(allocators) / sizeof(allocators[0]); ++i) { - allocator = allocators[i]; - printf("Using %p allocator\n", test_allocator); - omp_set_default_allocator(allocator); - test_allocator = omp_get_default_allocator(); - if (test_allocator != allocator) { - printf("error: omp_set|get_default_allocator() not working\n"); - return 0; - } - shared_array = (int *)omp_alloc(sizeof(int) * ARRAY_SIZE, test_allocator); - if (shared_array == NULL) { - printf("error: shared_array is NULL\n"); - return 0; - } - for (j = 0; j < ARRAY_SIZE; ++j) { - shared_array[j] = j; - } - #pragma omp parallel shared(shared_array) - { - int i; - int tid = omp_get_thread_num(); - int *private_array = - (int *)omp_alloc(sizeof(int) * ARRAY_SIZE, omp_default_mem_alloc); - if (private_array == NULL) { - printf("error: thread %d private_array is NULL\n", tid); - #pragma omp atomic - err++; - } - for (i = 0; i < ARRAY_SIZE; ++i) { - private_array[i] = shared_array[i] + tid; - } - for (i = 0; i < ARRAY_SIZE; ++i) { - if (private_array[i] != i + tid) { - printf("error: thread %d element %d is %d instead of %d\n", tid, i, - private_array[i], i + tid); - #pragma omp atomic - err++; - } - } - omp_free(private_array, omp_default_mem_alloc); - } /* end of parallel */ - omp_free(shared_array, test_allocator); - } - - return !err; -} - -int main() { - int i; - int num_failed = 0; - - for (i = 0; i < REPETITIONS; i++) { - if (!test_omp_alloc()) { - num_failed++; - } - } - return num_failed; -} Index: runtime/test/api/omp_alloc_def_fb.c =================================================================== --- /dev/null +++ runtime/test/api/omp_alloc_def_fb.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run + +#include +#include + +int main() { + omp_alloctrait_t at[2]; + omp_allocator_handle_t a; + void *p[2]; + at[0].key = OMP_ATK_POOL_SIZE; + at[0].value = 2 * 1024 * 1024; + at[1].key = OMP_ATK_FALLBACK; + at[1].value = OMP_ATV_DEFAULT_MEM_FB; + a = omp_init_allocator(omp_large_cap_mem_space, 2, at); + printf("allocator large created: %p\n", a); + #pragma omp parallel num_threads(2) + { + int i = omp_get_thread_num(); + p[i] = omp_alloc(1024 * 1024, a); + #pragma omp barrier + printf("th %d, ptr %p\n", i, p[i]); + omp_free(p[i], a); + } + // Both pointers should be non-NULL + if (p[0] != NULL && p[1] != NULL) { + printf("passed\n"); + return 0; + } else { + printf("failed: pointers %p %p\n", p[0], p[1]); + return 1; + } +} Index: runtime/test/api/omp_alloc_hbw.c =================================================================== --- /dev/null +++ runtime/test/api/omp_alloc_hbw.c @@ -0,0 +1,45 @@ +// RUN: %libomp-compile-and-run + +#include +#include + +int main() { + omp_alloctrait_t at[2]; + omp_allocator_handle_t a; + void *p[2]; + at[0].key = OMP_ATK_POOL_SIZE; + at[0].value = 2 * 1024 * 1024; + at[1].key = OMP_ATK_FALLBACK; + at[1].value = OMP_ATV_NULL_FB; + a = omp_init_allocator(omp_high_bw_mem_space, 2, at); + printf("allocator hbw created: %p\n", a); + #pragma omp parallel num_threads(2) + { + int i = omp_get_thread_num(); + p[i] = omp_alloc(1024 * 1024, a); + #pragma omp barrier + printf("th %d, ptr %p\n", i, p[i]); + omp_free(p[i], a); + } + if (a != omp_null_allocator) { + // As an allocator has some small memory overhead + // exactly one of the two pointers should be NULL + // because of NULL fallback requested + if ((p[0] == NULL && p[1] != NULL) || (p[0] != NULL && p[1] == NULL)) { + printf("passed\n"); + return 0; + } else { + printf("failed: pointers %p %p\n", p[0], p[1]); + return 1; + } + } else { + // NULL allocator should cause default allocations + if (p[0] != NULL && p[1] != NULL) { + printf("passed\n"); + return 0; + } else { + printf("failed: pointers %p %p\n", p[0], p[1]); + return 1; + } + } +} Index: runtime/test/api/omp_alloc_null_fb.c =================================================================== --- /dev/null +++ runtime/test/api/omp_alloc_null_fb.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile-and-run + +#include +#include + +int main() { + omp_alloctrait_t at[2]; + omp_allocator_handle_t a; + void *p[2]; + at[0].key = OMP_ATK_POOL_SIZE; + at[0].value = 2 * 1024 * 1024; + at[1].key = OMP_ATK_FALLBACK; + at[1].value = OMP_ATV_NULL_FB; + a = omp_init_allocator(omp_large_cap_mem_space, 2, at); + printf("allocator large created: %p\n", a); + #pragma omp parallel num_threads(2) + { + int i = omp_get_thread_num(); + #pragma omp barrier + p[i] = omp_alloc(1024 * 1024, a); + #pragma omp barrier + printf("th %d, ptr %p\n", i, p[i]); + omp_free(p[i], a); + } + // As an allocator has some small memory overhead + // exactly one of the two pointers should be NULL + // because of NULL fallback requested + if ((p[0] == NULL && p[1] != NULL) || (p[0] != NULL && p[1] == NULL)) { + printf("passed\n"); + return 0; + } else { + printf("failed: pointers %p %p\n", p[0], p[1]); + return 1; + } +} Index: runtime/tools/generate-def.pl =================================================================== --- runtime/tools/generate-def.pl +++ runtime/tools/generate-def.pl @@ -108,7 +108,8 @@ foreach my $entry ( keys( %$entries ) ) { if ( not $entries->{ $entry }->{ obsolete } ) { my $ordinal = $entries->{ $entry }->{ ordinal }; - if ( $entry =~ m{\A[ok]mp_} ) { + # omp_alloc and omp_free are C/C++ only functions, skip "1000+ordinal" for them + if ( $entry =~ m{\A[ok]mp_} and $entry ne "omp_alloc" and $entry ne "omp_free" ) { if ( not defined( $ordinal ) ) { runtime_error( "Bad entry \"$entry\": ordinal number is not specified."