Index: openmp/trunk/runtime/src/dllexports =================================================================== --- openmp/trunk/runtime/src/dllexports +++ openmp/trunk/runtime/src/dllexports @@ -539,6 +539,24 @@ kmp_set_disp_num_buffers 890 +%ifdef OMP_50 + omp_control_tool 891 + omp_set_default_allocator 892 + omp_get_default_allocator 893 + omp_alloc 894 + omp_free 895 + + OMP_NULL_ALLOCATOR DATA + omp_default_mem_alloc DATA + omp_large_cap_mem_alloc DATA + omp_const_mem_alloc DATA + omp_high_bw_mem_alloc DATA + omp_low_lat_mem_alloc DATA + omp_cgroup_mem_alloc DATA + omp_pteam_mem_alloc DATA + omp_thread_mem_alloc DATA +%endif # OMP_50 + %ifndef stub # Ordinals between 900 and 999 are reserved Index: openmp/trunk/runtime/src/exports_so.txt =================================================================== --- openmp/trunk/runtime/src/exports_so.txt +++ openmp/trunk/runtime/src/exports_so.txt @@ -21,6 +21,7 @@ # "Normal" symbols. # omp_*; # Standard OpenMP functions. + OMP_*; # Standard OpenMP symbols. # # OMPT API Index: openmp/trunk/runtime/src/i18n/en_US.txt =================================================================== --- openmp/trunk/runtime/src/i18n/en_US.txt +++ openmp/trunk/runtime/src/i18n/en_US.txt @@ -333,6 +333,7 @@ TopologyExtraNoTi "%1$s: %2$d packages x %3$d nodes/pkg x %4$d tiles/node x %5$d cores/tile x %6$d threads/core (%7$d total cores)" OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the default (loop) instead. " "This issue is fixed in an up-to-date compiler." +OmpNoAllocator "Allocator %1$s is not available, will use default allocator." # --- OpenMP errors detected at runtime --- # Index: openmp/trunk/runtime/src/include/50/omp.h.var =================================================================== --- openmp/trunk/runtime/src/include/50/omp.h.var +++ openmp/trunk/runtime/src/include/50/omp.h.var @@ -27,8 +27,14 @@ # if defined(_WIN32) # define __KAI_KMPC_CONVENTION __cdecl +# ifndef __KMP_IMP +# define __KMP_IMP __declspec(dllimport) +# endif # else # define __KAI_KMPC_CONVENTION +# ifndef __KMP_IMP +# define __KMP_IMP +# endif # endif /* schedule kind constants */ @@ -199,7 +205,30 @@ extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); + /* OpenMP 5.0 Memory Management */ + typedef void *omp_allocator_t; + extern __KMP_IMP const omp_allocator_t *OMP_NULL_ALLOCATOR; + extern __KMP_IMP const omp_allocator_t *omp_default_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_large_cap_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_const_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_high_bw_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_low_lat_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_cgroup_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_pteam_mem_alloc; + extern __KMP_IMP const omp_allocator_t *omp_thread_mem_alloc; + + extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(const omp_allocator_t *); + extern const omp_allocator_t * __KAI_KMPC_CONVENTION omp_get_default_allocator(void); +#ifdef __cplusplus + extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR); + extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR); +#else + extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator); + extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator); +#endif + # undef __KAI_KMPC_CONVENTION +# undef __KMP_IMP /* Warning: The following typedefs are not standard, deprecated and will be removed in a future release. Index: openmp/trunk/runtime/src/include/50/omp_lib.h.var =================================================================== --- openmp/trunk/runtime/src/include/50/omp_lib.h.var +++ openmp/trunk/runtime/src/include/50/omp_lib.h.var @@ -37,6 +37,8 @@ parameter(omp_control_tool_kind=omp_integer_kind) integer omp_control_tool_result_kind parameter(omp_control_tool_result_kind=omp_integer_kind) + integer omp_allocator_kind + parameter(omp_allocator_kind=int_ptr_kind()) integer(kind=omp_integer_kind)openmp_version parameter(openmp_version=@LIBOMP_OMP_YEAR_MONTH@) @@ -104,6 +106,16 @@ integer(kind=omp_control_tool_result_kind)omp_control_tool_ignored parameter(omp_control_tool_ignored=1) + integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 + integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 + integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 + integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 + integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 + integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 + integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 + integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 + integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 + interface ! *** @@ -381,6 +393,16 @@ integer (kind=omp_integer_kind) omp_get_max_task_priority end function omp_get_max_task_priority + subroutine omp_set_default_allocator(svar) bind(c) + import + integer (kind=omp_allocator_kind), value :: svar + end subroutine omp_set_default_allocator + + function omp_get_default_allocator() bind(c) + import + integer (kind=omp_allocator_kind) omp_get_default_allocator + end function omp_get_default_allocator + ! *** ! *** kmp_* entry points ! *** Index: openmp/trunk/runtime/src/include/50/omp_lib.f.var =================================================================== --- openmp/trunk/runtime/src/include/50/omp_lib.f.var +++ openmp/trunk/runtime/src/include/50/omp_lib.f.var @@ -34,6 +34,7 @@ integer, parameter :: omp_lock_hint_kind = omp_integer_kind integer, parameter :: omp_control_tool_kind = omp_integer_kind integer, parameter :: omp_control_tool_result_kind = omp_integer_kind + integer, parameter :: omp_allocator_kind = int_ptr_kind() end module omp_lib_kinds @@ -72,6 +73,16 @@ integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 + integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 + integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 + integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 + integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 + integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 + integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 + integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 + integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 + interface ! *** @@ -354,6 +365,16 @@ integer (kind=omp_integer_kind) omp_get_max_task_priority end function omp_get_max_task_priority + subroutine omp_set_default_allocator(svar) + use omp_lib_kinds + integer (kind=omp_allocator_kind) svar + end subroutine omp_set_default_allocator + + function omp_get_default_allocator() + use omp_lib_kinds + integer (kind=omp_allocator_kind) omp_get_default_allocator + end function omp_get_default_allocator + ! *** ! *** kmp_* entry points ! *** Index: openmp/trunk/runtime/src/include/50/omp_lib.f90.var =================================================================== --- openmp/trunk/runtime/src/include/50/omp_lib.f90.var +++ openmp/trunk/runtime/src/include/50/omp_lib.f90.var @@ -30,6 +30,7 @@ integer, parameter :: omp_lock_hint_kind = omp_integer_kind integer, parameter :: omp_control_tool_kind = omp_integer_kind integer, parameter :: omp_control_tool_result_kind = omp_integer_kind + integer, parameter :: omp_allocator_kind = c_intptr_t end module omp_lib_kinds @@ -80,6 +81,16 @@ integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 + integer (kind=omp_allocator_kind), parameter :: omp_null_allocator = 0 + integer (kind=omp_allocator_kind), parameter :: omp_default_mem_alloc = 1 + integer (kind=omp_allocator_kind), parameter :: omp_large_cap_mem_alloc = 2 + integer (kind=omp_allocator_kind), parameter :: omp_const_mem_alloc = 3 + integer (kind=omp_allocator_kind), parameter :: omp_high_bw_mem_alloc = 4 + integer (kind=omp_allocator_kind), parameter :: omp_low_lat_mem_alloc = 5 + integer (kind=omp_allocator_kind), parameter :: omp_cgroup_mem_alloc = 6 + integer (kind=omp_allocator_kind), parameter :: omp_pteam_mem_alloc = 7 + integer (kind=omp_allocator_kind), parameter :: omp_thread_mem_alloc = 8 + interface ! *** @@ -364,6 +375,16 @@ integer (kind=omp_integer_kind) omp_get_max_task_priority end function omp_get_max_task_priority + subroutine omp_set_default_allocator(svar) bind(c) + use omp_lib_kinds + integer (kind=omp_allocator_kind), value :: svar + end subroutine omp_set_default_allocator + + function omp_get_default_allocator() bind(c) + use omp_lib_kinds + integer (kind=omp_allocator_kind) omp_get_default_allocator + end function omp_get_default_allocator + ! *** ! *** kmp_* entry points ! *** Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -830,6 +830,31 @@ #define KMP_GTID_UNKNOWN (-5) /* Is not known */ #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ +#if OMP_50_ENABLED +/* OpenMP 5.0 Memory Management support */ +extern int __kmp_memkind_available; +extern int __kmp_hbw_mem_available; +typedef void *omp_allocator_t; +extern const omp_allocator_t *OMP_NULL_ALLOCATOR; +extern const omp_allocator_t *omp_default_mem_alloc; +extern const omp_allocator_t *omp_large_cap_mem_alloc; +extern const omp_allocator_t *omp_const_mem_alloc; +extern const omp_allocator_t *omp_high_bw_mem_alloc; +extern const omp_allocator_t *omp_low_lat_mem_alloc; +extern const omp_allocator_t *omp_cgroup_mem_alloc; +extern const omp_allocator_t *omp_pteam_mem_alloc; +extern const omp_allocator_t *omp_thread_mem_alloc; +extern const omp_allocator_t *__kmp_def_allocator; + +extern void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *al); +extern const omp_allocator_t *__kmpc_get_default_allocator(int gtid); +extern void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t *al); +extern void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *al); + +extern void __kmp_init_memkind(); +extern void __kmp_fini_memkind(); +#endif // OMP_50_ENABLED + /* ------------------------------------------------------------------------ */ #define KMP_UINT64_MAX \ @@ -2414,7 +2439,9 @@ #if KMP_AFFINITY_SUPPORTED kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ #endif - +#if OMP_50_ENABLED + void *const *th_def_allocator; /* per implicit task default allocator */ +#endif /* The data set by the master at reinit, then R/W by the worker */ KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ @@ -2628,6 +2655,9 @@ #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via // omp_set_num_threads() call +#if OMP_50_ENABLED + void *const *t_def_allocator; /* per implicit task default allocator */ +#endif // Read/write by workers as well #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) Index: openmp/trunk/runtime/src/kmp_alloc.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_alloc.cpp +++ openmp/trunk/runtime/src/kmp_alloc.cpp @@ -1221,6 +1221,159 @@ KE_TRACE(30, ("<- __kmp_thread_free()\n")); } +#if OMP_50_ENABLED +/* OMP 5.0 Memory Management support */ +static int (*p_hbw_check)(void); +static void *(*p_hbw_malloc)(size_t); +static void (*p_hbw_free)(void *); +static int (*p_hbw_set_policy)(int); +static const char *kmp_mk_lib_name; +static void *h_memkind; + +void __kmp_init_memkind() { +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + kmp_mk_lib_name = "libmemkind.so"; + h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); + if (h_memkind) { + p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available"); + p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc"); + p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free"); + p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy"); + if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) { + __kmp_memkind_available = 1; + if (p_hbw_check() == 0) { + p_hbw_set_policy(1); // return NULL is not enough memory + __kmp_hbw_mem_available = 1; // found HBW memory available + } + return; // success - all symbols resolved + } + dlclose(h_memkind); // failure + h_memkind = NULL; + } + p_hbw_check = NULL; + p_hbw_malloc = NULL; + p_hbw_free = NULL; + p_hbw_set_policy = NULL; +#else + kmp_mk_lib_name = ""; + h_memkind = NULL; + p_hbw_check = NULL; + p_hbw_malloc = NULL; + p_hbw_free = NULL; + p_hbw_set_policy = NULL; +#endif +} + +void __kmp_fini_memkind() { +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + if (h_memkind) { + dlclose(h_memkind); + h_memkind = NULL; + } + p_hbw_check = NULL; + p_hbw_malloc = NULL; + p_hbw_free = NULL; + p_hbw_set_policy = NULL; +#endif +} + +void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) { + if (allocator == OMP_NULL_ALLOCATOR) + allocator = omp_default_mem_alloc; + KMP_DEBUG_ASSERT( + allocator == omp_default_mem_alloc || + allocator == omp_large_cap_mem_alloc || + allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc || + allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc || + allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc); + __kmp_threads[gtid]->th.th_def_allocator = allocator; +} +const omp_allocator_t *__kmpc_get_default_allocator(int gtid) { + return __kmp_threads[gtid]->th.th_def_allocator; +} + +typedef struct kmp_mem_desc { // Memory block descriptor + void *ptr_alloc; // Pointer returned by allocator + size_t size_a; // Size of allocated memory block (initial+descriptor+align) + void *ptr_align; // Pointer to aligned memory, returned + const omp_allocator_t *allocator; // allocator +} kmp_mem_desc_t; +static int alignment = sizeof(void *); // let's align to pointer size + +void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (allocator == OMP_NULL_ALLOCATOR) + allocator = __kmp_threads[gtid]->th.th_def_allocator; + + int sz_desc = sizeof(kmp_mem_desc_t); + void *ptr = NULL; + kmp_mem_desc_t desc; + kmp_uintptr_t addr; // address returned by allocator + kmp_uintptr_t addr_align; // address to return to caller + kmp_uintptr_t addr_descr; // address of memory block descriptor + + KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); + + desc.size_a = size + sz_desc + alignment; + if (allocator == omp_default_mem_alloc) + ptr = __kmp_allocate(desc.size_a); + if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { + KMP_DEBUG_ASSERT(p_hbw_malloc != NULL); + ptr = p_hbw_malloc(desc.size_a); + } + + KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr, + desc.size_a, __kmp_hbw_mem_available)); + if (ptr == NULL) + return NULL; + + addr = (kmp_uintptr_t)ptr; + addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1); + addr_descr = addr_align - sz_desc; + + desc.ptr_alloc = ptr; + desc.ptr_align = (void *)addr_align; + desc.allocator = allocator; + *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents + KMP_MB(); + + KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid)); + return desc.ptr_align; +} + +void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) { + KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator)); + if (ptr == NULL) + return; + + kmp_mem_desc_t desc; + kmp_uintptr_t addr_align; // address to return to caller + kmp_uintptr_t addr_descr; // address of memory block descriptor + + addr_align = (kmp_uintptr_t)ptr; + addr_descr = addr_align - sizeof(kmp_mem_desc_t); + desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor + + KMP_DEBUG_ASSERT(desc.ptr_align == ptr); + if (allocator) { + KMP_DEBUG_ASSERT(desc.allocator == allocator); + } else { + allocator = desc.allocator; + } + KMP_DEBUG_ASSERT(allocator); + + if (allocator == omp_default_mem_alloc) + __kmp_free(desc.ptr_alloc); + if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) { + KMP_DEBUG_ASSERT(p_hbw_free != NULL); + p_hbw_free(desc.ptr_alloc); + } + KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, + allocator)); +} + +#endif + /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it may be useful for debugging memory corruptions, used freed pointers, etc. */ Index: openmp/trunk/runtime/src/kmp_barrier.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_barrier.cpp +++ openmp/trunk/runtime/src/kmp_barrier.cpp @@ -1984,6 +1984,10 @@ } } #endif +#if OMP_50_ENABLED + if (!KMP_MASTER_TID(tid)) + KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator); +#endif #if USE_ITT_BUILD && USE_ITT_NOTIFY if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { Index: openmp/trunk/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_csupport.cpp +++ openmp/trunk/runtime/src/kmp_csupport.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define __KMP_IMP #include "omp.h" /* extern "C" declarations of user-visible routines */ #include "kmp.h" #include "kmp_error.h" @@ -540,6 +541,9 @@ serial_team->t.t_dispatch->th_disp_buffer->next; __kmp_free(disp_buffer); } +#if OMP_50_ENABLED + this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore +#endif --serial_team->t.t_serialized; if (serial_team->t.t_serialized == 0) { Index: openmp/trunk/runtime/src/kmp_ftn_entry.h =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_entry.h +++ openmp/trunk/runtime/src/kmp_ftn_entry.h @@ -361,7 +361,35 @@ return ret; #endif } + +/* OpenMP 5.0 Memory Management support */ +void FTN_STDCALL FTN_SET_DEFAULT_ALLOCATOR(const omp_allocator_t *allocator) { +#ifndef KMP_STUB + __kmpc_set_default_allocator(__kmp_entry_gtid(), allocator); +#endif +} +const omp_allocator_t *FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) { +#ifdef KMP_STUB + return NULL; +#else + return __kmpc_get_default_allocator(__kmp_entry_gtid()); #endif +} +void *FTN_STDCALL FTN_ALLOC(size_t size, const omp_allocator_t *allocator) { +#ifdef KMP_STUB + return malloc(size); +#else + return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); +#endif +} +void FTN_STDCALL FTN_FREE(void *ptr, const omp_allocator_t *allocator) { +#ifdef KMP_STUB + free(ptr); +#else + __kmpc_free(__kmp_entry_gtid(), ptr, allocator); +#endif +} +#endif /* OMP_50_ENABLED */ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { #ifdef KMP_STUB @@ -1135,7 +1163,7 @@ return kmpc_realloc(KMP_DEREF ptr, KMP_DEREF size); } -void FTN_STDCALL FTN_FREE(void *KMP_DEREF ptr) { +void FTN_STDCALL FTN_KFREE(void *KMP_DEREF ptr) { // does nothing if the library is not initialized kmpc_free(KMP_DEREF ptr); } Index: openmp/trunk/runtime/src/kmp_ftn_os.h =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_os.h +++ openmp/trunk/runtime/src/kmp_ftn_os.h @@ -47,7 +47,7 @@ #define FTN_ALIGNED_MALLOC kmp_aligned_malloc #define FTN_CALLOC kmp_calloc #define FTN_REALLOC kmp_realloc -#define FTN_FREE kmp_free +#define FTN_KFREE kmp_free #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads @@ -135,6 +135,10 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL omp_control_tool +#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator +#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator +#define FTN_ALLOC omp_alloc +#define FTN_FREE omp_free #endif #endif /* KMP_FTN_PLAIN */ @@ -169,7 +173,7 @@ #define FTN_ALIGNED_MALLOC kmp_aligned_malloc_ #define FTN_CALLOC kmp_calloc_ #define FTN_REALLOC kmp_realloc_ -#define FTN_FREE kmp_free_ +#define FTN_KFREE kmp_free_ #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads_ @@ -256,7 +260,11 @@ #endif #if OMP_50_ENABLED -#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#define FTN_CONTROL_TOOL omp_control_tool_ +#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator_ +#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_ +#define FTN_ALLOC omp_alloc_ +#define FTN_FREE omp_free_ #endif #endif /* KMP_FTN_APPEND */ @@ -291,7 +299,7 @@ #define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC #define FTN_CALLOC KMP_CALLOC #define FTN_REALLOC KMP_REALLOC -#define FTN_FREE KMP_FREE +#define FTN_KFREE KMP_FREE #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS @@ -379,6 +387,10 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR +#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR +#define FTN_ALLOC OMP_ALLOC +#define FTN_FREE OMP_FREE #endif #endif /* KMP_FTN_UPPER */ @@ -413,7 +425,7 @@ #define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC_ #define FTN_CALLOC KMP_CALLOC_ #define FTN_REALLOC KMP_REALLOC_ -#define FTN_FREE KMP_FREE_ +#define FTN_KFREE KMP_FREE_ #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ @@ -501,6 +513,10 @@ #if OMP_50_ENABLED #define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_ +#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR_ +#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_ +#define FTN_ALLOC OMP_ALLOC_ +#define FTN_FREE OMP_FREE_ #endif #endif /* KMP_FTN_UAPPEND */ Index: openmp/trunk/runtime/src/kmp_global.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_global.cpp +++ openmp/trunk/runtime/src/kmp_global.cpp @@ -300,6 +300,21 @@ kmp_uint64 __kmp_taskloop_min_tasks = 0; #endif +#if OMP_50_ENABLED +int __kmp_memkind_available = 0; +int __kmp_hbw_mem_available = 0; +const omp_allocator_t *OMP_NULL_ALLOCATOR = NULL; +const omp_allocator_t *omp_default_mem_alloc = (const omp_allocator_t *)1; +const omp_allocator_t *omp_large_cap_mem_alloc = (const omp_allocator_t *)2; +const omp_allocator_t *omp_const_mem_alloc = (const omp_allocator_t *)3; +const omp_allocator_t *omp_high_bw_mem_alloc = (const omp_allocator_t *)4; +const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; +const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; +const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; +const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; +void *const *__kmp_def_allocator = omp_default_mem_alloc; +#endif + /* This check ensures that the compiler is passing the correct data type for the flags formal parameter of the function kmpc_omp_task_alloc(). If the type is not a 4-byte type, then give an error message about a non-positive length Index: openmp/trunk/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_runtime.cpp +++ openmp/trunk/runtime/src/kmp_runtime.cpp @@ -539,8 +539,16 @@ team_id); } -static void __kmp_init_allocator() {} -static void __kmp_fini_allocator() {} +static void __kmp_init_allocator() { +#if OMP_50_ENABLED + __kmp_init_memkind(); +#endif +} +static void __kmp_fini_allocator() { +#if OMP_50_ENABLED + __kmp_fini_memkind(); +#endif +} /* ------------------------------------------------------------------------ */ @@ -1318,6 +1326,9 @@ serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; +#if OMP_50_ENABLED + serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save +#endif propagateFPControl(serial_team); @@ -1608,6 +1619,9 @@ KMP_ATOMIC_INC(&root->r.r_in_parallel); parent_team->t.t_active_level++; parent_team->t.t_level++; +#if OMP_50_ENABLED + parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save +#endif /* Change number of threads in the team if requested */ if (master_set_numthreads) { // The parallel has num_threads clause @@ -2073,6 +2087,9 @@ #if OMP_40_ENABLED KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); #endif +#if OMP_50_ENABLED + KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); +#endif // Update the floating point rounding in the team if required. propagateFPControl(team); @@ -2514,6 +2531,9 @@ master_th->th.th_first_place = team->t.t_first_place; master_th->th.th_last_place = team->t.t_last_place; #endif /* OMP_40_ENABLED */ +#if OMP_50_ENABLED + master_th->th.th_def_allocator = team->t.t_def_allocator; +#endif updateHWFPControl(team); @@ -3791,11 +3811,13 @@ root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; #endif - if (TCR_4(__kmp_init_middle)) { __kmp_affinity_set_init_mask(gtid, TRUE); } #endif /* KMP_AFFINITY_SUPPORTED */ +#if OMP_50_ENABLED + root_thread->th.th_def_allocator = __kmp_def_allocator; +#endif __kmp_root_counter++; @@ -4334,6 +4356,9 @@ new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; #endif +#if OMP_50_ENABLED + new_thr->th.th_def_allocator = __kmp_def_allocator; +#endif TCW_4(new_thr->th.th_in_pool, FALSE); new_thr->th.th_active_in_pool = FALSE; Index: openmp/trunk/runtime/src/kmp_settings.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_settings.cpp +++ openmp/trunk/runtime/src/kmp_settings.cpp @@ -3251,6 +3251,149 @@ #endif /* OMP_40_ENABLED */ +#if OMP_50_ENABLED + +// OMP_ALLOCATOR sets default allocator +static void __kmp_stg_parse_allocator(char const *name, char const *value, + void *data) { + /* + The value can be any predefined allocator: + omp_default_mem_alloc = 1; + omp_large_cap_mem_alloc = 2; + omp_const_mem_alloc = 3; + omp_high_bw_mem_alloc = 4; + omp_low_lat_mem_alloc = 5; + omp_cgroup_mem_alloc = 6; + omp_pteam_mem_alloc = 7; + omp_thread_mem_alloc = 8; + Acceptable value is either a digit or a string. + */ + const char *buf = value; + const char *next; + int num; + SKIP_WS(buf); + if ((*buf > '0') && (*buf < '9')) { + next = buf; + SKIP_DIGITS(next); + num = __kmp_str_to_int(buf, *next); + KMP_ASSERT(num > 0); + switch (num) { + case 4: + if (__kmp_hbw_mem_available) { + __kmp_def_allocator = omp_high_bw_mem_alloc; + } else { + __kmp_msg(kmp_ms_warning, + KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } + break; + case 1: + __kmp_def_allocator = omp_default_mem_alloc; + break; + case 2: + __kmp_msg(kmp_ms_warning, + KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + break; + case 3: + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + break; + case 5: + __kmp_msg(kmp_ms_warning, + KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + break; + case 6: + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + break; + case 7: + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + break; + case 8: + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + break; + } + return; + } + next = buf; + if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { + if (__kmp_hbw_mem_available) { + __kmp_def_allocator = omp_high_bw_mem_alloc; + } else { + __kmp_msg(kmp_ms_warning, + KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } + } else if (__kmp_match_str("omp_default_mem_alloc", buf, &next)) { + __kmp_def_allocator = omp_default_mem_alloc; + } else if (__kmp_match_str("omp_large_cap_mem_alloc", buf, &next)) { + __kmp_msg(kmp_ms_warning, + KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } else if (__kmp_match_str("omp_const_mem_alloc", buf, &next)) { + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } else if (__kmp_match_str("omp_low_lat_mem_alloc", buf, &next)) { + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } else if (__kmp_match_str("omp_cgroup_mem_alloc", buf, &next)) { + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } else if (__kmp_match_str("omp_pteam_mem_alloc", buf, &next)) { + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } else if (__kmp_match_str("omp_thread_mem_alloc", buf, &next)) { + __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), + __kmp_msg_null); + __kmp_def_allocator = omp_default_mem_alloc; + } + buf = next; + SKIP_WS(buf); + if (*buf != '\0') { + KMP_WARNING(ParseExtraCharsWarn, name, buf); + } +} + +static void __kmp_stg_print_allocator(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_def_allocator == omp_default_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_default_mem_alloc"); + } else if (__kmp_def_allocator == omp_high_bw_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_high_bw_mem_alloc"); + } else if (__kmp_def_allocator == omp_large_cap_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_large_cap_mem_alloc"); + } else if (__kmp_def_allocator == omp_const_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_const_mem_alloc"); + } else if (__kmp_def_allocator == omp_low_lat_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_low_lat_mem_alloc"); + } else if (__kmp_def_allocator == omp_cgroup_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_cgroup_mem_alloc"); + } else if (__kmp_def_allocator == omp_pteam_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_pteam_mem_alloc"); + } else if (__kmp_def_allocator == omp_thread_mem_alloc) { + __kmp_stg_print_str(buffer, name, "omp_thread_mem_alloc"); + } +} + +#endif /* OMP_50_ENABLED */ + // ----------------------------------------------------------------------------- // OMP_DYNAMIC @@ -4707,7 +4850,6 @@ {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, NULL, /* no print */ NULL, 0, 0}, #endif /* OMP_40_ENABLED */ - {"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, __kmp_stg_print_topology_method, NULL, 0, 0}, @@ -4791,6 +4933,11 @@ __kmp_stg_print_omp_cancellation, NULL, 0, 0}, #endif +#if OMP_50_ENABLED + {"OMP_ALLOCATOR", __kmp_stg_parse_allocator, __kmp_stg_print_allocator, + NULL, 0, 0}, +#endif + #if OMP_50_ENABLED && OMPT_SUPPORT {"OMP_TOOL_LIBRARIES", __kmp_stg_parse_omp_tool_libraries, __kmp_stg_print_omp_tool_libraries, NULL, 0, 0}, Index: openmp/trunk/runtime/src/kmp_stub.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_stub.cpp +++ openmp/trunk/runtime/src/kmp_stub.cpp @@ -15,6 +15,7 @@ #include #include +#define __KMP_IMP #include "omp.h" // omp_* declarations, must be included before "kmp.h" #include "kmp.h" // KMP_DEFAULT_STKSIZE #include "kmp_stub.h" @@ -338,4 +339,17 @@ return wtick; } // __kmps_get_wtick +#if OMP_50_ENABLED +/* OpenMP 5.0 Memory Management */ +const omp_allocator_t *OMP_NULL_ALLOCATOR = NULL; +const omp_allocator_t *omp_default_mem_alloc = (const omp_allocator_t *)1; +const omp_allocator_t *omp_large_cap_mem_alloc = (const omp_allocator_t *)2; +const omp_allocator_t *omp_const_mem_alloc = (const omp_allocator_t *)3; +const omp_allocator_t *omp_high_bw_mem_alloc = (const omp_allocator_t *)4; +const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; +const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; +const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; +const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; +#endif /* OMP_50_ENABLED */ + // end of file // Index: openmp/trunk/runtime/test/api/omp_alloc.c =================================================================== --- openmp/trunk/runtime/test/api/omp_alloc.c +++ openmp/trunk/runtime/test/api/omp_alloc.c @@ -0,0 +1,78 @@ +// RUN: %libomp-compile-and-run +#include +#include +#include +#include "omp_testsuite.h" + +#define ARRAY_SIZE 10000 + +int test_omp_alloc() { + int err; + int i, j; + int *shared_array; + const omp_allocator_t *allocator; + const omp_allocator_t *test_allocator; + // Currently, only default memory allocator is implemented + const omp_allocator_t *allocators[] = { + omp_default_mem_alloc, + }; + + err = 0; + for (i = 0; i < sizeof(allocators) / sizeof(allocators[0]); ++i) { + allocator = allocators[i]; + printf("Using %p allocator\n", test_allocator); + omp_set_default_allocator(allocator); + test_allocator = omp_get_default_allocator(); + if (test_allocator != allocator) { + printf("error: omp_set|get_default_allocator() not working\n"); + return 0; + } + shared_array = (int *)omp_alloc(sizeof(int) * ARRAY_SIZE, test_allocator); + if (shared_array == NULL) { + printf("error: shared_array is NULL\n"); + return 0; + } + for (j = 0; j < ARRAY_SIZE; ++j) { + shared_array[j] = j; + } + #pragma omp parallel shared(shared_array) + { + int i; + int tid = omp_get_thread_num(); + int *private_array = + (int *)omp_alloc(sizeof(int) * ARRAY_SIZE, omp_default_mem_alloc); + if (private_array == NULL) { + printf("error: thread %d private_array is NULL\n", tid); + #pragma omp atomic + err++; + } + for (i = 0; i < ARRAY_SIZE; ++i) { + private_array[i] = shared_array[i] + tid; + } + for (i = 0; i < ARRAY_SIZE; ++i) { + if (private_array[i] != i + tid) { + printf("error: thread %d element %d is %d instead of %d\n", tid, i, + private_array[i], i + tid); + #pragma omp atomic + err++; + } + } + omp_free(private_array, omp_default_mem_alloc); + } /* end of parallel */ + omp_free(shared_array, test_allocator); + } + + return !err; +} + +int main() { + int i; + int num_failed = 0; + + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_alloc()) { + num_failed++; + } + } + return num_failed; +} Index: openmp/trunk/runtime/tools/generate-def.pl =================================================================== --- openmp/trunk/runtime/tools/generate-def.pl +++ openmp/trunk/runtime/tools/generate-def.pl @@ -110,12 +110,14 @@ if ( not $entries->{ $entry }->{ obsolete } ) { my $ordinal = $entries->{ $entry }->{ ordinal }; if ( $entry =~ m{\A[ok]mp_} ) { - if ( not defined( $ordinal ) or $ordinal eq "DATA" ) { + if ( not defined( $ordinal ) ) { runtime_error( "Bad entry \"$entry\": ordinal number is not specified." ); }; # if - $entries->{ uc( $entry ) } = { ordinal => 1000 + $ordinal }; + if ( $ordinal ne "DATA" ) { + $entries->{ uc( $entry ) } = { ordinal => 1000 + $ordinal }; + } }; # if }; # if }; # foreach