Index: openmp/runtime/src/CMakeLists.txt =================================================================== --- openmp/runtime/src/CMakeLists.txt +++ openmp/runtime/src/CMakeLists.txt @@ -45,6 +45,7 @@ ${LIBOMP_SRC_DIR}/i18n ${LIBOMP_INC_DIR} ${LIBOMP_SRC_DIR}/thirdparty/ittnotify + ${LIBOMP_SRC_DIR}/thirdparty/memkind ) if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) @@ -93,6 +94,7 @@ libomp_append(LIBOMP_ASMFILES z_Linux_asm.S) # Unix assembly file endif() libomp_append(LIBOMP_CXXFILES thirdparty/ittnotify/ittnotify_static.cpp LIBOMP_USE_ITT_NOTIFY) + libomp_append(LIBOMP_CXXFILES thirdparty/memkind/kmp_memkind.cpp) libomp_append(LIBOMP_CXXFILES kmp_debugger.cpp LIBOMP_USE_DEBUGGER) libomp_append(LIBOMP_CXXFILES kmp_stats.cpp LIBOMP_STATS) libomp_append(LIBOMP_CXXFILES kmp_stats_timing.cpp LIBOMP_STATS) Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -920,6 +920,19 @@ omp_uintptr_t value; } omp_alloctrait_t; +enum { + omp_ata_null = 0, + omp_ata_default = 1, + omp_ata__large_cap = 2, + omp_ata_const = 3, + omp_ata_high_bw = 4, + omp_ata_low_lat = 5, + omp_ata_cgroup = 6, + omp_ata_pteam = 7, + omp_ata_thread = 8, + omp_ata_last = 9, +}; + typedef void *omp_allocator_handle_t; extern omp_allocator_handle_t const omp_null_allocator; extern omp_allocator_handle_t const omp_default_mem_alloc; @@ -936,20 +949,29 @@ // end of duplicate type definitions from omp.h #endif -extern int __kmp_memkind_available; - typedef omp_memspace_handle_t kmp_memspace_t; // placeholder typedef struct kmp_allocator_t { omp_memspace_handle_t memspace; - void **memkind; // pointer to memkind int alignment; omp_alloctrait_value_t fb; kmp_allocator_t *fb_data; kmp_uint64 pool_size; kmp_uint64 pool_used; + int partition; + + /* custom allocators */ + void *(*alloc)(size_t size, kmp_allocator_t *, int gtid); + void (*free)(void *p, kmp_allocator_t *, int gtid); + void *aux; // pointer to auxiliary data that the custom allocators might need } kmp_allocator_t; +extern kmp_allocator_t kmp_standard_allocators[]; +extern void *kmp_default_alloc(size_t size, kmp_allocator_t *al, int gtid); +extern void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid); +extern int (*kmp_init_allocator_p)(kmp_allocator_t *); +extern void (*kmp_destroy_allocator_p)(kmp_allocator_t *); + extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t, int ntraits, Index: openmp/runtime/src/kmp_alloc.cpp =================================================================== --- openmp/runtime/src/kmp_alloc.cpp +++ openmp/runtime/src/kmp_alloc.cpp @@ -1221,119 +1221,68 @@ } /* OMP 5.0 Memory Management support */ -static const char *kmp_mk_lib_name; -static void *h_memkind; -/* memkind experimental API: */ -// memkind_alloc -static void *(*kmp_mk_alloc)(void *k, size_t sz); -// memkind_free -static void (*kmp_mk_free)(void *kind, void *ptr); -// memkind_check_available -static int (*kmp_mk_check)(void *kind); -// kinds we are going to use -static void **mk_default; -static void **mk_interleave; -static void **mk_hbw; -static void **mk_hbw_interleave; -static void **mk_hbw_preferred; -static void **mk_hugetlb; -static void **mk_hbw_hugetlb; -static void **mk_hbw_preferred_hugetlb; - -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB -static inline void chk_kind(void ***pkind) { - KMP_DEBUG_ASSERT(pkind); - if (*pkind) // symbol found - if (kmp_mk_check(**pkind)) // kind not available or error - *pkind = NULL; -} -#endif - -void __kmp_init_memkind() { -// as of 2018-07-31 memkind does not support Windows*, exclude it for now -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - // use of statically linked memkind is problematic, as it depends on libnuma - kmp_mk_lib_name = "libmemkind.so"; - h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); - if (h_memkind) { - kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); - kmp_mk_alloc = - (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); - kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); - mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); - if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && - !kmp_mk_check(*mk_default)) { - __kmp_memkind_available = 1; - mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); - chk_kind(&mk_interleave); - mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); - chk_kind(&mk_hbw); - mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); - chk_kind(&mk_hbw_interleave); - mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); - chk_kind(&mk_hbw_preferred); - mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); - chk_kind(&mk_hugetlb); - mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); - chk_kind(&mk_hbw_hugetlb); - mk_hbw_preferred_hugetlb = - (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); - chk_kind(&mk_hbw_preferred_hugetlb); - KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); - return; // success - } - dlclose(h_memkind); // failure - h_memkind = NULL; - } - kmp_mk_check = NULL; - kmp_mk_alloc = NULL; - kmp_mk_free = NULL; - mk_default = NULL; - mk_interleave = NULL; - mk_hbw = NULL; - mk_hbw_interleave = NULL; - mk_hbw_preferred = NULL; - mk_hugetlb = NULL; - mk_hbw_hugetlb = NULL; - mk_hbw_preferred_hugetlb = NULL; -#else - kmp_mk_lib_name = ""; - h_memkind = NULL; - kmp_mk_check = NULL; - kmp_mk_alloc = NULL; - kmp_mk_free = NULL; - mk_default = NULL; - mk_interleave = NULL; - mk_hbw = NULL; - mk_hbw_interleave = NULL; - mk_hbw_preferred = NULL; - mk_hugetlb = NULL; - mk_hbw_hugetlb = NULL; - mk_hbw_preferred_hugetlb = NULL; -#endif -} +void *kmp_default_alloc(size_t size, kmp_allocator_t *al, int gtid); +void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid); + +kmp_allocator_t kmp_standard_allocators[] = { + { /* omp_null_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = NULL, + .free = NULL, + }, + { /* omp_default_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_large_cap_allocator */ + .memspace = omp_large_cap_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_const_allocator */ + .memspace = omp_const_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_high_bw_allocator */ + .memspace = omp_high_bw_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_low_lat_allocator */ + .memspace = omp_low_lat_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_cgroup_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_pteam_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_thread_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, +}; -void __kmp_fini_memkind() { -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - if (__kmp_memkind_available) - KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); - if (h_memkind) { - dlclose(h_memkind); - h_memkind = NULL; - } - kmp_mk_check = NULL; - kmp_mk_alloc = NULL; - kmp_mk_free = NULL; - mk_default = NULL; - mk_interleave = NULL; - mk_hbw = NULL; - mk_hbw_interleave = NULL; - mk_hbw_preferred = NULL; - mk_hugetlb = NULL; - mk_hbw_hugetlb = NULL; - mk_hbw_preferred_hugetlb = NULL; -#endif -} +// custom initialization function +int (*kmp_init_allocator_p)(kmp_allocator_t *); omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, int ntraits, @@ -1366,10 +1315,13 @@ al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb); break; case omp_atk_fb_data: - al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); + if (traits[i].value < sizeof(kmp_standard_allocators) / sizeof(kmp_standard_allocators[0])) + al->fb_data = &kmp_standard_allocators[traits[i].value]; + else + al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); break; case omp_atk_partition: - al->memkind = RCAST(void **, traits[i].value); + al->partition = traits[i].value; break; default: KMP_ASSERT2(0, "Unexpected allocator trait"); @@ -1378,47 +1330,31 @@ if (al->fb == 0) { // set default allocator al->fb = omp_atv_default_mem_fb; - al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; + al->fb_data = &kmp_standard_allocators[(uintptr_t) omp_default_mem_alloc]; } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al->fb_data != NULL); } else if (al->fb == omp_atv_default_mem_fb) { - al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; + al->fb_data = &kmp_standard_allocators[(uintptr_t) omp_default_mem_alloc]; } - if (__kmp_memkind_available) { - // Let's use memkind library if available - if (ms == omp_high_bw_mem_space) { - if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) { - al->memkind = mk_hbw_interleave; - } else if (mk_hbw_preferred) { - // AC: do not try to use MEMKIND_HBW for now, because memkind library - // cannot reliably detect exhaustion of HBW memory. - // It could be possible using hbw_verify_memory_region() but memkind - // manual says: "Using this function in production code may result in - // serious performance penalty". - al->memkind = mk_hbw_preferred; - } else { - // HBW is requested but not available --> return NULL allocator - __kmp_free(al); - return omp_null_allocator; - } - } else { - if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) { - al->memkind = mk_interleave; - } else { - al->memkind = mk_default; - } - } - } else { - if (ms == omp_high_bw_mem_space) { - // cannot detect HBW memory presence without memkind library + + al->alloc = kmp_default_alloc; + al->free = kmp_default_free; + if (kmp_init_allocator_p != NULL && kmp_init_allocator_p(al) != 0) { + // something went wrong, bail __kmp_free(al); return omp_null_allocator; } - } + return (omp_allocator_handle_t)al; } +// custom destruction function +void (*kmp_destroy_allocator_p)(kmp_allocator_t *); + void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) { + if (kmp_destroy_allocator_p) + kmp_destroy_allocator_p(RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator))); + if (allocator > kmp_max_mem_alloc) __kmp_free(allocator); } @@ -1441,15 +1377,28 @@ } kmp_mem_desc_t; static int alignment = sizeof(void *); // let's align to pointer size +void *kmp_default_alloc(size_t size, kmp_allocator_t *alm, int gtid) { + return __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), size); +} + +void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid) { + return __kmp_thread_free(__kmp_thread_from_gtid(gtid), ptr); +} + void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) { void *ptr = NULL; - kmp_allocator_t *al; + kmp_allocator_t *al = NULL; KMP_DEBUG_ASSERT(__kmp_init_serial); if (allocator == omp_null_allocator) allocator = __kmp_threads[gtid]->th.th_def_allocator; KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); - al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); + if ((uintptr_t) allocator < sizeof(kmp_standard_allocators) / sizeof(kmp_standard_allocators[0])) + al = &kmp_standard_allocators[(uintptr_t) allocator]; + else if (allocator > kmp_max_mem_alloc) + al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); + + KMP_ASSERT(al != NULL); int sz_desc = sizeof(kmp_mem_desc_t); kmp_mem_desc_t desc; @@ -1462,101 +1411,29 @@ } desc.size_a = size + sz_desc + align; - if (__kmp_memkind_available) { - if (allocator < kmp_max_mem_alloc) { - // pre-defined allocator - if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { - ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); - } else { - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } - } else if (al->pool_size > 0) { - // custom allocator with pool size requested - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (al->pool_size > 0) { + kmp_uint64 used = KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); if (used + desc.size_a > al->pool_size) { // not enough space, need to go fallback path KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } // else ptr == NULL; - } else { - // pool has enough space - ptr = kmp_mk_alloc(*al->memkind, desc.size_a); - if (ptr == NULL) { - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } - } - } - } else { - // custom allocator, pool size not requested - ptr = kmp_mk_alloc(*al->memkind, desc.size_a); - if (ptr == NULL) { - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } - } + switch (al->fb) { + default: + return NULL; + + case omp_atv_abort_fb: + KMP_ASSERT(0); + abort(); + + case omp_atv_default_mem_fb: + [[clang::fallthrough]]; + case omp_atv_allocator_fb: + ptr = __kmpc_alloc(gtid, size, al->fb_data); + return ptr; } - } else if (allocator < kmp_max_mem_alloc) { - // pre-defined allocator - if (allocator == omp_high_bw_mem_alloc) { - // ptr = NULL; - } else { - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - } - } else if (al->pool_size > 0) { - // custom allocator with pool size requested - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); - if (used + desc.size_a > al->pool_size) { - // not enough space, need to go fallback path - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } // else ptr == NULL; - } else { - // pool has enough space - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - if (ptr == NULL && al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } // no sense to look for another fallback because of same internal alloc } - } else { - // custom allocator, pool size not requested - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - if (ptr == NULL && al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } // no sense to look for another fallback because of same internal alloc } + + ptr = (*al->alloc)(desc.size_a, al, gtid); KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a)); if (ptr == NULL) return NULL; @@ -1599,32 +1476,15 @@ oal = (omp_allocator_handle_t)al; // cast to void* for comparisons KMP_DEBUG_ASSERT(al); - if (__kmp_memkind_available) { - if (oal < kmp_max_mem_alloc) { - // pre-defined allocator - if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { - kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); - } else { - kmp_mk_free(*mk_default, desc.ptr_alloc); - } - } else { + KMP_ASSERT(al->free != NULL); + (*al->free)(ptr, al, gtid); + if (al->pool_size > 0) { // custom allocator with pool size requested kmp_uint64 used = KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); (void)used; // to suppress compiler warning KMP_DEBUG_ASSERT(used >= desc.size_a); } - kmp_mk_free(*al->memkind, desc.ptr_alloc); - } - } else { - if (oal > kmp_max_mem_alloc && al->pool_size > 0) { - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - (void)used; // to suppress compiler warning - KMP_DEBUG_ASSERT(used >= desc.size_a); - } - __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); - } KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, allocator)); } Index: openmp/runtime/src/kmp_global.cpp =================================================================== --- openmp/runtime/src/kmp_global.cpp +++ openmp/runtime/src/kmp_global.cpp @@ -287,7 +287,6 @@ kmp_int32 __kmp_max_task_priority = 0; kmp_uint64 __kmp_taskloop_min_tasks = 0; -int __kmp_memkind_available = 0; omp_allocator_handle_t const omp_null_allocator = NULL; omp_allocator_handle_t const omp_default_mem_alloc = (omp_allocator_handle_t const)1; Index: openmp/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/runtime/src/kmp_runtime.cpp +++ openmp/runtime/src/kmp_runtime.cpp @@ -526,8 +526,13 @@ "%s_%d.t_disp_buffer", header, team_id); } -static void __kmp_init_allocator() { __kmp_init_memkind(); } -static void __kmp_fini_allocator() { __kmp_fini_memkind(); } +static void __kmp_init_allocator() { + __kmp_init_memkind(); +} + +static void __kmp_fini_allocator() { + __kmp_fini_memkind(); +} /* ------------------------------------------------------------------------ */ Index: openmp/runtime/src/kmp_settings.cpp =================================================================== --- openmp/runtime/src/kmp_settings.cpp +++ openmp/runtime/src/kmp_settings.cpp @@ -3275,6 +3275,18 @@ __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format); } // OMP_ALLOCATOR sets default allocator +static const char *__kmp_allocator_names[] = { + "omp_default_mem_alloc", + "omp_large_cap_mem_alloc", + "omp_const_mem_alloc", + "omp_high_bw_mem_alloc", + "omp_low_lat_mem_alloc", + "omp_cgroup_mem_alloc", + "omp_pteam_mem_alloc", + "omp_thread_mem_alloc", + NULL +}; + static void __kmp_stg_parse_allocator(char const *name, char const *value, void *data) { /* @@ -3297,94 +3309,27 @@ next = buf; SKIP_DIGITS(next); num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num > 0); - switch (num) { - case 4: - if (__kmp_memkind_available) { - __kmp_def_allocator = omp_high_bw_mem_alloc; - } else { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; + KMP_ASSERT(num > 0 && num < 9); + } else { + num = -1; + next = buf; + for(int i = 0; __kmp_allocator_names[i] != NULL; i++) { + if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { + num = i + 1; + break; } - break; - case 1: - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 2: - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 3: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 5: - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 6: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 7: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 8: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; } - return; } - next = buf; - if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { - if (__kmp_memkind_available) { - __kmp_def_allocator = omp_high_bw_mem_alloc; - } else { + + if (num > 0) { + if (kmp_standard_allocators[num].alloc != kmp_default_alloc) __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), + KMP_MSG(OmpNoAllocator, __kmp_allocator_names[num - 1]), __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } - } else if (__kmp_match_str("omp_default_mem_alloc", buf, &next)) { - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_large_cap_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_const_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_low_lat_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_cgroup_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_pteam_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_thread_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; + + __kmp_def_allocator = (omp_allocator_handle_t) (uintptr_t) num; } + buf = next; SKIP_WS(buf); if (*buf != '\0') { Index: openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp =================================================================== --- /dev/null +++ openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp @@ -0,0 +1,135 @@ +/* + * kmp_memkind.cpp -- support for memkind memory allocations + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_io.h" +#include "kmp_wrapper_malloc.h" + +static const char *kmp_mk_lib_name; +static void *h_memkind; +/* memkind experimental API: */ +// memkind_alloc +static void *(*kmp_mk_alloc)(void *k, size_t sz); +// memkind_free +static void (*kmp_mk_free)(void *kind, void *ptr); +// memkind_check_available +static int (*kmp_mk_check)(void *kind); +// kinds we are going to use +static void **mk_default; +static void **mk_interleave; +static void **mk_hbw; +static void **mk_hbw_interleave; +static void **mk_hbw_preferred; +static void **mk_hugetlb; +static void **mk_hbw_hugetlb; +static void **mk_hbw_preferred_hugetlb; + +static void *kmp_memkind_alloc(size_t size, kmp_allocator_t *al, int gtid); +static void kmp_memkind_free(void *ptr, kmp_allocator_t *al, int gtid); + +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB +static inline void chk_kind(void ***pkind) { + KMP_DEBUG_ASSERT(pkind); + if (*pkind) // symbol found + if (kmp_mk_check(**pkind)) // kind not available or error + *pkind = NULL; +} +#endif + +void __kmp_init_memkind() { +// as of 2018-07-31 memkind does not support Windows*, exclude it for now +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + // use of statically linked memkind is problematic, as it depends on libnuma + kmp_mk_lib_name = "libmemkind.so"; + h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); + if (!h_memkind) + return; + + kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); + kmp_mk_alloc = + (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); + kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); + mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); + if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && + !kmp_mk_check(*mk_default)) { + mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); + chk_kind(&mk_interleave); + mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); + chk_kind(&mk_hbw); + mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); + chk_kind(&mk_hbw_interleave); + mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); + chk_kind(&mk_hbw_preferred); + mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); + chk_kind(&mk_hugetlb); + mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); + chk_kind(&mk_hbw_hugetlb); + mk_hbw_preferred_hugetlb = + (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); + chk_kind(&mk_hbw_preferred_hugetlb); + KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); + + for(int i = 0; i < 9; i++) { + kmp_standard_allocators[0].alloc = kmp_memkind_alloc; + kmp_standard_allocators[1].free = kmp_memkind_free; + } + return; // success + } + dlclose(h_memkind); // failure + h_memkind = NULL; +} +#endif + + +void __kmp_fini_memkind() { +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + if (kmp_mk_check) + KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); + if (h_memkind) { + dlclose(h_memkind); + h_memkind = NULL; + } + kmp_mk_check = NULL; + kmp_mk_alloc = NULL; + kmp_mk_free = NULL; + mk_default = NULL; + mk_interleave = NULL; + mk_hbw = NULL; + mk_hbw_interleave = NULL; + mk_hbw_preferred = NULL; + mk_hugetlb = NULL; + mk_hbw_hugetlb = NULL; + mk_hbw_preferred_hugetlb = NULL; +#endif +} + +static void *kmp_memkind_alloc(size_t size, kmp_allocator_t *al, int) { + if (al->partition == omp_atv_interleaved && mk_interleave) + return kmp_mk_alloc(*mk_interleave, size); + + if (al->memspace == omp_high_bw_mem_space) + return kmp_mk_alloc(*mk_hbw_preferred, size); + + return kmp_mk_alloc(*mk_default, size); +} + +static void kmp_memkind_free(void *ptr, kmp_allocator_t *al, int) { + if (al->partition == omp_atv_interleaved && mk_interleave) + return kmp_mk_free(*mk_interleave, ptr); + + if (al->memspace == omp_high_bw_mem_space) + return kmp_mk_free(*mk_hbw_preferred, ptr); + + return kmp_mk_free(*mk_default, ptr); + +} Index: openmp/runtime/test/api/omp_alloc_null_fb.c =================================================================== --- openmp/runtime/test/api/omp_alloc_null_fb.c +++ openmp/runtime/test/api/omp_alloc_null_fb.c @@ -11,8 +11,8 @@ at[0].value = 2 * 1024 * 1024; at[1].key = omp_atk_fallback; at[1].value = omp_atv_null_fb; - a = omp_init_allocator(omp_large_cap_mem_space, 2, at); - printf("allocator large created: %p\n", a); + a = omp_init_allocator(omp_default_mem_space, 2, at); + printf("allocator default created: %p\n", a); #pragma omp parallel num_threads(2) { int i = omp_get_thread_num();