diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -187,6 +187,12 @@ set(LIBOMP_USE_ITT_NOTIFY TRUE CACHE BOOL "Enable ITT notify?") +# SICM support +set(LIBOMP_USE_SICM TRUE CACHE BOOL + "Enable SICM suppoort?") + +set(LIBOMP_SICM_PATH "/usr" CACHE PATH "SICM Path") + # normal, profile, stubs library. set(NORMAL_LIBRARY FALSE) set(STUBS_LIBRARY FALSE) @@ -376,6 +382,7 @@ libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}") libomp_say("Use TSAN-support -- ${LIBOMP_TSAN_SUPPORT}") libomp_say("Use Hwloc library -- ${LIBOMP_USE_HWLOC}") + libomp_say("Use SICM library -- ${LIBOMP_USE_SICM}") endif() add_subdirectory(src) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -45,11 +45,17 @@ ${LIBOMP_SRC_DIR}/i18n ${LIBOMP_INC_DIR} ${LIBOMP_SRC_DIR}/thirdparty/ittnotify + ${LIBOMP_SRC_DIR}/thirdparty/sicm + ${LIBOMP_SRC_DIR}/thirdparty/memkind ) if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() +if(${LIBOMP_USE_SICM}) + include_directories(${LIBOMP_SICM_PATH}/include) +endif() + # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) @@ -93,6 +99,13 @@ libomp_append(LIBOMP_ASMFILES z_Linux_asm.S) # Unix assembly file endif() libomp_append(LIBOMP_CXXFILES thirdparty/ittnotify/ittnotify_static.cpp LIBOMP_USE_ITT_NOTIFY) + if(${LIBOMP_USE_SICM}) + libomp_append(LIBOMP_CXXFILES thirdparty/sicm/kmp_sicm.cpp) + else() + libomp_append(LIBOMP_CXXFILES thirdparty/sicm/kmp_no_sicm.cpp) + endif() + libomp_append(LIBOMP_CXXFILES thirdparty/sicm/kmp_sicm.cpp) + libomp_append(LIBOMP_CXXFILES thirdparty/memkind/kmp_memkind.cpp) libomp_append(LIBOMP_CXXFILES kmp_debugger.cpp LIBOMP_USE_DEBUGGER) libomp_append(LIBOMP_CXXFILES kmp_stats.cpp LIBOMP_STATS) libomp_append(LIBOMP_CXXFILES kmp_stats_timing.cpp LIBOMP_STATS) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -916,6 +916,19 @@ omp_uintptr_t value; } omp_alloctrait_t; +enum { + omp_ata_null = 0, + omp_ata_default = 1, + omp_ata__large_cap = 2, + omp_ata_const = 3, + omp_ata_high_bw = 4, + omp_ata_low_lat = 5, + omp_ata_cgroup = 6, + omp_ata_pteam = 7, + omp_ata_thread = 8, + omp_ata_last = 9, +}; + typedef void *omp_allocator_handle_t; extern omp_allocator_handle_t const omp_null_allocator; extern omp_allocator_handle_t const omp_default_mem_alloc; @@ -932,20 +945,29 @@ // end of duplicate type definitions from omp.h #endif -extern int __kmp_memkind_available; - typedef omp_memspace_handle_t kmp_memspace_t; // placeholder typedef struct kmp_allocator_t { omp_memspace_handle_t memspace; - void **memkind; // pointer to memkind int alignment; omp_alloctrait_value_t fb; kmp_allocator_t *fb_data; kmp_uint64 pool_size; kmp_uint64 pool_used; + int partition; + + /* custom allocators */ + void *(*alloc)(size_t size, kmp_allocator_t *, int gtid); + void (*free)(void *p, kmp_allocator_t *, int gtid); + void *aux; } kmp_allocator_t; +extern kmp_allocator_t kmp_standard_allocators[]; +extern void *kmp_default_alloc(size_t size, kmp_allocator_t *al, int gtid); +extern void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid); +extern int (*kmp_init_allocator_p)(kmp_allocator_t *); +extern void (*kmp_destroy_allocator_p)(kmp_allocator_t *); + extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t, int ntraits, @@ -959,6 +981,9 @@ extern void __kmp_init_memkind(); extern void __kmp_fini_memkind(); +extern void __kmp_init_sicm(); +extern void __kmp_fini_sicm(); + /* ------------------------------------------------------------------------ */ #define KMP_UINT64_MAX \ diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -1221,119 +1221,68 @@ } /* OMP 5.0 Memory Management support */ -static const char *kmp_mk_lib_name; -static void *h_memkind; -/* memkind experimental API: */ -// memkind_alloc -static void *(*kmp_mk_alloc)(void *k, size_t sz); -// memkind_free -static void (*kmp_mk_free)(void *kind, void *ptr); -// memkind_check_available -static int (*kmp_mk_check)(void *kind); -// kinds we are going to use -static void **mk_default; -static void **mk_interleave; -static void **mk_hbw; -static void **mk_hbw_interleave; -static void **mk_hbw_preferred; -static void **mk_hugetlb; -static void **mk_hbw_hugetlb; -static void **mk_hbw_preferred_hugetlb; - -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB -static inline void chk_kind(void ***pkind) { - KMP_DEBUG_ASSERT(pkind); - if (*pkind) // symbol found - if (kmp_mk_check(**pkind)) // kind not available or error - *pkind = NULL; -} -#endif - -void __kmp_init_memkind() { -// as of 2018-07-31 memkind does not support Windows*, exclude it for now -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - // use of statically linked memkind is problematic, as it depends on libnuma - kmp_mk_lib_name = "libmemkind.so"; - h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); - if (h_memkind) { - kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); - kmp_mk_alloc = - (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); - kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); - mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); - if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && - !kmp_mk_check(*mk_default)) { - __kmp_memkind_available = 1; - mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); - chk_kind(&mk_interleave); - mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); - chk_kind(&mk_hbw); - mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); - chk_kind(&mk_hbw_interleave); - mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); - chk_kind(&mk_hbw_preferred); - mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); - chk_kind(&mk_hugetlb); - mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); - chk_kind(&mk_hbw_hugetlb); - mk_hbw_preferred_hugetlb = - (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); - chk_kind(&mk_hbw_preferred_hugetlb); - KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); - return; // success - } - dlclose(h_memkind); // failure - h_memkind = NULL; - } - kmp_mk_check = NULL; - kmp_mk_alloc = NULL; - kmp_mk_free = NULL; - mk_default = NULL; - mk_interleave = NULL; - mk_hbw = NULL; - mk_hbw_interleave = NULL; - mk_hbw_preferred = NULL; - mk_hugetlb = NULL; - mk_hbw_hugetlb = NULL; - mk_hbw_preferred_hugetlb = NULL; -#else - kmp_mk_lib_name = ""; - h_memkind = NULL; - kmp_mk_check = NULL; - kmp_mk_alloc = NULL; - kmp_mk_free = NULL; - mk_default = NULL; - mk_interleave = NULL; - mk_hbw = NULL; - mk_hbw_interleave = NULL; - mk_hbw_preferred = NULL; - mk_hugetlb = NULL; - mk_hbw_hugetlb = NULL; - mk_hbw_preferred_hugetlb = NULL; -#endif -} +void *kmp_default_alloc(size_t size, kmp_allocator_t *al, int gtid); +void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid); + +kmp_allocator_t kmp_standard_allocators[] = { + { /* omp_null_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = NULL, + .free = NULL, + }, + { /* omp_default_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_large_cap_allocator */ + .memspace = omp_large_cap_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_const_allocator */ + .memspace = omp_const_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_high_bw_allocator */ + .memspace = omp_high_bw_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_low_lat_allocator */ + .memspace = omp_low_lat_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_cgroup_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_pteam_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, + { /* omp_thread_allocator */ + .memspace = omp_default_mem_space, + .fb = omp_atv_abort_fb, + .alloc = kmp_default_alloc, + .free = kmp_default_free, + }, +}; -void __kmp_fini_memkind() { -#if KMP_OS_UNIX && KMP_DYNAMIC_LIB - if (__kmp_memkind_available) - KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); - if (h_memkind) { - dlclose(h_memkind); - h_memkind = NULL; - } - kmp_mk_check = NULL; - kmp_mk_alloc = NULL; - kmp_mk_free = NULL; - mk_default = NULL; - mk_interleave = NULL; - mk_hbw = NULL; - mk_hbw_interleave = NULL; - mk_hbw_preferred = NULL; - mk_hugetlb = NULL; - mk_hbw_hugetlb = NULL; - mk_hbw_preferred_hugetlb = NULL; -#endif -} +// custom initialization function +int (*kmp_init_allocator_p)(kmp_allocator_t *); omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, int ntraits, @@ -1366,10 +1315,13 @@ al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb); break; case omp_atk_fb_data: - al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); + if (traits[i].value < sizeof(kmp_standard_allocators) / sizeof(kmp_standard_allocators[0])) { + al->fb_data = &kmp_standard_allocators[traits[i].value]; + } else + al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); break; case omp_atk_partition: - al->memkind = RCAST(void **, traits[i].value); + al->partition = traits[i].value; break; default: KMP_ASSERT2(0, "Unexpected allocator trait"); @@ -1378,47 +1330,31 @@ if (al->fb == 0) { // set default allocator al->fb = omp_atv_default_mem_fb; - al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; + al->fb_data = &kmp_standard_allocators[(uintptr_t) omp_default_mem_alloc]; } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al->fb_data != NULL); } else if (al->fb == omp_atv_default_mem_fb) { - al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; + al->fb_data = &kmp_standard_allocators[(uintptr_t) omp_default_mem_alloc]; } - if (__kmp_memkind_available) { - // Let's use memkind library if available - if (ms == omp_high_bw_mem_space) { - if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) { - al->memkind = mk_hbw_interleave; - } else if (mk_hbw_preferred) { - // AC: do not try to use MEMKIND_HBW for now, because memkind library - // cannot reliably detect exhaustion of HBW memory. - // It could be possible using hbw_verify_memory_region() but memkind - // manual says: "Using this function in production code may result in - // serious performance penalty". - al->memkind = mk_hbw_preferred; - } else { - // HBW is requested but not available --> return NULL allocator - __kmp_free(al); - return omp_null_allocator; - } - } else { - if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) { - al->memkind = mk_interleave; - } else { - al->memkind = mk_default; - } - } - } else { - if (ms == omp_high_bw_mem_space) { - // cannot detect HBW memory presence without memkind library + + al->alloc = kmp_default_alloc; // kmp_standard_allocators[(uintptr_t) ms].alloc; + al->free = kmp_default_free; // kmp_standard_allocators[(uintptr_t) ms].free; + if (kmp_init_allocator_p != NULL && kmp_init_allocator_p(al) != 0) { + // something went wrong, bail __kmp_free(al); return omp_null_allocator; } - } + return (omp_allocator_handle_t)al; } +// custom destruction function +void (*kmp_destroy_allocator_p)(kmp_allocator_t *); + void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) { + if (kmp_destroy_allocator_p) + kmp_destroy_allocator_p(RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator))); + if (allocator > kmp_max_mem_alloc) __kmp_free(allocator); } @@ -1441,15 +1377,28 @@ } kmp_mem_desc_t; static int alignment = sizeof(void *); // let's align to pointer size +void *kmp_default_alloc(size_t size, kmp_allocator_t *alm, int gtid) { + return __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), size); +} + +void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid) { + return __kmp_thread_free(__kmp_thread_from_gtid(gtid), ptr); +} + void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) { void *ptr = NULL; - kmp_allocator_t *al; + kmp_allocator_t *al = NULL; KMP_DEBUG_ASSERT(__kmp_init_serial); if (allocator == omp_null_allocator) allocator = __kmp_threads[gtid]->th.th_def_allocator; KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); - al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); + if ((uintptr_t) allocator < sizeof(kmp_standard_allocators) / sizeof(kmp_standard_allocators[0])) + al = &kmp_standard_allocators[(uintptr_t) allocator]; + else if (allocator > kmp_max_mem_alloc) + al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); + + KMP_ASSERT(al != NULL); int sz_desc = sizeof(kmp_mem_desc_t); kmp_mem_desc_t desc; @@ -1462,101 +1411,29 @@ } desc.size_a = size + sz_desc + align; - if (__kmp_memkind_available) { - if (allocator < kmp_max_mem_alloc) { - // pre-defined allocator - if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { - ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); - } else { - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } - } else if (al->pool_size > 0) { - // custom allocator with pool size requested - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (al->pool_size > 0) { + kmp_uint64 used = KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); if (used + desc.size_a > al->pool_size) { // not enough space, need to go fallback path KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } // else ptr == NULL; - } else { - // pool has enough space - ptr = kmp_mk_alloc(*al->memkind, desc.size_a); - if (ptr == NULL) { - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } - } - } - } else { - // custom allocator, pool size not requested - ptr = kmp_mk_alloc(*al->memkind, desc.size_a); - if (ptr == NULL) { - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } - } + switch (al->fb) { + default: + return NULL; + + case omp_atv_abort_fb: + KMP_ASSERT(0); + abort(); + + case omp_atv_default_mem_fb: + [[clang::fallthrough]]; + case omp_atv_allocator_fb: + ptr = __kmpc_alloc(gtid, size, al->fb_data); + return ptr; } - } else if (allocator < kmp_max_mem_alloc) { - // pre-defined allocator - if (allocator == omp_high_bw_mem_alloc) { - // ptr = NULL; - } else { - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - } - } else if (al->pool_size > 0) { - // custom allocator with pool size requested - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); - if (used + desc.size_a > al->pool_size) { - // not enough space, need to go fallback path - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al); - } // else ptr == NULL; - } else { - // pool has enough space - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - if (ptr == NULL && al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } // no sense to look for another fallback because of same internal alloc } - } else { - // custom allocator, pool size not requested - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - if (ptr == NULL && al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } // no sense to look for another fallback because of same internal alloc } + + ptr = (*al->alloc)(desc.size_a, al, gtid); KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a)); if (ptr == NULL) return NULL; @@ -1599,32 +1476,15 @@ oal = (omp_allocator_handle_t)al; // cast to void* for comparisons KMP_DEBUG_ASSERT(al); - if (__kmp_memkind_available) { - if (oal < kmp_max_mem_alloc) { - // pre-defined allocator - if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { - kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); - } else { - kmp_mk_free(*mk_default, desc.ptr_alloc); - } - } else { + KMP_ASSERT(al->free != NULL); + (*al->free)(ptr, al, gtid); + if (al->pool_size > 0) { // custom allocator with pool size requested kmp_uint64 used = KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); (void)used; // to suppress compiler warning KMP_DEBUG_ASSERT(used >= desc.size_a); } - kmp_mk_free(*al->memkind, desc.ptr_alloc); - } - } else { - if (oal > kmp_max_mem_alloc && al->pool_size > 0) { - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - (void)used; // to suppress compiler warning - KMP_DEBUG_ASSERT(used >= desc.size_a); - } - __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); - } KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc, allocator)); } diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -287,7 +287,6 @@ kmp_int32 __kmp_max_task_priority = 0; kmp_uint64 __kmp_taskloop_min_tasks = 0; -int __kmp_memkind_available = 0; omp_allocator_handle_t const omp_null_allocator = NULL; omp_allocator_handle_t const omp_default_mem_alloc = (omp_allocator_handle_t const)1; diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -526,8 +526,15 @@ "%s_%d.t_disp_buffer", header, team_id); } -static void __kmp_init_allocator() { __kmp_init_memkind(); } -static void __kmp_fini_allocator() { __kmp_fini_memkind(); } +static void __kmp_init_allocator() { + __kmp_init_memkind(); + __kmp_init_sicm(); +} + +static void __kmp_fini_allocator() { + __kmp_fini_memkind(); + __kmp_fini_sicm(); +} /* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3275,6 +3275,18 @@ __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format); } // OMP_ALLOCATOR sets default allocator +static const char *__kmp_allocator_names[] = { + "omp_default_mem_alloc", + "omp_large_cap_mem_alloc", + "omp_const_mem_alloc", + "omp_high_bw_mem_alloc", + "omp_low_lat_mem_alloc", + "omp_cgroup_mem_alloc", + "omp_pteam_mem_alloc", + "omp_thread_mem_alloc", + NULL +}; + static void __kmp_stg_parse_allocator(char const *name, char const *value, void *data) { /* @@ -3297,94 +3309,27 @@ next = buf; SKIP_DIGITS(next); num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num > 0); - switch (num) { - case 4: - if (__kmp_memkind_available) { - __kmp_def_allocator = omp_high_bw_mem_alloc; - } else { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; + KMP_ASSERT(num > 0 && num < 9); + } else { + num = -1; + next = buf; + for(int i = 0; __kmp_allocator_names[i] != NULL; i++) { + if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { + num = i + 1; + break; } - break; - case 1: - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 2: - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 3: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 5: - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 6: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 7: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; - case 8: - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - break; } - return; } - next = buf; - if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) { - if (__kmp_memkind_available) { - __kmp_def_allocator = omp_high_bw_mem_alloc; - } else { + + if (num > 0) { + if (kmp_standard_allocators[num].alloc != kmp_default_alloc) __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"), + KMP_MSG(OmpNoAllocator, __kmp_allocator_names[num - 1]), __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } - } else if (__kmp_match_str("omp_default_mem_alloc", buf, &next)) { - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_large_cap_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, - KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_const_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_low_lat_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_cgroup_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_pteam_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; - } else if (__kmp_match_str("omp_thread_mem_alloc", buf, &next)) { - __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"), - __kmp_msg_null); - __kmp_def_allocator = omp_default_mem_alloc; + + __kmp_def_allocator = (omp_allocator_handle_t) (uintptr_t) num; } + buf = next; SKIP_WS(buf); if (*buf != '\0') { diff --git a/openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp b/openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp new file mode 100644 --- /dev/null +++ b/openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp @@ -0,0 +1,135 @@ +/* + * kmp_memkind.cpp -- support for memkind memory allocations + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_io.h" +#include "kmp_wrapper_malloc.h" + +static const char *kmp_mk_lib_name; +static void *h_memkind; +/* memkind experimental API: */ +// memkind_alloc +static void *(*kmp_mk_alloc)(void *k, size_t sz); +// memkind_free +static void (*kmp_mk_free)(void *kind, void *ptr); +// memkind_check_available +static int (*kmp_mk_check)(void *kind); +// kinds we are going to use +static void **mk_default; +static void **mk_interleave; +static void **mk_hbw; +static void **mk_hbw_interleave; +static void **mk_hbw_preferred; +static void **mk_hugetlb; +static void **mk_hbw_hugetlb; +static void **mk_hbw_preferred_hugetlb; + +static void *kmp_memkind_alloc(size_t size, kmp_allocator_t *al, int gtid); +static void kmp_memkind_free(void *ptr, kmp_allocator_t *al, int gtid); + +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB +static inline void chk_kind(void ***pkind) { + KMP_DEBUG_ASSERT(pkind); + if (*pkind) // symbol found + if (kmp_mk_check(**pkind)) // kind not available or error + *pkind = NULL; +} +#endif + +void __kmp_init_memkind() { +// as of 2018-07-31 memkind does not support Windows*, exclude it for now +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + // use of statically linked memkind is problematic, as it depends on libnuma + kmp_mk_lib_name = "libmemkind.so"; + h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); + if (!h_memkind) + return; + + kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); + kmp_mk_alloc = + (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); + kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); + mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); + if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && + !kmp_mk_check(*mk_default)) { + mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); + chk_kind(&mk_interleave); + mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); + chk_kind(&mk_hbw); + mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); + chk_kind(&mk_hbw_interleave); + mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); + chk_kind(&mk_hbw_preferred); + mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); + chk_kind(&mk_hugetlb); + mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); + chk_kind(&mk_hbw_hugetlb); + mk_hbw_preferred_hugetlb = + (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); + chk_kind(&mk_hbw_preferred_hugetlb); + KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); + + for(int i = 0; i < 9; i++) { + kmp_standard_allocators[0].alloc = kmp_memkind_alloc; + kmp_standard_allocators[1].free = kmp_memkind_free; + } + return; // success + } + dlclose(h_memkind); // failure + h_memkind = NULL; +} +#endif + + +void __kmp_fini_memkind() { +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + if (kmp_mk_check) + KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); + if (h_memkind) { + dlclose(h_memkind); + h_memkind = NULL; + } + kmp_mk_check = NULL; + kmp_mk_alloc = NULL; + kmp_mk_free = NULL; + mk_default = NULL; + mk_interleave = NULL; + mk_hbw = NULL; + mk_hbw_interleave = NULL; + mk_hbw_preferred = NULL; + mk_hugetlb = NULL; + mk_hbw_hugetlb = NULL; + mk_hbw_preferred_hugetlb = NULL; +#endif +} + +static void *kmp_memkind_alloc(size_t size, kmp_allocator_t *al, int) { + if (al->partition == omp_atv_interleaved && mk_interleave) + return kmp_mk_alloc(*mk_interleave, size); + + if (al->memspace == omp_high_bw_mem_space) + return kmp_mk_alloc(*mk_hbw_preferred, size); + + return kmp_mk_alloc(*mk_default, size); +} + +static void kmp_memkind_free(void *ptr, kmp_allocator_t *al, int) { + if (al->partition == omp_atv_interleaved && mk_interleave) + return kmp_mk_free(*mk_interleave, ptr); + + if (al->memspace == omp_high_bw_mem_space) + return kmp_mk_free(*mk_hbw_preferred, ptr); + + return kmp_mk_free(*mk_default, ptr); + +} diff --git a/openmp/runtime/src/thirdparty/sicm/kmp_no_sicm.cpp b/openmp/runtime/src/thirdparty/sicm/kmp_no_sicm.cpp new file mode 100644 --- /dev/null +++ b/openmp/runtime/src/thirdparty/sicm/kmp_no_sicm.cpp @@ -0,0 +1,5 @@ +void __kmp_init_sicm() { +} + +void __kmp_fini_sicm() { +} diff --git a/openmp/runtime/src/thirdparty/sicm/kmp_sicm.cpp b/openmp/runtime/src/thirdparty/sicm/kmp_sicm.cpp new file mode 100644 --- /dev/null +++ b/openmp/runtime/src/thirdparty/sicm/kmp_sicm.cpp @@ -0,0 +1,179 @@ +#include "kmp.h" +#include "kmp_io.h" +#include "kmp_wrapper_malloc.h" + +#include + +static void *h_sicm; + +static sicm_device_list (*p_sicm_init)(void); +static sicm_arena (*p_sicm_arena_create)(size_t, int, sicm_device_list *); +static void (*p_sicm_arena_destroy)(sicm_arena arena); +static sicm_device *(*p_sicm_arena_get_devices)(sicm_arena sa); +static int (*p_sicm_arena_set_devices)(sicm_arena sa, sicm_device *dev); +static void *(*p_sicm_arena_alloc)(sicm_arena sa, size_t sz); +static void (*p_sicm_free)(void *ptr); +static int (*p_sicm_device_page_size)(sicm_device *); + +static int kmp_sicm_init_allocator(kmp_allocator_t *al); +static void *kmp_sicm_alloc(size_t size, kmp_allocator_t *al, int gtid); +static void kmp_sicm_free(void *ptr, kmp_allocator_t *al, int gtid); + +static sicm_device_list kmp_sicm_devs; // all +static sicm_device_list kmp_sicm_default_devs; +static sicm_device_list kmp_sicm_large_cap_devs; +static sicm_device_list kmp_sicm_const_devs; +static sicm_device_list kmp_sicm_high_bw_devs; +static sicm_device_list kmp_sicm_low_lat_devs; + +static void kmp_sicm_init_device_list(sicm_device_list *devs, int tag) { + int n; + + n = 0; + for(unsigned int i = 0; i < kmp_sicm_devs.count; i++) { + sicm_device *dev = kmp_sicm_devs.devices[i]; + if (dev->tag == tag && p_sicm_device_page_size(dev) == 4) + n++; + } + + devs->count = n; + devs->devices = (sicm_device **) malloc(n * sizeof(sicm_device *)); + n = 0; + for(unsigned int i = 0; i < kmp_sicm_devs.count; i++) { + sicm_device *dev = kmp_sicm_devs.devices[i]; + if (dev->tag == tag && p_sicm_device_page_size(dev) == 4) { + devs->devices[n] = dev; + n++; + } + } +} + +void __kmp_init_sicm() { +#if KMP_OS_UNIX && KMP_DYNAMIC_LIB + h_sicm = dlopen("libsicm.so", RTLD_LAZY); + if (!h_sicm) { + KE_TRACE(25, ("can't load libsicm.so: %s\n", dlerror())); + goto error; + } + + p_sicm_init = (sicm_device_list (*)(void)) dlsym(h_sicm, "sicm_init"); + p_sicm_arena_create = (sicm_arena (*)(size_t, int, sicm_device_list *)) dlsym(h_sicm, "sicm_arena_create"); + p_sicm_arena_destroy = (void (*)(sicm_arena)) dlsym(h_sicm, "sicm_arena_destroy"); + p_sicm_arena_get_devices = (sicm_device *(*)(sicm_arena sa)) dlsym(h_sicm, "sicm_arena_get_devices"); + p_sicm_arena_set_devices = (int (*)(sicm_arena sa, sicm_device *dev)) dlsym(h_sicm, "sicm_arena_set_devices"); + p_sicm_arena_alloc = (void *(*)(sicm_arena sa, size_t sz)) dlsym(h_sicm, "sicm_arena_alloc"); + p_sicm_free = (void (*)(void *ptr)) dlsym(h_sicm, "sicm_free"); + p_sicm_device_page_size = (int (*)(sicm_device *)) dlsym(h_sicm, "sicm_device_page_size"); + + if (!p_sicm_init || !p_sicm_arena_create || !p_sicm_arena_destroy || !p_sicm_arena_get_devices || + !p_sicm_arena_set_devices || !p_sicm_arena_alloc || !p_sicm_free || !p_sicm_device_page_size) { + KE_TRACE(25, ("can't initialize SICM library\n")); + goto error; + } + + KE_TRACE(25, ("__kmp_init_sicm: Initializing SICM support\n")); + kmp_sicm_devs = p_sicm_init(); + kmp_init_allocator_p = kmp_sicm_init_allocator; + kmp_sicm_init_device_list(&kmp_sicm_default_devs, SICM_DRAM); + KE_TRACE(25, ("__kmp_init_sicm: Default memspace: %d devices\n", kmp_sicm_default_devs.count)); + kmp_sicm_init_device_list(&kmp_sicm_large_cap_devs, SICM_OPTANE); + KE_TRACE(25, ("__kmp_init_sicm: Large-capacity memspace: %d devices\n", kmp_sicm_large_cap_devs.count)); + kmp_sicm_init_device_list(&kmp_sicm_const_devs, -1); + KE_TRACE(25, ("__kmp_init_sicm: Constant memspace: %d devices\n", kmp_sicm_const_devs.count)); + kmp_sicm_init_device_list(&kmp_sicm_high_bw_devs, SICM_KNL_HBM); + KE_TRACE(25, ("__kmp_init_sicm: High-bandwidth memspace: %d devices\n", kmp_sicm_high_bw_devs.count)); + kmp_sicm_init_device_list(&kmp_sicm_low_lat_devs, -1); + KE_TRACE(25, ("__kmp_init_sicm: Low-latency memspace: %d devices\n", kmp_sicm_low_lat_devs.count)); + + for(int i = 0; i < 9; i++) + kmp_sicm_init_allocator(&kmp_standard_allocators[i]); + + KE_TRACE(25, ("__kmp_init_sicm: SICM library initialized\n")); + return; + +error: +#endif + + p_sicm_init = NULL; + p_sicm_arena_create = NULL; + p_sicm_arena_get_devices = NULL; + p_sicm_arena_set_devices = NULL; + p_sicm_arena_alloc = NULL; + p_sicm_free = NULL; + if (h_sicm) + dlclose(h_sicm); + h_sicm = NULL; + + return; +} + +void __kmp_fini_sicm() { + if (h_sicm) + dlclose(h_sicm); + + p_sicm_init = NULL; + p_sicm_arena_create = NULL; + p_sicm_arena_get_devices = NULL; + p_sicm_arena_set_devices = NULL; + p_sicm_arena_alloc = NULL; + p_sicm_free = NULL; + h_sicm = NULL; +} + +int kmp_sicm_init_allocator(kmp_allocator_t *al) { + sicm_arena sa; + sicm_device_list *devs; + + KMP_ASSERT(p_sicm_arena_create != NULL); + + al->aux = NULL; + devs = NULL; + if (al->memspace == omp_default_mem_space) { + devs = &kmp_sicm_default_devs; + } else if (al->memspace == omp_const_mem_space) { + devs = &kmp_sicm_const_devs; + } else if (al->memspace == omp_large_cap_mem_space) { + devs = &kmp_sicm_large_cap_devs; + } else if (al->memspace == omp_high_bw_mem_space) { + devs = &kmp_sicm_high_bw_devs; + } else if (al->memspace == omp_low_lat_mem_space) { + devs = &kmp_sicm_low_lat_devs; + } + + if (devs == NULL) + return -1; + + if (devs->count == 0) + return -1; + + sa = p_sicm_arena_create(al->pool_size, 0, devs); + if (sa == NULL) { + return -1; + } + + al->alloc = kmp_sicm_alloc; + al->free = kmp_sicm_free; + + al->aux = sa; + return 0; +} + +static void *kmp_sicm_alloc(size_t size, kmp_allocator_t *al, int) { + sicm_arena sa; + + sa = (sicm_arena) al->aux; + KMP_ASSERT(p_sicm_arena_alloc != NULL); + return p_sicm_arena_alloc(sa, size); +} + +static void kmp_sicm_free(void *ptr, kmp_allocator_t *al, int) { + KMP_ASSERT(p_sicm_free != NULL); + p_sicm_free(ptr); +} + +void kmp_sicm_destroy_allocator(kmp_allocator_t *al) { + sicm_arena sa; + + sa = al->aux; + p_sicm_arena_destroy(sa); +} diff --git a/openmp/runtime/test/api/omp_alloc_null_fb.c b/openmp/runtime/test/api/omp_alloc_null_fb.c --- a/openmp/runtime/test/api/omp_alloc_null_fb.c +++ b/openmp/runtime/test/api/omp_alloc_null_fb.c @@ -11,8 +11,8 @@ at[0].value = 2 * 1024 * 1024; at[1].key = omp_atk_fallback; at[1].value = omp_atv_null_fb; - a = omp_init_allocator(omp_large_cap_mem_space, 2, at); - printf("allocator large created: %p\n", a); + a = omp_init_allocator(omp_default_mem_space, 2, at); + printf("allocator default created: %p\n", a); #pragma omp parallel num_threads(2) { int i = omp_get_thread_num();