diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1033,6 +1033,7 @@ kmp_allocator_t *fb_data; kmp_uint64 pool_size; kmp_uint64 pool_used; + bool pinned; } kmp_allocator_t; extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -1245,6 +1245,8 @@ static void *(*kmp_target_alloc_host)(size_t size, int device); static void *(*kmp_target_alloc_shared)(size_t size, int device); static void *(*kmp_target_alloc_device)(size_t size, int device); +static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device); +static void *(*kmp_target_unlock_mem)(void *ptr, int device); static void *(*kmp_target_free_host)(void *ptr, int device); static void *(*kmp_target_free_shared)(void *ptr, int device); static void *(*kmp_target_free_device)(void *ptr, int device); @@ -1386,7 +1388,9 @@ switch (traits[i].key) { case omp_atk_sync_hint: case omp_atk_access: + break; case omp_atk_pinned: + al->pinned = true; break; case omp_atk_alignment: __kmp_type_convert(traits[i].value, &(al->alignment)); @@ -1545,6 +1549,8 @@ return NULL; if (allocator == omp_null_allocator) allocator = __kmp_threads[gtid]->th.th_def_allocator; + kmp_int32 default_device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; al = RCAST(kmp_allocator_t *, allocator); @@ -1560,6 +1566,9 @@ align = algn; // max of allocator trait, parameter and sizeof(void*) desc.size_orig = size; desc.size_a = size + sz_desc + align; + bool is_pinned = false; + if (allocator > kmp_max_mem_alloc) + is_pinned = al->pinned; // Use default allocator if libmemkind is not available int use_default_allocator = (__kmp_memkind_available) ? false : true; @@ -1589,7 +1598,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } // else ptr == NULL; } else { // pool has enough space @@ -1603,7 +1615,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } } } @@ -1619,7 +1634,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } } } @@ -1689,7 +1707,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } // else ptr == NULL; } else { // pool has enough space @@ -1709,6 +1730,9 @@ if (ptr == NULL) return NULL; + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, desc.size_a, default_device); + addr = (kmp_uintptr_t)ptr; addr_align = (addr + sz_desc + align - 1) & ~(align - 1); addr_descr = addr_align - sz_desc; @@ -1825,6 +1849,12 @@ oal = (omp_allocator_handle_t)al; // cast to void* for comparisons KMP_DEBUG_ASSERT(al); + if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) { + kmp_int32 device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; + kmp_target_unlock_mem(desc.ptr_alloc, device); + } + if (__kmp_memkind_available) { if (oal < kmp_max_mem_alloc) { // pre-defined allocator