Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -1033,6 +1033,7 @@ kmp_allocator_t *fb_data; kmp_uint64 pool_size; kmp_uint64 pool_used; + bool pinned; } kmp_allocator_t; extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, Index: openmp/runtime/src/kmp_alloc.cpp =================================================================== --- openmp/runtime/src/kmp_alloc.cpp +++ openmp/runtime/src/kmp_alloc.cpp @@ -1245,6 +1245,8 @@ static void *(*kmp_target_alloc_host)(size_t size, int device); static void *(*kmp_target_alloc_shared)(size_t size, int device); static void *(*kmp_target_alloc_device)(size_t size, int device); +static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device); +static void *(*kmp_target_unlock_mem)(void *ptr, int device); static void *(*kmp_target_free_host)(void *ptr, int device); static void *(*kmp_target_free_shared)(void *ptr, int device); static void *(*kmp_target_free_device)(void *ptr, int device); @@ -1386,7 +1388,9 @@ switch (traits[i].key) { case omp_atk_sync_hint: case omp_atk_access: + break; case omp_atk_pinned: + al->pinned = true; break; case omp_atk_alignment: __kmp_type_convert(traits[i].value, &(al->alignment)); @@ -1545,6 +1549,8 @@ return NULL; if (allocator == omp_null_allocator) allocator = __kmp_threads[gtid]->th.th_def_allocator; + kmp_int32 default_device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; al = RCAST(kmp_allocator_t *, allocator); @@ -1560,6 +1566,9 @@ align = algn; // max of allocator trait, parameter and sizeof(void*) desc.size_orig = size; desc.size_a = size + sz_desc + align; + bool is_pinned = false; + if (allocator > kmp_max_mem_alloc) + is_pinned = al->pinned; if (__kmp_memkind_available) { if (allocator < kmp_max_mem_alloc) { @@ -1586,7 +1595,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } // else ptr == NULL; } else { // pool has enough space @@ -1600,7 +1612,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } } } @@ -1616,7 +1631,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } } } @@ -1633,6 +1651,8 @@ ptr = kmp_target_alloc_shared(size, device); else // allocator == llvm_omp_target_device_mem_alloc ptr = kmp_target_alloc_device(size, device); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, device); } return ptr; } @@ -1655,6 +1675,8 @@ ptr = kmp_target_alloc_shared(size, device); else // al->memspace == llvm_omp_target_device_mem_space ptr = kmp_target_alloc_device(size, device); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, device); } return ptr; } else if (al->pool_size > 0) { @@ -1672,7 +1694,10 @@ } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } // else ptr == NULL; } else { // pool has enough space @@ -1692,6 +1717,9 @@ if (ptr == NULL) return NULL; + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, desc.size_a, default_device); + addr = (kmp_uintptr_t)ptr; addr_align = (addr + sz_desc + align - 1) & ~(align - 1); addr_descr = addr_align - sz_desc; @@ -1993,7 +2021,10 @@ memset(descr.ptr_allocated, 0xEF, descr.size_allocated); // Fill memory block with 0xEF, it helps catch using freed memory. #endif - + if (kmp_target_unlock_mem) { + kmp_int32 default_device = 0; + kmp_target_unlock_mem(descr.ptr_allocated, default_device); + } #ifndef LEAK_MEMORY KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); #ifdef KMP_DEBUG Index: openmp/runtime/test/api/omp_pinned.c =================================================================== --- /dev/null +++ openmp/runtime/test/api/omp_pinned.c @@ -0,0 +1,17 @@ +// RUN: %libomp-compile-and-run + +#include + +int main() { + omp_alloctrait_t pinned_trait[1] = {{omp_atk_pinned, omp_atv_true}}; + omp_allocator_handle_t pinned_alloc = omp_init_allocator(omp_default_mem_space, 1, pinned_trait); + double *a = (double *)omp_alloc(10*sizeof(double), pinned_alloc); + + #pragma omp parallel for + for(int i = 0; i < 10; i++) + a[i] = 0; + + omp_free(a, pinned_alloc); + + return 0; +}