diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -555,12 +555,20 @@ omp_cgroup_mem_alloc DATA omp_pteam_mem_alloc DATA omp_thread_mem_alloc DATA + # Preview of target memory support + llvm_omp_target_host_mem_alloc DATA + llvm_omp_target_shared_mem_alloc DATA + llvm_omp_target_device_mem_alloc DATA omp_default_mem_space DATA omp_large_cap_mem_space DATA omp_const_mem_space DATA omp_high_bw_mem_space DATA omp_low_lat_mem_space DATA + # Preview of target memory support + llvm_omp_target_host_mem_space DATA + llvm_omp_target_shared_mem_space DATA + llvm_omp_target_device_mem_space DATA %ifndef stub # Ordinals between 900 and 999 are reserved diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -357,12 +357,21 @@ extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc; + /* Preview of target memory support */ + extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc; + extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc; + typedef omp_uintptr_t omp_memspace_handle_t; extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space; + /* Preview of target memory support */ + extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space; + extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space; + extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space; # else # if __cplusplus >= 201103 typedef enum omp_allocator_handle_t : omp_uintptr_t @@ -379,6 +388,10 @@ omp_cgroup_mem_alloc = 6, omp_pteam_mem_alloc = 7, omp_thread_mem_alloc = 8, + /* Preview of target memory support */ + llvm_omp_target_host_mem_alloc = 100, + llvm_omp_target_shared_mem_alloc = 101, + llvm_omp_target_device_mem_alloc = 102, KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX } omp_allocator_handle_t; # if __cplusplus >= 201103 @@ -392,6 +405,10 @@ omp_const_mem_space = 2, omp_high_bw_mem_space = 3, omp_low_lat_mem_space = 4, + /* Preview of target memory support */ + llvm_omp_target_host_mem_space = 100, + llvm_omp_target_shared_mem_space = 101, + llvm_omp_target_device_mem_space = 102, KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX } omp_memspace_handle_t; # endif diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -214,6 +214,13 @@ parameter(omp_pteam_mem_alloc=7) integer(kind=omp_allocator_handle_kind)omp_thread_mem_alloc parameter(omp_thread_mem_alloc=8) + ! Preview of target memory support + integer(kind=omp_allocator_handle_kind)llvm_omp_target_host_mem_alloc + parameter(llvm_omp_target_host_mem_alloc=100) + integer(kind=omp_allocator_handle_kind)llvm_omp_target_shared_mem_alloc + parameter(llvm_omp_target_shared_mem_alloc=101) + integer(kind=omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc + parameter(llvm_omp_target_device_mem_alloc=102) integer(kind=omp_memspace_handle_kind)omp_default_mem_space parameter(omp_default_mem_space=0) @@ -225,6 +232,13 @@ parameter(omp_high_bw_mem_space=3) integer(kind=omp_memspace_handle_kind)omp_low_lat_mem_space parameter(omp_low_lat_mem_space=4) + ! Preview of target memory support + integer(kind=omp_memspace_handle_kind)llvm_omp_target_host_mem_space + parameter(llvm_omp_target_host_mem_space=100) + integer(kind=omp_memspace_handle_kind)llvm_omp_target_shared_mem_space + parameter(llvm_omp_target_shared_mem_space=101) + integer(kind=omp_memspace_handle_kind)llvm_omp_target_device_mem_space + parameter(llvm_omp_target_device_mem_space=102) integer(kind=omp_pause_resource_kind)omp_pause_resume parameter(omp_pause_resume=0) diff --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.f90.var --- a/openmp/runtime/src/include/omp_lib.f90.var +++ b/openmp/runtime/src/include/omp_lib.f90.var @@ -137,12 +137,20 @@ integer (kind=omp_allocator_handle_kind), parameter :: omp_cgroup_mem_alloc = 6 integer (kind=omp_allocator_handle_kind), parameter :: omp_pteam_mem_alloc = 7 integer (kind=omp_allocator_handle_kind), parameter :: omp_thread_mem_alloc = 8 + ! Preview of target memory support + integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_host_mem_alloc = 100 + integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_shared_mem_alloc = 101 + integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_device_mem_alloc = 102 integer (kind=omp_memspace_handle_kind), parameter :: omp_default_mem_space = 0 integer (kind=omp_memspace_handle_kind), parameter :: omp_large_cap_mem_space = 1 integer (kind=omp_memspace_handle_kind), parameter :: omp_const_mem_space = 2 integer (kind=omp_memspace_handle_kind), parameter :: omp_high_bw_mem_space = 3 integer (kind=omp_memspace_handle_kind), parameter :: omp_low_lat_mem_space = 4 + ! Preview of target memory support + integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_host_mem_space = 100 + integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_shared_mem_space = 101 + integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_device_mem_space = 102 integer (kind=omp_pause_resource_kind), parameter :: omp_pause_resume = 0 integer (kind=omp_pause_resource_kind), parameter :: omp_pause_soft = 1 diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -958,6 +958,10 @@ extern omp_memspace_handle_t const omp_const_mem_space; extern omp_memspace_handle_t const omp_high_bw_mem_space; extern omp_memspace_handle_t const omp_low_lat_mem_space; +// Preview of target memory support +extern omp_memspace_handle_t const llvm_omp_target_host_mem_space; +extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space; +extern omp_memspace_handle_t const llvm_omp_target_device_mem_space; typedef struct { omp_alloctrait_key_t key; @@ -974,6 +978,10 @@ extern omp_allocator_handle_t const omp_cgroup_mem_alloc; extern omp_allocator_handle_t const omp_pteam_mem_alloc; extern omp_allocator_handle_t const omp_thread_mem_alloc; +// Preview of target memory support +extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc; +extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc; +extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc; extern omp_allocator_handle_t const kmp_max_mem_alloc; extern omp_allocator_handle_t __kmp_def_allocator; @@ -1011,6 +1019,7 @@ extern void __kmp_init_memkind(); extern void __kmp_fini_memkind(); +extern void __kmp_init_target_mem(); /* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -1242,6 +1242,20 @@ static void **mk_dax_kmem; static void **mk_dax_kmem_all; static void **mk_dax_kmem_preferred; +// Preview of target memory support +static void *(*kmp_target_alloc_host)(size_t size, int device); +static void *(*kmp_target_alloc_shared)(size_t size, int device); +static void *(*kmp_target_alloc_device)(size_t size, int device); +static void *(*kmp_target_free)(void *ptr, int device); +static bool __kmp_target_mem_available; +#define KMP_IS_TARGET_MEM_SPACE(MS) \ + (MS == llvm_omp_target_host_mem_space || \ + MS == llvm_omp_target_shared_mem_space || \ + MS == llvm_omp_target_device_mem_space) +#define KMP_IS_TARGET_MEM_ALLOC(MA) \ + (MA == llvm_omp_target_host_mem_alloc || \ + MA == llvm_omp_target_shared_mem_alloc || \ + MA == llvm_omp_target_device_mem_alloc) #if KMP_OS_UNIX && KMP_DYNAMIC_LIB static inline void chk_kind(void ***pkind) { @@ -1338,6 +1352,18 @@ mk_dax_kmem_preferred = NULL; #endif } +// Preview of target memory support +void __kmp_init_target_mem() { + *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host"); + *(void **)(&kmp_target_alloc_shared) = + KMP_DLSYM("llvm_omp_target_alloc_shared"); + *(void **)(&kmp_target_alloc_device) = + KMP_DLSYM("llvm_omp_target_alloc_device"); + *(void **)(&kmp_target_free) = KMP_DLSYM("omp_target_free"); + __kmp_target_mem_available = kmp_target_alloc_host && + kmp_target_alloc_shared && + kmp_target_alloc_device && kmp_target_free; +} omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, int ntraits, @@ -1345,7 +1371,7 @@ // OpenMP 5.0 only allows predefined memspaces KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space || ms == omp_large_cap_mem_space || ms == omp_const_mem_space || - ms == omp_high_bw_mem_space); + ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms)); kmp_allocator_t *al; int i; al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed @@ -1423,6 +1449,9 @@ al->memkind = mk_default; } } + } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) { + __kmp_free(al); + return omp_null_allocator; } else { if (ms == omp_high_bw_mem_space) { // cannot detect HBW memory presence without memkind library @@ -1543,6 +1572,22 @@ } } } else if (allocator < kmp_max_mem_alloc) { + if (KMP_IS_TARGET_MEM_ALLOC(allocator)) { + // Use size input directly as the memory may not be accessible on host. + // Use default device for now. + if (__kmp_target_mem_available) { + kmp_int32 device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; + if (allocator == llvm_omp_target_host_mem_alloc) + ptr = kmp_target_alloc_host(size, device); + else if (allocator == llvm_omp_target_shared_mem_alloc) + ptr = kmp_target_alloc_shared(size, device); + else // allocator == llvm_omp_target_device_mem_alloc + ptr = kmp_target_alloc_device(size, device); + } + return ptr; + } + // pre-defined allocator if (allocator == omp_high_bw_mem_alloc) { // ptr = NULL; @@ -1551,6 +1596,18 @@ } else { ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); } + } else if (KMP_IS_TARGET_MEM_SPACE(al->memspace)) { + if (__kmp_target_mem_available) { + kmp_int32 device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; + if (al->memspace == llvm_omp_target_host_mem_space) + ptr = kmp_target_alloc_host(size, device); + else if (al->memspace == llvm_omp_target_shared_mem_space) + ptr = kmp_target_alloc_shared(size, device); + else // al->memspace == llvm_omp_target_device_mem_space + ptr = kmp_target_alloc_device(size, device); + } + return ptr; } else if (al->pool_size > 0) { // custom allocator with pool size requested kmp_uint64 used = @@ -1685,6 +1742,15 @@ kmp_mem_desc_t desc; kmp_uintptr_t addr_align; // address to return to caller kmp_uintptr_t addr_descr; // address of memory block descriptor + if (KMP_IS_TARGET_MEM_ALLOC(allocator) || + (allocator > kmp_max_mem_alloc && + KMP_IS_TARGET_MEM_SPACE(al->memspace))) { + KMP_DEBUG_ASSERT(kmp_target_free); + kmp_int32 device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; + kmp_target_free(ptr, device); + return; + } addr_align = (kmp_uintptr_t)ptr; addr_descr = addr_align - sizeof(kmp_mem_desc_t); diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -319,6 +319,13 @@ (omp_allocator_handle_t const)7; omp_allocator_handle_t const omp_thread_mem_alloc = (omp_allocator_handle_t const)8; +// Preview of target memory support +omp_allocator_handle_t const llvm_omp_target_host_mem_alloc = + (omp_allocator_handle_t const)100; +omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc = + (omp_allocator_handle_t const)101; +omp_allocator_handle_t const llvm_omp_target_device_mem_alloc = + (omp_allocator_handle_t const)102; omp_allocator_handle_t const kmp_max_mem_alloc = (omp_allocator_handle_t const)1024; omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc; @@ -333,6 +340,13 @@ (omp_memspace_handle_t const)3; omp_memspace_handle_t const omp_low_lat_mem_space = (omp_memspace_handle_t const)4; +// Preview of target memory support +omp_memspace_handle_t const llvm_omp_target_host_mem_space = + (omp_memspace_handle_t const)100; +omp_memspace_handle_t const llvm_omp_target_shared_mem_space = + (omp_memspace_handle_t const)101; +omp_memspace_handle_t const llvm_omp_target_device_mem_space = + (omp_memspace_handle_t const)102; /* This check ensures that the compiler is passing the correct data type for the flags formal parameter of the function kmpc_omp_task_alloc(). If the type is diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -540,7 +540,10 @@ "%s_%d.t_disp_buffer", header, team_id); } -static void __kmp_init_allocator() { __kmp_init_memkind(); } +static void __kmp_init_allocator() { + __kmp_init_memkind(); + __kmp_init_target_mem(); +} static void __kmp_fini_allocator() { __kmp_fini_memkind(); } /* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp --- a/openmp/runtime/src/kmp_stub.cpp +++ b/openmp/runtime/src/kmp_stub.cpp @@ -350,6 +350,13 @@ (omp_allocator_handle_t const)7; omp_allocator_handle_t const omp_thread_mem_alloc = (omp_allocator_handle_t const)8; +// Preview of target memory support +omp_allocator_handle_t const llvm_omp_target_host_mem_alloc = + (omp_allocator_handle_t const)100; +omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc = + (omp_allocator_handle_t const)101; +omp_allocator_handle_t const llvm_omp_target_device_mem_alloc = + (omp_allocator_handle_t const)102; omp_memspace_handle_t const omp_default_mem_space = (omp_memspace_handle_t const)0; @@ -361,6 +368,13 @@ (omp_memspace_handle_t const)3; omp_memspace_handle_t const omp_low_lat_mem_space = (omp_memspace_handle_t const)4; +// Preview of target memory support +omp_memspace_handle_t const llvm_omp_target_host_mem_space = + (omp_memspace_handle_t const)100; +omp_memspace_handle_t const llvm_omp_target_shared_mem_space = + (omp_memspace_handle_t const)101; +omp_memspace_handle_t const llvm_omp_target_device_mem_space = + (omp_memspace_handle_t const)102; #endif /* KMP_OS_WINDOWS */ void *omp_alloc(size_t size, const omp_allocator_handle_t allocator) { i;