diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -86,6 +86,13 @@ OMP_REQ_DYNAMIC_ALLOCATORS = 0x010 }; +enum TargetAllocTy : int32_t { + TARGET_ALLOC_DEVICE = 0, + TARGET_ALLOC_HOST, + TARGET_ALLOC_SHARED, + TARGET_ALLOC_DEFAULT +}; + /// This struct is a record of an entry point or global. For a function /// entry point the size is expected to be zero struct __tgt_offload_entry { @@ -190,6 +197,12 @@ size_t device_offset, int device_num); int omp_target_disassociate_ptr(void *host_ptr, int device_num); +/// Explicit target memory allocators +/// Using the llvm_ prefix until they become part of the OpenMP standard. +void *llvm_omp_target_alloc_device(size_t size, int device_num); +void *llvm_omp_target_alloc_host(size_t size, int device_num); +void *llvm_omp_target_alloc_shared(size_t size, int device_num); + /// add the clauses of the requires directives in a given file void __tgt_register_requires(int64_t flags); diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -65,8 +65,10 @@ // initialize the target data mapping structures. These addresses are // used to generate a table of target variables to pass to // __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in -// case an error occurred on the target device. -void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr); +// case an error occurred on the target device. Kind dictates what allocator +// to use (e.g. shared, host, device). +void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr, + int32_t Kind); // Pass the data content to the target device using the target address. In case // of success, return zero. Otherwise, return an error code. diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1488,9 +1488,16 @@ return DeviceInfo.getOffloadEntriesTable(device_id); } -void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *) { +void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) { void *ptr = NULL; assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); + + if (kind != TARGET_ALLOC_DEFAULT) { + REPORT("Invalid target data allocation kind or requested allocator not " + "implemented yet\n"); + return NULL; + } + atmi_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id)); DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size, (long long unsigned)(Elf64_Addr)ptr); diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1095,9 +1095,16 @@ return DeviceRTL.loadBinary(device_id, image); } -void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *) { +void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *, + int32_t kind) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); + if (kind != TARGET_ALLOC_DEFAULT) { + REPORT("Invalid target data allocation kind or requested allocator not " + "implemented yet\n"); + return NULL; + } + return DeviceRTL.dataAlloc(device_id, size); } diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp @@ -250,8 +250,23 @@ return DeviceInfo.getOffloadEntriesTable(device_id); } -void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) { - void *ptr = malloc(size); +// Sample implementation of explicit memory allocator. For this plugin all kinds +// are equivalent to each other. +void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr, + int32_t kind) { + void *ptr = NULL; + + switch (kind) { + case TARGET_ALLOC_DEVICE: + case TARGET_ALLOC_HOST: + case TARGET_ALLOC_SHARED: + case TARGET_ALLOC_DEFAULT: + ptr = malloc(size); + break; + default: + REPORT("Invalid target data allocation kind"); + } + return ptr; } diff --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp --- a/openmp/libomptarget/plugins/remote/src/rtl.cpp +++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp @@ -84,7 +84,14 @@ return Manager->isDataExchangeable(SrcDevId, DstDevId); } -void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr) { +void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr, + int32_t kind) { + if (kind != TARGET_ALLOC_DEFAULT) { + REPORT("Invalid target data allocation kind or requested allocator not " + "implemented yet\n"); + return NULL; + } + return Manager->dataAlloc(DeviceId, Size, HstPtr); } diff --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp --- a/openmp/libomptarget/plugins/ve/src/rtl.cpp +++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp @@ -330,10 +330,17 @@ // used to generate a table of target variables to pass to // __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in // case an error occurred on the target device. -void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr) { +void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr, + int32_t kind) { int ret; uint64_t addr; + if (kind != TARGET_ALLOC_DEFAULT) { + REPORT("Invalid target data allocation kind or requested allocator not " + "implemented yet\n"); + return NULL; + } + if (DeviceInfo.ProcHandles[ID] == NULL) { struct veo_proc_handle *proc_handle; proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]); diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -38,31 +38,19 @@ } EXTERN void *omp_target_alloc(size_t size, int device_num) { - TIMESCOPE(); - DP("Call to omp_target_alloc for device %d requesting %zu bytes\n", - device_num, size); - - if (size <= 0) { - DP("Call to omp_target_alloc with non-positive length\n"); - return NULL; - } - - void *rc = NULL; + return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEFAULT, __func__); +} - if (device_num == omp_get_initial_device()) { - rc = malloc(size); - DP("omp_target_alloc returns host ptr " DPxMOD "\n", DPxPTR(rc)); - return rc; - } +EXTERN void *llvm_omp_target_alloc_device(size_t size, int device_num) { + return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEVICE, __func__); +} - if (!device_is_ready(device_num)) { - DP("omp_target_alloc returns NULL ptr\n"); - return NULL; - } +EXTERN void *llvm_omp_target_alloc_host(size_t size, int device_num) { + return targetAllocExplicit(size, device_num, TARGET_ALLOC_HOST, __func__); +} - rc = PM->Devices[device_num].allocData(size); - DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc)); - return rc; +EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) { + return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__); } EXTERN void omp_target_free(void *device_ptr, int device_num) { diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -185,13 +185,16 @@ __tgt_target_table *load_binary(void *Img); // device memory allocation/deallocation routines - /// Allocates \p Size bytes on the device and returns the address/nullptr when + /// Allocates \p Size bytes on the device, host or shared memory space + /// (depending on \p Kind) and returns the address/nullptr when /// succeeds/fails. \p HstPtr is an address of the host data which the /// allocated target data will be associated with. If it is unknown, the /// default value of \p HstPtr is nullptr. Note: this function doesn't do /// pointer association. Actually, all the __tgt_rtl_data_alloc - /// implementations ignore \p HstPtr. - void *allocData(int64_t Size, void *HstPtr = nullptr); + /// implementations ignore \p HstPtr. \p Kind dictates what allocator should + /// be used (host, shared, device). + void *allocData(int64_t Size, void *HstPtr = nullptr, + int32_t Kind = TARGET_ALLOC_DEFAULT); /// Deallocates memory which \p TgtPtrBegin points at and returns /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. int32_t deleteData(void *TgtPtrBegin); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -405,8 +405,8 @@ return rc; } -void *DeviceTy::allocData(int64_t Size, void *HstPtr) { - return RTL->data_alloc(RTLDeviceID, Size, HstPtr); +void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { + return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } int32_t DeviceTy::deleteData(void *TgtPtrBegin) { diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -36,6 +36,9 @@ omp_target_memcpy_rect; omp_target_associate_ptr; omp_target_disassociate_ptr; + llvm_omp_target_alloc_host; + llvm_omp_target_alloc_shared; + llvm_omp_target_alloc_device; local: *; }; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -328,6 +328,35 @@ return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; } +void *targetAllocExplicit(size_t size, int device_num, int kind, + const char *name) { + TIMESCOPE(); + DP("Call to %s for device %d requesting %zu bytes\n", name, device_num, size); + + if (size <= 0) { + DP("Call to %s with non-positive length\n", name); + return NULL; + } + + void *rc = NULL; + + if (device_num == omp_get_initial_device()) { + rc = malloc(size); + DP("%s returns host ptr " DPxMOD "\n", name, DPxPTR(rc)); + return rc; + } + + if (!device_is_ready(device_num)) { + DP("%s returns NULL ptr\n", name); + return NULL; + } + + DeviceTy &Device = PM->Devices[device_num]; + rc = Device.allocData(size, nullptr, kind); + DP("%s returns device ptr " DPxMOD "\n", name, DPxPTR(rc)); + return rc; +} + /// Call the user-defined mapper function followed by the appropriate // targetData* function (targetData{Begin,End,Update}). int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg, diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -46,6 +46,8 @@ extern void handleTargetOutcome(bool Success, ident_t *Loc); extern int checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc); +extern void *targetAllocExplicit(size_t size, int device_num, int kind, + const char *name); // This structure stores information of a mapped memory region. struct MapComponentInfoTy { diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -30,7 +30,7 @@ typedef int32_t(number_of_devices_ty)(); typedef int32_t(init_device_ty)(int32_t); typedef __tgt_target_table *(load_binary_ty)(int32_t, void *); - typedef void *(data_alloc_ty)(int32_t, int64_t, void *); + typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t); typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t); typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t, __tgt_async_info *);