diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -108,6 +108,7 @@ ${include_directory}/Debug.h ${include_directory}/Interface.h ${include_directory}/Mapping.h + ${include_directory}/Memory.h ${include_directory}/State.h ${include_directory}/Synchronization.h ${include_directory}/Types.h @@ -119,6 +120,7 @@ ${source_directory}/Debug.cpp ${source_directory}/Kernel.cpp ${source_directory}/Mapping.cpp + ${source_directory}/Memory.cpp ${source_directory}/Misc.cpp ${source_directory}/Parallelism.cpp ${source_directory}/Reduction.cpp diff --git a/openmp/libomptarget/DeviceRTL/include/Memory.h b/openmp/libomptarget/DeviceRTL/include/Memory.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/include/Memory.h @@ -0,0 +1,26 @@ +//===--- Memory.h - OpenMP device runtime memory allocator -------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_CONFIGURATION_H +#define OMPTARGET_CONFIGURATION_H + +#include "Types.h" + +using size_t = uint64_t; + +extern "C" { +void *malloc(size_t Size); + +void free(void *); +} + +#endif diff --git a/openmp/libomptarget/DeviceRTL/src/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/src/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/src/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/src/CMakeLists.txt @@ -3,6 +3,7 @@ Debug.cpp Kernel.cpp Mapping.cpp + Memory.cpp Misc.cpp Parallelism.cpp Reduction.cpp diff --git a/openmp/libomptarget/DeviceRTL/src/Memory.cpp b/openmp/libomptarget/DeviceRTL/src/Memory.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Memory.cpp @@ -0,0 +1,34 @@ +//===------- Memory.cpp - OpenMP device runtime memory allocator -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#pragma omp begin declare target device_type(nohost) + +#include "Memory.h" + +/// AMDGCN Implementation +/// +///{ +#pragma omp begin declare variant match(device = {arch(amdgcn)}) + +extern "C" { + +unsigned long long __ockl_dm_alloc(unsigned long long __size); +void __ockl_dm_dealloc(unsigned long long __addr); + +void *malloc(size_t Size) { return (void *)__ockl_dm_alloc(Size); } + +void free(void *P) { __ockl_dm_dealloc((unsigned long long)P); } +} + +#pragma omp end declare variant +///} + +#pragma omp end declare target diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -964,6 +964,82 @@ } } + // Initialize heap buffer + { + const char *BufferVarName = "omptarget_device_heap_buffer"; + const char *SizeVarName = "omptarget_device_heap_size"; + CUdeviceptr BufferVarPtr; + CUdeviceptr SizeVarPtr; + size_t BufferVarSize; + size_t SizeVarSize; + + Err = cuModuleGetGlobal(&BufferVarPtr, &BufferVarSize, Module, + BufferVarName); + if (Err == CUDA_SUCCESS) { + if (BufferVarSize != sizeof(uint64_t)) { + REPORT("Global global heap buffer pointer '%s' - size mismatch (%zu " + "!= %zu)\n", + BufferVarName, BufferVarSize, sizeof(uint64_t)); + CUDA_ERR_STRING(Err); + return nullptr; + } + + Err = cuModuleGetGlobal(&SizeVarPtr, &SizeVarSize, Module, SizeVarName); + if (Err == CUDA_SUCCESS) { + if (SizeVarSize != sizeof(uint64_t)) { + REPORT("Global global heap size variable '%s' - size mismatch (%zu " + "!= %zu)\n", + SizeVarName, SizeVarSize, sizeof(uint64_t)); + CUDA_ERR_STRING(Err); + return nullptr; + } + + CUdeviceptr BufferPtr; + size_t HeapSize = 1024U * 1024 * 1024 * 2; + + Err = cuMemAlloc(&BufferPtr, HeapSize); + if (Err != CUDA_SUCCESS) { + REPORT("Error when allocating heap bufferm size = %zu\n", HeapSize); + CUDA_ERR_STRING(Err); + return nullptr; + } + + Err = cuMemcpyHtoD(BufferVarPtr, &BufferPtr, BufferVarSize); + if (Err != CUDA_SUCCESS) { + REPORT("Error when copying data from host to device. Pointers: " + "host = " DPxMOD ", device = " DPxMOD ", size = %zu\n", + DPxPTR(&BufferPtr), DPxPTR(BufferVarPtr), BufferVarSize); + CUDA_ERR_STRING(Err); + return nullptr; + } + + Err = cuMemcpyHtoD(SizeVarPtr, &HeapSize, SizeVarSize); + if (Err != CUDA_SUCCESS) { + REPORT("Error when copying data from host to device. Pointers: " + "host = " DPxMOD ", device = " DPxMOD ", size = %zu\n", + DPxPTR(&HeapSize), DPxPTR(SizeVarPtr), SizeVarSize); + CUDA_ERR_STRING(Err); + return nullptr; + } + + DP("Successfully set heap buffer. omptarget_device_heap_buffer " + "= " DPxMOD ", omptarget_device_heap_size = %zu\n", + DPxPTR(BufferPtr), HeapSize); + } else { + DP("Finding global heap buffer pointer '%s' - symbol missing.\n", + SizeVarName); + DP("Continue, considering this is an image does not require heap " + "allocation.\n"); + } + + } else { + DP("Finding global heap buffer pointer '%s' - symbol missing.\n", + BufferVarName); + DP("Continue, considering this is an image does not require heap " + "allocation.\n"); + } + } + return getOffloadEntriesTable(DeviceId); }