diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h @@ -48,6 +48,18 @@ CU_CTX_SCHED_MASK = 0x07, } CUctx_flags; +#define cuMemFree cuMemFree_v2 +#define cuMemAlloc cuMemAlloc_v2 +#define cuMemcpyDtoH cuMemcpyDtoH_v2 +#define cuMemcpyHtoD cuMemcpyHtoD_v2 +#define cuStreamDestroy cuStreamDestroy_v2 +#define cuModuleGetGlobal cuModuleGetGlobal_v2 +#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 +#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 +#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 +#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2 +#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2 + CUresult cuCtxGetDevice(CUdevice *); CUresult cuDeviceGet(CUdevice *, int); CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice); @@ -60,26 +72,26 @@ unsigned, unsigned, unsigned, CUstream, void **, void **); -CUresult cuMemAlloc_v2(CUdeviceptr *, size_t); -CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr, CUdeviceptr, size_t, CUstream); +CUresult cuMemAlloc(CUdeviceptr *, size_t); +CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); -CUresult cuMemcpyDtoH_v2(void *, CUdeviceptr, size_t); -CUresult cuMemcpyDtoHAsync_v2(void *, CUdeviceptr, size_t, CUstream); -CUresult cuMemcpyHtoD_v2(CUdeviceptr, const void *, size_t); -CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr, const void *, size_t, CUstream); +CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); +CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); +CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); +CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream); -CUresult cuMemFree_v2(CUdeviceptr); +CUresult cuMemFree(CUdeviceptr); CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); -CUresult cuModuleGetGlobal_v2(CUdeviceptr *, size_t *, CUmodule, const char *); +CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); CUresult cuModuleUnload(CUmodule); CUresult cuStreamCreate(CUstream *, unsigned); -CUresult cuStreamDestroy_v2(CUstream); +CUresult cuStreamDestroy(CUstream); CUresult cuStreamSynchronize(CUstream); CUresult cuCtxSetCurrent(CUcontext); -CUresult cuDevicePrimaryCtxRelease_v2(CUdevice); +CUresult cuDevicePrimaryCtxRelease(CUdevice); CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *); -CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice, unsigned); +CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned); CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice); CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *, void **); diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp @@ -28,26 +28,26 @@ DLWRAP(cuGetErrorString, 2); DLWRAP(cuLaunchKernel, 11); -DLWRAP(cuMemAlloc_v2, 2); -DLWRAP(cuMemcpyDtoDAsync_v2, 4); +DLWRAP(cuMemAlloc, 2); +DLWRAP(cuMemcpyDtoDAsync, 4); -DLWRAP(cuMemcpyDtoH_v2, 3); -DLWRAP(cuMemcpyDtoHAsync_v2, 4); -DLWRAP(cuMemcpyHtoD_v2, 3); -DLWRAP(cuMemcpyHtoDAsync_v2, 4); +DLWRAP(cuMemcpyDtoH, 3); +DLWRAP(cuMemcpyDtoHAsync, 4); +DLWRAP(cuMemcpyHtoD, 3); +DLWRAP(cuMemcpyHtoDAsync, 4); -DLWRAP(cuMemFree_v2, 1); +DLWRAP(cuMemFree, 1); DLWRAP(cuModuleGetFunction, 3); -DLWRAP(cuModuleGetGlobal_v2, 4); +DLWRAP(cuModuleGetGlobal, 4); DLWRAP(cuModuleUnload, 1); DLWRAP(cuStreamCreate, 2); -DLWRAP(cuStreamDestroy_v2, 1); +DLWRAP(cuStreamDestroy, 1); DLWRAP(cuStreamSynchronize, 1); DLWRAP(cuCtxSetCurrent, 1); -DLWRAP(cuDevicePrimaryCtxRelease_v2, 1); +DLWRAP(cuDevicePrimaryCtxRelease, 1); DLWRAP(cuDevicePrimaryCtxGetState, 3); -DLWRAP(cuDevicePrimaryCtxSetFlags_v2, 2); +DLWRAP(cuDevicePrimaryCtxSetFlags, 2); DLWRAP(cuDevicePrimaryCtxRetain, 2); DLWRAP(cuModuleLoadDataEx, 5); diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -110,8 +110,8 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size, CUstream Stream) { - CUresult Err = cuMemcpyDtoDAsync_v2((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, - Size, Stream); + CUresult Err = + cuMemcpyDtoDAsync((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, Size, Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from device to device. Pointers: src " @@ -207,8 +207,8 @@ for (CUstream &S : StreamPool[I]) { if (S) - checkResult(cuStreamDestroy_v2(S), - "Error returned from cuStreamDestroy_v2\n"); + checkResult(cuStreamDestroy(S), + "Error returned from cuStreamDestroy\n"); } } } @@ -311,8 +311,8 @@ return nullptr; CUdeviceptr DevicePtr; - Err = cuMemAlloc_v2(&DevicePtr, Size); - if (!checkResult(Err, "Error returned from cuMemAlloc_v2\n")) + Err = cuMemAlloc(&DevicePtr, Size); + if (!checkResult(Err, "Error returned from cuMemAlloc\n")) return nullptr; return (void *)DevicePtr; @@ -323,8 +323,8 @@ if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n")) return OFFLOAD_FAIL; - Err = cuMemFree_v2((CUdeviceptr)TgtPtr); - if (!checkResult(Err, "Error returned from cuMemFree_v2\n")) + Err = cuMemFree((CUdeviceptr)TgtPtr); + if (!checkResult(Err, "Error returned from cuMemFree\n")) return OFFLOAD_FAIL; return OFFLOAD_SUCCESS; @@ -466,8 +466,8 @@ CUdevice Device; checkResult(cuCtxGetDevice(&Device), "Error returned from cuCtxGetDevice\n"); - checkResult(cuDevicePrimaryCtxRelease_v2(Device), - "Error returned from cuDevicePrimaryCtxRelease_v2\n"); + checkResult(cuDevicePrimaryCtxRelease(Device), + "Error returned from cuDevicePrimaryCtxRelease\n"); } } } @@ -506,9 +506,8 @@ } else { DP("The primary context is inactive, set its flags to " "CU_CTX_SCHED_BLOCKING_SYNC\n"); - Err = cuDevicePrimaryCtxSetFlags_v2(Device, CU_CTX_SCHED_BLOCKING_SYNC); - if (!checkResult(Err, - "Error returned from cuDevicePrimaryCtxSetFlags_v2\n")) + Err = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC); + if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxSetFlags\n")) return OFFLOAD_FAIL; } @@ -657,7 +656,7 @@ __tgt_offload_entry Entry = *E; CUdeviceptr CUPtr; size_t CUSize; - Err = cuModuleGetGlobal_v2(&CUPtr, &CUSize, Module, E->name); + Err = cuModuleGetGlobal(&CUPtr, &CUSize, Module, E->name); // We keep this style here because we need the name if (Err != CUDA_SUCCESS) { REPORT("Loading global '%s' Failed\n", E->name); @@ -689,7 +688,7 @@ // If unified memory is present any target link or to variables // can access host addresses directly. There is no longer a // need for device copies. - cuMemcpyHtoD_v2(CUPtr, E->addr, sizeof(void *)); + cuMemcpyHtoD(CUPtr, E->addr, sizeof(void *)); DP("Copy linked variable host address (" DPxMOD ") to device address (" DPxMOD ")\n", DPxPTR(*((void **)E->addr)), DPxPTR(CUPtr)); @@ -720,7 +719,7 @@ CUdeviceptr ExecModePtr; size_t CUSize; - Err = cuModuleGetGlobal_v2(&ExecModePtr, &CUSize, Module, ExecModeName); + Err = cuModuleGetGlobal(&ExecModePtr, &CUSize, Module, ExecModeName); if (Err == CUDA_SUCCESS) { if (CUSize != sizeof(int8_t)) { DP("Loading global exec_mode '%s' - size mismatch (%zd != %zd)\n", @@ -728,7 +727,7 @@ return nullptr; } - Err = cuMemcpyDtoH_v2(&ExecModeVal, ExecModePtr, CUSize); + Err = cuMemcpyDtoH(&ExecModeVal, ExecModePtr, CUSize); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from device to host. Pointers: " "host = " DPxMOD ", device = " DPxMOD ", size = %zd\n", @@ -769,7 +768,7 @@ CUdeviceptr DeviceEnvPtr; size_t CUSize; - Err = cuModuleGetGlobal_v2(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName); + Err = cuModuleGetGlobal(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName); if (Err == CUDA_SUCCESS) { if (CUSize != sizeof(DeviceEnv)) { REPORT( @@ -779,7 +778,7 @@ return nullptr; } - Err = cuMemcpyHtoD_v2(DeviceEnvPtr, &DeviceEnv, CUSize); + Err = cuMemcpyHtoD(DeviceEnvPtr, &DeviceEnv, CUSize); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from host to device. Pointers: " "host = " DPxMOD ", device = " DPxMOD ", size = %zu\n", @@ -817,7 +816,7 @@ CUstream Stream = getStream(DeviceId, AsyncInfoPtr); - Err = cuMemcpyHtoDAsync_v2((CUdeviceptr)TgtPtr, HstPtr, Size, Stream); + Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from host to device. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n", @@ -839,7 +838,7 @@ CUstream Stream = getStream(DeviceId, AsyncInfoPtr); - Err = cuMemcpyDtoHAsync_v2(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream); + Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from device to host. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",