diff --git a/openmp/libomptarget/include/Debug.h b/openmp/libomptarget/include/Debug.h --- a/openmp/libomptarget/include/Debug.h +++ b/openmp/libomptarget/include/Debug.h @@ -37,24 +37,38 @@ #ifndef _OMPTARGET_DEBUG_H #define _OMPTARGET_DEBUG_H -static inline int getInfoLevel() { - static int InfoLevel = -1; - if (InfoLevel >= 0) - return InfoLevel; - - if (char *EnvStr = getenv("LIBOMPTARGET_INFO")) - InfoLevel = std::stoi(EnvStr); +#include + +/// 32-Bit field data attributes controlling information presented to the user. +enum OpenMPInfoType : uint32_t { + // Print data arguments and attributes upon entering an OpenMP device kernel. + OMP_INFOTYPE_KERNEL_ARGS = 0x0001, + // Indicate when an address already exists in the device mapping table. + OMP_INFOTYPE_MAPPING_EXISTS = 0x0002, + // Dump the contents of the device pointer map at kernel exit or failure. + OMP_INFOTYPE_DUMP_TABLE = 0x0004, + // Print kernel information from target device plugins + OMP_INFOTYPE_PLUGIN_KERNEL = 0x0010, +}; + +static inline uint32_t getInfoLevel() { + static uint32_t InfoLevel = 0; + static std::once_flag Flag{}; + std::call_once(Flag, []() { + if (char *EnvStr = getenv("LIBOMPTARGET_INFO")) + InfoLevel = std::stoi(EnvStr); + }); return InfoLevel; } -static inline int getDebugLevel() { - static int DebugLevel = -1; - if (DebugLevel >= 0) - return DebugLevel; - - if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG")) - DebugLevel = std::stoi(EnvStr); +static inline uint32_t getDebugLevel() { + static uint32_t DebugLevel = 0; + static std::once_flag Flag{}; + std::call_once(Flag, []() { + if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG")) + DebugLevel = std::stoi(EnvStr); + }); return DebugLevel; } @@ -107,7 +121,7 @@ /// Print a generic information string used if LIBOMPTARGET_INFO=1 #define INFO_MESSAGE(_num, ...) \ do { \ - fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \ + fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", (int)_num); \ fprintf(stderr, __VA_ARGS__); \ } while (0) diff --git a/openmp/libomptarget/include/SourceInfo.h b/openmp/libomptarget/include/SourceInfo.h --- a/openmp/libomptarget/include/SourceInfo.h +++ b/openmp/libomptarget/include/SourceInfo.h @@ -54,6 +54,13 @@ return std::string(reinterpret_cast(name)); } + std::string initStr(const ident_t *loc) { + if (!loc) + return ";unknown;unknown;0;0;;"; + else + return std::string(reinterpret_cast(loc->psource)); + } + /// Get n-th substring in an expression separated by ;. std::string getSubstring(const int n) const { std::size_t begin = sourceStr.find(';'); @@ -73,7 +80,7 @@ public: SourceInfo(const ident_t *loc) - : sourceStr(initStr(loc->psource)), name(getSubstring(1)), + : sourceStr(initStr(loc)), name(getSubstring(1)), filename(removePath(getSubstring(0))), line(std::stoi(getSubstring(2))), column(std::stoi(getSubstring(3))) {} diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -501,11 +501,12 @@ DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit; } - INFO(DeviceId, - "Device supports up to %d CUDA blocks and %d threads with a " - "warp size of %d\n", - DeviceData[DeviceId].BlocksPerGrid, - DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); + if (getDebugLevel() || (getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL)) + INFO(DeviceId, + "Device supports up to %d CUDA blocks and %d threads with a " + "warp size of %d\n", + DeviceData[DeviceId].BlocksPerGrid, + DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); // Set default number of teams if (EnvNumTeams > 0) { @@ -937,14 +938,15 @@ CudaBlocksPerGrid = TeamNum; } - INFO(DeviceId, - "Launching kernel %s with %d blocks and %d threads in %s " - "mode\n", - (getOffloadEntry(DeviceId, TgtEntryPtr)) - ? getOffloadEntry(DeviceId, TgtEntryPtr)->name - : "(null)", - CudaBlocksPerGrid, CudaThreadsPerBlock, - (KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic"); + if (getDebugLevel() || (getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL)) + INFO(DeviceId, + "Launching kernel %s with %d blocks and %d threads in %s " + "mode\n", + (getOffloadEntry(DeviceId, TgtEntryPtr)) + ? getOffloadEntry(DeviceId, TgtEntryPtr)->name + : "(null)", + CudaBlocksPerGrid, CudaThreadsPerBlock, + (KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic"); CUstream Stream = getStream(DeviceId, AsyncInfo); Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1, diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -49,10 +49,11 @@ MemoryManager(nullptr) {} DeviceTy::~DeviceTy() { - if (DeviceID == -1 || getInfoLevel() < 1) + if (DeviceID == -1 || !getInfoLevel()) return; - dumpTargetPointerMappings(*this); + ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"}; + dumpTargetPointerMappings(&loc, *this); } int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { @@ -217,14 +218,16 @@ HT.incRefCount(); uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); - INFO(DeviceID, - "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD - ", " - "Size=%" PRId64 ",%s RefCount=%s, Name=%s\n", - (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp), - Size, (UpdateRefCount ? " updated" : ""), - HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str(), - (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "(null)"); + if (getDebugLevel() || getInfoLevel() & OMP_INFOTYPE_MAPPING_EXISTS) + INFO(DeviceID, + "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD + ", " + "Size=%" PRId64 ",%s RefCount=%s, Name=%s\n", + (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp), + Size, (UpdateRefCount ? " updated" : ""), + HT.isRefCountInf() ? "INF" + : std::to_string(HT.getRefCount()).c_str(), + (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "(null)"); rc = (void *)tp; } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) { // Explicit extension of mapped data - not allowed. diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -57,22 +57,27 @@ break; case tgt_mandatory: if (!success) { - if (getInfoLevel() > 1) + if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE) for (const auto &Device : PM->Devices) - dumpTargetPointerMappings(Device); + dumpTargetPointerMappings(loc, Device); else - FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target " - "pointer maps\n"); + FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target " + "pointer mappings.\n", + OMP_INFOTYPE_DUMP_TABLE); SourceInfo info(loc); if (info.isAvailible()) fprintf(stderr, "%s:%d:%d: ", info.getFilename(), info.getLine(), info.getColumn()); else - FAILURE_MESSAGE( - "Build with debug information to provide more information"); + FAILURE_MESSAGE("Source location information not present. Compile with " + "-g or -gline-tables-only.\n"); FATAL_MESSAGE0( 1, "failure of target construct while offloading is mandatory"); + } else { + if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE) + for (const auto &Device : PM->Devices) + dumpTargetPointerMappings(loc, Device); } break; } @@ -147,6 +152,9 @@ DeviceTy &Device = PM->Devices[device_id]; + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) + printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, + arg_names, "Entering OpenMP data region"); #ifdef OMPTARGET_DEBUG for (int i = 0; i < arg_num; ++i) { DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 @@ -227,6 +235,9 @@ return; } + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) + printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, + arg_names, "Exiting OpenMP data region"); #ifdef OMPTARGET_DEBUG for (int i=0; iDevices[device_id]; int rc = targetDataUpdate(Device, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers); @@ -351,6 +366,9 @@ return OFFLOAD_FAIL; } + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) + printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, + arg_names, "Entering OpenMP kernel"); #ifdef OMPTARGET_DEBUG for (int i=0; i #include +#include +#include #include @@ -90,20 +91,59 @@ //////////////////////////////////////////////////////////////////////////////// /// dump a table of all the host-target pointer pairs on failure -static inline void dumpTargetPointerMappings(const DeviceTy &Device) { +static inline void dumpTargetPointerMappings(const ident_t *loc, + const DeviceTy &Device) { if (Device.HostDataToTargetMap.empty()) return; - fprintf(stderr, "Device %d Host-Device Pointer Mappings:\n", Device.DeviceID); - fprintf(stderr, "%-18s %-18s %s %s\n", "Host Ptr", "Target Ptr", "Size (B)", - "Declaration"); + SourceInfo kernel(loc); + INFO(Device.DeviceID, "Host-Device Pointer Mappings at %s:%d:%d:\n", + kernel.getFilename(), kernel.getLine(), kernel.getColumn()); + INFO(Device.DeviceID, "%-18s %-18s %s %s %s\n", "Host Ptr", "Target Ptr", + "Size (B)", "RefCount", "Declaration"); for (const auto &HostTargetMap : Device.HostDataToTargetMap) { SourceInfo info(HostTargetMap.HstPtrName); - fprintf(stderr, DPxMOD " " DPxMOD " %-8lu %s at %s:%d:%d\n", - DPxPTR(HostTargetMap.HstPtrBegin), - DPxPTR(HostTargetMap.TgtPtrBegin), - HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin, info.getName(), - info.getFilename(), info.getLine(), info.getColumn()); + INFO(Device.DeviceID, DPxMOD " " DPxMOD " %-8lu %-8ld %s at %s:%d:%d\n", + DPxPTR(HostTargetMap.HstPtrBegin), DPxPTR(HostTargetMap.TgtPtrBegin), + (long unsigned)(HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin), + HostTargetMap.getRefCount(), info.getName(), info.getFilename(), + info.getLine(), info.getColumn()); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// Print out the names and properties of the arguments to each kernel +static inline void +printKernelArguments(const ident_t *loc, const int64_t DeviceId, + const int32_t argNum, const int64_t *argSizes, + const int64_t *argTypes, const map_var_info_t *argNames, + const char *regionType) { + SourceInfo info(loc); + INFO(DeviceId, "%s at %s:%d:%d with %d arguments:\n", regionType, + info.getFilename(), info.getLine(), info.getColumn(), argNum); + + for (int32_t i = 0; i < argNum; ++i) { + const map_var_info_t varName = (argNames) ? argNames[i] : nullptr; + const char *type = nullptr; + const char *implicit = + (argTypes[i] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : ""; + if (argTypes[i] & OMP_TGT_MAPTYPE_TO && argTypes[i] & OMP_TGT_MAPTYPE_FROM) + type = "tofrom"; + else if (argTypes[i] & OMP_TGT_MAPTYPE_TO) + type = "to"; + else if (argTypes[i] & OMP_TGT_MAPTYPE_FROM) + type = "from"; + else if (argTypes[i] & OMP_TGT_MAPTYPE_PRIVATE) + type = "private"; + else if (argTypes[i] & OMP_TGT_MAPTYPE_LITERAL) + type = "firstprivate"; + else if (argTypes[i] & OMP_TGT_MAPTYPE_TARGET_PARAM && argSizes[i] != 0) + type = "alloc"; + else + type = "use_address"; + + INFO(DeviceId, "%s(%s)[%ld] %s\n", type, + getNameFromMapping(varName).c_str(), argSizes[i], implicit); } } diff --git a/openmp/libomptarget/test/offloading/info.c b/openmp/libomptarget/test/offloading/info.c --- a/openmp/libomptarget/test/offloading/info.c +++ b/openmp/libomptarget/test/offloading/info.c @@ -1,15 +1,38 @@ -// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO +// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -gline-tables-only && env LIBOMPTARGET_INFO=23 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO #include #include +#define N 64 + int main() { - int ptr = 1; + int A[N]; + int B[N]; + int C[N]; + int val = 1; -// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}} -// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode -#pragma omp target map(tofrom:ptr) - {ptr = 1;} +// INFO: CUDA device 0 info: Device supports up to {{.*}} CUDA blocks and {{.*}} threads with a warp size of {{.*}} +// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:33:1 with 3 arguments: +// INFO: Libomptarget device 0 info: alloc(A[0:64])[256] +// INFO: Libomptarget device 0 info: tofrom(B[0:64])[256] +// INFO: Libomptarget device 0 info: to(C[0:64])[256] +// INFO: Libomptarget device 0 info: Host-Device Pointer Mappings at info.c:33:1: +// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration +// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:11:7 +// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:10:7 +// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:9:7 +// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:34:1 with 1 arguments: +// INFO: Libomptarget device 0 info: firstprivate(val)[4] +// INFO: CUDA device 0 info: Launching kernel {{.*}} with {{.*}} and {{.*}} threads in {{.*}} mode +// INFO: Libomptarget device 0 info: Host-Device Pointer Mappings at info.c:34:1: +// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration +// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 C[0:64] at info.c:11:7 +// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 B[0:64] at info.c:10:7 +// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 A[0:64] at info.c:9:7 +// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:33:1 +#pragma omp target data map(alloc:A[0:N]) map(tofrom:B[0:N]) map(to:C[0:N]) +#pragma omp target firstprivate(val) + { val = 1; } return 0; }