diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -451,6 +451,17 @@ : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), HasCloseModifier(HasCloseModifier) {} }; + +/// Synchronize device +static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo"); + if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) { + REPORT("Failed to synchronize device.\n"); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -631,11 +642,9 @@ // AsyncInfo->Queue will not be nullptr, so again, we don't need to // synchronize. if (AsyncInfo && AsyncInfo->Queue) { - Ret = Device.synchronize(AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Failed to synchronize device.\n"); + Ret = syncDevice(Device, AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } } // Deallocate target pointer @@ -1307,6 +1316,11 @@ REPORT("Failed to process data after launching the kernel.\n"); return OFFLOAD_FAIL; } + } else if (AsyncInfo.Queue) { + // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't + // hava any argument, and the device supports async operations, so we need a + // sync at this point. + return syncDevice(Device, &AsyncInfo); } return OFFLOAD_SUCCESS; diff --git a/openmp/libomptarget/test/offloading/assert.cpp b/openmp/libomptarget/test/offloading/assert.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/offloading/assert.cpp @@ -0,0 +1,8 @@ +// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda && %libomptarget-run-fail-nvptx64-nvidia-cuda + +int main(int argc, char *argv[]) { +#pragma omp target + { __builtin_trap(); } + + return 0; +}