diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -451,6 +451,17 @@
       : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete),
         HasCloseModifier(HasCloseModifier) {}
 };
+
+/// Synchronize device
+static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) {
+  assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo");
+  if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) {
+    REPORT("Failed to synchronize device.\n");
+    return OFFLOAD_FAIL;
+  }
+
+  return OFFLOAD_SUCCESS;
+}
 } // namespace
 
 /// Internal function to undo the mapping and retrieve the data from the device.
@@ -631,11 +642,9 @@
   // AsyncInfo->Queue will not be nullptr, so again, we don't need to
   // synchronize.
   if (AsyncInfo && AsyncInfo->Queue) {
-    Ret = Device.synchronize(AsyncInfo);
-    if (Ret != OFFLOAD_SUCCESS) {
-      REPORT("Failed to synchronize device.\n");
+    Ret = syncDevice(Device, AsyncInfo);
+    if (Ret != OFFLOAD_SUCCESS)
       return OFFLOAD_FAIL;
-    }
   }
 
   // Deallocate target pointer
@@ -1307,6 +1316,11 @@
       REPORT("Failed to process data after launching the kernel.\n");
       return OFFLOAD_FAIL;
     }
+  } else if (AsyncInfo.Queue) {
+    // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
+    // hava any argument, and the device supports async operations, so we need a
+    // sync at this point.
+    return syncDevice(Device, &AsyncInfo);
   }
 
   return OFFLOAD_SUCCESS;
diff --git a/openmp/libomptarget/test/offloading/assert.cpp b/openmp/libomptarget/test/offloading/assert.cpp
new file mode 100644
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/assert.cpp
@@ -0,0 +1,8 @@
+// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda && %libomptarget-run-fail-nvptx64-nvidia-cuda
+
+int main(int argc, char *argv[]) {
+#pragma omp target
+  { __builtin_trap(); }
+
+  return 0;
+}