diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -296,12 +296,13 @@
                                         uint32_t ThreadLimitClause[3]) const {
   assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
-  if (ThreadLimitClause[0] > 0 && isGenericMode())
+
+  if (static_cast<int32_t>(ThreadLimitClause[0]) <= 0)
+    ThreadLimitClause[0] = PreferredNumThreads;
+  else if (ThreadLimitClause[0] > 0 && isGenericMode())
     ThreadLimitClause[0] += GenericDevice.getWarpSize();
 
-  return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)
-                                     ? ThreadLimitClause[0]
-                                     : PreferredNumThreads);
+  return std::min(MaxNumThreads, ThreadLimitClause[0]);
 }
 
 uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
diff --git a/openmp/libomptarget/test/offloading/negative_thread_limit.cpp b/openmp/libomptarget/test/offloading/negative_thread_limit.cpp
new file mode 100644
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/negative_thread_limit.cpp
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compilexx-run-and-check-amdgcn-amd-amdhsa \
+// RUN:   -check-prefixes=INFO,AMDGPU
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda \
+// RUN:   -check-prefixes=INFO,NVIDIAGPU
+
+#include <omp.h>
+#include <stdio.h>
+
+void createThreads(int threadLimit) {
+  int threadCount = 0;
+#pragma omp target teams distribute parallel for map(from : threadCount)       \
+    thread_limit(threadLimit)
+  for (int i = 0; i < 1; ++i)
+    threadCount = omp_get_num_threads();
+
+  printf("threadCount=%d\n", threadCount);
+}
+
+int main(int argc, char *argv[]) {
+  int isHost = -1;
+
+#pragma omp target map(from : isHost)
+  { isHost = omp_is_initial_device(); }
+
+  // Make sure we run on device
+  printf("Target region executed on the %s\n", isHost ? "host" : "device");
+
+  // Set the thread limit to a large negative number, such that the
+  // addition of GV_Warp_Size cannot push this into positive range
+  // thread_limit: value < -GV_Warp_Size  < 0 -- expected: GV_Default_WG_Size
+  createThreads((int)0xF0000000);
+
+  // thread_limit: value > GV_Max_WG_Size > 0 -- expected: GV_Max_WG_Size
+  // Note: on nvidia this could also be CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
+  createThreads((int)0x40000000);
+
+  // thread_limit: value == 0                 -- expected: GV_Default_WG_Size
+  createThreads(0);
+
+  return isHost;
+}
+
+// INFO: Target region executed on the device
+
+// AMDGPU: threadCount=256
+// AMDGPU: threadCount=1024
+// AMDGPU: threadCount=256
+
+// NVIDIAGPU: threadCount=128
+// NVIDIAGPU: threadCount={{1024|384}}
+// NVIDIAGPU: threadCount=128