diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallingConv.h"
@@ -37,6 +38,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -4121,6 +4123,20 @@
   Builder.CreateCall(Fn, {});
 }
 
+static const omp::GV &getGridValue(Function *Kernel) {
+  if (Kernel->getCallingConv() == CallingConv::AMDGPU_KERNEL) {
+    StringRef Features =
+        Kernel->getFnAttribute("target-features").getValueAsString();
+    if (Features.count("+wavefrontsize64"))
+      return omp::getAMDGPUGridValues<64>();
+    return omp::getAMDGPUGridValues<32>();
+  }
+  if (Triple(Kernel->getParent()->getTargetTriple()).isNVPTX())
+
+    return omp::NVPTXGridValues;
+  llvm_unreachable("No grid value available for this architecture!");
+}
+
 void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
     Function *OutlinedFn, int32_t NumTeams, int32_t NumThreads) {
   if (Config.isTargetDevice()) {
@@ -4135,6 +4151,9 @@
   if (NumTeams > 0)
     OutlinedFn->addFnAttr("omp_target_num_teams", std::to_string(NumTeams));
 
+  if (NumThreads == -1 && Config.isGPU())
+    NumThreads = getGridValue(OutlinedFn).GV_Default_WG_Size;
+
   if (NumThreads > 0) {
     if (OutlinedFn->getCallingConv() == CallingConv::AMDGPU_KERNEL) {
       OutlinedFn->addFnAttr("amdgpu-flat-work-group-size",
diff --git a/openmp/libomptarget/test/offloading/default_thread_limit.c b/openmp/libomptarget/test/offloading/default_thread_limit.c
new file mode 100644
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/default_thread_limit.c
@@ -0,0 +1,103 @@
+// clang-format off
+// RUN: %libomptarget-compile-generic
+// RUN: env LIBOMPTARGET_INFO=16 \
+// RUN:   %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=DEFAULT
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+__attribute__((optnone)) int optnone() { return 1; }
+
+int main() {
+  int N = optnone() * 4098 * 32;
+
+// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
+#pragma omp target teams distribute parallel for simd
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
+#pragma omp target teams distribute parallel for simd
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
+#pragma omp target teams distribute parallel for simd
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
+#pragma omp target 
+#pragma omp teams distribute parallel for
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 42 (MaxFlatWorkGroupSize: 1024
+#pragma omp target thread_limit(optnone() * 42)
+#pragma omp teams distribute parallel for
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 42 (MaxFlatWorkGroupSize: 42
+#pragma omp target thread_limit(optnone() * 42) ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42))))
+#pragma omp teams distribute parallel for
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// FIXME: Use the attribute value to imply a thread_limit
+// DEFAULT: {{(128|256)}} (MaxFlatWorkGroupSize: 42
+#pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42))))
+#pragma omp teams distribute parallel for
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: MaxFlatWorkGroupSize: 1024
+#pragma omp target
+#pragma omp teams distribute parallel for num_threads(optnone() * 42)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: MaxFlatWorkGroupSize: 1024
+#pragma omp target teams distribute parallel for thread_limit(optnone() * 42)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: MaxFlatWorkGroupSize: 1024
+#pragma omp target teams distribute parallel for num_threads(optnone() * 42)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 9 (MaxFlatWorkGroupSize: 9
+#pragma omp target
+#pragma omp teams distribute parallel for num_threads(9)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 4 (MaxFlatWorkGroupSize: 4
+#pragma omp target thread_limit(4)
+#pragma omp teams distribute parallel for
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 4 (MaxFlatWorkGroupSize: 4
+#pragma omp target
+#pragma omp teams distribute parallel for thread_limit(4)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 9 (MaxFlatWorkGroupSize: 9
+#pragma omp target teams distribute parallel for num_threads(9)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+// DEFAULT: 4 (MaxFlatWorkGroupSize: 4
+#pragma omp target teams distribute parallel for simd thread_limit(4)
+  for (int i = 0; i < N; ++i) {
+    optnone();
+  }
+}
+