diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -399,6 +399,7 @@
   // OpenMP Environment properties
   int EnvNumTeams;
   int EnvTeamLimit;
+  int EnvTeamThreadLimit;
   int EnvMaxTeamsDefault;
 
   // OpenMP Requires Flags
@@ -631,6 +632,13 @@
     } else {
       EnvMaxTeamsDefault = -1;
     }
+    envStr = getenv("OMP_TEAMS_THREAD_LIMIT");
+    if (envStr) {
+      EnvTeamThreadLimit = std::stoi(envStr);
+      DP("Parsed OMP_TEAMS_THREAD_LIMIT=%d\n", EnvTeamThreadLimit);
+    } else {
+      EnvTeamThreadLimit = -1;
+    }
 
     // Default state.
     RequiresFlags = OMP_REQ_UNDEFINED;
@@ -936,6 +944,14 @@
        DeviceInfo.GroupsPerDevice[device_id]);
   }
 
+  // Adjust threads to the env variables
+  if (DeviceInfo.EnvTeamThreadLimit > 0 &&
+      (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
+                           DeviceInfo.EnvTeamThreadLimit))) {
+    DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
+       DeviceInfo.EnvTeamThreadLimit);
+  }
+
   // Set default number of threads
   DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size;
   DP("Default number of threads set according to library's default %d\n",
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -281,6 +281,7 @@
   // OpenMP environment properties
   int EnvNumTeams;
   int EnvTeamLimit;
+  int EnvTeamThreadLimit;
   // OpenMP requires flags
   int64_t RequiresFlags;
 
@@ -436,7 +437,7 @@
 
   DeviceRTLTy()
       : NumberOfDevices(0), EnvNumTeams(-1), EnvTeamLimit(-1),
-        RequiresFlags(OMP_REQ_UNDEFINED) {
+        EnvTeamThreadLimit(-1), RequiresFlags(OMP_REQ_UNDEFINED) {
 
     DP("Start initializing CUDA\n");
 
@@ -467,6 +468,11 @@
       EnvTeamLimit = std::stoi(EnvStr);
       DP("Parsed OMP_TEAM_LIMIT=%d\n", EnvTeamLimit);
     }
+    if (const char *EnvStr = getenv("OMP_TEAMS_THREAD_LIMIT")) {
+      // OMP_TEAMS_THREAD_LIMIT has been set
+      EnvTeamThreadLimit = std::stoi(EnvStr);
+      DP("Parsed OMP_TEAMS_THREAD_LIMIT=%d\n", EnvTeamThreadLimit);
+    }
     if (const char *EnvStr = getenv("OMP_NUM_TEAMS")) {
       // OMP_NUM_TEAMS has been set
       EnvNumTeams = std::stoi(EnvStr);
@@ -596,14 +602,35 @@
       DP("Error getting max block dimension, use default value %d\n",
          DeviceRTLTy::DefaultNumThreads);
       DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads;
-    } else if (MaxBlockDimX <= DeviceRTLTy::HardThreadLimit) {
-      DP("Using %d CUDA threads per block\n", MaxBlockDimX);
-      DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX;
     } else {
-      DP("Max CUDA threads per block %d exceeds the hard thread limit %d, "
-         "capping at the hard limit\n",
-         MaxBlockDimX, DeviceRTLTy::HardThreadLimit);
-      DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit;
+      if (EnvTeamThreadLimit < 0) {
+        if (MaxBlockDimX <= DeviceRTLTy::HardThreadLimit) {
+          DP("Using %d CUDA threads per block\n", MaxBlockDimX);
+          DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX;
+        } else {
+          DP("Max CUDA threads per block %d exceeds the hard thread limit %d, "
+             "capping at the hard limit\n",
+             MaxBlockDimX, DeviceRTLTy::HardThreadLimit);
+          DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit;
+        }
+      } else {
+        if (MaxBlockDimX <= DeviceRTLTy::HardThreadLimit &&
+            MaxBlockDimX <= EnvTeamThreadLimit) {
+          DP("Using %d CUDA threads per block\n", MaxBlockDimX);
+          DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX;
+        } else if (MaxBlockDimX <= DeviceRTLTy::HardThreadLimit &&
+                   MaxBlockDimX > EnvTeamThreadLimit) {
+          DP("Max CUDA threads per block %d exceeds the hard thread limit %d "
+             "set by OMP_TEAMS_THREAD_LIMIT, capping at the limit\n",
+             MaxBlockDimX, EnvTeamThreadLimit);
+          DeviceData[DeviceId].ThreadsPerBlock = EnvTeamThreadLimit;
+        } else {
+          DP("Max CUDA threads per block %d exceeds the hard thread limit %d, "
+             "capping at the hard limit\n",
+             MaxBlockDimX, DeviceRTLTy::HardThreadLimit);
+          DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit;
+        }
+      }
     }
 
     // Get and set warp size