diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -320,6 +320,9 @@
   bool IsInit;
   std::once_flag InitFlag;
   bool HasPendingGlobals;
+  /// The physical number of team processors. For cuda, this is number of SMs,
+  /// for AMD, this is number of CUs. Field used by ompx_get_device_num_units(devid).
+  int32_t DeviceUnits;
 
   /// Host data to device map type with a wrapper key indirection that allows
   /// concurrent modification of the entries without invalidating the underlying
@@ -462,6 +465,10 @@
 
   /// Destroy the event.
   int32_t destroyEvent(void *Event);
+
+  void setDeviceUnits(int32_t num_device_units) { DeviceUnits = num_device_units; }
+  int32_t getDeviceUnits() { return DeviceUnits; }
+
   /// }
 
 private:
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -222,6 +222,7 @@
 extern "C" {
 #endif
 
+int ompx_get_device_num_units(int device_num);
 int omp_get_num_devices(void);
 int omp_get_device_num(void);
 int omp_get_initial_device(void);
diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -192,6 +192,10 @@
 int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
                                    const char **ErrStr);
 
+// Number of available physical processors to execute teams. AMD calls these
+// CUs. Nvidia calls them SMs. For CPUs modeling teams, they could be sockets.
+int32_t __tgt_rtl_number_of_device_units(int32_t device_num);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h
--- a/openmp/libomptarget/include/rtl.h
+++ b/openmp/libomptarget/include/rtl.h
@@ -75,6 +75,7 @@
   typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
   typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
                                        const char **);
+  typedef int32_t(number_of_device_units_ty)(int32_t);
 
   int32_t Idx = -1;             // RTL index, index is the number of devices
                                 // of other RTLs that were registered before,
@@ -124,6 +125,7 @@
   destroy_event_ty *destroy_event = nullptr;
   init_async_info_ty *init_async_info = nullptr;
   init_device_into_ty *init_device_info = nullptr;
+  number_of_device_units_ty *number_of_device_units = nullptr;
   release_async_info_ty *release_async_info = nullptr;
 
   // Are there images associated with this RTL.
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1945,6 +1945,10 @@
   return elfMachineIdIsAmdgcn(Image);
 }
 
+int __tgt_rtl_number_of_device_units(int DeviceId) {
+  return DeviceInfo().ComputeUnits[DeviceId];
+}
+
 int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
                                        __tgt_image_info *info) {
   if (!__tgt_rtl_is_valid_binary(image))
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -164,6 +164,7 @@
   // OpenMP properties
   unsigned int NumTeams = 0;
   unsigned int NumThreads = 0;
+  unsigned int NumberOfDeviceUnits = 0;
 };
 
 /// Resource allocator where \p T is the resource type.
@@ -585,6 +586,10 @@
 
   int getNumOfDevices() const { return NumberOfDevices; }
 
+  int getNumOfDeviceUnits(int devid) const {
+    return DeviceData[devid].NumberOfDeviceUnits;
+  }
+
   void setRequiresFlag(const int64_t Flags) { this->RequiresFlags = Flags; }
 
   int initDevice(const int DeviceId) {
@@ -653,6 +658,18 @@
       DeviceData[DeviceId].BlocksPerGrid = MaxGridDimX;
     }
 
+    // Query attributes to for number of SMs for ompx_get_device_num_units(devid)
+    int TmpDeviceUnits;
+    Err = cuDeviceGetAttribute(
+        &TmpDeviceUnits, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, Device);
+    if (Err != CUDA_SUCCESS) {
+      DP("Error: on CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, using %d\n", 16);
+      DeviceData[DeviceId].NumberOfDeviceUnits = 16;
+    } else {
+      DP("Device %d has %d procs for team execution\n", DeviceId, TmpDeviceUnits);
+      DeviceData[DeviceId].NumberOfDeviceUnits = TmpDeviceUnits;
+    }
+
     // We are only exploiting threads along the x axis.
     int MaxBlockDimX;
     Err = cuDeviceGetAttribute(&MaxBlockDimX,
@@ -1574,6 +1591,10 @@
 
 int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); }
 
+int32_t __tgt_rtl_number_of_device_units(int devid) {
+  return DeviceRTL.getNumOfDeviceUnits(devid);
+}
+
 int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
   DP("Init requires flags to %" PRId64 "\n", RequiresFlags);
   DeviceRTL.setRequiresFlag(RequiresFlags);
diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
--- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
@@ -41,6 +41,7 @@
 #include "elf_common.h"
 
 #define NUMBER_OF_DEVICES 4
+#define NUMBER_OF_DEVICE_UNITS 1
 #define OFFLOAD_SECTION_NAME "omp_offloading_entries"
 
 /// Array of Dynamic libraries loaded for this target.
@@ -128,6 +129,12 @@
 
 int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
 
+// __tgt_rtl_number_of_device_units supports ompx_get_device_num_units(devid).
+// Imples that support multiple host teams may want to change this.
+int32_t __tgt_rtl_number_of_device_units(int32_t device_id) {
+  return NUMBER_OF_DEVICE_UNITS;
+}
+
 int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
 
 __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
diff --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp
--- a/openmp/libomptarget/plugins/ve/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp
@@ -179,6 +179,10 @@
 // target RTL.
 int32_t __tgt_rtl_number_of_devices(void) { return DeviceInfo.NodeIds.size(); }
 
+int32_t __tgt_rtl_number_of_device_units(int device_id) {
+  return DeviceInfo.ProcHandles[device_id].size();
+}
+
 // Return an integer different from zero if the provided device image can be
 // supported by the runtime. The functionality is similar to comparing the
 // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp
--- a/openmp/libomptarget/src/api.cpp
+++ b/openmp/libomptarget/src/api.cpp
@@ -30,6 +30,20 @@
   return DevicesSize;
 }
 
+EXTERN int ompx_get_device_num_units(int device_num) {
+  if (!deviceIsReady(device_num)) {
+    DP("Device %d did not initialize\n", device_num);
+    // return 1 team proc for initial/host device
+    return 1;
+  }
+  TIMESCOPE();
+  PM->RTLsMtx.lock();
+  int DeviceUnits = PM->Devices[device_num]->getDeviceUnits();
+  PM->RTLsMtx.unlock();
+  DP("Call to ompx_get_device_num_units returning %d\n", DeviceUnits);
+  return DeviceUnits;
+}
+
 EXTERN int omp_get_device_num(void) {
   TIMESCOPE();
   int HostDevice = omp_get_initial_device();
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -496,6 +496,7 @@
   int32_t Ret = RTL->init_device(RTLDeviceID);
   if (Ret != OFFLOAD_SUCCESS)
     return;
+  setDeviceUnits(RTL->number_of_device_units(RTLDeviceID));
 
   IsInit = true;
 }
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -30,6 +30,7 @@
     __tgt_push_mapper_component;
     __kmpc_push_target_tripcount;
     __kmpc_push_target_tripcount_mapper;
+    ompx_get_device_num_units;
     omp_get_num_devices;
     omp_get_device_num;
     omp_get_initial_device;
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -127,6 +127,9 @@
     if (!(*((void **)&R.number_of_devices) =
               DynLibrary->getAddressOfSymbol("__tgt_rtl_number_of_devices")))
       ValidPlugin = false;
+    if (!(*((void **)&R.number_of_device_units) =
+              DynLibrary->getAddressOfSymbol("__tgt_rtl_number_of_device_units")))
+      ValidPlugin = false;
     if (!(*((void **)&R.init_device) =
               DynLibrary->getAddressOfSymbol("__tgt_rtl_init_device")))
       ValidPlugin = false;
diff --git a/openmp/libomptarget/test/api/ompx_get_device_num_units.c b/openmp/libomptarget/test/api/ompx_get_device_num_units.c
new file mode 100644
--- /dev/null
+++ b/openmp/libomptarget/test/api/ompx_get_device_num_units.c
@@ -0,0 +1,34 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+#include <omp.h>
+#include <stdio.h>
+
+int test_omp_get_device_num_units(int devid) {
+  /* checks that ompx_get_device_num_units() > 0 */
+  int device_units = ompx_get_device_num_units(devid);
+  printf("device_units(%d) = %d\n", devid, device_units);
+
+#pragma omp target
+  {}
+
+  return (device_units > 0);
+}
+
+int main() {
+  int i;
+  int failed = 0;
+
+  if (!test_omp_get_device_num_units(omp_get_initial_device())) {
+    failed++;
+  }
+  if (!test_omp_get_device_num_units(omp_get_default_device())) {
+    failed++;
+  }
+  if (failed)
+    printf("FAIL\n");
+  else
+    printf("PASS\n");
+  return failed;
+}
+
+// CHECK: PASS
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -498,6 +498,7 @@
 
     /* LLVM Extensions */
     extern void *llvm_omp_target_dynamic_shared_alloc();
+    extern int __KAI_KMPC_CONVENTION ompx_get_device_num_units(int);
 
 #   undef __KAI_KMPC_CONVENTION
 #   undef __KMP_IMP