diff --git a/openmp/libomptarget/deviceRTLs/common/include/ICVs.h b/openmp/libomptarget/deviceRTLs/common/include/ICVs.h
--- a/openmp/libomptarget/deviceRTLs/common/include/ICVs.h
+++ b/openmp/libomptarget/deviceRTLs/common/include/ICVs.h
@@ -8,9 +8,13 @@
 //
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef OMPTARGET_ICVS_H
 #define OMPTARGET_ICVS_H
 
+#include "interface.h"
+#include <bits/stdint-uintn.h>
+
 struct ICVStateTy {
   int nthreads_var;
 
@@ -22,11 +26,32 @@
   /// active-levels-var is 1, if active_level is not 0, otherweise it is 0.
   int active_level;
 
+  /// Don't use UB type punning!
+  ///
+  ///{
+  struct RunSchedVarEncodingTy {
+    omp_sched_t ScheduleKind;
+    int ChunkSize;
+  };
+
+  uint64_t run_sched_var;
+
+  static_assert(
+      sizeof(run_sched_var) == sizeof(RunSchedVarEncodingTy),
+      "Schedule encoding is supposed to cover the entire run-sched-var ICV!");
+  ///}
+
   static bool ensureICVStateForThread(unsigned TId);
 
   static int &getICVForThread(int ICVStateTy::*Var);
   static int incICVForThread(int ICVStateTy::*Var, int UpdateVal);
   static int setICVForThread(int ICVStateTy::*Var, int UpdateVal);
+
+  static uint64_t &getICVForThread(uint64_t ICVStateTy::*Var);
+  static uint64_t incICVForThread(uint64_t ICVStateTy::*Var,
+                                  uint64_t UpdateVal);
+  static uint64_t setICVForThread(uint64_t ICVStateTy::*Var,
+                                  uint64_t UpdateVal);
 };
 
 #ifdef __cplusplus
diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -102,20 +102,7 @@
 
 class omptarget_nvptx_TaskDescr {
 public:
-  // methods for flags
-  INLINE omp_sched_t GetRuntimeSched() const;
-  INLINE void SetRuntimeSched(omp_sched_t sched);
-  INLINE int InParallelRegion() const { return items.flags & TaskDescr_InPar; }
-  INLINE int InL2OrHigherParallelRegion() const {
-    return items.flags & TaskDescr_InParL2P;
-  }
-  INLINE int IsParallelConstruct() const {
-    return items.flags & TaskDescr_IsParConstr;
-  }
-  INLINE int IsTaskConstruct() const { return !IsParallelConstruct(); }
   // methods for other fields
-  INLINE uint16_t &ThreadId() { return items.threadId; }
-  INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; }
   INLINE omptarget_nvptx_TaskDescr *GetPrevTaskDescr() const { return prev; }
   INLINE void SetPrevTaskDescr(omptarget_nvptx_TaskDescr *taskDescr) {
     prev = taskDescr;
@@ -133,23 +120,6 @@
                                    uint16_t tid, uint16_t tnum);
 
 private:
-  // bits for flags: (6 used, 2 free)
-  //   3 bits (SchedMask) for runtime schedule
-  //   1 bit (InPar) if this thread has encountered one or more parallel region
-  //   1 bit (IsParConstr) if ICV for a parallel region (false = explicit task)
-  //   1 bit (InParL2+) if this thread has encountered L2 or higher parallel
-  //   region
-  static const uint8_t TaskDescr_SchedMask = (0x1 | 0x2 | 0x4);
-  static const uint8_t TaskDescr_InPar = 0x10;
-  static const uint8_t TaskDescr_IsParConstr = 0x20;
-  static const uint8_t TaskDescr_InParL2P = 0x40;
-
-  struct TaskDescr_items {
-    uint8_t flags; // 6 bit used (see flag above)
-    uint8_t unused;
-    uint16_t threadId;         // thread id
-    uint64_t runtimeChunkSize; // runtime chunk size
-  } items;
   omptarget_nvptx_TaskDescr *prev;
 };
 
diff --git a/openmp/libomptarget/deviceRTLs/common/omptargeti.h b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
--- a/openmp/libomptarget/deviceRTLs/common/omptargeti.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
@@ -15,53 +15,18 @@
 // Task Descriptor
 ////////////////////////////////////////////////////////////////////////////////
 
-INLINE omp_sched_t omptarget_nvptx_TaskDescr::GetRuntimeSched() const {
-  // sched starts from 1..4; encode it as 0..3; so add 1 here
-  uint8_t rc = (items.flags & TaskDescr_SchedMask) + 1;
-  return (omp_sched_t)rc;
-}
-
-INLINE void omptarget_nvptx_TaskDescr::SetRuntimeSched(omp_sched_t sched) {
-  // sched starts from 1..4; encode it as 0..3; so sub 1 here
-  uint8_t val = ((uint8_t)sched) - 1;
-  // clear current sched
-  items.flags &= ~TaskDescr_SchedMask;
-  // set new sched
-  items.flags |= val;
-}
-
 INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() {
-  // slow method
-  // flag:
-  //   default sched is static,
-  //   dyn is off (unused now anyway, but may need to sample from host ?)
-  //   not in parallel
-
-  items.flags = 0;
-  items.threadId = 0;         // is master
-  items.runtimeChunkSize = 1; // preferred chunking statik with chunk 1
 }
 
 // This is called when all threads are started together in SPMD mode.
 // OMP directives include target parallel, target distribute parallel for, etc.
 INLINE void omptarget_nvptx_TaskDescr::InitLevelOneTaskDescr(
     omptarget_nvptx_TaskDescr *parentTaskDescr) {
-  // slow method
-  // flag:
-  //   default sched is static,
-  //   dyn is off (unused now anyway, but may need to sample from host ?)
-  //   in L1 parallel
-
-  items.flags = TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
-  items.threadId =
-      GetThreadIdInBlock();   // get ids from cuda (only called for 1st level)
-  items.runtimeChunkSize = 1; // preferred chunking statik with chunk 1
   prev = parentTaskDescr;
 }
 
 INLINE void omptarget_nvptx_TaskDescr::CopyData(
     omptarget_nvptx_TaskDescr *sourceTaskDescr) {
-  items = sourceTaskDescr->items;
 }
 
 INLINE void
@@ -79,41 +44,21 @@
 INLINE void omptarget_nvptx_TaskDescr::CopyForExplicitTask(
     omptarget_nvptx_TaskDescr *parentTaskDescr) {
   CopyParent(parentTaskDescr);
-  items.flags = items.flags & ~TaskDescr_IsParConstr;
-  ASSERT0(LT_FUSSY, IsTaskConstruct(), "expected task");
 }
 
 INLINE void omptarget_nvptx_TaskDescr::CopyToWorkDescr(
     omptarget_nvptx_TaskDescr *masterTaskDescr) {
   CopyParent(masterTaskDescr);
-  // overwrite specific items;
-  items.flags |=
-      TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
 }
 
 INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
     omptarget_nvptx_TaskDescr *workTaskDescr) {
   Copy(workTaskDescr);
-  //
-  // overwrite specific items;
-  //
-  // The threadID should be GetThreadIdInBlock() % GetMasterThreadID().
-  // This is so that the serial master (first lane in the master warp)
-  // gets a threadId of 0.
-  // However, we know that this function is always called in a parallel
-  // region where only workers are active.  The serial master thread
-  // never enters this region.  When a parallel region is executed serially,
-  // the threadId is set to 0 elsewhere and the kmpc_serialized_* functions
-  // are called, which never activate this region.
-  items.threadId =
-      GetThreadIdInBlock(); // get ids from cuda (only called for 1st level)
 }
 
 INLINE void omptarget_nvptx_TaskDescr::CopyConvergentParent(
     omptarget_nvptx_TaskDescr *parentTaskDescr, uint16_t tid, uint16_t tnum) {
   CopyParent(parentTaskDescr);
-  items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
-  items.threadId = tid;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/common/src/ICVs.cpp b/openmp/libomptarget/deviceRTLs/common/src/ICVs.cpp
--- a/openmp/libomptarget/deviceRTLs/common/src/ICVs.cpp
+++ b/openmp/libomptarget/deviceRTLs/common/src/ICVs.cpp
@@ -18,6 +18,7 @@
 #include "omptarget.h"
 #include "support.h"
 #include "target_interface.h"
+#include <bits/stdint-uintn.h>
 #include <string.h>
 
 #define ICV_DEBUG(...)
@@ -119,6 +120,24 @@
   return !!ICVStateTy::getICVForThread(&ICVStateTy::active_level);
 }
 
+void omp_get_schedule(omp_sched_t *ScheduleKind, int *ChunkSize) {
+  uint64_t RunSchedVar =
+      ICVStateTy::getICVForThread(&ICVStateTy::run_sched_var);
+  ICVStateTy::RunSchedVarEncodingTy RunSchedVarEncoding;
+  memcpy(&RunSchedVarEncoding, &RunSchedVar, sizeof(RunSchedVarEncoding));
+  *ScheduleKind = RunSchedVarEncoding.ScheduleKind;
+  *ChunkSize = RunSchedVarEncoding.ChunkSize;
+}
+
+void omp_set_schedule(omp_sched_t ScheduleKind, int ChunkSize) {
+  ICVStateTy::RunSchedVarEncodingTy RunSchedVarEncoding;
+  RunSchedVarEncoding.ScheduleKind = ScheduleKind;
+  RunSchedVarEncoding.ChunkSize = ChunkSize;
+  uint64_t RunSchedVar;
+  memcpy(&RunSchedVar, &RunSchedVarEncoding, sizeof(RunSchedVarEncoding));
+  ICVStateTy::setICVForThread(&ICVStateTy::run_sched_var, RunSchedVar);
+}
+
 static int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
                                     int OutOfBoundsVal = -1) {
   if (Level == 0)
@@ -197,9 +216,9 @@
   ThreadStates[TId] = nullptr;
 }
 
-DEVICE TeamStateTy SHARED(omp::TeamState);
+TeamStateTy SHARED(omp::TeamState);
 
-[[clang::loader_uninitialized]] DEVICE ThreadStateTy
+[[clang::loader_uninitialized]] ThreadStateTy
     *omp::ThreadStates[MAX_THREADS_PER_TEAM];
 #pragma omp allocate(omp::ThreadStates) allocator(omp_pteam_mem_alloc)
 
diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
--- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
@@ -74,41 +74,6 @@
   return rc;
 }
 
-EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier) {
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, isSPMDMode(),
-            "Expected SPMD mode only with uninitialized runtime.");
-    *kind = omp_sched_static;
-    *modifier = 1;
-  } else {
-    omptarget_nvptx_TaskDescr *currTaskDescr =
-        getMyTopTaskDescriptor(isSPMDMode());
-    *kind = currTaskDescr->GetRuntimeSched();
-    *modifier = currTaskDescr->RuntimeChunkSize();
-  }
-  PRINT(LD_IO, "call omp_get_schedule returns sched %d and modif %d\n",
-        (int)*kind, *modifier);
-}
-
-EXTERN void omp_set_schedule(omp_sched_t kind, int modifier) {
-  PRINT(LD_IO, "call omp_set_schedule(sched %d, modif %d)\n", (int)kind,
-        modifier);
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, isSPMDMode(),
-            "Expected SPMD mode only with uninitialized runtime.");
-    return;
-  }
-  if (kind >= omp_sched_static && kind < omp_sched_auto) {
-    omptarget_nvptx_TaskDescr *currTaskDescr =
-        getMyTopTaskDescriptor(isSPMDMode());
-    currTaskDescr->SetRuntimeSched(kind);
-    currTaskDescr->RuntimeChunkSize() = modifier;
-    PRINT(LD_IOD, "omp_set_schedule did set sched %d & modif %" PRIu64 "\n",
-          (int)currTaskDescr->GetRuntimeSched(),
-          currTaskDescr->RuntimeChunkSize());
-  }
-}
-
 EXTERN omp_proc_bind_t omp_get_proc_bind(void) {
   PRINT0(LD_IO, "call omp_get_proc_bin() is true, regardless on state\n");
   return omp_proc_bind_true;
diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -248,8 +248,10 @@
       chunk = tripCount; // one thread gets the whole loop
     } else if (schedule == kmp_sched_runtime) {
       // process runtime
-      omp_sched_t rtSched = currTaskDescr->GetRuntimeSched();
-      chunk = currTaskDescr->RuntimeChunkSize();
+      omp_sched_t rtSched;
+      int ChunkInt;
+      omp_get_schedule(&rtSched, &ChunkInt);
+      chunk = ChunkInt;
       switch (rtSched) {
       case omp_sched_static: {
         if (chunk > 0)
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -144,7 +144,7 @@
   PRINT(LD_PAR,
         "thread will execute parallel region with id %d in a team of "
         "%d threads\n",
-        (int)newTaskDescr->ThreadId(), (int)ThreadLimit);
+        (int)threadId, (int)ThreadLimit);
 }
 
 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
diff --git a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
--- a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
@@ -81,12 +81,6 @@
   omptarget_nvptx_TaskDescr *currTaskDescr =
       omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
   ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr");
-  ASSERT0(LT_FUSSY, !currTaskDescr->InParallelRegion(),
-          "cannot be called in a parallel region.");
-  if (currTaskDescr->InParallelRegion()) {
-    PRINT0(LD_PAR, "already in parallel: go seq\n");
-    return;
-  }
 
   uint16_t NumThreads = determineNumberOfThreads();
   TeamState.ParallelTeamSize = NumThreads;
@@ -152,7 +146,7 @@
     PRINT(LD_PAR,
           "thread will execute parallel region with id %d in a team of "
           "%d threads\n",
-          (int)newTaskDescr->ThreadId(), (int)nThreads);
+          (int)threadId, (int)nThreads);
   }
 
   return ThreadIsActive;
@@ -211,11 +205,6 @@
                                               "new seq parallel task");
   newTaskDescr->CopyParent(currTaskDescr);
 
-  // tweak values for serialized parallel case:
-  // - each thread becomes ID 0 in its serialized parallel, and
-  // - there is only one thread per team
-  newTaskDescr->ThreadId() = 0;
-
   // set new task descriptor as top
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
                                                              newTaskDescr);