Index: cmake/OpenMPTesting.cmake
===================================================================
--- cmake/OpenMPTesting.cmake
+++ cmake/OpenMPTesting.cmake
@@ -147,7 +147,7 @@
     return()
   endif()
 
-  cmake_parse_arguments(ARG "" "" "DEPENDS" ${ARGN})
+  cmake_parse_arguments(ARG "" "" "DEPENDS;ARGS" ${ARGN})
   # EXCLUDE_FROM_ALL excludes the test ${target} out of check-openmp.
   if (NOT EXCLUDE_FROM_ALL)
     # Register the testsuites and depends for the check-openmp rule.
@@ -156,8 +156,9 @@
   endif()
 
   if (${OPENMP_STANDALONE_BUILD})
+    set(LIT_ARGS ${OPENMP_LIT_ARGS} ${ARG_ARGS})
     add_custom_target(${target}
-      COMMAND ${PYTHON_EXECUTABLE} ${OPENMP_LLVM_LIT_EXECUTABLE} ${OPENMP_LIT_ARGS} ${ARG_UNPARSED_ARGUMENTS}
+      COMMAND ${PYTHON_EXECUTABLE} ${OPENMP_LLVM_LIT_EXECUTABLE} ${LIT_ARGS} ${ARG_UNPARSED_ARGUMENTS}
       COMMENT ${comment}
       DEPENDS ${ARG_DEPENDS}
       ${cmake_3_2_USES_TERMINAL}
@@ -167,6 +168,7 @@
       ${comment}
       ${ARG_UNPARSED_ARGUMENTS}
       DEPENDS clang clang-headers FileCheck ${ARG_DEPENDS}
+      ARGS ${ARG_ARGS}
     )
   endif()
 endfunction()
Index: libomptarget/CMakeLists.txt
===================================================================
--- libomptarget/CMakeLists.txt
+++ libomptarget/CMakeLists.txt
@@ -20,6 +20,7 @@
 
 if(OPENMP_STANDALONE_BUILD)
   # Build all libraries into a common place so that tests can find them.
+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
   set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
 endif()
 
@@ -40,10 +41,6 @@
 # the list of supported targets in the current system.
 set (LIBOMPTARGET_SYSTEM_TARGETS "")
 
-# Set base directories - required for lit to locate the tests.
-set(LIBOMPTARGET_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-set(LIBOMPTARGET_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
-
 # If building this library in debug mode, we define a macro to enable
 # dumping progress messages at runtime.
 string( TOLOWER "${CMAKE_BUILD_TYPE}" LIBOMPTARGET_CMAKE_BUILD_TYPE)
@@ -65,6 +62,17 @@
   set(LIBOMPTARGET_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 endif()
 
+# Definitions for testing, for reuse when testing libomptarget-nvptx.
+if(OPENMP_STANDALONE_BUILD)
+  set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/../runtime/src" CACHE STRING
+    "Path to folder containing omp.h")
+  set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/../runtime/src" CACHE STRING
+    "Path to folder containing libomp.so")
+else()
+  set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/../runtime/src")
+endif()
+
+
 # Build offloading plugins and device RTLs if they are available.
 add_subdirectory(plugins)
 add_subdirectory(deviceRTLs)
Index: libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===================================================================
--- libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -132,6 +132,9 @@
       set(bc_flags ${bc_flags} -Dnv_weak=weak)
     endif()
 
+    # Create target to build all Bitcode libraries.
+    add_custom_target(omptarget-nvptx-bc)
+
     # Generate a Bitcode library for all the compute capabilities the user requested.
     foreach(sm ${nvptx_sm_list})
       set(cuda_arch --cuda-gpu-arch=sm_${sm})
@@ -165,6 +168,7 @@
       set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)
 
       add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
+      add_dependencies(omptarget-nvptx-bc omptarget-nvptx-${sm}-bc)
 
       # Copy library to destination.
       add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
@@ -176,6 +180,7 @@
     endforeach()
   endif()
 
+  add_subdirectory(test)
 else()
   libomptarget_say("Not building CUDA offloading device RTL: CUDA tools not found in the system.")
 endif()
Index: libomptarget/deviceRTLs/nvptx/src/libcall.cu
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -61,8 +61,8 @@
   omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
   int rc = 1; // default is 1 thread avail
   if (!currTaskDescr->InParallelRegion()) {
-    // not currently in a parallel region... all are available
-    rc = GetNumberOfProcsInTeam();
+    // Not currently in a parallel region, return what was set.
+    rc = currTaskDescr->NThreads();
     ASSERT0(LT_FUSSY, rc >= 0, "bad number of threads");
   }
   PRINT(LD_IO, "call omp_get_max_threads() return %d\n", rc);
Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -193,25 +193,38 @@
 // support for parallel that goes parallel (1 static level only)
 ////////////////////////////////////////////////////////////////////////////////
 
-// return number of cuda threads that participate to parallel
-// calculation has to consider simd implementation in nvptx
-// i.e. (num omp threads * num lanes)
-//
-// cudathreads =
-//    if(num_threads != 0) {
-//      if(thread_limit > 0) {
-//        min (num_threads*numLanes ; thread_limit*numLanes);
-//      } else {
-//        min (num_threads*numLanes; blockDim.x)
-//      }
-//    } else {
-//      if (thread_limit != 0) {
-//        min (thread_limit*numLanes; blockDim.x)
-//      } else { // no thread_limit, no num_threads, use all cuda threads
-//        blockDim.x;
-//      }
-//    }
-//
+static INLINE uint16_t determineNumberOfThreads(uint16_t NumThreadsClause,
+                                                uint16_t NThreadsICV,
+                                                uint16_t ThreadLimit) {
+  uint16_t ThreadsRequested = NThreadsICV;
+  if (NumThreadsClause != 0) {
+    ThreadsRequested = NumThreadsClause;
+  }
+
+  uint16_t ThreadsAvailable = GetNumberOfWorkersInTeam();
+  if (ThreadLimit != 0 && ThreadLimit < ThreadsAvailable) {
+    ThreadsAvailable = ThreadLimit;
+  }
+
+  uint16_t NumThreads = ThreadsAvailable;
+  if (ThreadsRequested != 0 && ThreadsRequested < NumThreads) {
+    NumThreads = ThreadsRequested;
+  }
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
+  // On Volta and newer architectures we require that all lanes in
+  // a warp participate in the parallel region.  Round down to a
+  // multiple of WARPSIZE since it is legal to do so in OpenMP.
+  if (NumThreads < WARPSIZE) {
+    NumThreads = 1;
+  } else {
+    NumThreads = (NumThreads & ~((uint16_t)WARPSIZE - 1));
+  }
+#endif
+
+  return NumThreads;
+}
+
 // This routine is always called by the team master..
 EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
                                            int16_t IsOMPRuntimeInitialized) {
@@ -234,78 +247,26 @@
     return;
   }
 
-  uint16_t CudaThreadsForParallel = 0;
-  uint16_t NumThreadsClause =
+  uint16_t &NumThreadsClause =
       omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(threadId);
 
-  // we cannot have more than block size
-  uint16_t CudaThreadsAvail = GetNumberOfWorkersInTeam();
-
-  // currTaskDescr->ThreadLimit(): If non-zero, this is the limit as
-  // specified by the thread_limit clause on the target directive.
-  // GetNumberOfWorkersInTeam(): This is the number of workers available
-  // in this kernel instance.
-  //
-  // E.g: If thread_limit is 33, the kernel is launched with 33+32=65
-  // threads.  The last warp is the master warp so in this case
-  // GetNumberOfWorkersInTeam() returns 64.
-
-  // this is different from ThreadAvail of OpenMP because we may be
-  // using some of the CUDA threads as SIMD lanes
-  int NumLanes = 1;
-  if (NumThreadsClause != 0) {
-    // reset request to avoid propagating to successive #parallel
-    omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(threadId) =
-        0;
-
-    // assume that thread_limit*numlanes is already <= CudaThreadsAvail
-    // because that is already checked on the host side (CUDA offloading rtl)
-    if (currTaskDescr->ThreadLimit() != 0)
-      CudaThreadsForParallel =
-          NumThreadsClause * NumLanes < currTaskDescr->ThreadLimit() * NumLanes
-              ? NumThreadsClause * NumLanes
-              : currTaskDescr->ThreadLimit() * NumLanes;
-    else {
-      CudaThreadsForParallel = (NumThreadsClause * NumLanes > CudaThreadsAvail)
-                                   ? CudaThreadsAvail
-                                   : NumThreadsClause * NumLanes;
-    }
-  } else {
-    if (currTaskDescr->ThreadLimit() != 0) {
-      CudaThreadsForParallel =
-          (currTaskDescr->ThreadLimit() * NumLanes > CudaThreadsAvail)
-              ? CudaThreadsAvail
-              : currTaskDescr->ThreadLimit() * NumLanes;
-    } else
-      CudaThreadsForParallel = CudaThreadsAvail;
-  }
+  uint16_t NumThreads =
+      determineNumberOfThreads(NumThreadsClause, currTaskDescr->NThreads(),
+                               currTaskDescr->ThreadLimit());
 
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-  // On Volta and newer architectures we require that all lanes in
-  // a warp participate in the parallel region.  Round down to a
-  // multiple of WARPSIZE since it is legal to do so in OpenMP.
-  // CudaThreadsAvail is the number of workers available in this
-  // kernel instance and is greater than or equal to
-  // currTaskDescr->ThreadLimit().
-  if (CudaThreadsForParallel < CudaThreadsAvail) {
-    CudaThreadsForParallel =
-        (CudaThreadsForParallel < WARPSIZE)
-            ? 1
-            : CudaThreadsForParallel & ~((uint16_t)WARPSIZE - 1);
+  if (NumThreadsClause != 0) {
+    // Reset request to avoid propagating to successive #parallel
+    NumThreadsClause = 0;
   }
-#endif
 
-  ASSERT(LT_FUSSY, CudaThreadsForParallel > 0,
-         "bad thread request of %d threads", CudaThreadsForParallel);
+  ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
+         NumThreads);
   ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
           "only team master can create parallel");
 
-  // set number of threads on work descriptor
-  // this is different from the number of cuda threads required for the parallel
-  // region
+  // Set number of threads on work descriptor.
   omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
-  workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr,
-                                             CudaThreadsForParallel / NumLanes);
+  workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr, NumThreads);
 }
 
 // All workers call this function.  Deactivate those not needed.
Index: libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt
===================================================================
--- /dev/null
+++ libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt
@@ -0,0 +1,26 @@
+if(NOT OPENMP_TEST_COMPILER_ID STREQUAL "Clang")
+  # Silently return, no need to annoy the user.
+  return()
+endif()
+
+set(deps omptarget-nvptx omptarget omp)
+if(LIBOMPTARGET_NVPTX_ENABLE_BCLIB)
+  set(deps ${deps} omptarget-nvptx-bc)
+endif()
+
+# Don't run by default.
+set(EXCLUDE_FROM_ALL True)
+# Run with only one thread to only launch one application to the GPU at a time.
+add_openmp_testsuite(check-libomptarget-nvptx
+    "Running libomptarget-nvptx tests" ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS ${deps} ARGS -j1)
+
+set(LIBOMPTARGET_NVPTX_TEST_FLAGS "" CACHE STRING
+    "Extra compiler flags to send to the test compiler.")
+set(LIBOMPTARGET_NVPTX_TEST_OPENMP_FLAGS
+    "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda" CACHE STRING
+    "OpenMP compiler flags to use for testing libomptarget-nvptx.")
+
+# Configure the lit.site.cfg.in file
+set(AUTO_GEN_COMMENT "## Autogenerated by libomptarget-nvptx configuration.\n# Do not edit!")
+configure_file(lit.site.cfg.in lit.site.cfg @ONLY)
Index: libomptarget/deviceRTLs/nvptx/test/lit.cfg
===================================================================
--- /dev/null
+++ libomptarget/deviceRTLs/nvptx/test/lit.cfg
@@ -0,0 +1,69 @@
+# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
+# Configuration file for the 'lit' test runner.
+
+import os
+import lit.formats
+
+# Tell pylint that we know config and lit_config exist somewhere.
+if 'PYLINT_IMPORT' in os.environ:
+    config = object()
+    lit_config = object()
+
+def prepend_library_path(name, value, sep):
+    if name in config.environment:
+        config.environment[name] = value + sep + config.environment[name]
+    else:
+        config.environment[name] = value
+
+# name: The name of this test suite.
+config.name = 'libomptarget-nvptx'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.cc']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root object directory where output is placed
+config.test_exec_root = config.binary_dir
+
+# test format
+config.test_format = lit.formats.ShTest()
+
+# compiler flags
+config.test_flags = " -I " + config.omp_header_directory + \
+    " -L " + config.library_dir + \
+    " --libomptarget-nvptx-path=" + config.library_dir;
+
+if config.omp_host_rtl_directory:
+    config.test_flags = config.test_flags + \
+        " -L " + config.omp_host_rtl_directory
+
+config.test_flags = config.test_flags + " " + config.test_extra_flags
+
+# Setup environment to find dynamic library at runtime.
+prepend_library_path('LD_LIBRARY_PATH', config.library_dir, ":")
+prepend_library_path('LD_LIBRARY_PATH', config.omp_host_rtl_directory, ":")
+
+# Forbid fallback to host.
+config.environment["OMP_TARGET_OFFLOAD"] = "MANDATORY"
+
+# substitutions
+config.substitutions.append(("%compilexx-run-and-check",
+    "%compilexx-and-run | " + config.libomptarget_filecheck + " %s"))
+config.substitutions.append(("%compile-run-and-check",
+    "%compile-and-run | " + config.libomptarget_filecheck + " %s"))
+config.substitutions.append(("%compilexx-and-run", "%compilexx && %run"))
+config.substitutions.append(("%compile-and-run", "%compile && %run"))
+
+config.substitutions.append(("%compilexx",
+    "%clangxx %openmp_flags %flags %s -o %t"))
+config.substitutions.append(("%compile",
+    "%clang %openmp_flags %flags %s -o %t"))
+
+config.substitutions.append(("%clangxx", config.test_cxx_compiler))
+config.substitutions.append(("%clang", config.test_c_compiler))
+config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
+config.substitutions.append(("%flags", config.test_flags))
+
+config.substitutions.append(("%run", "%t"))
Index: libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in
===================================================================
--- /dev/null
+++ libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in
@@ -0,0 +1,14 @@
+@AUTO_GEN_COMMENT@
+
+config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
+config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
+config.test_openmp_flags = "@LIBOMPTARGET_NVPTX_TEST_OPENMP_FLAGS@"
+config.test_extra_flags = "@LIBOMPTARGET_NVPTX_TEST_FLAGS@"
+config.binary_dir = "@CMAKE_CURRENT_BINARY_DIR@"
+config.library_dir = "@LIBOMPTARGET_LIBRARY_DIR@"
+config.omp_header_directory = "@LIBOMPTARGET_OPENMP_HEADER_FOLDER@"
+config.omp_host_rtl_directory = "@LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER@"
+config.libomptarget_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@"
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
Index: libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c
===================================================================
--- /dev/null
+++ libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c
@@ -0,0 +1,102 @@
+// RUN: %compile-run-and-check
+
+#include <stdio.h>
+#include <omp.h>
+
+const int WarpSize = 32;
+const int NumThreads1 = 1 * WarpSize;
+const int NumThreads2 = 2 * WarpSize;
+const int NumThreads3 = 3 * WarpSize;
+const int MaxThreads = 1024;
+
+int main(int argc, char *argv[]) {
+  int check1[MaxThreads];
+  int check2[MaxThreads];
+  int check3[MaxThreads];
+  int check4[MaxThreads];
+  for (int i = 0; i < MaxThreads; i++) {
+    check1[i] = check2[i] = check3[i] = check4[i] = 0;
+  }
+
+  int maxThreads1 = -1;
+  int maxThreads2 = -1;
+  int maxThreads3 = -1;
+
+  #pragma omp target map(check1[:], check2[:], check3[:], check4[:]) \
+                     map(maxThreads1, maxThreads2, maxThreads3)
+  {
+    #pragma omp parallel num_threads(NumThreads1)
+    {
+      check1[omp_get_thread_num()] += omp_get_num_threads();
+    }
+
+    // API method to set number of threads in parallel regions without
+    // num_threads() clause.
+    omp_set_num_threads(NumThreads2);
+    maxThreads1 = omp_get_max_threads();
+    #pragma omp parallel
+    {
+      check2[omp_get_thread_num()] += omp_get_num_threads();
+    }
+
+    maxThreads2 = omp_get_max_threads();
+
+    // num_threads() clause should override nthreads-var ICV.
+    #pragma omp parallel num_threads(NumThreads3)
+    {
+      check3[omp_get_thread_num()] += omp_get_num_threads();
+    }
+
+    maxThreads3 = omp_get_max_threads();
+
+    // Effect from omp_set_num_threads() should still be visible.
+    #pragma omp parallel
+    {
+      check4[omp_get_thread_num()] += omp_get_num_threads();
+    }
+  }
+
+  // CHECK: maxThreads1 = 64
+  printf("maxThreads1 = %d\n", maxThreads1);
+  // CHECK: maxThreads2 = 64
+  printf("maxThreads2 = %d\n", maxThreads2);
+  // CHECK: maxThreads3 = 64
+  printf("maxThreads3 = %d\n", maxThreads3);
+
+  // CHECK-NOT: invalid
+  for (int i = 0; i < MaxThreads; i++) {
+    if (i < NumThreads1) {
+      if (check1[i] != NumThreads1) {
+        printf("invalid: check1[%d] should be %d, is %d\n", i, NumThreads1, check1[i]);
+      }
+    } else if (check1[i] != 0) {
+      printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+    }
+
+    if (i < NumThreads2) {
+      if (check2[i] != NumThreads2) {
+        printf("invalid: check2[%d] should be %d, is %d\n", i, NumThreads2, check2[i]);
+      }
+    } else if (check2[i] != 0) {
+      printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+    }
+
+    if (i < NumThreads3) {
+      if (check3[i] != NumThreads3) {
+        printf("invalid: check3[%d] should be %d, is %d\n", i, NumThreads3, check3[i]);
+      }
+    } else if (check3[i] != 0) {
+      printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]);
+    }
+
+    if (i < NumThreads2) {
+      if (check4[i] != NumThreads2) {
+        printf("invalid: check4[%d] should be %d, is %d\n", i, NumThreads2, check4[i]);
+      }
+    } else if (check4[i] != 0) {
+      printf("invalid: check4[%d] should be 0, is %d\n", i, check4[i]);
+    }
+  }
+
+  return 0;
+}
Index: libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c
===================================================================
--- /dev/null
+++ libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c
@@ -0,0 +1,77 @@
+// RUN: %compile-run-and-check
+
+#include <stdio.h>
+#include <omp.h>
+
+const int WarpSize = 32;
+const int ThreadLimit = 1 * WarpSize;
+const int NumThreads2 = 2 * WarpSize;
+const int NumThreads3 = 3 * WarpSize;
+const int MaxThreads = 1024;
+
+int main(int argc, char *argv[]) {
+  int check1[MaxThreads];
+  int check2[MaxThreads];
+  int check3[MaxThreads];
+  for (int i = 0; i < MaxThreads; i++) {
+    check1[i] = check2[i] = check3[i] = 0;
+  }
+
+  int threadLimit = -1;
+
+  #pragma omp target teams num_teams(1) thread_limit(ThreadLimit) \
+                           map(check1[:], check2[:], check3[:], threadLimit)
+  {
+    threadLimit = omp_get_thread_limit();
+
+    // All parallel regions should get as many threads as specified by the
+    // thread_limit() clause.
+    #pragma omp parallel
+    {
+      check1[omp_get_thread_num()] += omp_get_num_threads();
+    }
+
+    omp_set_num_threads(NumThreads2);
+    #pragma omp parallel
+    {
+      check2[omp_get_thread_num()] += omp_get_num_threads();
+    }
+
+    #pragma omp parallel num_threads(NumThreads3)
+    {
+      check3[omp_get_thread_num()] += omp_get_num_threads();
+    }
+  }
+
+  // CHECK: threadLimit = 32
+  printf("threadLimit = %d\n", threadLimit);
+
+  // CHECK-NOT: invalid
+  for (int i = 0; i < MaxThreads; i++) {
+    if (i < ThreadLimit) {
+      if (check1[i] != ThreadLimit) {
+        printf("invalid: check1[%d] should be %d, is %d\n", i, ThreadLimit, check1[i]);
+      }
+    } else if (check1[i] != 0) {
+      printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+    }
+
+    if (i < ThreadLimit) {
+      if (check2[i] != ThreadLimit) {
+        printf("invalid: check2[%d] should be %d, is %d\n", i, ThreadLimit, check2[i]);
+      }
+    } else if (check2[i] != 0) {
+      printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+    }
+
+    if (i < ThreadLimit) {
+      if (check3[i] != ThreadLimit) {
+        printf("invalid: check3[%d] should be %d, is %d\n", i, ThreadLimit, check3[i]);
+      }
+    } else if (check3[i] != 0) {
+      printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]);
+    }
+  }
+
+  return 0;
+}
Index: libomptarget/test/CMakeLists.txt
===================================================================
--- libomptarget/test/CMakeLists.txt
+++ libomptarget/test/CMakeLists.txt
@@ -14,15 +14,6 @@
 
 add_openmp_testsuite(check-libomptarget "Running libomptarget tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omptarget omp)
 
-if(${OPENMP_STANDALONE_BUILD})
-  set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/../../runtime/src" CACHE STRING
-    "Path to folder containing omp.h")
-  set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/../../runtime/src" CACHE STRING
-    "Path to folder containing libomp.so")
-else()
-  set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${LIBOMPTARGET_BINARY_DIR}/../runtime/src")
-endif()
-
 # Configure the lit.site.cfg.in file
 set(AUTO_GEN_COMMENT "## Autogenerated by libomptarget configuration.\n# Do not edit!")
 configure_file(lit.site.cfg.in lit.site.cfg @ONLY)
Index: libomptarget/test/lit.site.cfg.in
===================================================================
--- libomptarget/test/lit.site.cfg.in
+++ libomptarget/test/lit.site.cfg.in
@@ -16,4 +16,4 @@
 config.libomptarget_debug = @LIBOMPTARGET_DEBUG@
 
 # Let the main config do the real work.
-lit_config.load_config(config, "@LIBOMPTARGET_BASE_DIR@/test/lit.cfg")
+lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")