Index: openmp/runtime/src/CMakeLists.txt
===================================================================
--- openmp/runtime/src/CMakeLists.txt
+++ openmp/runtime/src/CMakeLists.txt
@@ -45,6 +45,7 @@
   ${LIBOMP_SRC_DIR}/i18n
   ${LIBOMP_INC_DIR}
   ${LIBOMP_SRC_DIR}/thirdparty/ittnotify
+  ${LIBOMP_SRC_DIR}/thirdparty/memkind
 )
 if(${LIBOMP_USE_HWLOC})
   include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
@@ -93,6 +94,7 @@
     libomp_append(LIBOMP_ASMFILES z_Linux_asm.S) # Unix assembly file
   endif()
   libomp_append(LIBOMP_CXXFILES thirdparty/ittnotify/ittnotify_static.cpp LIBOMP_USE_ITT_NOTIFY)
+  libomp_append(LIBOMP_CXXFILES thirdparty/memkind/kmp_memkind.cpp)
   libomp_append(LIBOMP_CXXFILES kmp_debugger.cpp LIBOMP_USE_DEBUGGER)
   libomp_append(LIBOMP_CXXFILES kmp_stats.cpp LIBOMP_STATS)
   libomp_append(LIBOMP_CXXFILES kmp_stats_timing.cpp LIBOMP_STATS)
Index: openmp/runtime/src/kmp.h
===================================================================
--- openmp/runtime/src/kmp.h
+++ openmp/runtime/src/kmp.h
@@ -920,6 +920,19 @@
   omp_uintptr_t value;
 } omp_alloctrait_t;
 
+enum {
+  omp_ata_null = 0,
+  omp_ata_default = 1,
+  omp_ata__large_cap = 2,
+  omp_ata_const = 3,
+  omp_ata_high_bw = 4,
+  omp_ata_low_lat = 5,
+  omp_ata_cgroup = 6,
+  omp_ata_pteam = 7,
+  omp_ata_thread = 8,
+  omp_ata_last = 9,
+};
+
 typedef void *omp_allocator_handle_t;
 extern omp_allocator_handle_t const omp_null_allocator;
 extern omp_allocator_handle_t const omp_default_mem_alloc;
@@ -936,20 +949,29 @@
 // end of duplicate type definitions from omp.h
 #endif
 
-extern int __kmp_memkind_available;
-
 typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
 
 typedef struct kmp_allocator_t {
   omp_memspace_handle_t memspace;
-  void **memkind; // pointer to memkind
   int alignment;
   omp_alloctrait_value_t fb;
   kmp_allocator_t *fb_data;
   kmp_uint64 pool_size;
   kmp_uint64 pool_used;
+  int partition;
+
+  /* custom allocators */
+  void *(*alloc)(size_t size, kmp_allocator_t *, int gtid);
+  void (*free)(void *p, kmp_allocator_t *, int gtid);
+  void *aux;
 } kmp_allocator_t;
 
+extern kmp_allocator_t kmp_standard_allocators[];
+extern void *kmp_default_alloc(size_t size, kmp_allocator_t *al, int gtid);
+extern void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid);
+extern int (*kmp_init_allocator_p)(kmp_allocator_t *);
+extern void (*kmp_destroy_allocator_p)(kmp_allocator_t *);
+
 extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
                                                     omp_memspace_handle_t,
                                                     int ntraits,
Index: openmp/runtime/src/kmp_alloc.cpp
===================================================================
--- openmp/runtime/src/kmp_alloc.cpp
+++ openmp/runtime/src/kmp_alloc.cpp
@@ -1221,119 +1221,68 @@
 }
 
 /* OMP 5.0 Memory Management support */
-static const char *kmp_mk_lib_name;
-static void *h_memkind;
-/* memkind experimental API: */
-// memkind_alloc
-static void *(*kmp_mk_alloc)(void *k, size_t sz);
-// memkind_free
-static void (*kmp_mk_free)(void *kind, void *ptr);
-// memkind_check_available
-static int (*kmp_mk_check)(void *kind);
-// kinds we are going to use
-static void **mk_default;
-static void **mk_interleave;
-static void **mk_hbw;
-static void **mk_hbw_interleave;
-static void **mk_hbw_preferred;
-static void **mk_hugetlb;
-static void **mk_hbw_hugetlb;
-static void **mk_hbw_preferred_hugetlb;
-
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
-static inline void chk_kind(void ***pkind) {
-  KMP_DEBUG_ASSERT(pkind);
-  if (*pkind) // symbol found
-    if (kmp_mk_check(**pkind)) // kind not available or error
-      *pkind = NULL;
-}
-#endif
-
-void __kmp_init_memkind() {
-// as of 2018-07-31 memkind does not support Windows*, exclude it for now
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
-  // use of statically linked memkind is problematic, as it depends on libnuma
-  kmp_mk_lib_name = "libmemkind.so";
-  h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
-  if (h_memkind) {
-    kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
-    kmp_mk_alloc =
-        (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
-    kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
-    mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
-    if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
-        !kmp_mk_check(*mk_default)) {
-      __kmp_memkind_available = 1;
-      mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
-      chk_kind(&mk_interleave);
-      mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
-      chk_kind(&mk_hbw);
-      mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
-      chk_kind(&mk_hbw_interleave);
-      mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
-      chk_kind(&mk_hbw_preferred);
-      mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
-      chk_kind(&mk_hugetlb);
-      mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
-      chk_kind(&mk_hbw_hugetlb);
-      mk_hbw_preferred_hugetlb =
-          (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
-      chk_kind(&mk_hbw_preferred_hugetlb);
-      KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
-      return; // success
-    }
-    dlclose(h_memkind); // failure
-    h_memkind = NULL;
-  }
-  kmp_mk_check = NULL;
-  kmp_mk_alloc = NULL;
-  kmp_mk_free = NULL;
-  mk_default = NULL;
-  mk_interleave = NULL;
-  mk_hbw = NULL;
-  mk_hbw_interleave = NULL;
-  mk_hbw_preferred = NULL;
-  mk_hugetlb = NULL;
-  mk_hbw_hugetlb = NULL;
-  mk_hbw_preferred_hugetlb = NULL;
-#else
-  kmp_mk_lib_name = "";
-  h_memkind = NULL;
-  kmp_mk_check = NULL;
-  kmp_mk_alloc = NULL;
-  kmp_mk_free = NULL;
-  mk_default = NULL;
-  mk_interleave = NULL;
-  mk_hbw = NULL;
-  mk_hbw_interleave = NULL;
-  mk_hbw_preferred = NULL;
-  mk_hugetlb = NULL;
-  mk_hbw_hugetlb = NULL;
-  mk_hbw_preferred_hugetlb = NULL;
-#endif
-}
+void *kmp_default_alloc(size_t size, kmp_allocator_t *al, int gtid);
+void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid);
+
+kmp_allocator_t kmp_standard_allocators[] = {
+  { /* omp_null_allocator */
+    .memspace = omp_default_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = NULL,
+    .free = NULL,
+  },
+  { /* omp_default_allocator */
+    .memspace = omp_default_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_large_cap_allocator */
+    .memspace = omp_large_cap_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_const_allocator */
+    .memspace = omp_const_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_high_bw_allocator */
+    .memspace = omp_high_bw_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_low_lat_allocator */
+    .memspace = omp_low_lat_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_cgroup_allocator */
+    .memspace = omp_default_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_pteam_allocator */
+    .memspace = omp_default_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+  { /* omp_thread_allocator */
+    .memspace = omp_default_mem_space,
+    .fb = omp_atv_abort_fb,
+    .alloc = kmp_default_alloc,
+    .free = kmp_default_free,
+  },
+};
 
-void __kmp_fini_memkind() {
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
-  if (__kmp_memkind_available)
-    KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
-  if (h_memkind) {
-    dlclose(h_memkind);
-    h_memkind = NULL;
-  }
-  kmp_mk_check = NULL;
-  kmp_mk_alloc = NULL;
-  kmp_mk_free = NULL;
-  mk_default = NULL;
-  mk_interleave = NULL;
-  mk_hbw = NULL;
-  mk_hbw_interleave = NULL;
-  mk_hbw_preferred = NULL;
-  mk_hugetlb = NULL;
-  mk_hbw_hugetlb = NULL;
-  mk_hbw_preferred_hugetlb = NULL;
-#endif
-}
+// custom initialization function
+int (*kmp_init_allocator_p)(kmp_allocator_t *);
 
 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
                                              int ntraits,
@@ -1366,10 +1315,13 @@
           al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
       break;
     case omp_atk_fb_data:
-      al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
+      if (traits[i].value < sizeof(kmp_standard_allocators) / sizeof(kmp_standard_allocators[0])) {
+        al->fb_data = &kmp_standard_allocators[traits[i].value];
+      } else
+        al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
       break;
     case omp_atk_partition:
-      al->memkind = RCAST(void **, traits[i].value);
+      al->partition = traits[i].value;
       break;
     default:
       KMP_ASSERT2(0, "Unexpected allocator trait");
@@ -1378,47 +1330,31 @@
   if (al->fb == 0) {
     // set default allocator
     al->fb = omp_atv_default_mem_fb;
-    al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
+    al->fb_data = &kmp_standard_allocators[(uintptr_t) omp_default_mem_alloc];
   } else if (al->fb == omp_atv_allocator_fb) {
     KMP_ASSERT(al->fb_data != NULL);
   } else if (al->fb == omp_atv_default_mem_fb) {
-    al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
+    al->fb_data = &kmp_standard_allocators[(uintptr_t) omp_default_mem_alloc];
   }
-  if (__kmp_memkind_available) {
-    // Let's use memkind library if available
-    if (ms == omp_high_bw_mem_space) {
-      if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
-        al->memkind = mk_hbw_interleave;
-      } else if (mk_hbw_preferred) {
-        // AC: do not try to use MEMKIND_HBW for now, because memkind library
-        // cannot reliably detect exhaustion of HBW memory.
-        // It could be possible using hbw_verify_memory_region() but memkind
-        // manual says: "Using this function in production code may result in
-        // serious performance penalty".
-        al->memkind = mk_hbw_preferred;
-      } else {
-        // HBW is requested but not available --> return NULL allocator
-        __kmp_free(al);
-        return omp_null_allocator;
-      }
-    } else {
-      if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
-        al->memkind = mk_interleave;
-      } else {
-        al->memkind = mk_default;
-      }
-    }
-  } else {
-    if (ms == omp_high_bw_mem_space) {
-      // cannot detect HBW memory presence without memkind library
+
+  al->alloc = kmp_default_alloc; // kmp_standard_allocators[(uintptr_t) ms].alloc;
+  al->free = kmp_default_free; // kmp_standard_allocators[(uintptr_t) ms].free;
+  if (kmp_init_allocator_p != NULL && kmp_init_allocator_p(al) != 0) {
+    // something went wrong, bail
       __kmp_free(al);
       return omp_null_allocator;
     }
-  }
+
   return (omp_allocator_handle_t)al;
 }
 
+// custom destruction function
+void (*kmp_destroy_allocator_p)(kmp_allocator_t *);
+
 void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
+  if (kmp_destroy_allocator_p)
+    kmp_destroy_allocator_p(RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)));
+
   if (allocator > kmp_max_mem_alloc)
     __kmp_free(allocator);
 }
@@ -1441,15 +1377,28 @@
 } kmp_mem_desc_t;
 static int alignment = sizeof(void *); // let's align to pointer size
 
+void *kmp_default_alloc(size_t size, kmp_allocator_t *alm, int gtid) {
+  return __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), size);
+}
+
+void kmp_default_free(void *ptr, kmp_allocator_t *al, int gtid) {
+  return __kmp_thread_free(__kmp_thread_from_gtid(gtid), ptr);
+}
+
 void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
   void *ptr = NULL;
-  kmp_allocator_t *al;
+  kmp_allocator_t *al = NULL;
   KMP_DEBUG_ASSERT(__kmp_init_serial);
   if (allocator == omp_null_allocator)
     allocator = __kmp_threads[gtid]->th.th_def_allocator;
 
   KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
-  al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
+  if ((uintptr_t) allocator < sizeof(kmp_standard_allocators) / sizeof(kmp_standard_allocators[0]))
+    al = &kmp_standard_allocators[(uintptr_t) allocator];
+  else if (allocator > kmp_max_mem_alloc)
+    al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
+
+  KMP_ASSERT(al != NULL);
 
   int sz_desc = sizeof(kmp_mem_desc_t);
   kmp_mem_desc_t desc;
@@ -1462,101 +1411,29 @@
   }
   desc.size_a = size + sz_desc + align;
 
-  if (__kmp_memkind_available) {
-    if (allocator < kmp_max_mem_alloc) {
-      // pre-defined allocator
-      if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
-        ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
-      } else {
-        ptr = kmp_mk_alloc(*mk_default, desc.size_a);
-      }
-    } else if (al->pool_size > 0) {
-      // custom allocator with pool size requested
-      kmp_uint64 used =
-          KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
+  if (al->pool_size > 0) {
+    kmp_uint64 used = KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
       if (used + desc.size_a > al->pool_size) {
         // not enough space, need to go fallback path
         KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
-        if (al->fb == omp_atv_default_mem_fb) {
-          al = (kmp_allocator_t *)omp_default_mem_alloc;
-          ptr = kmp_mk_alloc(*mk_default, desc.size_a);
-        } else if (al->fb == omp_atv_abort_fb) {
-          KMP_ASSERT(0); // abort fallback requested
-        } else if (al->fb == omp_atv_allocator_fb) {
-          KMP_ASSERT(al != al->fb_data);
-          al = al->fb_data;
-          return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
-        } // else ptr == NULL;
-      } else {
-        // pool has enough space
-        ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
-        if (ptr == NULL) {
-          if (al->fb == omp_atv_default_mem_fb) {
-            al = (kmp_allocator_t *)omp_default_mem_alloc;
-            ptr = kmp_mk_alloc(*mk_default, desc.size_a);
-          } else if (al->fb == omp_atv_abort_fb) {
-            KMP_ASSERT(0); // abort fallback requested
-          } else if (al->fb == omp_atv_allocator_fb) {
-            KMP_ASSERT(al != al->fb_data);
-            al = al->fb_data;
-            return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
-          }
-        }
-      }
-    } else {
-      // custom allocator, pool size not requested
-      ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
-      if (ptr == NULL) {
-        if (al->fb == omp_atv_default_mem_fb) {
-          al = (kmp_allocator_t *)omp_default_mem_alloc;
-          ptr = kmp_mk_alloc(*mk_default, desc.size_a);
-        } else if (al->fb == omp_atv_abort_fb) {
-          KMP_ASSERT(0); // abort fallback requested
-        } else if (al->fb == omp_atv_allocator_fb) {
-          KMP_ASSERT(al != al->fb_data);
-          al = al->fb_data;
-          return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
-        }
-      }
+	switch (al->fb) {
+	default:
+	  return NULL;
+
+	case omp_atv_abort_fb:
+	  KMP_ASSERT(0);
+	  abort();
+
+	case omp_atv_default_mem_fb:
+	  [[clang::fallthrough]];
+	case omp_atv_allocator_fb:
+	  ptr = __kmpc_alloc(gtid, size, al->fb_data);
+	  return ptr;
     }
-  } else if (allocator < kmp_max_mem_alloc) {
-    // pre-defined allocator
-    if (allocator == omp_high_bw_mem_alloc) {
-      // ptr = NULL;
-    } else {
-      ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
-    }
-  } else if (al->pool_size > 0) {
-    // custom allocator with pool size requested
-    kmp_uint64 used =
-        KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
-    if (used + desc.size_a > al->pool_size) {
-      // not enough space, need to go fallback path
-      KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
-      if (al->fb == omp_atv_default_mem_fb) {
-        al = (kmp_allocator_t *)omp_default_mem_alloc;
-        ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
-      } else if (al->fb == omp_atv_abort_fb) {
-        KMP_ASSERT(0); // abort fallback requested
-      } else if (al->fb == omp_atv_allocator_fb) {
-        KMP_ASSERT(al != al->fb_data);
-        al = al->fb_data;
-        return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
-      } // else ptr == NULL;
-    } else {
-      // pool has enough space
-      ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
-      if (ptr == NULL && al->fb == omp_atv_abort_fb) {
-        KMP_ASSERT(0); // abort fallback requested
-      } // no sense to look for another fallback because of same internal alloc
     }
-  } else {
-    // custom allocator, pool size not requested
-    ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
-    if (ptr == NULL && al->fb == omp_atv_abort_fb) {
-      KMP_ASSERT(0); // abort fallback requested
-    } // no sense to look for another fallback because of same internal alloc
   }
+
+  ptr = (*al->alloc)(desc.size_a, al, gtid);
   KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
   if (ptr == NULL)
     return NULL;
@@ -1599,32 +1476,15 @@
   oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
   KMP_DEBUG_ASSERT(al);
 
-  if (__kmp_memkind_available) {
-    if (oal < kmp_max_mem_alloc) {
-      // pre-defined allocator
-      if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
-        kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
-      } else {
-        kmp_mk_free(*mk_default, desc.ptr_alloc);
-      }
-    } else {
+  KMP_ASSERT(al->free != NULL);
+  (*al->free)(ptr, al, gtid);
+
       if (al->pool_size > 0) { // custom allocator with pool size requested
         kmp_uint64 used =
             KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
         (void)used; // to suppress compiler warning
         KMP_DEBUG_ASSERT(used >= desc.size_a);
       }
-      kmp_mk_free(*al->memkind, desc.ptr_alloc);
-    }
-  } else {
-    if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
-      kmp_uint64 used =
-          KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
-      (void)used; // to suppress compiler warning
-      KMP_DEBUG_ASSERT(used >= desc.size_a);
-    }
-    __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
-  }
   KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
                 allocator));
 }
Index: openmp/runtime/src/kmp_global.cpp
===================================================================
--- openmp/runtime/src/kmp_global.cpp
+++ openmp/runtime/src/kmp_global.cpp
@@ -287,7 +287,6 @@
 kmp_int32 __kmp_max_task_priority = 0;
 kmp_uint64 __kmp_taskloop_min_tasks = 0;
 
-int __kmp_memkind_available = 0;
 omp_allocator_handle_t const omp_null_allocator = NULL;
 omp_allocator_handle_t const omp_default_mem_alloc =
     (omp_allocator_handle_t const)1;
Index: openmp/runtime/src/kmp_runtime.cpp
===================================================================
--- openmp/runtime/src/kmp_runtime.cpp
+++ openmp/runtime/src/kmp_runtime.cpp
@@ -526,8 +526,13 @@
                                "%s_%d.t_disp_buffer", header, team_id);
 }
 
-static void __kmp_init_allocator() { __kmp_init_memkind(); }
-static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
+static void __kmp_init_allocator() {
+  __kmp_init_memkind();
+}
+
+static void __kmp_fini_allocator() {
+  __kmp_fini_memkind();
+}
 
 /* ------------------------------------------------------------------------ */
 
Index: openmp/runtime/src/kmp_settings.cpp
===================================================================
--- openmp/runtime/src/kmp_settings.cpp
+++ openmp/runtime/src/kmp_settings.cpp
@@ -3275,6 +3275,18 @@
   __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format);
 }
 // OMP_ALLOCATOR sets default allocator
+static const char *__kmp_allocator_names[] = {
+    "omp_default_mem_alloc",
+    "omp_large_cap_mem_alloc",
+    "omp_const_mem_alloc",
+    "omp_high_bw_mem_alloc",
+    "omp_low_lat_mem_alloc",
+    "omp_cgroup_mem_alloc",
+    "omp_pteam_mem_alloc",
+    "omp_thread_mem_alloc",
+    NULL
+};
+
 static void __kmp_stg_parse_allocator(char const *name, char const *value,
                                       void *data) {
   /*
@@ -3297,94 +3309,27 @@
     next = buf;
     SKIP_DIGITS(next);
     num = __kmp_str_to_int(buf, *next);
-    KMP_ASSERT(num > 0);
-    switch (num) {
-    case 4:
-      if (__kmp_memkind_available) {
-        __kmp_def_allocator = omp_high_bw_mem_alloc;
-      } else {
-        __kmp_msg(kmp_ms_warning,
-                  KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"),
-                  __kmp_msg_null);
-        __kmp_def_allocator = omp_default_mem_alloc;
+    KMP_ASSERT(num > 0 && num < 9);
+  } else {
+    num = -1;
+    next = buf;
+    for(int i = 0; __kmp_allocator_names[i] != NULL; i++) {
+      if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) {
+        num = i + 1;
+        break;
       }
-      break;
-    case 1:
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
-    case 2:
-      __kmp_msg(kmp_ms_warning,
-                KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"),
-                __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
-    case 3:
-      __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"),
-                __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
-    case 5:
-      __kmp_msg(kmp_ms_warning,
-                KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"),
-                __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
-    case 6:
-      __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"),
-                __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
-    case 7:
-      __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"),
-                __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
-    case 8:
-      __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"),
-                __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-      break;
     }
-    return;
   }
-  next = buf;
-  if (__kmp_match_str("omp_high_bw_mem_alloc", buf, &next)) {
-    if (__kmp_memkind_available) {
-      __kmp_def_allocator = omp_high_bw_mem_alloc;
-    } else {
+
+  if (num > 0) {
+    if (kmp_standard_allocators[num].alloc != kmp_default_alloc)
       __kmp_msg(kmp_ms_warning,
-                KMP_MSG(OmpNoAllocator, "omp_high_bw_mem_alloc"),
+                KMP_MSG(OmpNoAllocator, __kmp_allocator_names[num - 1]),
                 __kmp_msg_null);
-      __kmp_def_allocator = omp_default_mem_alloc;
-    }
-  } else if (__kmp_match_str("omp_default_mem_alloc", buf, &next)) {
-    __kmp_def_allocator = omp_default_mem_alloc;
-  } else if (__kmp_match_str("omp_large_cap_mem_alloc", buf, &next)) {
-    __kmp_msg(kmp_ms_warning,
-              KMP_MSG(OmpNoAllocator, "omp_large_cap_mem_alloc"),
-              __kmp_msg_null);
-    __kmp_def_allocator = omp_default_mem_alloc;
-  } else if (__kmp_match_str("omp_const_mem_alloc", buf, &next)) {
-    __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_const_mem_alloc"),
-              __kmp_msg_null);
-    __kmp_def_allocator = omp_default_mem_alloc;
-  } else if (__kmp_match_str("omp_low_lat_mem_alloc", buf, &next)) {
-    __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_low_lat_mem_alloc"),
-              __kmp_msg_null);
-    __kmp_def_allocator = omp_default_mem_alloc;
-  } else if (__kmp_match_str("omp_cgroup_mem_alloc", buf, &next)) {
-    __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_cgroup_mem_alloc"),
-              __kmp_msg_null);
-    __kmp_def_allocator = omp_default_mem_alloc;
-  } else if (__kmp_match_str("omp_pteam_mem_alloc", buf, &next)) {
-    __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_pteam_mem_alloc"),
-              __kmp_msg_null);
-    __kmp_def_allocator = omp_default_mem_alloc;
-  } else if (__kmp_match_str("omp_thread_mem_alloc", buf, &next)) {
-    __kmp_msg(kmp_ms_warning, KMP_MSG(OmpNoAllocator, "omp_thread_mem_alloc"),
-              __kmp_msg_null);
-    __kmp_def_allocator = omp_default_mem_alloc;
+
+    __kmp_def_allocator = (omp_allocator_handle_t) (uintptr_t) num;
   }
+
   buf = next;
   SKIP_WS(buf);
   if (*buf != '\0') {
Index: openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp
===================================================================
--- /dev/null
+++ openmp/runtime/src/thirdparty/memkind/kmp_memkind.cpp
@@ -0,0 +1,135 @@
+/*
+ * kmp_memkind.cpp -- support for memkind memory allocations
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_io.h"
+#include "kmp_wrapper_malloc.h"
+
+static const char *kmp_mk_lib_name;
+static void *h_memkind;
+/* memkind experimental API: */
+// memkind_alloc
+static void *(*kmp_mk_alloc)(void *k, size_t sz);
+// memkind_free
+static void (*kmp_mk_free)(void *kind, void *ptr);
+// memkind_check_available
+static int (*kmp_mk_check)(void *kind);
+// kinds we are going to use
+static void **mk_default;
+static void **mk_interleave;
+static void **mk_hbw;
+static void **mk_hbw_interleave;
+static void **mk_hbw_preferred;
+static void **mk_hugetlb;
+static void **mk_hbw_hugetlb;
+static void **mk_hbw_preferred_hugetlb;
+
+static void *kmp_memkind_alloc(size_t size, kmp_allocator_t *al, int gtid);
+static void kmp_memkind_free(void *ptr, kmp_allocator_t *al, int gtid);
+
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+static inline void chk_kind(void ***pkind) {
+  KMP_DEBUG_ASSERT(pkind);
+  if (*pkind) // symbol found
+    if (kmp_mk_check(**pkind)) // kind not available or error
+      *pkind = NULL;
+}
+#endif
+
+void __kmp_init_memkind() {
+// as of 2018-07-31 memkind does not support Windows*, exclude it for now
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+  // use of statically linked memkind is problematic, as it depends on libnuma
+  kmp_mk_lib_name = "libmemkind.so";
+  h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
+  if (!h_memkind)
+    return;
+
+  kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
+  kmp_mk_alloc =
+        (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
+  kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
+  mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
+  if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
+      !kmp_mk_check(*mk_default)) {
+    mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
+    chk_kind(&mk_interleave);
+    mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
+    chk_kind(&mk_hbw);
+    mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
+    chk_kind(&mk_hbw_interleave);
+    mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
+    chk_kind(&mk_hbw_preferred);
+    mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
+    chk_kind(&mk_hugetlb);
+    mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
+    chk_kind(&mk_hbw_hugetlb);
+    mk_hbw_preferred_hugetlb =
+        (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
+    chk_kind(&mk_hbw_preferred_hugetlb);
+    KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
+
+    for(int i = 0; i < 9; i++) {
+      kmp_standard_allocators[0].alloc = kmp_memkind_alloc;
+      kmp_standard_allocators[1].free = kmp_memkind_free;
+    }
+    return; // success
+  }
+  dlclose(h_memkind); // failure
+  h_memkind = NULL;
+}
+#endif
+
+
+void __kmp_fini_memkind() {
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+  if (kmp_mk_check)
+    KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
+  if (h_memkind) {
+    dlclose(h_memkind);
+    h_memkind = NULL;
+  }
+  kmp_mk_check = NULL;
+  kmp_mk_alloc = NULL;
+  kmp_mk_free = NULL;
+  mk_default = NULL;
+  mk_interleave = NULL;
+  mk_hbw = NULL;
+  mk_hbw_interleave = NULL;
+  mk_hbw_preferred = NULL;
+  mk_hugetlb = NULL;
+  mk_hbw_hugetlb = NULL;
+  mk_hbw_preferred_hugetlb = NULL;
+#endif
+}
+
+static void *kmp_memkind_alloc(size_t size, kmp_allocator_t *al, int) {
+  if (al->partition == omp_atv_interleaved && mk_interleave)
+    return kmp_mk_alloc(*mk_interleave, size);
+
+  if (al->memspace == omp_high_bw_mem_space)
+    return kmp_mk_alloc(*mk_hbw_preferred, size);
+
+  return kmp_mk_alloc(*mk_default, size);
+}
+
+static void kmp_memkind_free(void *ptr, kmp_allocator_t *al, int) {
+  if (al->partition == omp_atv_interleaved && mk_interleave)
+    return kmp_mk_free(*mk_interleave, ptr);
+
+  if (al->memspace == omp_high_bw_mem_space)
+    return kmp_mk_free(*mk_hbw_preferred, ptr);
+
+  return kmp_mk_free(*mk_default, ptr);
+  
+}
Index: openmp/runtime/test/api/omp_alloc_null_fb.c
===================================================================
--- openmp/runtime/test/api/omp_alloc_null_fb.c
+++ openmp/runtime/test/api/omp_alloc_null_fb.c
@@ -11,8 +11,8 @@
   at[0].value = 2 * 1024 * 1024;
   at[1].key = omp_atk_fallback;
   at[1].value = omp_atv_null_fb;
-  a = omp_init_allocator(omp_large_cap_mem_space, 2, at);
-  printf("allocator large created: %p\n", a);
+  a = omp_init_allocator(omp_default_mem_space, 2, at);
+  printf("allocator default created: %p\n", a);
   #pragma omp parallel num_threads(2)
   {
     int i = omp_get_thread_num();