diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2504,6 +2504,25 @@
   char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
 };
 
+// Declarations for custom fast memory allocator
+// NOTE: bufsize must be a signed datatype
+#if KMP_OS_WINDOWS
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
+typedef kmp_int32 bufsize;
+#else
+typedef kmp_int64 bufsize;
+#endif
+#else
+typedef ssize_t bufsize;
+#endif // KMP_OS_WINDOWS
+
+typedef struct kmp_mem_descr { // Memory block descriptor.
+  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
+  size_t size_allocated; // Size of allocated memory block.
+  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
+  size_t size_aligned; // Size of aligned memory block.
+} kmp_mem_descr_t;
+
 #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
 // Free lists keep same-size free memory slots for fast memory allocation
 // routines
@@ -2696,6 +2715,11 @@
   kmp_base_info_t th;
 } kmp_info_t;
 
+// some memory allocation wrappers declaration
+extern void *kmp_b_alloc(kmp_info_t *th, bufsize s);
+extern void kmp_b_free(kmp_info_t *th, void *buf);
+extern void kmp_b_dequeue(kmp_info_t *th);
+
 // OpenMP thread team data structures
 
 typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -23,18 +23,6 @@
 typedef void *(*bget_acquire_t)(size_t);
 typedef void (*bget_release_t)(void *);
 
-/* NOTE: bufsize must be a signed datatype */
-
-#if KMP_OS_WINDOWS
-#if KMP_ARCH_X86 || KMP_ARCH_ARM
-typedef kmp_int32 bufsize;
-#else
-typedef kmp_int64 bufsize;
-#endif
-#else
-typedef ssize_t bufsize;
-#endif // KMP_OS_WINDOWS
-
 /* The three modes of operation are, fifo search, lifo search, and best-fit */
 
 typedef enum bget_mode {
@@ -1734,13 +1722,6 @@
    memory leaks, but it may be useful for debugging memory corruptions, used
    freed pointers, etc. */
 /* #define LEAK_MEMORY */
-struct kmp_mem_descr { // Memory block descriptor.
-  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
-  size_t size_allocated; // Size of allocated memory block.
-  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
-  size_t size_aligned; // Size of aligned memory block.
-};
-typedef struct kmp_mem_descr kmp_mem_descr_t;
 
 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
@@ -1900,216 +1881,12 @@
   KE_TRACE(25, ("<- __kmp_free() returns\n"));
 } // func ___kmp_free
 
-#if USE_FAST_MEMORY == 3
-// Allocate fast memory by first scanning the thread's free lists
-// If a chunk the right size exists, grab it off the free list.
-// Otherwise allocate normally using kmp_thread_malloc.
-
-// AC: How to choose the limit? Just get 16 for now...
-#define KMP_FREE_LIST_LIMIT 16
-
-// Always use 128 bytes for determining buckets for caching memory blocks
-#define DCACHE_LINE 128
-
-void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
-  void *ptr;
-  size_t num_lines, idx;
-  int index;
-  void *alloc_ptr;
-  size_t alloc_size;
-  kmp_mem_descr_t *descr;
-
-  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
-                __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
-
-  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
-  idx = num_lines - 1;
-  KMP_DEBUG_ASSERT(idx >= 0);
-  if (idx < 2) {
-    index = 0; // idx is [ 0, 1 ], use first free list
-    num_lines = 2; // 1, 2 cache lines or less than cache line
-  } else if ((idx >>= 2) == 0) {
-    index = 1; // idx is [ 2, 3 ], use second free list
-    num_lines = 4; // 3, 4 cache lines
-  } else if ((idx >>= 2) == 0) {
-    index = 2; // idx is [ 4, 15 ], use third free list
-    num_lines = 16; // 5, 6, ..., 16 cache lines
-  } else if ((idx >>= 2) == 0) {
-    index = 3; // idx is [ 16, 63 ], use fourth free list
-    num_lines = 64; // 17, 18, ..., 64 cache lines
-  } else {
-    goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
-  }
-
-  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
-  if (ptr != NULL) {
-    // pop the head of no-sync free list
-    this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
-    KMP_DEBUG_ASSERT(
-        this_thr ==
-        ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
-            ->ptr_aligned);
-    goto end;
-  }
-  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
-  if (ptr != NULL) {
-    // no-sync free list is empty, use sync free list (filled in by other
-    // threads only)
-    // pop the head of the sync free list, push NULL instead
-    while (!KMP_COMPARE_AND_STORE_PTR(
-        &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
-      KMP_CPU_PAUSE();
-      ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
-    }
-    // push the rest of chain into no-sync free list (can be NULL if there was
-    // the only block)
-    this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
-    KMP_DEBUG_ASSERT(
-        this_thr ==
-        ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
-            ->ptr_aligned);
-    goto end;
-  }
-
-alloc_call:
-  // haven't found block in the free lists, thus allocate it
-  size = num_lines * DCACHE_LINE;
-
-  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
-  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
-                "alloc_size %d\n",
-                __kmp_gtid_from_thread(this_thr), alloc_size));
-  alloc_ptr = bget(this_thr, (bufsize)alloc_size);
-
-  // align ptr to DCACHE_LINE
-  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
-                  DCACHE_LINE) &
-                 ~(DCACHE_LINE - 1));
-  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
-
-  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
-  // we don't need size_allocated
-  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
-  // (it is already saved in bget buffer,
-  // but we may want to use another allocator in future)
-  descr->size_aligned = size;
-
-end:
-  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
-                __kmp_gtid_from_thread(this_thr), ptr));
-  return ptr;
-} // func __kmp_fast_allocate
-
-// Free fast memory and place it on the thread's free list if it is of
-// the correct size.
-void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
-  kmp_mem_descr_t *descr;
-  kmp_info_t *alloc_thr;
-  size_t size;
-  size_t idx;
-  int index;
-
-  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
-                __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
-  KMP_ASSERT(ptr != NULL);
-
-  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
+// some memory allocation wrappers definition
+void *kmp_b_alloc(kmp_info_t *th, bufsize s) { return bget(th, s); }
+void kmp_b_free(kmp_info_t *th, void *buf) { brel(th, buf); }
+void kmp_b_dequeue(kmp_info_t *th) { __kmp_bget_dequeue(th); }
 
-  KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
-                (int)descr->size_aligned));
-
-  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
-
-  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
-  if (idx == size) {
-    index = 0; // 2 cache lines
-  } else if ((idx <<= 1) == size) {
-    index = 1; // 4 cache lines
-  } else if ((idx <<= 2) == size) {
-    index = 2; // 16 cache lines
-  } else if ((idx <<= 2) == size) {
-    index = 3; // 64 cache lines
-  } else {
-    KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
-    goto free_call; // 65 or more cache lines ( > 8KB )
-  }
-
-  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
-  if (alloc_thr == this_thr) {
-    // push block to self no-sync free list, linking previous head (LIFO)
-    *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
-    this_thr->th.th_free_lists[index].th_free_list_self = ptr;
-  } else {
-    void *head = this_thr->th.th_free_lists[index].th_free_list_other;
-    if (head == NULL) {
-      // Create new free list
-      this_thr->th.th_free_lists[index].th_free_list_other = ptr;
-      *((void **)ptr) = NULL; // mark the tail of the list
-      descr->size_allocated = (size_t)1; // head of the list keeps its length
-    } else {
-      // need to check existed "other" list's owner thread and size of queue
-      kmp_mem_descr_t *dsc =
-          (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
-      // allocating thread, same for all queue nodes
-      kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
-      size_t q_sz =
-          dsc->size_allocated + 1; // new size in case we add current task
-      if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
-        // we can add current task to "other" list, no sync needed
-        *((void **)ptr) = head;
-        descr->size_allocated = q_sz;
-        this_thr->th.th_free_lists[index].th_free_list_other = ptr;
-      } else {
-        // either queue blocks owner is changing or size limit exceeded
-        // return old queue to allocating thread (q_th) synchronously,
-        // and start new list for alloc_thr's tasks
-        void *old_ptr;
-        void *tail = head;
-        void *next = *((void **)head);
-        while (next != NULL) {
-          KMP_DEBUG_ASSERT(
-              // queue size should decrease by 1 each step through the list
-              ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
-                      ->size_allocated +
-                  1 ==
-              ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
-                  ->size_allocated);
-          tail = next; // remember tail node
-          next = *((void **)next);
-        }
-        KMP_DEBUG_ASSERT(q_th != NULL);
-        // push block to owner's sync free list
-        old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
-        /* the next pointer must be set before setting free_list to ptr to avoid
-           exposing a broken list to other threads, even for an instant. */
-        *((void **)tail) = old_ptr;
-
-        while (!KMP_COMPARE_AND_STORE_PTR(
-            &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
-          KMP_CPU_PAUSE();
-          old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
-          *((void **)tail) = old_ptr;
-        }
-
-        // start new list of not-selt tasks
-        this_thr->th.th_free_lists[index].th_free_list_other = ptr;
-        *((void **)ptr) = NULL;
-        descr->size_allocated = (size_t)1; // head of queue keeps its length
-      }
-    }
-  }
-  goto end;
-
-free_call:
-  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
-                __kmp_gtid_from_thread(this_thr), size));
-  __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
-  brel(this_thr, descr->ptr_allocated);
-
-end:
-  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
-
-} // func __kmp_fast_free
+#if USE_FAST_MEMORY == 3
 
 // Initialize the thread free lists related to fast memory
 // Only do this when a thread is initially created.
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1453,6 +1453,226 @@
                                sizeof_shareds, task_entry);
 }
 
+#if USE_FAST_MEMORY == 3
+// Allocate fast memory by first scanning the thread's free lists
+// If a chunk the right size exists, grab it off the free list.
+// Otherwise allocate normally using kmp_thread_malloc.
+
+// AC: How to choose the limit? Just get 16 for now...
+#define KMP_FREE_LIST_LIMIT 16
+
+// Always use 128 bytes for determining buckets for caching memory blocks
+#define DCACHE_LINE 128
+
+void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
+  void *ptr;
+  size_t num_lines, idx;
+  int index;
+  void *alloc_ptr;
+  size_t alloc_size;
+  kmp_mem_descr_t *descr;
+
+  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
+                __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
+
+  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
+  idx = num_lines - 1;
+  KMP_DEBUG_ASSERT(idx >= 0);
+  if (idx < 2) {
+    index = 0; // idx is [ 0, 1 ], use first free list
+    num_lines = 2; // 1, 2 cache lines or less than cache line
+  } else if ((idx >>= 2) == 0) {
+    index = 1; // idx is [ 2, 3 ], use second free list
+    num_lines = 4; // 3, 4 cache lines
+  } else if ((idx >>= 2) == 0) {
+    index = 2; // idx is [ 4, 15 ], use third free list
+    num_lines = 16; // 5, 6, ..., 16 cache lines
+  } else if ((idx >>= 2) == 0) {
+    index = 3; // idx is [ 16, 63 ], use fourth free list
+    num_lines = 64; // 17, 18, ..., 64 cache lines
+  } else {
+    goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
+  }
+
+  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
+  if (ptr != NULL) {
+    // pop the head of no-sync free list
+    this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
+    KMP_DEBUG_ASSERT(
+        this_thr ==
+        ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
+            ->ptr_aligned);
+    goto end;
+  }
+  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
+  if (ptr != NULL) {
+    // no-sync free list is empty, use sync free list (filled in by other
+    // threads only)
+    // pop the head of the sync free list, push NULL instead
+    while (!KMP_COMPARE_AND_STORE_PTR(
+        &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
+      KMP_CPU_PAUSE();
+      ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
+    }
+    // push the rest of chain into no-sync free list (can be NULL if there was
+    // the only block)
+    this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
+    KMP_DEBUG_ASSERT(
+        this_thr ==
+        ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
+            ->ptr_aligned);
+    goto end;
+  }
+
+alloc_call:
+  // haven't found block in the free lists, thus allocate it
+  size = num_lines * DCACHE_LINE;
+
+  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
+  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
+                "alloc_size %d\n",
+                __kmp_gtid_from_thread(this_thr), alloc_size));
+#if INTEL_PRIVATE
+// AC: replace tbbmalloc with BGET for big blocks until the issues with
+//     358.botsalgn test is fixed:
+//        CQ180918: slowdown,
+//        CQ292223: memory consumption,
+//        CQ291050: unneeded allocations.
+// alloc_ptr = ___kmp_thread_malloc( this_thr, alloc_size KMP_SRC_LOC_PARM );
+#endif /* INTEL_PRIVATE */
+  alloc_ptr = kmp_b_alloc(this_thr, (bufsize)alloc_size);
+
+  // align ptr to DCACHE_LINE
+  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
+                  DCACHE_LINE) &
+                 ~(DCACHE_LINE - 1));
+  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
+
+  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
+  // we don't need size_allocated
+  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
+  // (it is already saved in bget buffer,
+  // but we may want to use another allocator in future)
+  descr->size_aligned = size;
+
+end:
+  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
+                __kmp_gtid_from_thread(this_thr), ptr));
+  return ptr;
+} // func __kmp_fast_allocate
+
+// Free fast memory and place it on the thread's free list if it is of
+// the correct size.
+void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
+  kmp_mem_descr_t *descr;
+  kmp_info_t *alloc_thr;
+  size_t size;
+  size_t idx;
+  int index;
+
+  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
+                __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
+  KMP_ASSERT(ptr != NULL);
+
+  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
+
+  KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
+                (int)descr->size_aligned));
+
+  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
+
+  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
+  if (idx == size) {
+    index = 0; // 2 cache lines
+  } else if ((idx <<= 1) == size) {
+    index = 1; // 4 cache lines
+  } else if ((idx <<= 2) == size) {
+    index = 2; // 16 cache lines
+  } else if ((idx <<= 2) == size) {
+    index = 3; // 64 cache lines
+  } else {
+    KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
+    goto free_call; // 65 or more cache lines ( > 8KB )
+  }
+
+  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
+  if (alloc_thr == this_thr) {
+    // push block to self no-sync free list, linking previous head (LIFO)
+    *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
+    this_thr->th.th_free_lists[index].th_free_list_self = ptr;
+  } else {
+    void *head = this_thr->th.th_free_lists[index].th_free_list_other;
+    if (head == NULL) {
+      // Create new free list
+      this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+      *((void **)ptr) = NULL; // mark the tail of the list
+      descr->size_allocated = (size_t)1; // head of the list keeps its length
+    } else {
+      // need to check existed "other" list's owner thread and size of queue
+      kmp_mem_descr_t *dsc =
+          (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
+      // allocating thread, same for all queue nodes
+      kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
+      size_t q_sz =
+          dsc->size_allocated + 1; // new size in case we add current task
+      if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
+        // we can add current task to "other" list, no sync needed
+        *((void **)ptr) = head;
+        descr->size_allocated = q_sz;
+        this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+      } else {
+        // either queue blocks owner is changing or size limit exceeded
+        // return old queue to allocating thread (q_th) synchroneously,
+        // and start new list for alloc_thr's tasks
+        void *old_ptr;
+        void *tail = head;
+        void *next = *((void **)head);
+        while (next != NULL) {
+          KMP_DEBUG_ASSERT(
+              // queue size should decrease by 1 each step through the list
+              ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
+                      ->size_allocated +
+                  1 ==
+              ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
+                  ->size_allocated);
+          tail = next; // remember tail node
+          next = *((void **)next);
+        }
+        KMP_DEBUG_ASSERT(q_th != NULL);
+        // push block to owner's sync free list
+        old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
+        /* the next pointer must be set before setting free_list to ptr to avoid
+           exposing a broken list to other threads, even for an instant. */
+        *((void **)tail) = old_ptr;
+
+        while (!KMP_COMPARE_AND_STORE_PTR(
+            &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
+          KMP_CPU_PAUSE();
+          old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
+          *((void **)tail) = old_ptr;
+        }
+
+        // start new list of not-selt tasks
+        this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+        *((void **)ptr) = NULL;
+        descr->size_allocated = (size_t)1; // head of queue keeps its length
+      }
+    }
+  }
+  goto end;
+
+free_call:
+  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
+                __kmp_gtid_from_thread(this_thr), size));
+  kmp_b_dequeue(this_thr); /* Release any queued buffers */
+  kmp_b_free(this_thr, descr->ptr_allocated);
+
+end:
+  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
+
+} // func __kmp_fast_free
+#endif // USE_FAST_MEMORY
+
 /*!
 @ingroup TASKING
 @param loc_ref location of the original task directive