diff --git a/pstl/include/pstl/internal/omp/parallel_for.h b/pstl/include/pstl/internal/omp/parallel_for.h
--- a/pstl/include/pstl/internal/omp/parallel_for.h
+++ b/pstl/include/pstl/internal/omp/parallel_for.h
@@ -31,7 +31,7 @@
     _PSTL_PRAGMA(omp taskloop untied mergeable)
     for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
     {
-        __omp_backend::__process_chunk(__policy, __first, __chunk, __f);
+        __pstl::__omp_backend::__process_chunk(__policy, __first, __chunk, __f);
     }
 }
 
diff --git a/pstl/include/pstl/internal/omp/parallel_invoke.h b/pstl/include/pstl/internal/omp/parallel_invoke.h
--- a/pstl/include/pstl/internal/omp/parallel_invoke.h
+++ b/pstl/include/pstl/internal/omp/parallel_invoke.h
@@ -35,13 +35,13 @@
 {
     if (omp_in_parallel())
     {
-        __parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
+        __pstl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
     }
     else
     {
         _PSTL_PRAGMA(omp parallel)
         _PSTL_PRAGMA(omp single nowait)
-        __parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
+        __pstl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
     }
 }
 
diff --git a/pstl/include/pstl/internal/omp/parallel_merge.h b/pstl/include/pstl/internal/omp/parallel_merge.h
--- a/pstl/include/pstl/internal/omp/parallel_merge.h
+++ b/pstl/include/pstl/internal/omp/parallel_merge.h
@@ -50,11 +50,13 @@
 
     _PSTL_PRAGMA(omp task untied mergeable default(none)
                      firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge))
-    __parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge);
+    __pstl::__omp_backend::__parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp,
+                                                      __leaf_merge);
 
     _PSTL_PRAGMA(omp task untied mergeable default(none)
                      firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge))
-    __parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge);
+    __pstl::__omp_backend::__parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp,
+                                                      __leaf_merge);
 
     _PSTL_PRAGMA(omp taskwait)
 }
@@ -77,14 +79,16 @@
 
     if (omp_in_parallel())
     {
-        __parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge);
+        __pstl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp,
+                                                          __leaf_merge);
     }
     else
     {
         _PSTL_PRAGMA(omp parallel)
         {
             _PSTL_PRAGMA(omp single nowait)
-            __parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge);
+            __pstl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp,
+                                                              __leaf_merge);
         }
     }
 }
diff --git a/pstl/include/pstl/internal/omp/parallel_stable_sort.h b/pstl/include/pstl/internal/omp/parallel_stable_sort.h
--- a/pstl/include/pstl/internal/omp/parallel_stable_sort.h
+++ b/pstl/include/pstl/internal/omp/parallel_stable_sort.h
@@ -12,6 +12,7 @@
 #define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H
 
 #include "util.h"
+#include "parallel_merge.h"
 
 namespace __pstl
 {
@@ -44,12 +45,12 @@
     }
 
     // Perform parallel moving of larger chunks
-    auto __policy = __omp_backend::__chunk_partitioner(__first1, __last1);
+    auto __policy = __pstl::__omp_backend::__chunk_partitioner(__first1, __last1);
 
     _PSTL_PRAGMA(omp taskloop)
     for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
     {
-        __omp_backend::__process_chunk(__policy, __first1, __chunk,
+        __pstl::__omp_backend::__process_chunk(__policy, __first1, __chunk,
                                        [&](auto __chunk_first, auto __chunk_last)
                                        {
                                            auto __chunk_offset = __chunk_first - __first1;
@@ -67,7 +68,7 @@
     _OutputIterator
     operator()(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first) const
     {
-        return __parallel_move_range(__first1, __last1, __d_first);
+        return __pstl::__omp_backend::__sort_details::__parallel_move_range(__first1, __last1, __d_first);
     }
 };
 } // namespace __sort_details
@@ -91,15 +92,16 @@
     {
         std::size_t __size = __xe - __xs;
         auto __mid = __xs + (__size / 2);
-        __parallel_invoke_body([&]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); },
-                               [&]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); });
+        __pstl::__omp_backend::__parallel_invoke_body(
+            [&]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); },
+            [&]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); });
 
         // Perform a parallel merge of the sorted ranges into __output_data.
         _VecType __output_data(__size);
         _MoveValue __move_value;
         _MoveRange __move_range;
         __utils::__serial_move_merge __merge(__size);
-        __parallel_merge_body(
+        __pstl::__omp_backend::__parallel_merge_body(
             __mid - __xs, __xe - __mid, __xs, __mid, __mid, __xe, __output_data.begin(), __comp,
             [&__merge, &__move_value, &__move_range](_RandomAccessIterator __as, _RandomAccessIterator __ae,
                                                      _RandomAccessIterator __bs, _RandomAccessIterator __be,
@@ -107,7 +109,7 @@
             { __merge(__as, __ae, __bs, __be, __cs, __comp, __move_value, __move_value, __move_range, __move_range); });
 
         // Move the values from __output_data back in the original source range.
-        __omp_backend::__sort_details::__parallel_move_range(__output_data.begin(), __output_data.end(), __xs);
+        __pstl::__omp_backend::__sort_details::__parallel_move_range(__output_data.begin(), __output_data.end(), __xs);
     }
 }
 
@@ -130,11 +132,11 @@
     {
         if (__count <= __nsort)
         {
-            __parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
+            __pstl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
         }
         else
         {
-            __parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort);
+            __pstl::__omp_backend::__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort);
         }
     }
     else
@@ -143,11 +145,11 @@
         _PSTL_PRAGMA(omp single nowait)
         if (__count <= __nsort)
         {
-            __parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
+            __pstl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
         }
         else
         {
-            __parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort);
+            __pstl::__omp_backend::__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort);
         }
     }
 }
diff --git a/pstl/include/pstl/internal/omp/parallel_transform_reduce.h b/pstl/include/pstl/internal/omp/parallel_transform_reduce.h
--- a/pstl/include/pstl/internal/omp/parallel_transform_reduce.h
+++ b/pstl/include/pstl/internal/omp/parallel_transform_reduce.h
@@ -60,7 +60,7 @@
     _PSTL_PRAGMA(omp taskloop shared(__accums))
     for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
     {
-        __omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk,
+        __pstl::__omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk,
                                        [&](auto __chunk_first, auto __chunk_last)
                                        {
                                            auto __thread_num = omp_get_thread_num();