diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -806,9 +806,11 @@
   config_define(1 _LIBCPP_PSTL_CPU_BACKEND_SERIAL)
 elseif(LIBCXX_PSTL_CPU_BACKEND STREQUAL "std_thread")
   config_define(1 _LIBCPP_PSTL_CPU_BACKEND_THREAD)
+elseif(LIBCXX_PSTL_CPU_BACKEND STREQUAL "libdispatch")
+  config_define(1 _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH)
 else()
   message(FATAL_ERROR "LIBCXX_PSTL_CPU_BACKEND is set to ${LIBCXX_PSTL_CPU_BACKEND}, which is not a valid backend.
-                       Valid backends are: serial, std_thread")
+                       Valid backends are: serial, std_thread and libdispatch")
 endif()
 
 if (LIBCXX_ABI_DEFINES)
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -78,6 +78,7 @@
   __algorithm/pstl_backends/cpu_backends/fill.h
   __algorithm/pstl_backends/cpu_backends/find_if.h
   __algorithm/pstl_backends/cpu_backends/for_each.h
+  __algorithm/pstl_backends/cpu_backends/libdispatch.h
   __algorithm/pstl_backends/cpu_backends/merge.h
   __algorithm/pstl_backends/cpu_backends/serial.h
   __algorithm/pstl_backends/cpu_backends/stable_sort.h
diff --git a/libcxx/include/__algorithm/pstl_backend.h b/libcxx/include/__algorithm/pstl_backend.h
--- a/libcxx/include/__algorithm/pstl_backend.h
+++ b/libcxx/include/__algorithm/pstl_backend.h
@@ -169,7 +169,8 @@
 };
 #  endif
 
-#  if defined(_LIBCPP_PSTL_CPU_BACKEND_SERIAL) || defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD)
+#  if defined(_LIBCPP_PSTL_CPU_BACKEND_SERIAL) || defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD) ||                          \
+      defined(_LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH)
 template <>
 struct __select_backend<std::execution::parallel_policy> {
   using type = __cpu_backend_tag;
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
@@ -16,6 +16,8 @@
 #  include <__algorithm/pstl_backends/cpu_backends/serial.h>
 #elif defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD)
 #  include <__algorithm/pstl_backends/cpu_backends/thread.h>
+#elif defined(_LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH)
+#  include <__algorithm/pstl_backends/cpu_backends/libdispatch.h>
 #else
 #  error "Invalid CPU backend choice"
 #endif
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
new file mode 100644
--- /dev/null
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -0,0 +1,225 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H
+#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H
+
+#include <__algorithm/lower_bound.h>
+#include <__algorithm/upper_bound.h>
+#include <__atomic/atomic.h>
+#include <__config>
+#include <__exception/terminate.h>
+#include <__iterator/iterator_traits.h>
+#include <__memory/construct_at.h>
+#include <__memory/uninitialized_buffer.h>
+#include <__memory/unique_ptr.h>
+#include <__memory_resource/memory_resource.h>
+#include <__numeric/transform_reduce.h>
+#include <__utility/exception_guard.h>
+#include <__utility/move.h>
+#include <__utility/terminate_on_exception.h>
+#include <cstddef>
+#include <new>
+#include <vector>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace __par_backend {
+inline namespace __libdispatch {
+
+// ::dispatch_apply is marked as __attribute__((nothrow)) because it doesn't let exceptions propagate, and neither do
+// we.
+[[_Clang::__callback__(__func, __context, __)]] _LIBCPP_EXPORTED_FROM_ABI void
+__dispatch_apply(size_t __chunk_count, void* __context, void (*__func)(void* __context, size_t __chunk)) noexcept;
+
+template <class _Func>
+_LIBCPP_HIDE_FROM_ABI void __dispatch_apply(size_t __chunk_count, _Func __func) noexcept {
+  __libdispatch::__dispatch_apply(__chunk_count, &__func, [](void* __context, size_t __chunk) {
+    (*static_cast<_Func*>(__context))(__chunk);
+  });
+}
+
+// Preliminary size of each chunk: requires further discussion
+constexpr ptrdiff_t __default_chunk_size = 2048;
+
+struct __chunk_partitions {
+  ptrdiff_t __chunk_count_; // includes the first chunk
+  ptrdiff_t __chunk_size_;
+  ptrdiff_t __first_chunk_size_;
+};
+
+[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI pmr::memory_resource* __get_memory_resource();
+[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size);
+
+template <class _RandomAccessIterator, class _Functor>
+_LIBCPP_HIDE_FROM_ABI void __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __f) {
+  auto __partitions = __libdispatch::__partition_chunks(__last - __first);
+
+  // Perform the chunked execution.
+  __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
+    auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
+    auto __index =
+        __chunk == 0
+            ? 0
+            : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
+    __f(__first + __index, __first + __index + __this_chunk_size);
+  });
+}
+
+template <class _Func1, class _Func2>
+_LIBCPP_HIDE_FROM_ABI void __libdispatch_invoke_parallel(_Func1&& __f1, _Func2&& __f2) {
+  __libdispatch::__dispatch_apply(2, [&](size_t __index) {
+    if (__index == 0)
+      __f1();
+    else
+      __f2();
+  });
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
+struct __merge_range {
+  __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
+      : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {}
+
+  _RandomAccessIterator1 __mid1_;
+  _RandomAccessIterator2 __mid2_;
+  _RandomAccessIteratorOut __result_;
+};
+
+template <typename _RandomAccessIterator1,
+          typename _RandomAccessIterator2,
+          typename _RandomAccessIterator3,
+          typename _Compare,
+          typename _LeafMerge>
+_LIBCPP_HIDE_FROM_ABI void __parallel_merge(
+    _RandomAccessIterator1 __first1,
+    _RandomAccessIterator1 __last1,
+    _RandomAccessIterator2 __first2,
+    _RandomAccessIterator2 __last2,
+    _RandomAccessIterator3 __result,
+    _Compare __comp,
+    _LeafMerge __leaf_merge) {
+  __chunk_partitions __partitions =
+      __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));
+
+  if (__partitions.__chunk_count_ == 0)
+    return;
+
+  if (__partitions.__chunk_count_ == 1) {
+    __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp);
+    return;
+  }
+
+  using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
+
+  vector<__merge_range_t> __ranges;
+  __ranges.reserve(__partitions.__chunk_count_);
+
+  // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case
+  std::__terminate_on_exception([&] {
+    auto __orig_first1 = __first1;
+    auto __orig_first2 = __first2;
+
+    bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
+
+    auto __compute_chunk = [&__last1, &__last2, __iterate_first_range, &__comp](
+                               auto& __iter1, auto& __iter2, auto& __out, size_t __increment_count) -> __merge_range_t {
+      auto [__mid1, __mid2] = [&] {
+        if (__iterate_first_range) {
+          auto __m1 = __iter1 + __increment_count;
+          auto __m2 = std::lower_bound(__iter2, __last2, __m1[-1], __comp);
+          return std::make_pair(__m1, __m2);
+        } else {
+          auto __m2 = __iter2 + __increment_count;
+          auto __m1 = std::lower_bound(__iter1, __last1, __m2[-1], __comp);
+          return std::make_pair(__m1, __m2);
+        }
+      }();
+      __merge_range_t __ret{__mid1, __mid2, __out};
+      __out += (__mid1 - __iter1) + (__mid2 - __iter2);
+      __iter1 = __mid1;
+      __iter2 = __mid2;
+      return __ret;
+    };
+
+    // handle first chunk
+    __ranges.emplace_back(__compute_chunk(__first1, __first2, __result, __partitions.__first_chunk_size_));
+
+    // handle 2 -> N - 1 chunks
+    for (ptrdiff_t __i = 1; __i != __partitions.__chunk_count_ - 1; ++__i)
+      __ranges.emplace_back(__compute_chunk(__first1, __first2, __result, __partitions.__chunk_size_));
+
+    // handle last chunk
+    __ranges.emplace_back(__last1, __last2, __result);
+
+    __libdispatch::__dispatch_apply(__ranges.size(), [&](size_t __index) {
+      auto __last_iters = __ranges[__index];
+      if (__index == 0) {
+        __leaf_merge(
+            __orig_first1, __last_iters.__mid1_, __orig_first2, __last_iters.__mid2_, __last_iters.__result_, __comp);
+      } else {
+        auto __first_iters = __ranges[__index - 1];
+        __leaf_merge(
+            __first_iters.__mid1_,
+            __last_iters.__mid1_,
+            __first_iters.__mid2_,
+            __last_iters.__mid2_,
+            __last_iters.__result_,
+            __comp);
+      }
+    });
+  });
+}
+
+template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction>
+_LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce(
+    _RandomAccessIterator __first,
+    _RandomAccessIterator __last,
+    _Transform __transform,
+    _Value __init,
+    _Combiner __combiner,
+    _Reduction __reduction) {
+  auto __partitions = __libdispatch::__partition_chunks(__first, __last);
+  auto __values     = std::__make_uninitialized_buffer<_Value[]>(
+      nothrow, __partitions.__chunk_count_, [](_Value* __ptr, size_t __count) { std::destroy_n(__ptr, __count); });
+
+  if (__values == nullptr)
+    std::__throw_pstl_bad_alloc();
+
+  // __dispatch_apply is noexcept
+  __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
+    auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
+    auto __index =
+        __chunk == 0
+            ? 0
+            : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
+    std::__construct_at(
+        __values + __index,
+        __reduction(__first + __index + 2,
+                    __first + __index + __this_chunk_size,
+                    __combiner(__transform((__first + __index)[0], (__first + __index)[1]))));
+  });
+
+  return std::transform_reduce(__values, __values + __partitions.__chunk_count_, __init, __transform, __reduction);
+}
+
+// TODO: parallelize this
+template <class _RandomAccessIterator, class _Comp, class _LeafSort>
+_LIBCPP_HIDE_FROM_ABI void __parallel_stable_sort(
+    _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
+  __leaf_sort(__first, __last, __comp);
+}
+
+_LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {}
+
+} // namespace __libdispatch
+} // namespace __par_backend
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h
@@ -163,8 +163,8 @@
           std::move(__last),
           [__transform](_ForwardIterator __iter) { return __transform(*__iter); },
           std::move(__init),
-          std::move(__reduce),
-          [=](_ForwardIterator __brick_first, _ForwardIterator __brick_last, _Tp __brick_init) {
+          __reduce,
+          [__transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) {
             return std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>(
                 __cpu_backend_tag{},
                 std::move(__brick_first),
diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in
--- a/libcxx/include/__config_site.in
+++ b/libcxx/include/__config_site.in
@@ -34,6 +34,7 @@
 // PSTL backends
 #cmakedefine _LIBCPP_PSTL_CPU_BACKEND_SERIAL
 #cmakedefine _LIBCPP_PSTL_CPU_BACKEND_THREAD
+#cmakedefine _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH
 
 // __USE_MINGW_ANSI_STDIO gets redefined on MinGW
 #ifdef __clang__
diff --git a/libcxx/include/__utility/terminate_on_exception.h b/libcxx/include/__utility/terminate_on_exception.h
--- a/libcxx/include/__utility/terminate_on_exception.h
+++ b/libcxx/include/__utility/terminate_on_exception.h
@@ -11,6 +11,7 @@
 
 #include <__config>
 #include <__exception/terminate.h>
+#include <new>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -313,6 +313,11 @@
   experimental/memory_resource.cpp
   )
 
+if (LIBCXX_PSTL_CPU_BACKEND STREQUAL "libdispatch")
+  set(LIBCXX_EXPERIMENTAL_SOURCES ${LIBCXX_EXPERIMENTAL_SOURCES}
+                                  pstl/libdispatch.cpp)
+endif()
+
 add_library(cxx_experimental STATIC ${LIBCXX_EXPERIMENTAL_SOURCES})
 target_link_libraries(cxx_experimental PUBLIC cxx-headers)
 if (LIBCXX_ENABLE_SHARED)
diff --git a/libcxx/src/pstl/libdispatch.cpp b/libcxx/src/pstl/libdispatch.cpp
new file mode 100644
--- /dev/null
+++ b/libcxx/src/pstl/libdispatch.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <__algorithm/min.h>
+#include <__algorithm/pstl_backends/cpu_backends/libdispatch.h>
+#include <__config>
+#include <dispatch/dispatch.h>
+#include <memory_resource>
+#include <thread>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace __par_backend::inline __libdispatch {
+
+pmr::memory_resource* __get_memory_resource() {
+  static std::pmr::synchronized_pool_resource pool{pmr::new_delete_resource()};
+  return &pool;
+}
+
+void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) noexcept {
+  ::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
+}
+
+__chunk_partitions __partition_chunks(ptrdiff_t element_count) {
+  __chunk_partitions partitions;
+  partitions.__chunk_count_ = [&] {
+    ptrdiff_t cores = std::max(1u, thread::hardware_concurrency());
+
+    auto medium = [&](ptrdiff_t n) { return cores + ((n - cores) / cores); };
+
+    // This is an approximation of `log(1.01, sqrt(n))` which seemes to be reasonable for `n` larger than 500 and tops
+    // at 800 tasks for n ~ 8 million
+    auto large = [](ptrdiff_t n) { return static_cast<ptrdiff_t>(100.499 * std::log(std::sqrt(n))); };
+
+    if (element_count < cores)
+      return element_count;
+    else if (element_count < 500)
+      return medium(element_count);
+    else
+      return std::min(medium(element_count), large(element_count)); // provide a "smooth" transition
+  }();
+  partitions.__chunk_size_       = element_count / partitions.__chunk_count_;
+  partitions.__first_chunk_size_ = partitions.__chunk_size_;
+
+  const ptrdiff_t leftover_item_count = element_count - (partitions.__chunk_count_ * partitions.__chunk_size_);
+
+  if (leftover_item_count == 0)
+    return partitions;
+
+  if (leftover_item_count == partitions.__chunk_size_) {
+    partitions.__chunk_count_ += 1;
+    return partitions;
+  }
+
+  const ptrdiff_t n_extra_items_per_chunk = leftover_item_count / partitions.__chunk_count_;
+  const ptrdiff_t n_final_leftover_items  = leftover_item_count - (n_extra_items_per_chunk * partitions.__chunk_count_);
+
+  partitions.__chunk_size_ += n_extra_items_per_chunk;
+  partitions.__first_chunk_size_ = partitions.__chunk_size_ + n_final_leftover_items;
+  return partitions;
+}
+
+} // namespace __par_backend::inline __libdispatch
+
+_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
new file mode 100644
--- /dev/null
+++ b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <algorithm>
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-private-header
+
+// __chunk_partitions __partition_chunks(ptrdiff_t);
+
+#include <__algorithm/pstl_backends/cpu_backends/libdispatch.h>
+#include <cassert>
+#include <cstddef>
+
+int main(int, char**) {
+  for (std::ptrdiff_t i = 0; i != 2ll << 20; ++i) {
+    auto chunks = std::__par_backend::__libdispatch::__partition_chunks(i);
+    assert(chunks.__chunk_count_ <= i);
+    assert((chunks.__chunk_count_ - 1) * chunks.__chunk_size_ + chunks.__first_chunk_size_ == i);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/fuzz.pstl.copy.sh.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/fuzz.pstl.copy.sh.cpp
new file mode 100644
--- /dev/null
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/fuzz.pstl.copy.sh.cpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// UNSUPPORTED: libcpp-has-no-incomplete-pstl
+
+// REQUIRES: clang || apple-clang
+// RUN: %{build} -fsanitize=fuzzer -O3
+
+// To make sure we run all possible code paths, we use inputs which are up to 1GB large.
+// RUN: %{run} -max_total_time=10 -max_len=1073741824
+
+// <algorithm>
+
+// template<class ExecutionPolicy, class ForwardIterator1, class ForwardIterator2>
+//   ForwardIterator2 copy(ExecutionPolicy&& policy,
+//                         ForwardIterator1 first, ForwardIterator1 last,
+//                         ForwardIterator2 result);
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+struct NonTrivial {
+  std::uint8_t val;
+
+  NonTrivial() = default;
+  NonTrivial(const NonTrivial& v) : val(v.val) {}
+  NonTrivial& operator=(const NonTrivial& v) {
+    val = v.val;
+    return *this;
+  }
+
+  bool operator==(NonTrivial v) const { return v.val == val; }
+};
+
+extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, std::size_t size) {
+  std::vector<NonTrivial> vec(size);
+  auto input = reinterpret_cast<const NonTrivial*>(data);
+  std::copy(std::execution::par, input, input + size, vec.begin());
+  return !std::equal(input, input + size, vec.data());
+}
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/fuzz.pstl.merge.sh.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/fuzz.pstl.merge.sh.cpp
new file mode 100644
--- /dev/null
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/fuzz.pstl.merge.sh.cpp
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// UNSUPPORTED: libcpp-has-no-incomplete-pstl
+
+// REQUIRES: clang || apple-clang
+// RUN: %{build} -fsanitize=fuzzer -O3
+
+// To make sure we run all possible code paths, we use inputs which are up to 1GB large.
+// RUN: %{run} -max_total_time=10 -max_len=1073741824
+
+// <algorithm>
+
+// template<class ExecutionPolicy, class ForwardIterator1, class ForwardIterator2>
+//   ForwardIterator2 copy(ExecutionPolicy&& policy,
+//                         ForwardIterator1 first, ForwardIterator1 last,
+//                         ForwardIterator2 result);
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, std::size_t size) {
+  try {
+    if (size < sizeof(std::size_t))
+      return -1;
+
+    std::size_t split_at;
+    std::memcpy(&split_at, data, sizeof(std::size_t));
+    data += sizeof(std::size_t);
+    size -= sizeof(std::size_t);
+
+    if (split_at > size)
+      return -1;
+
+    std::vector<std::uint8_t> in(data, data + size);
+    std::vector<std::uint8_t> out(size);
+
+    std::sort(in.begin(), in.begin() + split_at);
+    std::sort(in.begin() + split_at, in.end());
+    std::merge(
+        std::execution::par, in.data(), in.data() + split_at, in.data() + split_at, in.data() + size, out.data());
+    return !std::is_sorted(out.begin(), out.end());
+  } catch (...) { // allocation in std::merge failed, so the input data was too large
+    return -1;
+  }
+}
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/pstl.merge.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/pstl.merge.pass.cpp
--- a/libcxx/test/std/algorithms/alg.sorting/alg.merge/pstl.merge.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/pstl.merge.pass.cpp
@@ -49,6 +49,13 @@
       assert((out == std::array{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}));
     }
 
+    { // check that it works with both ranges being empty
+      std::array<int, 0> a;
+      std::array<int, 0> b;
+      std::array<int, std::size(a) + std::size(b)> out;
+      std::merge(
+          policy, Iter1(std::begin(a)), Iter1(std::end(a)), Iter2(std::begin(b)), Iter2(std::end(b)), std::begin(out));
+    }
     { // check that it works with the first range being empty
       std::array<int, 0> a;
       int b[] = {2, 4, 6, 8, 10};
@@ -96,8 +103,12 @@
       }
 
       std::vector<int> out(std::size(a) + std::size(b));
-      std::merge(
-          Iter1(a.data()), Iter1(a.data() + a.size()), Iter2(b.data()), Iter2(b.data() + b.size()), std::begin(out));
+      std::merge(policy,
+                 Iter1(a.data()),
+                 Iter1(a.data() + a.size()),
+                 Iter2(b.data()),
+                 Iter2(b.data() + b.size()),
+                 std::begin(out));
       std::vector<int> expected(200);
       std::iota(expected.begin(), expected.end(), 0);
       assert(std::equal(out.begin(), out.end(), expected.begin()));