diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -174,9 +174,11 @@
     algorithms/ranges_sort.bench.cpp
     algorithms/ranges_sort_heap.bench.cpp
     algorithms/ranges_stable_sort.bench.cpp
+    algorithms/reduce.bench.cpp
     algorithms/sort.bench.cpp
     algorithms/sort_heap.bench.cpp
     algorithms/stable_sort.bench.cpp
+    algorithms/transform_reduce.bench.cpp
     libcxxabi/dynamic_cast.bench.cpp
     allocation.bench.cpp
     deque.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/reduce.bench.cpp b/libcxx/benchmarks/algorithms/reduce.bench.cpp
new file mode 100644
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/reduce.bench.cpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <benchmark/benchmark.h>
+#include <numeric>
+
+template <class T>
+static void bm_reduce(benchmark::State& state) {
+  auto size = state.range();
+  std::vector<T> a;
+  for (size_t i = 0; i != size; ++i) {
+    a.emplace_back(i * 3);
+  }
+
+  for (auto _ : state) {
+    benchmark::ClobberMemory();
+    auto ret = std::reduce(a.begin(), a.end());
+    benchmark::DoNotOptimize(ret);
+  }
+}
+
+BENCHMARK(bm_reduce<float>)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/algorithms/transform_reduce.bench.cpp b/libcxx/benchmarks/algorithms/transform_reduce.bench.cpp
new file mode 100644
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/transform_reduce.bench.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <benchmark/benchmark.h>
+#include <numeric>
+
+template <class T>
+static void bm_transform_reduce(benchmark::State& state) {
+  auto size = state.range();
+  std::vector<T> a;
+  std::vector<T> b;
+  for (size_t i = 0; i != size; ++i) {
+    a.emplace_back(i * 3);
+    b.emplace_back(i + 2);
+  }
+
+  for (auto _ : state) {
+    benchmark::ClobberMemory();
+    auto ret = std::transform_reduce(a.begin(), a.end(), b.begin(), T(1));
+    benchmark::DoNotOptimize(ret);
+  }
+}
+
+BENCHMARK(bm_transform_reduce<float>)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__config b/libcxx/include/__config
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1278,6 +1278,12 @@
 #    define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS
 #  endif
 
+#ifdef _LIBCPP_COMPILER_CLANG_BASED
+#  define _LIBCPP_CLANG_PRAGMA(...) _Pragma(__VA_ARGS__)
+#else
+#  define _LIBCPP_CLANG_PRAGMA(...)
+#endif
+
 // TODO: Make this a proper configuration option
 #define _PSTL_PAR_BACKEND_SERIAL
 
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -107,6 +107,9 @@
 };
 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(multiplies);
 
+template <class _Tp>
+struct __is_trivial_multiplies_operation<multiplies<_Tp>, _Tp, _Tp> : true_type {};
+
 #if _LIBCPP_STD_VER >= 14
 template <>
 struct _LIBCPP_TEMPLATE_VIS multiplies<void>
@@ -119,6 +122,9 @@
         { return          _VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u); }
     typedef void is_transparent;
 };
+
+template <class _Lhs, class _Rhs>
+struct __is_trivial_multiplies_operation<multiplies<>, _Lhs, _Rhs> : true_type {};
 #endif
 
 #if _LIBCPP_STD_VER >= 14
diff --git a/libcxx/include/__numeric/reduce.h b/libcxx/include/__numeric/reduce.h
--- a/libcxx/include/__numeric/reduce.h
+++ b/libcxx/include/__numeric/reduce.h
@@ -13,6 +13,8 @@
 #include <__config>
 #include <__functional/operations.h>
 #include <__iterator/iterator_traits.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/operation_traits.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -29,6 +31,17 @@
   return __init;
 }
 
+#if _LIBCPP_STD_VER >= 20
+template <class _InputIterator, class _Tp, class _BinaryOp>
+  requires(is_same_v<__iter_value_type<_InputIterator>, _Tp> && __is_trivial_operation<_BinaryOp, _Tp, _Tp>::value)
+_LIBCPP_HIDE_FROM_ABI constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOp __b) {
+  _LIBCPP_CLANG_PRAGMA("clang loop vectorize(enable)")
+  for (; __first != __last; ++__first)
+    __init = __b(__init, *__first);
+  return __init;
+}
+#  endif // _LIBCPP_STD_VER >= 20
+
 template <class _InputIterator, class _Tp>
 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp reduce(_InputIterator __first, _InputIterator __last,
                                                                    _Tp __init) {
diff --git a/libcxx/include/__numeric/transform_reduce.h b/libcxx/include/__numeric/transform_reduce.h
--- a/libcxx/include/__numeric/transform_reduce.h
+++ b/libcxx/include/__numeric/transform_reduce.h
@@ -10,8 +10,12 @@
 #ifndef _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H
 #define _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H
 
+#include <__concepts/arithmetic.h>
 #include <__config>
 #include <__functional/operations.h>
+#include <__iterator/iterator_traits.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/operation_traits.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -40,6 +44,25 @@
   return __init;
 }
 
+#if _LIBCPP_STD_VER >= 20
+template <class _InputIterator1, class _InputIterator2, class _Tp, class _ReductionOp, class _TransformOp>
+  requires(__is_trivial_operation<_TransformOp, _Tp, _Tp>::value &&
+           __is_trivial_operation<_ReductionOp, _Tp, _Tp>::value &&
+           is_same_v<__iter_value_type<_InputIterator1>, _Tp> && is_same_v<__iter_value_type<_InputIterator2>, _Tp>)
+_LIBCPP_HIDE_FROM_ABI constexpr _Tp transform_reduce(
+    _InputIterator1 __first1,
+    _InputIterator1 __last1,
+    _InputIterator2 __first2,
+    _Tp __init,
+    _ReductionOp __b1,
+    _TransformOp __b2) {
+  _LIBCPP_CLANG_PRAGMA("clang loop vectorize(enable)")
+  for (; __first1 != __last1; ++__first1, (void)++__first2)
+    __init = __b1(__init, __b2(*__first1, *__first2));
+  return __init;
+}
+#  endif
+
 template <class _InputIterator1, class _InputIterator2, class _Tp>
 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp transform_reduce(_InputIterator1 __first1,
                                                                              _InputIterator1 __last1,
diff --git a/libcxx/include/__type_traits/operation_traits.h b/libcxx/include/__type_traits/operation_traits.h
--- a/libcxx/include/__type_traits/operation_traits.h
+++ b/libcxx/include/__type_traits/operation_traits.h
@@ -21,6 +21,15 @@
 template <class _Pred, class _Lhs, class _Rhs>
 struct __is_trivial_plus_operation : false_type {};
 
+template <class _Pred, class _Lhs, class _Rhs>
+struct __is_trivial_multiplies_operation : false_type {};
+
+template <class _Pred, class _Lhs, class _Rhs>
+using __is_trivial_operation =
+    integral_constant<bool,
+                      __is_trivial_plus_operation<_Pred, _Lhs, _Rhs>::value ||
+                          __is_trivial_multiplies_operation<_Pred, _Lhs, _Rhs>::value>;
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_OPERATION_TRAITS_H
diff --git a/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp b/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp
--- a/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp
@@ -50,6 +50,12 @@
     static_assert( std::is_same_v<T, decltype(std::reduce(p, p))> );
 }
 
+TEST_CONSTEXPR_CXX20 void test_optimized_path() {
+    float a[] = {1.f, 2.f, 3.f, 4.f};
+    auto ret = std::reduce(a, a + 4);
+    assert(ret > 9.5f && ret < 10.5f);
+}
+
 TEST_CONSTEXPR_CXX20 bool
 test()
 {
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp
--- a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp
@@ -70,6 +70,13 @@
         std::transform_reduce(std::begin(ia), std::end(ia), std::begin(ib), MoveOnly{0}).get());
 }
 
+TEST_CONSTEXPR_CXX20 void test_optimized_path() {
+    float a[] = {1.f, 2.f, 3.f, 4.f};
+    float b[] = {1.f, 2.f, 3.f, 4.f};
+    auto ret = std::transform_reduce(a, a + 4, b, 0);
+    assert(ret > 29.5f && ret < 30.5f);
+}
+
 TEST_CONSTEXPR_CXX20 bool
 test()
 {
@@ -109,6 +116,7 @@
     test<      int*,       unsigned int *>();
 
     test_move_only_types();
+    test_optimized_path();
 
     return true;
 }