Index: benchmarks/algorithms.bench.cpp
===================================================================
--- benchmarks/algorithms.bench.cpp
+++ benchmarks/algorithms.bench.cpp
@@ -58,5 +58,69 @@
 BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
     getDuplicateStringInputs)->Arg(TestNumInputs);
 
+template <typename GenInputs, typename Alg>
+void do_binary_search_benchmark(benchmark::State& st, GenInputs gen, Alg alg)
+{
+    using ValueType = typename decltype(gen(0))::value_type;
+    auto in = gen(st.range(0));
+    std::sort(in.begin(), in.end());
+
+    const auto every_10_percentile = [&]() -> std::vector<ValueType*>  {
+        size_t step = in.size() / 10;
+
+        if (step == 0) {
+            st.SkipWithError("Input doesn't contain enough elements");
+            return {};
+        }
+
+        std::vector<ValueType*> res;
+        for (size_t i = 0; i < in.size(); i += step)
+            res.push_back(&in[i]);
+
+        return res;
+    }();
+
+    for (auto _ : st)
+    {
+        for (auto* test : every_10_percentile)
+          benchmark::DoNotOptimize(alg(in.begin(), in.end(), *test));
+    }
+}
+
+template <typename GenInputs>
+void BM_LowerBound(benchmark::State& st, GenInputs gen)
+{
+    do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) {
+        return std::lower_bound(f, l, v);
+    });
+}
+
+BENCHMARK_CAPTURE(BM_LowerBound, random_int32, getRandomIntegerInputs<int32_t>)
+    ->Arg(TestNumInputs)                    // Small int32_t vector
+    ->Arg(TestNumInputs * TestNumInputs);   // Big int32_t   vector
+
+BENCHMARK_CAPTURE(BM_LowerBound, random_int64, getRandomIntegerInputs<int64_t>)
+    ->Arg(TestNumInputs);  // Small int64_t vector. Should also represent pointers.
+
+BENCHMARK_CAPTURE(BM_LowerBound, random_strings, getRandomStringInputs)
+    ->Arg(TestNumInputs);  // Small string vector. What happens if the comparison is not very cheap.
+
+template <typename GenInputs>
+void BM_EqualRange(benchmark::State& st, GenInputs gen)
+{
+    do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) {
+        return std::equal_range(f, l, v);
+    });
+}
+
+BENCHMARK_CAPTURE(BM_EqualRange, random_int32, getRandomIntegerInputs<int32_t>)
+    ->Arg(TestNumInputs)                    // Small int32_t vector
+    ->Arg(TestNumInputs * TestNumInputs);   // Big int32_t   vector
+
+BENCHMARK_CAPTURE(BM_EqualRange, random_int64, getRandomIntegerInputs<int64_t>)
+    ->Arg(TestNumInputs);  // Small int64_t vector. Should also represent pointers.
+
+BENCHMARK_CAPTURE(BM_EqualRange, random_strings, getRandomStringInputs)
+    ->Arg(TestNumInputs);  // Small string vector. What happens if the comparison is not very cheap.
 
 BENCHMARK_MAIN();
Index: include/algorithm
===================================================================
--- include/algorithm
+++ include/algorithm
@@ -3193,6 +3193,47 @@
 
 // partition_point
 
+template <typename _Tp, bool = is_integral<_Tp>::value>
+struct __do_half_as_unsigned {
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+    _Tp operator()(const _Tp& __value_) const {
+        return __value_ / 2;
+    }
+};
+
+
+#if _LIBCPP_STD_VER >= 11  // There isn't an easy compile time way to get
+                           // numeric_limits::max() in C++03
+template <typename _Integral>
+struct __do_half_as_unsigned<_Integral, /* is_integral =*/ true> {
+    typedef typename conditional<
+            std::numeric_limits<_Integral>::max() <= numeric_limits<size_t>::max(),
+            size_t,
+            _Integral
+        >::type __size_t_if_safe;
+
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+    _Integral operator()(_Integral __value_) const {
+        return static_cast<_Integral>(static_cast<__size_t_if_safe>(__value_) / 2);
+    }
+};
+#endif  // _LIBCPP_STD_VER >= 11
+
+
+// Casts to size_t to divide the number in half (if the type conversion is safe).
+// Implicit assumption - the cast will happen from the positive number.
+//
+// This is necessary because division by two is faster for unsigned numbers and it
+// matters (Bug 39129).
+//
+// This function needs to be constexpr because in C++20 algorithms that use it are
+// constexpr.
+template <typename _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+_Tp __half_as_unsigned(const _Tp& __value_) {
+    return __do_half_as_unsigned<_Tp>()(__value_);
+}
+
 template<class _ForwardIterator, class _Predicate>
 _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator
 partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
@@ -3201,7 +3242,7 @@
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = __half_as_unsigned(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__pred(*__m))
@@ -4060,26 +4101,39 @@
 
 // lower_bound
 
+template <class _Compare, class _Tp, class _Reference>
+struct __less_than_t
+{
+    _Compare __comp_;  // We can't use an empty base class optimization for comp
+                       // since it might be a function pointer.
+                       //
+                       // Because this functor is compiled out completly, it doesn't matter.
+    const _Tp* __value_;
+
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+    __less_than_t(_Compare __comp, const _Tp& __value_) :
+        __comp_(__comp),
+        __value_(_VSTD::addressof(__value_)) {}
+
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+    bool operator()(_Reference __u)
+    {
+        return __comp_(__u, *__value_);
+    }
+};
+
+template <class _Iterator, class _Compare, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+__less_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>
+__less_than(_Compare __comp, const _Tp& __value_) {
+    return __less_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>(__comp, __value_);
+}
+
 template <class _Compare, class _ForwardIterator, class _Tp>
 _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator
 __lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp)
 {
-    typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type;
-    difference_type __len = _VSTD::distance(__first, __last);
-    while (__len != 0)
-    {
-        difference_type __l2 = __len / 2;
-        _ForwardIterator __m = __first;
-        _VSTD::advance(__m, __l2);
-        if (__comp(*__m, __value_))
-        {
-            __first = ++__m;
-            __len -= __l2 + 1;
-        }
-        else
-            __len = __l2;
-    }
-    return __first;
+    return _VSTD::partition_point(__first, __last, __less_than<_ForwardIterator>(__comp, __value_));
 }
 
 template <class _ForwardIterator, class _Tp, class _Compare>
@@ -4108,26 +4162,40 @@
 
 // upper_bound
 
+template <class _Compare, class _Tp, class _Reference>
+struct __not_greater_than_t
+{
+    _Compare __comp_;  // We can't use an empty base class optimization for comp
+                       // since it might be a function pointer.
+                       //
+                       // Because this functor is compiled out completly, it doesn't matter.
+    const _Tp* __value_;
+
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+    __not_greater_than_t(_Compare __comp, const _Tp& __value_) :
+        __comp_(__comp),
+        __value_(_VSTD::addressof(__value_)) {}
+
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+    bool operator()(_Reference __u)
+    {
+        return !__comp_(*__value_, __u);
+    }
+};
+
+template <class _Iterator, class _Compare, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+__not_greater_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>
+__not_greater_than(_Compare __comp, const _Tp& __value_)
+{
+    return __not_greater_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>(__comp, __value_);
+}
+
 template <class _Compare, class _ForwardIterator, class _Tp>
 _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator
 __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp)
 {
-    typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type;
-    difference_type __len = _VSTD::distance(__first, __last);
-    while (__len != 0)
-    {
-        difference_type __l2 = __len / 2;
-        _ForwardIterator __m = __first;
-        _VSTD::advance(__m, __l2);
-        if (__comp(__value_, *__m))
-            __len = __l2;
-        else
-        {
-            __first = ++__m;
-            __len -= __l2 + 1;
-        }
-    }
-    return __first;
+    return _VSTD::partition_point(__first, __last, __not_greater_than<_ForwardIterator>(__comp, __value_));
 }
 
 template <class _ForwardIterator, class _Tp, class _Compare>
@@ -4164,7 +4232,7 @@
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = __half_as_unsigned(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(*__m, __value_))
Index: test/libcxx/algorithms/half_as_unsigned.pass.cpp
===================================================================
--- /dev/null
+++ test/libcxx/algorithms/half_as_unsigned.pass.cpp
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <algorithm>
+
+// template <typename _Tp> _Tp __half_as_unsigned(const _Tp&);
+
+// __half_as_unsigned divide integer number by 2 as unsigned number
+// if it's safe to do so. It can be an important optimization for lower bound,
+// for example.
+
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <type_traits>
+
+#include "user_defined_integral.hpp"
+
+
+template <class T, class = void>
+struct do_unsigned_castable : std::false_type {};
+
+template <class T>
+struct do_unsigned_castable<T,
+    typename std::__void_t<typename std::__do_half_as_unsigned<T>::__size_t_if_safe>::type> :
+    std::is_same<size_t, typename std::__do_half_as_unsigned<T>::__size_t_if_safe> {};
+
+template <class T>
+constexpr bool Unsigned_castable() { return do_unsigned_castable<T>::value; }
+
+int main()
+{
+    {
+        typedef int type;
+
+        static_assert(Unsigned_castable<type>(), "");
+        constexpr auto max_v = std::numeric_limits<type>::max();
+        static_assert(std::__half_as_unsigned(max_v) == max_v / 2, "");
+    }
+
+    {
+        typedef size_t type;
+
+        static_assert(Unsigned_castable<type>(), "");
+        constexpr auto max_v = std::numeric_limits<type>::max();
+        static_assert(std::__half_as_unsigned(max_v) == max_v / 2, "");
+    }
+
+    {
+        typedef UserDefinedIntegral<int> type;
+
+        static_assert(!Unsigned_castable<type>(), "");
+        const auto max_v = type(std::numeric_limits<int>::max());
+        assert(std::__half_as_unsigned(max_v) == max_v / 2);
+    }
+
+#if !defined(_LIBCPP_HAS_NO_INT128)
+    {
+        typedef __int128_t type;
+
+        static_assert(std::is_integral<type>::value, "");
+
+        static_assert(!Unsigned_castable<type>(), "");
+        constexpr auto max_v = std::numeric_limits<type>::max();
+        static_assert(std::__half_as_unsigned(max_v) == max_v / 2, "");
+    }
+#endif  // !defined(_LIBCPP_HAS_NO_INT128)
+}