Index: benchmarks/algorithms.bench.cpp
===================================================================
--- benchmarks/algorithms.bench.cpp
+++ benchmarks/algorithms.bench.cpp
@@ -58,5 +58,69 @@
 BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
     getDuplicateStringInputs)->Arg(TestNumInputs);
 
+template <typename GenInputs, typename Alg>
+void do_binary_search_benchmark(benchmark::State& st, GenInputs gen, Alg alg)
+{
+    using ValueType = typename decltype(gen(0))::value_type;
+    auto in = gen(st.range(0));
+    std::sort(in.begin(), in.end());
+
+    const auto every_10_percentile = [&]() -> std::vector<ValueType*>  {
+        size_t step = in.size() / 10;
+
+        if (step == 0) {
+            st.SkipWithError("Input doesn't contain enough elements");
+            return {};
+        }
+
+        std::vector<ValueType*> res;
+        for (size_t i = 0; i < in.size(); i += step)
+            res.push_back(&in[i]);
+
+        return res;
+    }();
+
+    for (auto _ : st)
+    {
+        for (auto* test : every_10_percentile)
+          benchmark::DoNotOptimize(alg(in.begin(), in.end(), *test));
+    }
+}
+
+template <typename GenInputs>
+void BM_LowerBound(benchmark::State& st, GenInputs gen)
+{
+    do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) {
+        return std::lower_bound(f, l, v);
+    });
+}
+
+BENCHMARK_CAPTURE(BM_LowerBound, random_int32, getRandomIntegerInputs<int32_t>)
+    ->Arg(TestNumInputs)                    // Small int32_t vector
+    ->Arg(TestNumInputs * TestNumInputs);   // Big int32_t   vector
+
+BENCHMARK_CAPTURE(BM_LowerBound, random_int64, getRandomIntegerInputs<int64_t>)
+    ->Arg(TestNumInputs);  // Small int64_t vector. Should also represent pointers.
+
+BENCHMARK_CAPTURE(BM_LowerBound, random_strings, getRandomStringInputs)
+    ->Arg(TestNumInputs);  // Small string vector. What happens if the comparison is not very cheap.
+
+template <typename GenInputs>
+void BM_EqualRange(benchmark::State& st, GenInputs gen)
+{
+    do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) {
+        return std::equal_range(f, l, v);
+    });
+}
+
+BENCHMARK_CAPTURE(BM_EqualRange, random_int32, getRandomIntegerInputs<int32_t>)
+    ->Arg(TestNumInputs)                    // Small int32_t vector
+    ->Arg(TestNumInputs * TestNumInputs);   // Big int32_t   vector
+
+BENCHMARK_CAPTURE(BM_EqualRange, random_int64, getRandomIntegerInputs<int64_t>)
+    ->Arg(TestNumInputs);  // Small int64_t vector. Should also represent pointers.
+
+BENCHMARK_CAPTURE(BM_EqualRange, random_strings, getRandomStringInputs)
+    ->Arg(TestNumInputs);  // Small string vector. What happens if the comparison is not very cheap.
 
 BENCHMARK_MAIN();
Index: include/algorithm
===================================================================
--- include/algorithm
+++ include/algorithm
@@ -750,6 +750,20 @@
     bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);}
 };
 
+// Perform division by two quickly for positive integers (llvm.org/PR39129)
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 ptrdiff_t
+__half_positive(ptrdiff_t __value)
+{
+    return static_cast<ptrdiff_t>(static_cast<size_t>(__value) / 2);
+}
+
+template <typename _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _Tp
+__half_positive(_Tp __value)
+{
+    return __value / 2;
+}
+
 #ifdef _LIBCPP_DEBUG
 
 template <class _Compare>
@@ -3202,7 +3216,7 @@
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = _VSTD::__half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__pred(*__m))
@@ -4069,7 +4083,7 @@
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = _VSTD::__half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(*__m, __value_))
@@ -4117,7 +4131,7 @@
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = __half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(__value_, *__m))
@@ -4165,7 +4179,7 @@
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = __half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(*__m, __value_))
Index: test/libcxx/algorithms/half_positive.pass.cpp
===================================================================
--- /dev/null
+++ test/libcxx/algorithms/half_positive.pass.cpp
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <algorithm>
+
+// template <typename _Tp> _Tp __half_as_unsigned(const _Tp&);
+
+// __half_as_unsigned divide integer number by 2 as unsigned number
+// if it's safe to do so. It can be an important optimization for lower bound,
+// for example.
+
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <type_traits>
+
+#include "user_defined_integral.hpp"
+
+
+int main()
+{
+    {
+        typedef int type;
+
+        constexpr auto max_v = std::numeric_limits<type>::max();
+        static_assert(std::__half_positive(max_v) == max_v / 2, "");
+    }
+
+    {
+        typedef size_t type;
+
+        constexpr auto max_v = std::numeric_limits<type>::max();
+        static_assert(std::__half_positive(max_v) == max_v / 2, "");
+    }
+
+    {
+        typedef UserDefinedIntegral<int> type;
+
+        const auto max_v = type(std::numeric_limits<int>::max());
+        assert(std::__half_positive(max_v) == max_v / 2);
+    }
+
+#if !defined(_LIBCPP_HAS_NO_INT128)
+    {
+        typedef __int128_t type;
+
+        constexpr auto max_v = std::numeric_limits<type>::max();
+        static_assert(std::__half_positive(max_v) == max_v / 2, "");
+    }
+#endif  // !defined(_LIBCPP_HAS_NO_INT128)
+}