diff --git a/libcxx/include/string b/libcxx/include/string
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -796,6 +796,10 @@
         _NOEXCEPT;
 #endif
 
+    // Optimization opportunity: do not externally instantiate the copy
+    // constructor, which inlines short string initialization. Long string
+    // initialization is delegated to the (external) __init_long()method,
+    // which results in a 3X-4X speed up for SSO initialization.
     basic_string(const basic_string& __str);
     basic_string(const basic_string& __str, const allocator_type& __a);
 
@@ -874,6 +878,7 @@
     _LIBCPP_INLINE_VISIBILITY
     operator __self_view() const _NOEXCEPT { return __self_view(data(), size()); }
 
+    // Optimization opportunity: do not externally instantiate
     basic_string& operator=(const basic_string& __str);
 
     template <class _Tp, class = typename enable_if<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, void>::type>
@@ -1543,6 +1548,8 @@
     inline
     void __init(size_type __n, value_type __c);
 
+    void __init_long(const basic_string& __str);
+
     template <class _InputIterator>
     inline
     typename enable_if
@@ -1794,6 +1801,20 @@
     traits_type::assign(__p[__sz], value_type());
 }
 
+template <class _CharT, class _Traits, class _Allocator>
+void basic_string<_CharT, _Traits, _Allocator>::__init_long(
+    const basic_string& __str) {
+  const value_type* __s = _VSTD::__to_address(__str.__get_long_pointer());
+  size_type __sz = __str.__get_long_size();
+  size_type __cap = __recommend(__sz);
+  pointer __p = __alloc_traits::allocate(__alloc(), __cap + 1);
+  __set_long_pointer(__p);
+  __set_long_cap(__cap + 1);
+  __set_long_size(__sz);
+  traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
+  traits_type::assign(__p[__sz], value_type());
+}
+
 template <class _CharT, class _Traits, class _Allocator>
 template <class>
 basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, const _Allocator& __a)
@@ -1837,7 +1858,7 @@
     if (!__str.__is_long())
         __r_.first().__r = __str.__r_.first().__r;
     else
-        __init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
+        __init_long(__str);
 #if _LIBCPP_DEBUG_LEVEL >= 2
     __get_db()->__insert_c(this);
 #endif
@@ -1851,7 +1872,7 @@
     if (!__str.__is_long())
         __r_.first().__r = __str.__r_.first().__r;
     else
-        __init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
+        __init_long(__str);
 #if _LIBCPP_DEBUG_LEVEL >= 2
     __get_db()->__insert_c(this);
 #endif
@@ -2266,7 +2287,10 @@
     if (this != &__str)
     {
         __copy_assign_alloc(__str);
-        return assign(__str.data(), __str.size());
+        if (__is_long() | __str.__is_long()) {  // LINT: explicit binary or.
+          return assign(__str.data(), __str.size());
+        }
+        __r_.first().__r = __str.__r_.first().__r;
     }
     return *this;
 }