diff --git a/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst b/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst new file mode 100644 --- /dev/null +++ b/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst @@ -0,0 +1,135 @@ +============================================= +Enable std::unique_ptr [[clang::trivial_abi]] +============================================= + +Background +========== + +Consider the follow snippets + + +.. code-block:: cpp + + void raw_func(Foo* raw_arg) { ... } + void smart_func(std::unique_ptr smart_arg) { ... } + + Foo* raw_ptr_retval() { ... } + std::unique_ptr smart_ptr_retval() { ... } + + + +The argument ``raw_arg`` could be passed in a register but ``smart_arg`` could not, due to current +implementation. + +Specifically, in the ``smart_arg`` case, the caller secretly constructs a temporary ``std::unique_ptr`` +in its stack-frame, and then passes a pointer to it to the callee in a hidden parameter. +Similarly, the return value from ``smart_ptr_retval`` is secretly allocated in the caller and +passed as a secret reference to the callee. + + +Goal +=================== + +``std::unique_ptr`` is passed directly in a register. + +Design +====== + +* Annotate the two definitions of ``std::unique_ptr`` with ``clang::trivial_abi`` attribute. +* Put the attribuate behind a flag because this change has potential compilation and runtime breakages. + + +This comes with some side effects: + +* ``std::unique_ptr`` parameters will now be destroyed by callees, rather than callers. + It is worth noting that destruction by callee is not unique to the use of trivial_abi attribute. + In most Microsoft's ABIs, arguments are always destroyed by the callee. + +* Reduced code-size. + + +Performance impact +------------------ + +Google has measured performance improvements of up to 1.6% on some large server macrobenchmarks, and a small reduction in binary sizes. + +This also affects null pointer optimization + +Clang's optimizer can now figure out when a `std::unique_ptr` is known to contain *non*-null. +(Actually, this has been a *missed* optimization all along.) + + +.. code-block:: cpp + + struct Foo { + ~Foo(); + }; + std::unique_ptr make_foo(); + void do_nothing(const Foo&) + + void bar() { + auto x = make_foo(); + do_nothing(*x); + } + + +With this change, ``~Foo()`` will be called even if ``make_foo`` returns ``unique_ptr(nullptr)``. +The compiler can now assume that ``x.get()`` cannot be null by the end of ``bar()``, because +the deference of ``x`` would be UB if it were ``nullptr``. (This dereference would not have caused +a segfault, because no load is generated for dereferencing a pointer to a reference. This can be detected with ``-fsanitize=null``). + + +Potential breakages +------------------- + +The following breakages were discovered by enabling this change and fixing the resulting issues in a large code base. + +- Compilation failures + + - Function definitions now require complete type ``T`` for parameters with type ``std::unique_ptr``. The following code will no longer compile. + + .. code-block:: cpp + + class Foo; + void func(std::unique_ptr arg) { /* never use `arg` directly */ } + + - Fix: Remove forward-declaration of ``Foo`` and include its proper headrer. + +- Runtime Failures + + - Lifetime of ``std::unique_ptr<>`` arguments end earlier (at the end of the callee's body, rather than at the end of the full expression containing the call). + + .. code-block:: cpp + + util::Status run_worker(std::unique_ptr); + void func() { + std::unique_ptr smart_foo = ...; + Foo* owned_foo = smart_foo.get(); + // Currently, the following would "work" because the argument to run_worker() is deleted at the end of func() + // With the new calling convention, it will be deleted at the end of run_worker(), + // making this an access to freed memory. + owned_foo->Bar(run_worker(std::move(smart_foo))); + ^ + // <<`` ends earlier. + + Spot the bug: + + .. code-block:: cpp + + std::unique_ptr create_and_subscribe(Bar* subscriber) { + auto foo = std::make_unique(); + subscriber->sub([&foo] { foo->do_thing();} ); + return foo; + } + + One could point out this is an obvious stack-use-after return bug. + With the current calling convention, running this code with ASAN enabled, however, would not yield any "issue". + So is this a bug in ASAN? (Spoiler: No) + + This currently would "work" only because the storage for ``foo`` is in the caller's stackframe. + In other words, ``&foo`` in callee and ``&foo`` in the caller are the same address. + +ASAN can be used to detect both of these. diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -164,6 +164,7 @@ DesignDocs/FileTimeType DesignDocs/FeatureTestMacros DesignDocs/ExtendedCXX03Support + DesignDocs/UniquePtrTrivialAbi * ` design `_ * ` design `_ diff --git a/libcxx/include/__config b/libcxx/include/__config --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -105,6 +105,12 @@ // Re-worked external template instantiations for std::string with a focus on // performance and fast-path inlining. # define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION +// Enable clang::trivial_abi on std::unique_ptr. +# ifdef _LIBCPP_ABI_ENABLE_UNIQUE_PTR_WITH_TRIVIAL_ABI +# define _LIBCPP_ABI_UNIQUE_PTR_TRIVIAL __attribute__((trivial_abi)) +# else +# define _LIBCPP_ABI_UNIQUE_PTR_TRIVIAL +# endif #elif _LIBCPP_ABI_VERSION == 1 # if !defined(_LIBCPP_OBJECT_FORMAT_COFF) // Enable compiling copies of now inline methods into the dylib to support diff --git a/libcxx/include/memory b/libcxx/include/memory --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -2317,7 +2317,7 @@ }; template > -class _LIBCPP_TEMPLATE_VIS unique_ptr { +class _LIBCPP_ABI_UNIQUE_PTR_TRIVIAL _LIBCPP_TEMPLATE_VIS unique_ptr { public: typedef _Tp element_type; typedef _Dp deleter_type; @@ -2525,7 +2525,7 @@ template -class _LIBCPP_TEMPLATE_VIS unique_ptr<_Tp[], _Dp> { +class _LIBCPP_ABI_UNIQUE_PTR_TRIVIAL _LIBCPP_TEMPLATE_VIS unique_ptr<_Tp[], _Dp> { public: typedef _Tp element_type; typedef _Dp deleter_type;