This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
libcxx/include/
-
include/
-
string

Differential D68623

Optimize and fix basic_string move assignment operator
ClosedPublic

Authored by mvels on Oct 7 2019, 6:43 PM.

Download Raw Diff

Details

Reviewers

mclow.lists
EricWF

Summary

This change optimizes the move assignment operator to more efficient / compact code, and fixes a bug.

Move assignment optimization:

do not use the (expensive / branched) clear_and_shrink() method but inline size = 0
only zero existing instance in the presence of potential exceptions

Result (std::string): more compact code, single branch

Bug fix:

an exception thrown by the alloc move assignment would leave the current instance in a bad state with a non null pointer owned by the moved from instance, not owned by the current alloc. Swap the alloc move and assignment of __r_r.first()

Given:

void MoveAssign(std::string* d, std::string* s) {
    *d = std::move(*s);
}

Before:

        pushq   %r14
        pushq   %rbx
        pushq   %rax
        movq    %rsi, %r14
        movq    %rdi, %rbx
        cmpb    $0, 23(%rdi)
        js      .LBB2_2
        movb    $0, (%rbx)
        movb    $0, 23(%rbx)
        jmp     .LBB2_4
.LBB2_2:
        movq    (%rbx), %rax
        movb    $0, (%rax)
        movq    $0, 8(%rbx)
        cmpb    $0, 23(%rbx)
        jns     .LBB2_4
        movq    (%rbx), %rdi
        callq   operator delete(void*)
        movq    $0, 16(%rbx)
.LBB2_4:
        movq    16(%r14), %rax
        movq    %rax, 16(%rbx)
        movups  (%r14), %xmm0
        movups  %xmm0, (%rbx)
        xorps   %xmm0, %xmm0
        movups  %xmm0, (%r14)
        movq    $0, 16(%r14)
        addq    $8, %rsp
        popq    %rbx
        popq    %r14
        retq

After:

        push    r14
        push    rbx
        push    rax
        mov     rbx, rsi
        mov     r14, rdi
        cmp     byte ptr [rdi + 23], 0
        jns     .LBB2_2
        mov     rdi, qword ptr [r14]
        call    operator delete(void*)
.LBB2_2:
        mov     rax, qword ptr [rbx + 16]
        mov     qword ptr [r14 + 16], rax
        movups  xmm0, xmmword ptr [rbx]
        movups  xmmword ptr [r14], xmm0
        mov     byte ptr [rbx], 0
        mov     byte ptr [rbx + 23], 0
        add     rsp, 8
        pop     rbx
        pop     r14
        ret

Diff Detail

Repository

rG LLVM Github Monorepo

Build Status

Buildable 39142
Build 39152: arc lint + arc unit

Event Timeline

mvels created this revision.Oct 7 2019, 6:43 PM

Herald added a project: Restricted Project. · View Herald TranscriptOct 7 2019, 6:43 PM

Herald added subscribers: libcxx-commits, christof. · View Herald Transcript

mvels retitled this revision from Optimize and Fix move assignment operator This change optimizes the move assigment operator to more efficient / compact code, and fixes a bug. Move assignment optimization: - do not use the (epensive / branched) clear_and_shrink() method but... to Optimize and Fix move assignment operator.Oct 7 2019, 6:45 PM

mvels edited the summary of this revision. (Show Details)

Herald added a reviewer: mclow.lists. · View Herald TranscriptOct 7 2019, 6:45 PM

mvels edited the summary of this revision. (Show Details)Oct 7 2019, 6:46 PM

I'm wondering where you are seeing getting this codegen from.

I tried this in Compiler Explorer:

extern std::string source;
extern std::string dest;

int main () { source = std::move(dest);  return dest.size(); }

and got a much simpler codegen with -O3

Here's the godbolt link I am looking at with your code:

https://gcc.godbolt.org/z/0WaoKK

main:                                   # @main
        push    rax
        cmp     byte ptr [rip + source+23], 0
        js      .LBB0_2
        mov     byte ptr [rip + source], 0
        mov     byte ptr [rip + source+23], 0
        jmp     .LBB0_4
.LBB0_2:
        mov     rax, qword ptr [rip + source]
        mov     byte ptr [rax], 0
        mov     qword ptr [rip + source+8], 0
        cmp     byte ptr [rip + source+23], 0
        jns     .LBB0_4
        mov     rdi, qword ptr [rip + source]
        call    operator delete(void*)
        mov     qword ptr [rip + source+16], 0
.LBB0_4:
        mov     rax, qword ptr [rip + dest+16]
        mov     qword ptr [rip + source+16], rax
        movups  xmm0, xmmword ptr [rip + dest]
        movups  xmmword ptr [rip + source], xmm0
        xorps   xmm0, xmm0
        movups  xmmword ptr [rip + dest], xmm0
        mov     qword ptr [rip + dest+16], 0
        xor     eax, eax
        pop     rcx
        ret

In my local godbolt with the change (I may spin up a cloud instance if I have some time), the codegen is:

main:                                   # @main
        push    rax
        cmp     byte ptr [rip + source+23], 0
        jns     .LBB0_2
        mov     rdi, qword ptr [rip + source]
        call    operator delete(void*)
.LBB0_2:
        mov     rax, qword ptr [rip + dest+16]
        mov     qword ptr [rip + source+16], rax
        movups  xmm0, xmmword ptr [rip + dest]
        movups  xmmword ptr [rip + source], xmm0
        mov     byte ptr [rip + dest+23], 0
        mov     byte ptr [rip + dest], 0
        xor     eax, eax
        pop     rcx
        ret

mvels retitled this revision from Optimize and Fix move assignment operator to Optimize and fix basic_string move assignment operator.Oct 7 2019, 7:50 PM

Harbormaster completed remote builds in B39142: Diff 223719.Oct 7 2019, 10:07 PM

This looks ok to me. If you're concerned about performance, you might want to split this into two routines, one for the case were the allocators are equal, and one for the case where they're not (and skip the whole allocator dance for the equal case.)

Thinking about this some more, I agree with your analysis about the move assignment throwing.

Yeah, it's pretty bad though if allocators have throwing move operators, but until c++14 it's 'technically allowed' and something that the std handle gracefully. I was not that alarmed tho, such should be rare, and deserving a halt and catch fire. :)

Re split for the alloc case: that may be a good option for a next change, I wanted to keep this change small in scope, and for std::allocator no extra cost / code emitted

I'm fine with incremental changes.
It would be nice to have a test case for a allocator that throws on move-assignment, though.

This revision is now accepted and ready to land.Oct 11 2019, 11:40 AM

Demonstrating my ignorance on the expected / normal flow of submits here: what is the ETA for this?

@mvels, I'm assuming you don't have commit access to LLVM. The standard procedure there is that when your commit is accepted, you comment saying you don't have commit access and ask a reviewer to commit it for you. In this case, @mclow.lists should be able to commit it on your behalf.

Thanks Shoaib!

Marshall, could you commit this change for me?

Thanks!

Eric or Marschal, can we land this change? Thanks!

Ping?

Looks like this landed in November: http://reviews.llvm.org/rG703c26f03be74daf6e483380e6b23029a3851081

(ps: I think you've contributed enough good patches to apply for commit access: https://llvm.org/docs/DeveloperPolicy.html#new-contributors)

Oh, it popped on my screen and I missed that it closed / landed, sorry about that :)

I'll file for commit access, thanks!

Revision Contents

Path

Size

libcxx/

include/

string

18 lines

Diff 223719

libcxx/include/string

	Show First 20 Lines • Show All 2,283 Lines • ▼ Show 20 Lines
	void			void
	basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)			basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)
	#if _LIBCPP_STD_VER > 14			#if _LIBCPP_STD_VER > 14
	_NOEXCEPT			_NOEXCEPT
	#else			#else
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)			_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	#endif			#endif
	{			{
	__clear_and_shrink();			if (__is_long()) {
	__r_.first() = __str.__r_.first();			__alloc_traits::deallocate(__alloc(), __get_long_pointer(),
				__get_long_cap());
				#if _LIBCPP_STD_VER <= 14
				if (!is_nothrow_move_assignable<allocator_type>::value) {
				__set_short_size(0);
				traits_type::assign(__get_short_pointer()[0], value_type());
				}
				#endif
				}
	__move_assign_alloc(__str);			__move_assign_alloc(__str);
	__str.__zero();			__r_.first() = __str.__r_.first();
				__str.__set_short_size(0);
				traits_type::assign(__str.__get_short_pointer()[0], value_type());
	}			}

	template <class _CharT, class _Traits, class _Allocator>			template <class _CharT, class _Traits, class _Allocator>
	inline			inline
	basic_string<_CharT, _Traits, _Allocator>&			basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str)			basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))			_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
	{			{
	▲ Show 20 Lines • Show All 2,062 Lines • Show Last 20 Lines