This is a non trivial win for externally templated assignment operator.
x86 without tail call (current libc++)
0000000000000000 <std::string::operator=(std::string const&)>:
0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 53 push %rbx 5: 50 push %rax 6: 48 89 fb mov %rdi,%rbx 9: 48 39 f7 cmp %rsi,%rdi c: 74 17 je 25 <std::string::operator=(std::string const&)+0x25> e: 0f b6 56 17 movzbl 0x17(%rsi),%edx 12: 84 d2 test %dl,%dl 14: 79 07 jns 1d <std::string::operator=(std::string const&)+0x1d> 16: 48 8b 56 08 mov 0x8(%rsi),%rdx 1a: 48 8b 36 mov (%rsi),%rsi 1d: 48 89 df mov %rbx,%rdi 20: e8 00 00 00 00 callq 25 <std::string::operator=(std::string const&)+0x25> 25: 48 89 d8 mov %rbx,%rax 28: 48 83 c4 08 add $0x8,%rsp 2c: 5b pop %rbx 2d: 5d pop %rbp 2e: c3 retq
After:
0000000000000000 <std::string::operator=(std::string const&)>:
0: 48 39 f7 cmp %rsi,%rdi 3: 74 14 je 19 <std::string::operator=(std::string const&)+0x19> 5: 0f b6 56 17 movzbl 0x17(%rsi),%edx 9: 84 d2 test %dl,%dl b: 79 07 jns 14 <std::string::operator=(std::string const&)+0x14> d: 48 8b 56 08 mov 0x8(%rsi),%rdx 11: 48 8b 36 mov (%rsi),%rsi 14: e9 00 00 00 00 jmpq 19 <std::string::operator=(std::string const&)+0x19> 19: 48 89 f8 mov %rdi,%rax 1c: c3 retq
Benchmark (pending per https://reviews.llvm.org/D67667)
BM_StringAssignStr_Empty_Opaque 6.23ns ± 0% 5.19ns ± 0% -16.70% (p=0.016 n=5+4) BM_StringAssignStr_Empty_Transparent 5.86ns ± 0% 5.14ns ± 0% -12.24% (p=0.008 n=5+5) BM_StringAssignStr_Small_Opaque 8.79ns ± 1% 7.69ns ± 0% -12.53% (p=0.008 n=5+5) BM_StringAssignStr_Small_Transparent 9.44ns ± 0% 8.00ns ± 0% -15.26% (p=0.008 n=5+5) BM_StringAssignStr_Large_Opaque 25.2ns ± 0% 24.3ns ± 0% -3.50% (p=0.008 n=5+5) BM_StringAssignStr_Large_Transparent 23.6ns ± 0% 22.5ns ± 0% -4.76% (p=0.008 n=5+5) BM_StringAssignStr_Huge_Opaque 319ns ± 5% 317ns ± 5% ~ (p=0.690 n=5+5) BM_StringAssignStr_Huge_Transparent 319ns ± 5% 317ns ± 5% ~ (p=0.421 n=5+5) BM_StringAssignAsciiz_Empty_Opaque 7.41ns ± 0% 7.77ns ± 0% +4.89% (p=0.008 n=5+5) BM_StringAssignAsciiz_Empty_Transparent 7.54ns ± 3% 7.30ns ± 0% -3.24% (p=0.008 n=5+5) BM_StringAssignAsciiz_Small_Opaque 9.87ns ± 0% 10.24ns ± 1% +3.76% (p=0.008 n=5+5) BM_StringAssignAsciiz_Small_Transparent 10.4ns ± 1% 9.8ns ± 2% -5.78% (p=0.008 n=5+5) BM_StringAssignAsciiz_Large_Opaque 30.1ns ± 0% 30.1ns ± 0% ~ (p=0.167 n=5+5) BM_StringAssignAsciiz_Large_Transparent 27.1ns ± 0% 27.4ns ± 0% +0.92% (p=0.016 n=4+5) BM_StringAssignAsciiz_Huge_Opaque 383ns ± 4% 382ns ± 4% ~ (p=0.548 n=5+5) BM_StringAssignAsciiz_Huge_Transparent 375ns ± 0% 380ns ± 0% +1.37% (p=0.029 n=4+4) BM_StringAssignAsciizMix_Opaque 14.0ns ± 0% 14.0ns ± 0% ~ (p=0.881 n=5+5) BM_StringAssignAsciizMix_Transparent 13.7ns ± 1% 13.8ns ± 0% ~ (p=0.056 n=5+5)