diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -614,8 +614,7 @@ uint64_t pltEntryAddr) const override; private: - bool btiHeader; // bti instruction needed in PLT Header - bool btiEntry; // bti instruction needed in PLT Entry + bool btiHeader; // bti instruction needed in PLT Header and Entry bool pacEntry; // autia1716 instruction needed in PLT Entry }; } // namespace @@ -626,15 +625,14 @@ // address of the PLT entry can be taken by the program, which permits an // indirect jump to the PLT entry. This can happen when the address // of the PLT entry for a function is canonicalised due to the address of - // the function in an executable being taken by a shared library. - // FIXME: There is a potential optimization to omit the BTI if we detect - // that the address of the PLT entry isn't taken. + // the function in an executable being taken by a shared library, or + // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating + // relocations. // The PAC PLT entries require dynamic loader support and this isn't known // from properties in the objects, so we use the command line flag. - btiEntry = btiHeader && !config->shared; pacEntry = config->zPacPlt; - if (btiEntry || pacEntry) { + if (btiHeader || pacEntry) { pltEntrySize = 24; ipltEntrySize = 24; } @@ -694,7 +692,12 @@ }; const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop - if (btiEntry) { + // needsPltAddr indicates a non-ifunc canonical PLT entry whose address may + // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its + // address may escape if referenced by a direct relocation. The condition is + // conservative. + bool hasBti = btiHeader && (sym.needsPltAddr || sym.isInIplt); + if (hasBti) { memcpy(buf, btiData, sizeof(btiData)); buf += sizeof(btiData); pltEntryAddr += sizeof(btiData); @@ -711,7 +714,7 @@ memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); else memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr)); - if (!btiEntry) + if (!hasBti) // We didn't add the BTI c instruction so round out size with NOP. memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData)); } diff --git a/lld/test/ELF/Inputs/aarch64-addrifunc.s b/lld/test/ELF/Inputs/aarch64-addrifunc.s --- a/lld/test/ELF/Inputs/aarch64-addrifunc.s +++ b/lld/test/ELF/Inputs/aarch64-addrifunc.s @@ -1,8 +1,7 @@ .text - .globl myfunc .globl func1 .type func1, %function func1: - adrp x8, :got: myfunc - ldr x8, [x8, :got_lo12: myfunc] + adrp x8, :got: ifunc2 + ldr x8, [x8, :got_lo12: ifunc2] ret diff --git a/lld/test/ELF/aarch64-feature-bti.s b/lld/test/ELF/aarch64-feature-bti.s --- a/lld/test/ELF/aarch64-feature-bti.s +++ b/lld/test/ELF/aarch64-feature-bti.s @@ -1,5 +1,6 @@ # REQUIRES: aarch64 # RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu --defsym CANONICAL_PLT=1 %s -o %tcanon.o # RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/aarch64-bti1.s -o %t1.o # RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/aarch64-func3.s -o %t2.o # RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/aarch64-func3-bti.s -o %t3.o @@ -65,24 +66,23 @@ # BTISO-NEXT: 10360: bti c # BTISO-NEXT: stp x16, x30, [sp, #-16]! # BTISO-NEXT: adrp x16, 0x30000 -# BTISO-NEXT: ldr x17, [x16, #1136] -# BTISO-NEXT: add x16, x16, #1136 +# BTISO-NEXT: ldr x17, [x16, #1144] +# BTISO-NEXT: add x16, x16, #1144 # BTISO-NEXT: br x17 # BTISO-NEXT: nop # BTISO-NEXT: nop # BTISO: 0000000000010380 : # BTISO-NEXT: 10380: adrp x16, 0x30000 -# BTISO-NEXT: ldr x17, [x16, #1144] -# BTISO-NEXT: add x16, x16, #1144 +# BTISO-NEXT: ldr x17, [x16, #1152] +# BTISO-NEXT: add x16, x16, #1152 # BTISO-NEXT: br x17 # SOGOTPLT2: Hex dump of section '.got.plt' -# SOGOTPLT2-NEXT: 0x00030460 00000000 00000000 00000000 00000000 -# SOGOTPLT2-NEXT: 0x00030470 00000000 00000000 60030100 00000000 +# SOGOTPLT2-NEXT: 0x00030468 00000000 00000000 00000000 00000000 +# SOGOTPLT2-NEXT: 0x00030478 00000000 00000000 60030100 00000000 ## Build an executable with all relocatable inputs having the BTI -## .note.gnu.property. We expect a bti c in front of all PLT entries as the -## address of a PLT entry can escape an executable. +## .note.gnu.property. # RUN: ld.lld %t2.o --shared --soname=t2.so -o %t2.so @@ -105,12 +105,26 @@ # EXECBTI-NEXT: nop # EXECBTI-NEXT: nop # EXECBTI: 0000000000210370 : -# EXECBTI-NEXT: 210370: bti c -# EXECBTI-NEXT: adrp x16, 0x230000 +# EXECBTI-NEXT: 210370: adrp x16, 0x230000 # EXECBTI-NEXT: ldr x17, [x16, #1168] # EXECBTI-NEXT: add x16, x16, #1168 # EXECBTI-NEXT: br x17 # EXECBTI-NEXT: nop +# EXECBTI-NEXT: nop + +## We expect a bti c in front of a canonical PLT entry because its address +## can escape the executable. +# RUN: ld.lld %tcanon.o %t.so %t2.so -o %t2.exe +# RUN: llvm-readelf --dynamic-table -n %t2.exe | FileCheck --check-prefix=BTIPROP %s +# RUN: llvm-objdump -d --mattr=+bti --no-show-raw-insn %t2.exe | FileCheck --check-prefix=EXECBTI2 %s +# EXECBTI2: 0000000000210380 : +# EXECBTI2-NEXT: 210380: bti c +# EXECBTI2-NEXT: adrp x16, 0x230000 +# EXECBTI2-NEXT: ldr x17, [x16, #1184] +# EXECBTI2-NEXT: add x16, x16, #1184 +# EXECBTI2-NEXT: br x17 +# EXECBTI2-NEXT: nop + ## We expect the same for PIE, as the address of an ifunc can escape # RUN: ld.lld --pie %t.o %t.so %t2.so -o %tpie.exe @@ -133,12 +147,12 @@ # PIE-NEXT: nop # PIE-NEXT: nop # PIE: 0000000000010370 : -# PIE-NEXT: 10370: bti c -# PIE-NEXT: adrp x16, 0x30000 +# PIE-NEXT: 10370: adrp x16, 0x30000 # PIE-NEXT: ldr x17, [x16, #1184] # PIE-NEXT: add x16, x16, #1184 # PIE-NEXT: br x17 # PIE-NEXT: nop +# PIE-NEXT: nop ## Build and executable with not all relocatable inputs having the BTI ## .note.property, expect no bti c and no .note.gnu.property entry @@ -198,12 +212,12 @@ # FORCE-NEXT: nop # FORCE-NEXT: nop # FORCE: 00000000002103a0 : -# FORCE-NEXT: 2103a0: bti c -# FORCE-NEXT: adrp x16, 0x230000 +# FORCE-NEXT: 2103a0: adrp x16, 0x230000 # FORCE-NEXT: ldr x17, [x16, #1200] # FORCE-NEXT: add x16, x16, #1200 # FORCE-NEXT: br x17 # FORCE-NEXT: nop +# FORCE-NEXT: nop .section ".note.gnu.property", "a" .long 4 @@ -220,5 +234,10 @@ .globl _start .type func1,%function func1: +.ifdef CANONICAL_PLT + adrp x0, func2 + add x0, x0, :lo12:func2 +.else bl func2 +.endif ret diff --git a/lld/test/ELF/aarch64-feature-btipac.s b/lld/test/ELF/aarch64-feature-btipac.s --- a/lld/test/ELF/aarch64-feature-btipac.s +++ b/lld/test/ELF/aarch64-feature-btipac.s @@ -25,15 +25,15 @@ # BTIPACSO-NEXT: 10360: bti c # BTIPACSO-NEXT: stp x16, x30, [sp, #-16]! # BTIPACSO-NEXT: adrp x16, 0x30000 -# BTIPACSO-NEXT: ldr x17, [x16, #1136] -# BTIPACSO-NEXT: add x16, x16, #1136 +# BTIPACSO-NEXT: ldr x17, [x16, #1144] +# BTIPACSO-NEXT: add x16, x16, #1144 # BTIPACSO-NEXT: br x17 # BTIPACSO-NEXT: nop # BTIPACSO-NEXT: nop # BTIPACSO: 0000000000010380 : # BTIPACSO-NEXT: 10380: adrp x16, 0x30000 -# BTIPACSO-NEXT: ldr x17, [x16, #1144] -# BTIPACSO-NEXT: add x16, x16, #1144 +# BTIPACSO-NEXT: ldr x17, [x16, #1152] +# BTIPACSO-NEXT: add x16, x16, #1152 # BTIPACSO-NEXT: br x17 # BTIPACPROP: Properties: aarch64 feature: BTI, PAC @@ -68,11 +68,12 @@ # BTIPACEX-NEXT: nop # BTIPACEX-NEXT: nop # BTIPACEX: 00000000002103a0 : -# BTIPACEX-NEXT: 2103a0: bti c -# BTIPACEX-NEXT: adrp x16, 0x230000 +# BTIPACEX-NEXT: 2103a0: adrp x16, 0x230000 # BTIPACEX-NEXT: ldr x17, [x16, #1200] # BTIPACEX-NEXT: add x16, x16, #1200 # BTIPACEX-NEXT: br x17 +# BTIPACEX-NEXT: nop +# BTIPACEX-NEXT: nop # BTIPACDYNEX: 0x0000000070000001 (AARCH64_BTI_PLT) # BTIPACDYNEX-NOT: 0x0000000070000003 (AARCH64_PAC_PLT) @@ -162,12 +163,12 @@ # BTIPACEX2-NEXT: nop # BTIPACEX2-NEXT: nop # BTIPACEX2: 00000000002103a0 : -# BTIPACEX2-NEXT: 2103a0: bti c -# BTIPACEX2-NEXT: adrp x16, 0x230000 +# BTIPACEX2-NEXT: 2103a0: adrp x16, 0x230000 # BTIPACEX2-NEXT: ldr x17, [x16, #1216] # BTIPACEX2-NEXT: add x16, x16, #1216 # BTIPACEX2-NEXT: autia1716 # BTIPACEX2-NEXT: br x17 +# BTIPACEX2-NEXT: nop # BTIPACDYN2: 0x0000000070000001 (AARCH64_BTI_PLT) # BTIPACDYN2-NEXT: 0x0000000070000003 (AARCH64_PAC_PLT) diff --git a/lld/test/ELF/aarch64-ifunc-bti.s b/lld/test/ELF/aarch64-ifunc-bti.s --- a/lld/test/ELF/aarch64-ifunc-bti.s +++ b/lld/test/ELF/aarch64-ifunc-bti.s @@ -6,37 +6,57 @@ # RUN: ld.lld --pie %t1.so %t.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn --mattr=+bti --triple=aarch64-linux-gnu %t | FileCheck %s +# RUN: ld.lld -shared -Bsymbolic %t1.so %t.o -o %t.so +# RUN: llvm-objdump -d --no-show-raw-insn --mattr=+bti %t | FileCheck %s --check-prefix=SHARED + # When the address of an ifunc is taken using a non-got reference which clang # can do, LLD exports a canonical PLT entry that may have its address taken so # we must use bti c. # CHECK: Disassembly of section .plt: -# CHECK: 0000000000010380 <.plt>: -# CHECK-NEXT: 10380: bti c +# CHECK: 00000000000103a0 <.plt>: +# CHECK-NEXT: 103a0: bti c # CHECK-NEXT: stp x16, x30, [sp, #-16]! # CHECK-NEXT: adrp x16, 0x30000 -# CHECK-NEXT: ldr x17, [x16, #1288] -# CHECK-NEXT: add x16, x16, #1288 +# CHECK-NEXT: ldr x17, [x16, #1344] +# CHECK-NEXT: add x16, x16, #1344 # CHECK-NEXT: br x17 # CHECK-NEXT: nop # CHECK-NEXT: nop -# CHECK: 00000000000103a0 : -# CHECK-NEXT: 103a0: bti c -# CHECK-NEXT: adrp x16, 0x30000 -# CHECK-NEXT: ldr x17, [x16, #1296] -# CHECK-NEXT: add x16, x16, #1296 +# CHECK: 00000000000103c0 : +# CHECK-NEXT: 103c0: adrp x16, 0x30000 +# CHECK-NEXT: ldr x17, [x16, #1352] +# CHECK-NEXT: add x16, x16, #1352 # CHECK-NEXT: br x17 # CHECK-NEXT: nop +# CHECK-NEXT: nop # CHECK-EMPTY: # CHECK: Disassembly of section .iplt: # CHECK-EMPTY: -# CHECK-NEXT: 00000000000103c0 : -# CHECK-NEXT: 103c0: bti c +## The address of ifunc1@plt does not escape so it does not need `bti c`, +## but having bti is not wrong. +# CHECK-NEXT: 00000000000103e0 <.iplt>: +# CHECK-NEXT: 103e0: bti c # CHECK-NEXT: adrp x16, 0x30000 -# CHECK-NEXT: ldr x17, [x16, #1304] -# CHECK-NEXT: add x16, x16, #1304 +# CHECK-NEXT: ldr x17, [x16, #1360] +# CHECK-NEXT: add x16, x16, #1360 # CHECK-NEXT: br x17 # CHECK-NEXT: nop +# CHECK-EMPTY: +## The address of ifunc2 (STT_FUNC) escapes, so it must have `bti c`. +# CHECK-NEXT: 00000000000103f8 : +# CHECK-NEXT: 103f8: bti c +# CHECK-NEXT: adrp x16, 0x30000 +# CHECK-NEXT: ldr x17, [x16, #1368] +# CHECK-NEXT: add x16, x16, #1368 +# CHECK-NEXT: br x17 +# CHECK-NEXT: nop + +# SHARED: <.iplt>: +# SHARED-NEXT: bti c + +# SHARED: : +# SHARED-NEXT: bti c .section ".note.gnu.property", "a" .long 4 @@ -50,11 +70,16 @@ .long 0 .text -.globl myfunc -.type myfunc,@gnu_indirect_function -myfunc: +.globl ifunc1 +.type ifunc1,@gnu_indirect_function +ifunc1: ret +.globl ifunc2 +.type ifunc2,@gnu_indirect_function +ifunc2: + ret + .globl func1 .text @@ -62,6 +87,7 @@ .type _start, %function _start: bl func1 - adrp x8, myfunc - add x8, x8, :lo12:myfunc + bl ifunc1 + adrp x8, ifunc2 + add x8, x8, :lo12:ifunc2 ret