diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp --- a/lld/ELF/Arch/X86.cpp +++ b/lld/ELF/Arch/X86.cpp @@ -408,6 +408,71 @@ memcpy(Loc - 2, Inst, sizeof(Inst)); } +// If Intel CET (Control-Flow Enforcement Technology) is enabled, +// we have to emit special PLT entries containing endbr32 instructions. +namespace { +class IntelCET : public X86 { +public: + IntelCET(); + void writeGotPlt(uint8_t *Buf, const Symbol &S) const override; + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void writeIBTPlt(uint8_t *Buf, size_t NumEntries) const override; + + enum { IBTPltHeaderSize = 16 }; +}; +} // namespace + +IntelCET::IntelCET() { PltHeaderSize = 0; } + +void IntelCET::writeGotPlt(uint8_t *Buf, const Symbol &S) const { + uint64_t VA = + In.IBTPlt->getVA() + IBTPltHeaderSize + S.PltIndex * PltEntrySize; + write32le(Buf, VA); +} + +void IntelCET::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + if (Config->Pic) { + const uint8_t Inst[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) + 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop + }; + memcpy(Buf, Inst, sizeof(Inst)); + write32le(Buf + 6, GotPltEntryAddr - In.GotPlt->getVA()); + return; + } + + const uint8_t Inst[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT + 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop + }; + memcpy(Buf, Inst, sizeof(Inst)); + write32le(Buf + 6, GotPltEntryAddr); +} + +void IntelCET::writeIBTPlt(uint8_t *Buf, size_t NumEntries) const { + writePltHeader(Buf); + Buf += IBTPltHeaderSize; + + const uint8_t Inst[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0x68, 0, 0, 0, 0, // pushl $reloc_offset + 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC + 0x66, 0x90, // nop + }; + + for (size_t I = 0; I != NumEntries; ++I) { + memcpy(Buf, Inst, sizeof(Inst)); + write32le(Buf + 5, I * sizeof(object::ELF32LE::Rel)); + write32le(Buf + 10, -PltHeaderSize - sizeof(Inst) * I - 30); + Buf += sizeof(Inst); + } +} + namespace { class RetpolinePic : public X86 { public: @@ -549,6 +614,11 @@ return &T; } + if (Config->AndFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { + static IntelCET T; + return &T; + } + static X86 T; return &T; } diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -148,7 +148,7 @@ }; memcpy(Buf, PltData, sizeof(PltData)); uint64_t GotPlt = In.GotPlt->getVA(); - uint64_t Plt = In.Plt->getVA(); + uint64_t Plt = In.IBTPlt ? In.IBTPlt->getVA() : In.Plt->getVA(); write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8 write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16 } @@ -567,6 +567,60 @@ return false; } +// If Intel CET (Control-Flow Enforcement Technology) is enabled, +// we have to emit special PLT entries containing endbr64 instructions. +namespace { +class IntelCET : public X86_64 { +public: + IntelCET(); + void writeGotPlt(uint8_t *Buf, const Symbol &S) const override; + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void writeIBTPlt(uint8_t *Buf, size_t NumEntries) const override; + + enum { IBTPltHeaderSize = 16 }; +}; +} // namespace + +IntelCET::IntelCET() { PltHeaderSize = 0; } + +void IntelCET::writeGotPlt(uint8_t *Buf, const Symbol &S) const { + uint64_t VA = + In.IBTPlt->getVA() + IBTPltHeaderSize + S.PltIndex * PltEntrySize; + write64le(Buf, VA); +} + +void IntelCET::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t Inst[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) + 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop + }; + memcpy(Buf, Inst, sizeof(Inst)); + write32le(Buf + 6, GotPltEntryAddr - PltEntryAddr - 10); +} + +void IntelCET::writeIBTPlt(uint8_t *Buf, size_t NumEntries) const { + writePltHeader(Buf); + Buf += IBTPltHeaderSize; + + const uint8_t Inst[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0x68, 0, 0, 0, 0, // pushq + 0xe9, 0, 0, 0, 0, // jmpq plt[0] + 0x66, 0x90, // nop + }; + + for (size_t I = 0; I < NumEntries; ++I) { + memcpy(Buf, Inst, sizeof(Inst)); + write32le(Buf + 5, I); + write32le(Buf + 10, -PltHeaderSize - sizeof(Inst) * I - 30); + Buf += sizeof(Inst); + } +} + // These nonstandard PLT entries are to migtigate Spectre v2 security // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT @@ -694,6 +748,11 @@ return &T; } + if (Config->AndFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { + static IntelCET T; + return &T; + } + static X86_64 T; return &T; } diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1594,9 +1594,6 @@ // with CET. // // This function returns the merged feature flags. If 0, we cannot enable CET. -// -// Note that the CET-aware PLT is not implemented yet. We do error -// check only. template static uint32_t getAndFeatures() { if (Config->EMachine != EM_386 && Config->EMachine != EM_X86_64) return 0; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -171,9 +171,8 @@ def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">, HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">; -// This option is intentionally hidden from the user as the implementation -// is not complete. -def require_cet: F<"require-cet">; +def require_cet: F<"require-cet">, + HelpText<"Force enable x86 Control-Flow Enforcement Technology">; defm format: Eq<"format", "Change the input format of the inputs following this option">, MetaVarName<"[default,elf,binary]">; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -671,14 +671,23 @@ bool isNeeded() const override { return !Entries.empty(); } void addSymbols(); template void addEntry(Symbol &Sym); + size_t getNumEntries() { return Entries.size(); } - size_t HeaderSize; + size_t HeaderSize = 0; private: std::vector Entries; bool IsIplt; }; +// This is x86-only. +class IBTPltSection : public SyntheticSection { +public: + IBTPltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; +}; + class GdbIndexSection final : public SyntheticSection { public: struct AddressEntry { @@ -1112,6 +1121,7 @@ PltSection *Plt; PltSection *Iplt; PPC32Got2Section *PPC32Got2; + IBTPltSection *IBTPlt; RelocationBaseSection *RelaDyn; RelrBaseSection *RelrDyn; RelocationBaseSection *RelaPlt; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2296,6 +2296,21 @@ : ".plt"), HeaderSize(!IsIplt || Config->ZRetpolineplt ? Target->PltHeaderSize : 0), IsIplt(IsIplt) { + bool IsX86Ibt = (Config->AndFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT); + + if (Config->EMachine == EM_PPC64) + Name = ".glink"; + else if (IsX86Ibt) + Name = ".plt.sec"; + else + Name = ".plt"; + + HeaderSize = IsIplt ? 0 : Target->PltHeaderSize; + + // A retpoline PLT always has a header even for IPLT. + if (Config->ZRetpolineplt) + HeaderSize = Target->PltHeaderSize; + // The PLT needs to be writable on SPARC as the dynamic linker will // modify the instructions in the PLT entries. if (Config->EMachine == EM_SPARCV9) @@ -2324,7 +2339,7 @@ unsigned RelOff = RelSec->Entsize * I + PltOff; uint64_t Got = B->getGotPltVA(); uint64_t Plt = this->getVA() + Off; - Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); + Target->writePlt(Buf + Off, Got, Plt, I, RelOff); Off += Target->PltEntrySize; } } @@ -2352,6 +2367,76 @@ } } +// This is an x86-only extra PLT section and used only when a security +// enhancement feature called CET is enabled. In this comment, I'll explain what +// the feature is and why we have two PLT sections if CET is enabled. +// +// So, what does CET do? CET introduces a new restriction to indirect jump +// instructions. CET works this way. Assume that CET is enabled. Then, if you +// execute an indirect jump instruction, the processor verifies that a special +// "landing pad" instruction (which is actually a repurposed NOP instruction and +// now called "endbr32" or "endbr64") is at the jump target. If the jump target +// does not start with that instruction, the processor raises an exception +// instead of continue executing code. +// +// If CET is enabled, the compiler emits endbr to all locations where indirect +// jumps may jump to. +// +// This mechanism makes it extremely hard to transfer the control to a middle of +// a function that is not supporsed to be a indirect jump target, preventing +// certain types of attacks such as ROP or JOP. +// +// Note that the processors in the market as of early 2019 don't actually +// support the feature. Only the spec is available at the moment. +// +// Now, I'll explain why we have this extra PLT section for CET. +// +// Since you can indirectly jump to a PLT entry, we have to make PLT entries +// start with endbr. The problem is there's no extra space for endbr (which is 4 +// bytes long), as the PLT entry is only 16 bytes long and all bytes are already +// used. +// +// In order to deal with the issue, we split a PLT entry into two PLT entries. +// Remember that each PLT entry contains code to jump to an address read from +// .got.plt AND code to resolve a dynamic symbol lazily. With the 2-PLT scheme, +// the former code is written to .plt.sec, and the latter code is written to +// .plt. +// +// Lazy symbol resolution in the 2-PLT scheme works in the usual way, except +// that the regular .plt is now called .plt.sec and .plt is repurposed to +// contain only code for lazy symbol resolution. +// +// In other words, this is how the 2-PLT scheme works. Application code is +// supposed to jump to .plt.sec to call an external function. Each .plt.sec +// entry contains code to read an address from a corresponding .got.plt entry +// and jump to that address. Addresses in .got.plt initially point to .plt, so +// when an application calls an external function for the first time, the +// control is transferred to a function that resolves a symbol name from +// external shared object files. That function then rewrites a .got.plt entry +// with a resolved address, so that the subsequent function calls directly jump +// to a desired location from .plt.sec. +// +// There is an open question as to whether the 2-PLT scheme was desirable or +// not. We could have simply extended the PLT entry size to 32-bytes to +// accommodate endbr, and that scheme would have been much simpler than the +// 2-PLT scheme. One reason to split PLT was, by doing that, we could keep hot +// code (.plt.sec) from cold code (.plt). But as far as I know no one proved +// that the optimization actually makes a difference. +// +// That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools +// depend on it, so we implement the ABI. +IBTPltSection::IBTPltSection() + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt") {} + +void IBTPltSection::writeTo(uint8_t *Buf) { + Target->writeIBTPlt(Buf, In.Plt->getNumEntries()); +} + +size_t IBTPltSection::getSize() const { + // 16 is the header size of .plt.sec. + return 16 + In.Plt->getNumEntries() * Target->PltEntrySize; +} + // The string hash function for .gdb_index. static uint32_t computeGdbHash(StringRef S) { uint32_t H = 0; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -42,6 +42,7 @@ virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const {} + virtual void writeIBTPlt(uint8_t *Buf, size_t NumEntries) const {} virtual void addPltHeaderSymbols(InputSection &IS) const {} virtual void addPltSymbols(InputSection &IS, uint64_t Off) const {} diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -438,6 +438,12 @@ false /*Sort*/); Add(In.RelaIplt); + if ((Config->EMachine == EM_386 || Config->EMachine == EM_X86_64) && + (Config->AndFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { + In.IBTPlt = make(); + Add(In.IBTPlt); + } + In.Plt = make(false); Add(In.Plt); In.Iplt = make(true); diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -182,6 +182,9 @@ field to the specified value. .It Fl -fini Ns = Ns Ar symbol Specify a finalizer function. +.It Fl -require-cet +Intel Control-Flow Enforcement Technology is enforced. An error is +reported if there is an input object file not compatible with CET. .It Fl -format Ns = Ns Ar input-format , Fl b Ar input-format Specify the format of the inputs following this option. .Ar input-format diff --git a/lld/test/ELF/i386-cet.s b/lld/test/ELF/i386-cet.s --- a/lld/test/ELF/i386-cet.s +++ b/lld/test/ELF/i386-cet.s @@ -29,15 +29,48 @@ # Check .note.gnu.protery without property SHSTK. # NOSHSTK: Properties: x86 feature: IBT +# RUN: ld.lld -shared %t1.o -o %t1.so +# RUN: ld.lld -e func1 %t.o %t1.so -o %t +# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s +# RUN: llvm-objdump -s -d %t | FileCheck -check-prefix=DISASM %s + +# DISASM: Disassembly of section .text: +# DISASM: 0000000000401000 func1: +# DISASM-NEXT: 401000: e8 2b 00 00 00 calll 43 +# DISASM-NEXT: 401005: c3 retl + +# DISASM: Disassembly of section .plt: +# DISASM: 0000000000401010 .plt: +# DISASM-NEXT: 401010: ff 35 04 30 40 00 pushl 4206596 +# DISASM-NEXT: 401016: ff 25 08 30 40 00 jmpl *4206600 +# DISASM-NEXT: 40101c: 90 nop +# DISASM-NEXT: 40101d: 90 nop +# DISASM-NEXT: 40101e: 90 nop +# DISASM-NEXT: 40101f: 90 nop +# DISASM-NEXT: 401020: f3 0f 1e fb endbr32 +# DISASM-NEXT: 401024: 68 00 00 00 00 pushl $0 +# DISASM-NEXT: 401029: e9 e2 ff ff ff jmp -30 <.plt> +# DISASM-NEXT: 40102e: 66 90 nop + +# DISASM: Disassembly of section .plt.sec: +# DISASM: 0000000000401030 .plt.sec: +# DISASM-NEXT: 401030: f3 0f 1e fb endbr32 +# DISASM-NEXT: 401034: ff 25 0c 30 40 00 jmpl *4206604 +# DISASM-NEXT: 40103a: 66 0f 1f 44 00 00 nopw (%eax,%eax) + +# DISASM: Contents of section .got.plt: +# DISASM-NEXT: 403000 00204000 00000000 00000000 20104000 + .section ".note.gnu.property", "a" .long 4 -.long 0xc +.long 0x10 .long 0x5 .asciz "GNU" .long 0xc0000002 .long 4 .long 3 +.long 0 .text .globl func1 diff --git a/lld/test/ELF/x86-64-cet.s b/lld/test/ELF/x86-64-cet.s --- a/lld/test/ELF/x86-64-cet.s +++ b/lld/test/ELF/x86-64-cet.s @@ -29,6 +29,36 @@ # Check .note.gnu.protery without property SHSTK. # NOSHSTK: Properties: x86 feature: IBT +# RUN: ld.lld -shared %t1.o -o %t1.so +# RUN: ld.lld -e func1 %t.o %t1.so -o %t +# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s +# RUN: llvm-objdump -s -d %t | FileCheck -check-prefix=DISASM %s + +# DISASM: Disassembly of section .text: +# DISASM: 0000000000201000 func1: +# DISASM-NEXT: 201000: e8 2b 00 00 00 callq 43 +# DISASM-NEXT: 201005: c3 retq + +# DISASM: Disassembly of section .plt: +# DISASM: 0000000000201010 .plt: +# DISASM-NEXT: 201010: ff 35 f2 1f 00 00 pushq 8178(%rip) +# DISASM-NEXT: 201016: ff 25 f4 1f 00 00 jmpq *8180(%rip) +# DISASM-NEXT: 20101c: 0f 1f 40 00 nopl (%rax) +# DISASM-NEXT: 201020: f3 0f 1e fa endbr64 +# DISASM-NEXT: 201024: 68 00 00 00 00 pushq $0 +# DISASM-NEXT: 201029: e9 e2 ff ff ff jmp -30 <.plt> +# DISASM-NEXT: 20102e: 66 90 nop + +# DISASM: Disassembly of section .plt.sec: +# DISASM: 0000000000201030 .plt.sec: +# DISASM-NEXT: 201030: f3 0f 1e fa endbr64 +# DISASM-NEXT: 201034: ff 25 de 1f 00 00 jmpq *8158(%rip) +# DISASM-NEXT: 20103a: 66 0f 1f 44 00 00 nopw (%rax,%rax) + +# DISASM: Contents of section .got.plt: +# DISASM-NEXT: 203000 00202000 00000000 00000000 00000000 +# DISASM-NEXT: 203010 00000000 00000000 20102000 00000000 + .section ".note.gnu.property", "a" .long 4 .long 0x10