Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -43,6 +43,8 @@ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; + bool adjustPrologueForCrossSplitStack(uint8_t *Loc, + uint8_t *End) const override; private: void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op, @@ -469,6 +471,40 @@ write32le(Loc - 1, Val + 1); } +// A split-stack prologue starts by checking the amount of stack remaining +// in one of two ways: +// A) Comparing of the stack pointer to a field in the tcb. +// B) Or a load of a stack pointer offset with an lea to r10 or r11. +template <> +bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, + uint8_t *End) const { + // Replace "cmp %fs:0x70,%rsp" and subsequent branch + // with "stc, nopl 0x0(%rax,%rax,1)" + if (Loc + 8 < End && memcmp(Loc, "\x64\x48\x3b\x24\x25", 4) == 0) { + memcpy(Loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8); + return true; + } + + // Adjust "lea -0x200(%rsp),%r10" to lea "-0x4200(%rsp),%r10" + if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x94\x24\x00\xfe\xff", 7) == 0) { + memcpy(Loc, "\x4c\x8d\x94\x24\x00\xbe\xff", 7); + return true; + } + + // Adjust "lea -0x200(%rsp),%r11" to lea "-0x4200(%rsp),%r11" + if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x9c\x24\x00\xfe\xff", 7) == 0) { + memcpy(Loc, "\x4c\x8d\x9c\x24\x00\xbe\xff", 7); + return true; + } + return false; +} + +template <> +bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, + uint8_t *End) const { + llvm_unreachable("Target doesn't support split stacks."); +} + // These nonstandard PLT entries are to migtigate Spectre v2 security // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT Index: ELF/InputFiles.h =================================================================== --- ELF/InputFiles.h +++ ELF/InputFiles.h @@ -210,6 +210,14 @@ // symbol table. StringRef SourceFile; + // True if the file defines functions compiled with + // -fsplit-stack. Usually false. + bool SplitStack = false; + + // True if the file defines functions compiled with -fsplit-stack, + // but had one or more functions with the no_split_stack attribute. + bool SomeNoSplitStack = false; + private: void initializeSections(llvm::DenseSet &ComdatGroups); Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -653,13 +653,24 @@ if (Name == ".note.GNU-stack") return &InputSection::Discarded; - // Split stacks is a feature to support a discontiguous stack. At least - // as of 2017, it seems that the feature is not being used widely. - // Only GNU gold supports that. We don't. For the details about that, - // see https://gcc.gnu.org/wiki/SplitStacks + // Split stacks is a feature to support a discontiguous stack, + // commonly used in the programming language Go. For the details, + // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled + // for split stack will include a .note.GNU-split-stack section. if (Name == ".note.GNU-split-stack") { - error(toString(this) + - ": object file compiled with -fsplit-stack is not supported"); + if (Config->Relocatable) { + error("Cannot mix split-stack and non-split-stack in a relocatable link"); + return &InputSection::Discarded; + } + this->SplitStack = true; + return &InputSection::Discarded; + } + + // An object file cmpiled for split stack, but where some of the + // functions were compiled with the no_split_stack_attribute will + // include a .note.GNU-no-split-stack section. + if (Name == ".note.GNU-no-split-stack") { + this->SomeNoSplitStack = true; return &InputSection::Discarded; } Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -106,7 +106,7 @@ static bool classof(const SectionBase *S) { return S->kind() != Output; } - // The file which contains this section. It's dynamic type is always + // The file which contains this section. Its dynamic type is always // ObjFile, but in order to avoid ELFT, we use InputFile as // its static type. InputFile *File; @@ -164,6 +164,11 @@ InputSection *getLinkOrderDep() const; + // Get the function symbol that encloses this offset from within the + // section. + template + Defined *getEnclosingFunction(uint64_t Offset); + // Compilers emit zlib-compressed debug sections if the -gz option // is given. This function checks if this section is compressed, and // if so, decompress in memory. @@ -185,6 +190,15 @@ // This vector contains such "cooked" relocations. std::vector Relocations; + // A function compiled with -fsplit-stack calling a function + // compiled without -fsplit-stack needs its prologue adjusted. Find + // such functions and adjust their prologues. This is very similar + // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more + // information. + template + void adjustSplitStackFunctionPrologues(uint8_t *Buf, uint8_t *End); + + template llvm::ArrayRef getDataAs() const { size_t S = Data.size(); assert(S % sizeof(T) == 0); Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -14,6 +14,7 @@ #include "LinkerScript.h" #include "OutputSections.h" #include "Relocations.h" +#include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -26,7 +27,10 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Threading.h" #include "llvm/Support/xxhash.h" +#include #include +#include +#include using namespace llvm; using namespace llvm::ELF; @@ -212,6 +216,17 @@ return cast(File->getSections()[Link]); } +// Find a function symbol that encloses a given location. +template +Defined *InputSectionBase::getEnclosingFunction(uint64_t Offset) { + for (Symbol *B : File->getSymbols()) + if (Defined *D = dyn_cast(B)) + if (D->Section == this && D->Type == STT_FUNC && + D->Value <= Offset && Offset < D->Value + D->Size) + return D; + return nullptr; +} + // Returns a source location string. Used to construct an error message. template std::string InputSectionBase::getLocation(uint64_t Offset) { @@ -231,12 +246,8 @@ if (SrcFile.empty()) SrcFile = toString(File); - // Find a function symbol that encloses a given location. - for (Symbol *B : File->getSymbols()) - if (auto *D = dyn_cast(B)) - if (D->Section == this && D->Type == STT_FUNC) - if (D->Value <= Offset && Offset < D->Value + D->Size) - return SrcFile + ":(function " + toString(*D) + ")"; + if (Defined *D = getEnclosingFunction(Offset)) + return SrcFile + ":(function " + toString(*D) + ")"; // If there's no symbol, print out the offset in the section. return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); @@ -729,6 +740,9 @@ template void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) { + if (Flags & SHF_EXECINSTR) + adjustSplitStackFunctionPrologues(Buf, BufEnd); + if (Flags & SHF_ALLOC) { relocateAlloc(Buf, BufEnd); return; @@ -804,6 +818,103 @@ } } +// For each function-defining prologue, find any calls to __morestack, +// and replace them with calls to __morestack_non_split. +static void switchMorestackCallsToMorestackNonSplit( + llvm::DenseSet& Prologues, + std::vector& MorestackCalls) { + + // If the target adjusted a function's prologue, all calls to + // __morestack inside that function should be switched to + // __morestack_non_split. + Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split"); + + // Sort both collections to compare addresses efficiently. + llvm::sort(MorestackCalls.begin(), MorestackCalls.end(), + [](const Relocation *L, const Relocation *R) { + return L->Offset < R->Offset; + }); + std::vector Functions(Prologues.begin(), Prologues.end()); + llvm::sort( + Functions.begin(), Functions.end(), + [](const Defined *L, const Defined *R) { return L->Value < R->Value; }); + + auto It = MorestackCalls.begin(); + for (Defined *F : Functions) { + // Find the first call to __morestack within the function. + while (It != MorestackCalls.end() && (*It)->Offset < F->Value) + ++It; + // Adjust all calls inside the function. + while (It != MorestackCalls.end() && (*It)->Offset < F->Value + F->Size) { + (*It)->Sym = MoreStackNonSplit; + ++It; + } + } +} + +static bool +enclosingPrologueAdjusted(uint64_t Offset, + const llvm::DenseSet &Prologues) { + for (Defined *F : Prologues) + if (F->Value <= Offset && Offset < F->Value + F->Size) + return true; + return false; +} + +// If a function compiled for split stack calls a function not +// compiled for split stack, then the caller needs its prologue +// adjusted to ensure that the called function will have enough stack +// available. Find those functions, and adjust their prologues. +template +void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf, + uint8_t *End) { + if (!getFile()->SplitStack) + return; + llvm::DenseSet AdjustedPrologues; + std::vector MorestackCalls; + + for (Relocation &Rel : Relocations) { + // Local symbols can't possibly be cross-calls, and should have been + // resolved long before this line. + if (Rel.Sym->isLocal()) + continue; + + Defined *D = dyn_cast(Rel.Sym); + // A reference to an undefined symbol was an error, and should not + // have gotten to this point. + if (!D) + continue; + + // Ignore calls into the split-stack api. + if (D->getName().startswith("__morestack")) { + if (D->getName().equals("__morestack")) + MorestackCalls.push_back(&Rel); + continue; + } + + // A relocation to non-function isn't relevant. Sometimes + // __morestack is not marked as a function, so this check comes + // after the name check. + if (D->Type != STT_FUNC) + continue; + + if (enclosingPrologueAdjusted(Rel.Offset, AdjustedPrologues)) + continue; + + if (Defined *F = getEnclosingFunction(Rel.Offset)) { + if (Target->adjustPrologueForCrossSplitStack(Buf + F->Value, End)) { + AdjustedPrologues.insert(F); + continue; + } + } + if (!getFile()->SomeNoSplitStack) + error("function call at " + getErrorLocation(Buf + Rel.Offset) + + "crosses a split-stack boundary, but unable " + + "to adjust the enclosing function's prologue"); + } + switchMorestackCallsToMorestackNonSplit(AdjustedPrologues, MorestackCalls); +} + template void InputSection::writeTo(uint8_t *Buf) { if (Type == SHT_NOBITS) return; Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -59,6 +59,13 @@ virtual bool needsThunk(RelExpr Expr, RelType RelocType, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const; + + // The function with a prologue starting at Loc was compiled with + // -fsplit-stack and it calls a function compiled without. Adjust the prologue + // to do the right thing. See https://gcc.gnu.org/wiki/SplitStacks. + virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, + uint8_t *End) const; + // Return true if we can reach Dst from Src with Relocation RelocType virtual bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -130,6 +130,12 @@ return false; } +bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, + uint8_t *End) const { + llvm_unreachable("Target doesn't support split stacks."); +} + + bool TargetInfo::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { return true; } Index: test/ELF/Inputs/x86-64-split-stack-main.s =================================================================== --- /dev/null +++ test/ELF/Inputs/x86-64-split-stack-main.s @@ -0,0 +1,16 @@ + .text + + .global non_split + .type non_split,@function +non_split: + retq + .size non_split,. - non_split + + .global non_function_text_symbol +non_function_text_symbol: + .byte 0x01 + .type non_function_text_symbol,@STT_OBJECT + .size non_function_text_symbol, 1 + + + .section .note.GNU-stack,"",@progbits Index: test/ELF/splitstacks.s =================================================================== --- test/ELF/splitstacks.s +++ /dev/null @@ -1,11 +0,0 @@ -# REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o - -# RUN: not ld.lld %t1.o -o %t 2>&1 | FileCheck %s -# CHECK: .o: object file compiled with -fsplit-stack is not supported - -.globl _start -_start: - nop - -.section .note.GNU-split-stack,"",@progbits Index: test/ELF/x86-64-split-stack-prologue-adjust-fail.s =================================================================== --- /dev/null +++ test/ELF/x86-64-split-stack-prologue-adjust-fail.s @@ -0,0 +1,31 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t2.o + +# RUN: not ld.lld --defsym __morestack=0x100 %t1.o %t2.o -o %t 2>&1 | FileCheck %s + +# An unknown prologue gives a match failure +# CHECK: unable to adjust the enclosing function's + +# RUN: not ld.lld -r --defsym __morestack=0x100 %t1.o %t2.o -o %t 2>&1 | FileCheck %s -check-prefix=RELOCATABLE +# RELOCATABLE: Cannot mix split-stack and non-split-stack in a relocatable link + + .text + + .global unknown_prologue + .type unknown_prologue,@function +unknown_prologue: + push %rbp + mov %rsp,%rbp + cmp %fs:0x70,%rsp + jae 1f + callq __morestack + retq +1: + callq non_split + leaveq + retq + + .size unknown_prologue,. - unknown_prologue + + .section .note.GNU-split-stack,"",@progbits Index: test/ELF/x86-64-split-stack-prologue-adjust-silent.s =================================================================== --- /dev/null +++ test/ELF/x86-64-split-stack-prologue-adjust-silent.s @@ -0,0 +1,32 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t2.o + +# RUN: ld.lld --defsym __morestack=0x100 %t1.o %t2.o -o %t +# RUN: llvm-objdump -d %t 2>&1 | FileCheck %s + +# An unknown prologue ordinarily gives a match failure, except that this +# object file includes a .note.GNU-no-split-stack section, which tells the +# linker to expect such prologues, and therefore not error. + +# CHECK: __morestack + + .text + + .global unknown_prologue + .type unknown_prologue,@function +unknown_prologue: + push %rbp + mov %rsp,%rbp + cmp %fs:0x70,%rsp + jae 1f + callq __morestack + retq +1: + callq non_split + leaveq + retq + + .size unknown_prologue,. - unknown_prologue + .section .note.GNU-split-stack,"",@progbits + .section .note.GNU-no-split-stack,"",@progbits Index: test/ELF/x86-64-split-stack-prologue-adjust-success.s =================================================================== --- /dev/null +++ test/ELF/x86-64-split-stack-prologue-adjust-success.s @@ -0,0 +1,124 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t2.o + +# RUN: ld.lld --defsym __morestack=0x100 --defsym __morestack_non_split=0x200 %t1.o %t2.o -o %t -z notext +# RUN: llvm-objdump -d %t | FileCheck %s + +# Avoid duplicating the prologue for every test via macros. + +.macro prologue1 function_to_call + .global prologue1_calls_\function_to_call + .type prologue1_calls_\function_to_call,@function +prologue1_calls_\function_to_call: + cmp %fs:0x70,%rsp + jae 1f + callq __morestack + retq +1: + # Various and duplicate calls to ensure every code path is taken. + callq \function_to_call + callq \function_to_call + callq 1b + callq non_function_text_symbol + retq + .size prologue1_calls_\function_to_call,. - prologue1_calls_\function_to_call +.endm + +.macro prologue2 function_to_call register + .global prologue2_calls_\function_to_call\register + .type prologue2_calls_\function_to_call\register,@function +prologue2_calls_\function_to_call\register: + lea -0x200(%rsp),%\register + cmp %fs:0x70,%\register + jae 1f + callq __morestack + retq +1: + # Various and duplicate calls to ensure every code path is taken. + callq \function_to_call + callq \function_to_call + callq 1b + callq non_function_text_symbol + retq + .size prologue2_calls_\function_to_call\register,. - prologue2_calls_\function_to_call\register +.endm + + .local foo +foo: + .section .text,"ax",@progbits + .quad foo + + .text + +# For split-stack code calling split-stack code, ensure prologue v1 still +# calls plain __morestack, and that any raw bytes written to the prologue +# make sense. +# CHECK: prologue1_calls_split: +# CHECK-NEXT: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%rsp +# CHECK: jae{{.*$}} +# CHECK-NEXT: callq{{.*}}<__morestack> + +prologue1 split + +# For split-stack code calling split-stack code, ensure prologue v2 still +# calls plain __morestack, that any raw bytes written to the prologue +# make sense, and that the register number is preserved. +# CHECK: prologue2_calls_splitr10: +# CHECK-NEXT: lea{{.*}} -{{[0-9]+}}(%rsp),{{.*}}%r10 +# CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r{{[0-9]+}} +# CHECK: jae{{.*}} +# CHECK-NEXT: callq{{.*}}<__morestack> + +prologue2 split r10 + +# CHECK: prologue2_calls_splitr11: +# CHECK-NEXT: lea{{.*}} -{{[0-9]+}}(%rsp),{{.*}}%r11 +# CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r{{[0-9]+}} +# CHECK: jae{{.*}} +# CHECK-NEXT: callq{{.*}}<__morestack> + +prologue2 split r11 + +# For split-stack code calling non-split-stack code, ensure prologue v1 +# calls __morestack_non_split, and that any raw bytes written to the prologue +# make sense. +# CHECK: prologue1_calls_non_split: +# CHECK-NEXT: stc{{.*$}} +# CHECK-NEXT: nopl{{.*$}} +# CHECK: jae{{.*$}} +# CHECK-NEXT: callq{{.*}}<__morestack_non_split> + +prologue1 non_split + +# For split-stack code calling non-split-stack code, ensure prologue v2 +# calls __morestack_non_split, that any raw bytes written to the prologue +# make sense, and that the register number is preserved +# CHECK: prologue2_calls_non_splitr10: +# CHECK-NEXT: lea{{.*$}} +# CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r10 +# CHECK: jae{{.*$}} +# CHECK-NEXT: callq{{.*}}<__morestack_non_split> + +prologue2 non_split r10 + +# CHECK: prologue2_calls_non_splitr11: +# CHECK-NEXT: lea{{.*$}} +# CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r11 +# CHECK: jae{{.*$}} +# CHECK-NEXT: callq{{.*}}<__morestack_non_split> + +prologue2 non_split r11 +# call foo@plt # for code-coverage. + + + + .global split + .type split,@function +split: + retq + + .size split,. - split + + .section .note.GNU-stack,"",@progbits + .section .note.GNU-split-stack,"",@progbits