Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -482,22 +482,25 @@ template <> bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End) const { + if (Loc + 8 >= End) + return false; + // Replace "cmp %fs:0x70,%rsp" and subsequent branch // with "stc, nopl 0x0(%rax,%rax,1)" - if (Loc + 8 < End && memcmp(Loc, "\x64\x48\x3b\x24\x25", 4) == 0) { + if (memcmp(Loc, "\x64\x48\x3b\x24\x25", 5) == 0) { memcpy(Loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8); return true; } - // Adjust "lea -0x200(%rsp),%r10" to lea "-0x4200(%rsp),%r10" - if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x94\x24\x00\xfe\xff", 7) == 0) { - memcpy(Loc, "\x4c\x8d\x94\x24\x00\xbe\xff", 7); - return true; - } - - // Adjust "lea -0x200(%rsp),%r11" to lea "-0x4200(%rsp),%r11" - if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x9c\x24\x00\xfe\xff", 7) == 0) { - memcpy(Loc, "\x4c\x8d\x9c\x24\x00\xbe\xff", 7); + // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could + // be r10 or r11. The lea instruction feeds a subsequent compare which checks + // if there is X available stack space. Making X larger effectively reserves + // that much additional space. The stack grows downward so subtract the value. + if (memcmp(Loc, "\x4c\x8d\x94\x24", 4) == 0 || + memcmp(Loc, "\x4c\x8d\x9c\x24", 4) == 0) { + // The offset bytes are encoded four bytes after the start of the + // instruction. + write32le(Loc + 4, read32le(Loc + 4) - 0x4000); return true; } return false; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -838,6 +838,11 @@ // __morestack inside that function should be switched to // __morestack_non_split. Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split"); + if (!MoreStackNonSplit) { + error("Mixing split-stack objects requires a definition of " + "__morestack_non_split"); + return; + } // Sort both collections to compare addresses efficiently. llvm::sort(MorestackCalls.begin(), MorestackCalls.end(), @@ -862,8 +867,8 @@ } } -static bool enclosingPrologueAdjusted(uint64_t Offset, - const DenseSet &Prologues) { +static bool enclosingPrologueAttempted(uint64_t Offset, + const DenseSet &Prologues) { for (Defined *F : Prologues) if (F->Value <= Offset && Offset < F->Value + F->Size) return true; @@ -879,7 +884,7 @@ uint8_t *End) { if (!getFile()->SplitStack) return; - DenseSet AdjustedPrologues; + DenseSet Prologues; std::vector MorestackCalls; for (Relocation &Rel : Relocations) { @@ -902,21 +907,26 @@ if (D->Type != STT_FUNC) continue; - if (enclosingPrologueAdjusted(Rel.Offset, AdjustedPrologues)) + // If the callee's-file was compiled with split stack, nothing to do. + auto *IS = cast_or_null(D->Section); + if (!IS || IS->getFile()->SplitStack) + continue; + + if (enclosingPrologueAttempted(Rel.Offset, Prologues)) continue; if (Defined *F = getEnclosingFunction(Rel.Offset)) { - if (Target->adjustPrologueForCrossSplitStack(Buf + F->Value, End)) { - AdjustedPrologues.insert(F); + Prologues.insert(F); + if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value), + End)) continue; - } + if (!getFile()->SomeNoSplitStack) + error(lld::toString(this) + ": " + F->getName() + + " (with -fsplit-stack) calls " + D->getName() + + " (without -fsplit-stack), but couldn't adjust its prologue"); } - if (!getFile()->SomeNoSplitStack) - error("function call at " + getErrorLocation(Buf + Rel.Offset) + - "crosses a split-stack boundary, but unable " + - "to adjust the enclosing function's prologue"); } - switchMorestackCallsToMorestackNonSplit(AdjustedPrologues, MorestackCalls); + switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls); } template void InputSection::writeTo(uint8_t *Buf) { Index: test/ELF/Inputs/x86-64-split-stack-extra.s =================================================================== --- test/ELF/Inputs/x86-64-split-stack-extra.s +++ test/ELF/Inputs/x86-64-split-stack-extra.s @@ -0,0 +1,10 @@ +# This file is split out to provide better code coverage. + .global split + .type split,@function +split: + retq + + .size split,. - split + + .section .note.GNU-stack,"",@progbits + .section .note.GNU-split-stack,"",@progbits Index: test/ELF/x86-64-split-stack-prologue-adjust-fail.s =================================================================== --- test/ELF/x86-64-split-stack-prologue-adjust-fail.s +++ test/ELF/x86-64-split-stack-prologue-adjust-fail.s @@ -1,15 +1,19 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-extra.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t3.o -# RUN: not ld.lld --defsym __morestack=0x100 %t1.o %t2.o -o %t 2>&1 | FileCheck %s +# RUN: not ld.lld --defsym __morestack=0x100 --defsym __more_stack_nonsplit=0x200 %t1.o %t2.o %t3.o -o %t 2>&1 | FileCheck %s # An unknown prologue gives a match failure -# CHECK: unable to adjust the enclosing function's +# CHECK: error: {{.*}}.o:(.text): unknown_prologue (with -fsplit-stack) calls non_split (without -fsplit-stack), but couldn't adjust its prologue # RUN: not ld.lld -r --defsym __morestack=0x100 %t1.o %t2.o -o %t 2>&1 | FileCheck %s -check-prefix=RELOCATABLE # RELOCATABLE: Cannot mix split-stack and non-split-stack in a relocatable link +# RUN: not ld.lld --defsym __morestack=0x100 --defsym _start=0x300 %t1.o %t2.o %t3.o -o %t 2>&1 | FileCheck %s -check-prefix=ERROR +# ERROR: Mixing split-stack objects requires a definition of __morestack_non_split + .text .global unknown_prologue Index: test/ELF/x86-64-split-stack-prologue-adjust-silent.s =================================================================== --- test/ELF/x86-64-split-stack-prologue-adjust-silent.s +++ test/ELF/x86-64-split-stack-prologue-adjust-silent.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t2.o -# RUN: ld.lld --defsym __morestack=0x100 %t1.o %t2.o -o %t +# RUN: ld.lld --defsym __morestack=0x100 --defsym __morestack_non_split=0x200 %t1.o %t2.o -o %t # RUN: llvm-objdump -d %t 2>&1 | FileCheck %s # An unknown prologue ordinarily gives a match failure, except that this Index: test/ELF/x86-64-split-stack-prologue-adjust-success.s =================================================================== --- test/ELF/x86-64-split-stack-prologue-adjust-success.s +++ test/ELF/x86-64-split-stack-prologue-adjust-success.s @@ -1,8 +1,9 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-extra.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-split-stack-main.s -o %t3.o -# RUN: ld.lld --defsym __morestack=0x100 --defsym __morestack_non_split=0x200 %t1.o %t2.o -o %t -z notext +# RUN: ld.lld --defsym __morestack=0x100 --defsym __morestack_non_split=0x200 %t1.o %t2.o %t3.o -o %t -z notext # RUN: llvm-objdump -d %t | FileCheck %s # Avoid duplicating the prologue for every test via macros. @@ -25,11 +26,11 @@ .size prologue1_calls_\function_to_call,. - prologue1_calls_\function_to_call .endm -.macro prologue2 function_to_call register +.macro prologue2 function_to_call register compare_amount .global prologue2_calls_\function_to_call\register .type prologue2_calls_\function_to_call\register,@function prologue2_calls_\function_to_call\register: - lea -0x200(%rsp),%\register + lea -\compare_amount(%rsp),%\register cmp %fs:0x70,%\register jae 1f callq __morestack @@ -65,20 +66,20 @@ # calls plain __morestack, that any raw bytes written to the prologue # make sense, and that the register number is preserved. # CHECK: prologue2_calls_splitr10: -# CHECK-NEXT: lea{{.*}} -{{[0-9]+}}(%rsp),{{.*}}%r10 +# CHECK-NEXT: lea{{.*}} -512(%rsp),{{.*}}%r10 # CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r{{[0-9]+}} # CHECK: jae{{.*}} # CHECK-NEXT: callq{{.*}}<__morestack> -prologue2 split r10 +prologue2 split r10 0x200 # CHECK: prologue2_calls_splitr11: -# CHECK-NEXT: lea{{.*}} -{{[0-9]+}}(%rsp),{{.*}}%r11 +# CHECK-NEXT: lea{{.*}} -256(%rsp),{{.*}}%r11 # CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r{{[0-9]+}} # CHECK: jae{{.*}} # CHECK-NEXT: callq{{.*}}<__morestack> -prologue2 split r11 +prologue2 split r11 0x100 # For split-stack code calling non-split-stack code, ensure prologue v1 # calls __morestack_non_split, and that any raw bytes written to the prologue @@ -95,30 +96,20 @@ # calls __morestack_non_split, that any raw bytes written to the prologue # make sense, and that the register number is preserved # CHECK: prologue2_calls_non_splitr10: -# CHECK-NEXT: lea{{.*$}} +# CHECK-NEXT: lea{{.*}} -16640(%rsp),{{.*}}%r10 # CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r10 # CHECK: jae{{.*$}} # CHECK-NEXT: callq{{.*}}<__morestack_non_split> -prologue2 non_split r10 +prologue2 non_split r10 0x100 # CHECK: prologue2_calls_non_splitr11: -# CHECK-NEXT: lea{{.*$}} +# CHECK-NEXT: lea{{.*}} -16896(%rsp),{{.*}}%r11 # CHECK: cmp{{.*}}%fs:{{[^,]*}},{{.*}}%r11 # CHECK: jae{{.*$}} # CHECK-NEXT: callq{{.*}}<__morestack_non_split> -prologue2 non_split r11 -# call foo@plt # for code-coverage. - - - - .global split - .type split,@function -split: - retq - - .size split,. - split +prologue2 non_split r11 0x200 .section .note.GNU-stack,"",@progbits .section .note.GNU-split-stack,"",@progbits