Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -83,6 +83,10 @@ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; + + bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const override; + bool needsMoreStackNonSplit() const override; }; } // namespace @@ -656,7 +660,78 @@ } } +bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const { + // If the caller has a global entry point adjust the buffer past it to find + // the start of the split-stack prologue. + uint8_t GepShift = (StOther >> 5) & 7; + if (GepShift > 1) { + Loc += (1 << GepShift); + } + + // At the very least we expect to see a load of some split-stack data from the + // tcb, and 2 instructions that calculate the ending stack address this + // function will require. If there is not enough room for at least 3 + // instructions it can't be a split-stack prologue. + if (Loc + 12 >= End) { + return false; + } + + // First instruction must be `ld r0, -0x7000-64(r13)` + if (read32(Loc) != 0xe80d8fc0) { + return false; + } + + int16_t HighImmediate = 0, LowImmediate = 0; + // First instruction can be either an addis if the frame size is larger then + // 32K, or an addi if the size is less then 32K. + int32_t FirstInstr = read32(Loc + 4); + if (getPrimaryOpCode(FirstInstr) == 15) { + HighImmediate = FirstInstr & 0xFFFF; + } else if (getPrimaryOpCode(FirstInstr) == 14) { + LowImmediate = FirstInstr & 0xFFFF; + } else { + return false; + } + + // Second instruction is either an addi or a nop + uint32_t SecondInstr = read32(Loc + 8); + if (getPrimaryOpCode(SecondInstr) == 14) { + assert(!LowImmediate); + LowImmediate = SecondInstr & 0xFFFF; + } else if (SecondInstr != 0x60000000) { + return false; + } + + int32_t StackFrameSize = (HighImmediate << 16) + LowImmediate; + int32_t AdjustedStackFrameSize = + StackFrameSize - Config->SplitStackAdjustSize; + + LowImmediate = AdjustedStackFrameSize & 0xFFFF; + HighImmediate = (AdjustedStackFrameSize + 0x8000) >> 16; + if (HighImmediate) { + write32(Loc + 4, 0x3D810000 | (uint16_t)HighImmediate); + // If the low immediate is zero the second instruction will be a nop. + SecondInstr = + LowImmediate ? 0x398C0000 | (uint16_t)LowImmediate : 0x60000000; + write32(Loc + 8, SecondInstr); + } else { + // addi r12, r1, imm + write32(Loc + 4, (0x39810000) | (uint16_t)LowImmediate); + write32(Loc + 8, 0x60000000); + } + + return true; +} + +// The PPC64 implementation of split-stack doesn't need to rewrite calls to +// __morestack since the stack size of the split-stack aware caller are +// adjusted at link time. +bool PPC64::needsMoreStackNonSplit() const { + return false; +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; -} + } Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -43,8 +43,9 @@ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; - bool adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const override; + bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const override; + bool needsMoreStackNonSplit() const override; private: void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op, @@ -481,7 +482,8 @@ // B) Or a load of a stack pointer offset with an lea to r10 or r11. template <> bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const { + uint8_t *End, + uint8_t StOther) const { if (Loc + 8 >= End) return false; @@ -506,12 +508,24 @@ return false; } + +template <> +bool X86_64::needsMoreStackNonSplit(void) const { + return true; +} + template <> bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const { + uint8_t *End, + uint8_t StOther) const { llvm_unreachable("Target doesn't support split stacks."); } +template <> +bool X86_64::needsMoreStackNonSplit(void) const { + llvm_unreachable("Target doesn't support split stacks."); +} + } // namespace // These nonstandard PLT entries are to migtigate Spectre v2 security Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -214,6 +214,7 @@ unsigned LTOO; unsigned Optimize; unsigned ThinLTOJobs; + int32_t SplitStackAdjustSize; // The following config options do not directly correspond to any // particualr command line options. Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -280,6 +280,10 @@ if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64) error("--fix-cortex-a53-843419 is only supported on AArch64 targets."); + if (Args.hasArg(OPT_split_stack_adjust_size) && Config->EMachine != EM_PPC64) + error( + "--split-stack-adjust-size is only supported on the PowerPC64 target."); + if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); @@ -799,6 +803,7 @@ Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); Config->SoName = Args.getLastArgValue(OPT_soname); Config->SortSection = getSortSection(Args); + Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384); Config->Strip = getStrip(Args); Config->Sysroot = Args.getLastArgValue(OPT_sysroot); Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); @@ -868,6 +873,9 @@ if (Config->ThinLTOJobs == 0) error("--thinlto-jobs: number of threads must be > 0"); + if (Config->SplitStackAdjustSize < 0) + error("--split-stack-adjust-size: size must be >= 0"); + // Parse ELF{32,64}{LE,BE} and CPU type. if (auto *Arg = Args.getLastArg(OPT_m)) { StringRef S = Arg->getValue(); Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -956,7 +956,7 @@ if (Defined *F = getEnclosingFunction(Rel.Offset)) { Prologues.insert(F); if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value), - End)) + End, F->StOther)) continue; if (!getFile()->SomeNoSplitStack) error(lld::toString(this) + ": " + F->getName() + @@ -964,7 +964,9 @@ " (without -fsplit-stack), but couldn't adjust its prologue"); } } - switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls); + + if (Target->needsMoreStackNonSplit()) + switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls); } template void InputSection::writeTo(uint8_t *Buf) { Index: ELF/Options.td =================================================================== --- ELF/Options.td +++ ELF/Options.td @@ -42,6 +42,12 @@ defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; +defm split_stack_adjust_size + : Eq<"split-stack-adjust-size", + "Specify adjustment to stack size when a split-stack function calls a " + "non-split-stack function">, + MetaVarName<"">; + defm library_path: Eq<"library-path", "Add a directory to the library search path">, MetaVarName<"">; Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -69,8 +69,15 @@ // The function with a prologue starting at Loc was compiled with // -fsplit-stack and it calls a function compiled without. Adjust the prologue // to do the right thing. See https://gcc.gnu.org/wiki/SplitStacks. - virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const; + // The symbols st_other flags are needed on PowerPC64 for determining the + // offset to the split-stack prologue. + virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const; + + // If a target needs to rewrite calls to __morestack to instead call + // __morestack_non_split when a split-stack enabled caller calls a + // non-split-stack callee this will return true. Otherwise returns false. + virtual bool needsMoreStackNonSplit(void) const; // Return true if we can reach Dst from Src with Relocation RelocType virtual bool inBranchRange(RelType Type, uint64_t Src, Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -132,11 +132,14 @@ return false; } -bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const { +bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const { llvm_unreachable("Target doesn't support split stacks."); } +bool TargetInfo::needsMoreStackNonSplit(void) const { + llvm_unreachable("Target doesn't support split stacks."); +} bool TargetInfo::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { return true; Index: test/ELF/Inputs/ppc64-no-split-stack.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-no-split-stack.s @@ -0,0 +1,8 @@ + .abiversion 2 + .p2align 2 + .global nss_callee + .type nss_callee, @function +nss_callee: + li 3, 1 + blr + Index: test/ELF/ppc64-split-stack-adjust-size-success.s =================================================================== --- /dev/null +++ test/ELF/ppc64-split-stack-adjust-size-success.s @@ -0,0 +1,99 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 32768 +# RUN: llvm-objdump -d %t | FileCheck %s + +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 4096 +# RUN: llvm-objdump -d %t | FileCheck %s -check-prefix=SMALL + +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 0 +# RUN: llvm-objdump -d %t | FileCheck %s -check-prefix=ZERO + + .p2align 2 + .global caller + .type caller, @function +caller: +.Lcaller_gep: + addis 2, 12, .TOC.-.Lcaller_gep@ha + addi 2, 2, .TOC.-.Lcaller_gep@l + .localentry caller, .-caller + ld 0, -0x7040(13) + addi 12, 1, -32 + nop + cmpld 7, 12, 0 + blt- 7, .Lcaller_alloc_more +.Lcaller_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl nss_callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lcaller_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcaller_body + .size caller, .-caller + +# CHECK-LABEL: caller +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -1 +# CHECK-NEXT: addi 12, 12, 32736 +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+36 + +# SMALL-LABEL: caller +# SMALL: ld 0, -28736(13) +# SMALL-NEXT: addi 12, 1, -4128 +# SMALL-NEXT: nop +# SMALL-NEXT: cmpld 7, 12, 0 +# SMALL-NEXT: bt- 28, .+36 + +# ZERO-LABEL: caller +# ZERO: ld 0, -28736(13) +# ZERO-NEXT: addi 12, 1, -32 +# ZERO-NEXT: nop +# ZERO-NEXT: cmpld 7, 12, 0 +# ZERO-NEXT: bt- 28, .+36 + .p2align 2 + .global main + .type main, @function +main: +.Lmain_gep: + addis 2,12,.TOC.-.Lmain_gep@ha + addi 2,2,.TOC.-.Lmain_gep@l + .localentry main,.-main + ld 0,-0x7040(13) + addi 12,1,-32 + nop + cmpld 7,12,0 + blt- 7, .Lmain_morestack +.Lmain_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl caller + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lmain_morestack: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lmain_body + .size main,.-main + + .section .note.GNU-split-stack,"",@progbits Index: test/ELF/ppc64-split-stack-prologue-adjust-success.s =================================================================== --- /dev/null +++ test/ELF/ppc64-split-stack-prologue-adjust-success.s @@ -0,0 +1,220 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + + .abiversion 2 + .section ".text" + + +# A caller with a stack that is small enough that the addis instruction +# from the split-stack prologue is unneeded, and after the prologue adjustment +# the stack size still fits whithin 16 bits. + .p2align 2 + .global caller_small_stack + .type caller_small_stack, @function +caller_small_stack: +.Lcss_gep: + addis 2, 12, .TOC.-.Lcss_gep@ha + addi 2, 2, .TOC.-.Lcss_gep@l + .localentry caller_small_stack, .-caller_small_stack + ld 0, -0x7040(13) + addi 12, 1, -32 + nop + cmpld 7, 12, 0 + blt- 7, .Lcss_alloc_more +.Lcss_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl nss_callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lcss_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcss_body + .size caller_small_stack, .-caller_small_stack + +# CHECK-LABEL: caller_small_stack +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addi 12, 1, -16416 +# CHECK-NEXT: nop +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+36 + +# A caller that has a stack size that fits whithin 16 bits, but the adjusted +# stack size after prologue adjustment now overflows 16 bits needing both addis +# and addi instructions. + .p2align 2 + .global caller_med_stack + .type caller_med_stack, @function +caller_med_stack: +.Lcms_gep: + addis 2, 12, .TOC.-.Lcms_gep@ha + addi 12, 12, .TOC.-.Lcms_gep@l + .localentry caller_med_stack, .-caller_med_stack + ld 0, -0x7040(13) + addi 12, 1, -32764 + nop + cmpld 7, 12, 0 + blt- 7, .Lcms_alloc_more +.Lcms_body: + mflr 0 + std 0, 16(1) + stdu 1, -32764(1) + bl nss_callee + addi 1, 1, 32764 + ld 0, 16(1) + mtlr 0 + blr +.Lcms_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcms_body + .size caller_med_stack, .-caller_med_stack + +# A caller with a large enough stack frame that botht the addis and +# addi instructions are used in the split-satck prologue. + .p2align 2 + .global caller_large_stack + .type caller_large_stack, @function +caller_large_stack: +.Lcls_gep: + addis 2, 12, .TOC.-.Lcls_gep@ha + addi 12, 12, .TOC.-.Lcls_gep@l + .localentry caller_large_stack, .-caller_large_stack + ld 0, -0x7040(13) + addis 12, 1, -1 + addi 12, 12, -32 + cmpld 7, 12, 0 + blt- 7, .Lcls_alloc_more +.Lcls_body: + mflr 0 + std 0, 16(1) + lis 0, -1 + addi 0, 0, -32 + stdux 1, 0, 1 + bl nss_callee + ld 1, 0(1) + ld 0, 16(1) + mtlr 0 + blr +.Lcls_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcls_body + .size caller_large_stack, .-caller_large_stack + +# CHECK-LABEL: caller_large_stack +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -1 +# CHECK-NEXT: addi 12, 12, -16416 +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+44 + +# A caller with a stack size that is larger then 16 bits, but aligned such that +# the addi instruction is unneeded. + .p2align 2 + .global caller_large_aligned_stack + .type caller_large_aligned_stack, @function +caller_large_aligned_stack: +.Lclas_gep: + addis 2, 12, .TOC.-.Lclas_gep@ha + addi 12, 12, .TOC.-.Lclas_gep@l + .localentry caller_large_aligned_stack, .-caller_large_aligned_stack + ld 0, -0x7040(13) + addis 12, 1, -2 + nop + cmpld 7, 12, 0 + blt- 7, .Lclas_alloc_more +.Lclas_body: + mflr 0 + std 0, 16(1) + lis 0, -2 + stdux 1, 0, 1 + bl nss_callee + ld 1, 0(1) + ld 0, 16(1) + mtlr 0 + blr +.Lclas_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lclas_body + .size caller_large_aligned_stack, .-caller_large_aligned_stack + +# CHECK-LABEL: caller_large_aligned_stack +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -2 +# CHECK-NEXT: addi 12, 12, -16384 +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+40 + + .p2align 2 + .global main + .type main, @function +main: +.Lmain_gep: + addis 2, 12,.TOC.-.Lmain_gep@ha + addi 2, 2,.TOC.-.Lmain_gep@l + .localentry main,.-main + ld 0, -0x7040(13) + addi 12,1,-32 + nop + cmpld 7, 12,0 + blt- 7, .Lmain_morestack +.Lmain_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl caller_small_stack + nop + bl caller_med_stack + nop + bl caller_large_stack + nop + bl caller_large_aligned_stack + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lmain_morestack: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lmain_body + .size main,.-main +# main only calls split-satck functions or __morestack so +# there should be no adjustment of its split-stack prologue. +# CHECK-LABEL: main +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addi 12, 1, -32 +# CHECK-NEXT: nop +# CHECK-NEXT: cmpld 7, 12, 0 + + .section .note.GNU-split-stack,"",@progbits