Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -103,6 +103,67 @@ return Type == R_PPC64_TOC16 || Type == R_PPC64_TOC16_DS; } +// Find the relocation in .rela.toc with matching offset. +template +static std::pair +getRelaTocSymAndAddend(InputSectionBase *TocSec, unsigned &TocRelIdx, + uint64_t Offset) { + if (!TocSec->NumRelocations) + return {}; + ArrayRef Relas = TocSec->template relas(); + while (TocRelIdx < Relas.size() && Relas[TocRelIdx].r_offset < Offset) + ++TocRelIdx; + if (!(TocRelIdx < Relas.size() && Relas[TocRelIdx].r_offset == Offset)) + return {}; + Symbol &Sym = TocSec->getFile()->getRelocTargetSym(Relas[TocRelIdx]); + return {dyn_cast(&Sym), getAddend(Relas[TocRelIdx])}; +} + +// Loading a symbol address from .toc (e.g. "addis r,2,foo@toc@ha; +// ld r,foo@toc@l(r)") can be relaxed to compute the address using an offset +// relative to the toc pointer (e.g. "addis r,2,offset; addi r,r,offset" or +// "nop; addi r,2,offset"). +// +// This requires the symbol to be defined, non-preemptable and addressable with +// a 32-bit signed offset from the toc pointer. +bool elf::tryRelaxTocPPC64(RelType Type, const Relocation &Rel, + unsigned &TocRelIdx, uint8_t *BufLoc) { + assert(Config->TocOptimize); + if (Rel.Addend < 0) + return false; + + // If the symbol is not the .toc section, this isn't a toc-indirection. + Defined *DefSym = dyn_cast(Rel.Sym); + if (!DefSym || !DefSym->isSection() || DefSym->Section->Name != ".toc") + return false; + + Defined *D; + int64_t Addend; + auto *TocISB = cast(DefSym->Section); + std::tie(D, Addend) = + Config->IsLE + ? getRelaTocSymAndAddend(TocISB, TocRelIdx, Rel.Addend) + : getRelaTocSymAndAddend(TocISB, TocRelIdx, Rel.Addend); + + // Only non-preemptable defined symbols can be relaxed. + if (!D || D->IsPreemptible) + return false; + + // Because we only have the 2 instructions of the got-indirect access sequence + // to rewrite, we can only relax if the symbol definition is within 32 bits of + // the TOC base-pointer. + uint64_t SymVA = D->getVA(Addend); + if (!isInt<32>(SymVA - getPPC64TocBase())) + return false; + + // Safe to relax, let the target callback perform the actual relaxation. + // Note that we use the offset between the symbols VA and the .got VA as the + // relocation value since relocateOne will perform the adjustment by the TOC + // bias. + Target->relaxGot(BufLoc, Type, SymVA - In.Got->getVA()); + return true; +} + namespace { class PPC64 final : public TargetInfo { public: @@ -121,6 +182,7 @@ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -270,6 +332,26 @@ return 2; } +void PPC64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { + switch (Type) { + case R_PPC64_TOC16_HA: + relocateOne(Loc, Type, Val); + break; + case R_PPC64_TOC16_LO_DS: { + // Convert "ld reg, foo@toc@l(reg)" to "addi reg, reg, offset", + // "lwa reg, foo@toc@l(reg)" to "lwa reg, offset(2)", etc. + uint32_t Instr = readInstrFromHalf16(Loc); + if (getPrimaryOpCode(Instr) != LD) + error("expected a 'ld' for got-indirect to toc-relative relaxing"); + writeInstrFromHalf16(Loc, (Instr & 0x03FFFFFF) | 0x38000000); + relocateOne(Loc, R_PPC64_TOC16_LO, Val); + break; + } + default: + llvm_unreachable("unexpected relocation type"); + } +} + void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: @@ -439,11 +521,12 @@ return R_GOT_OFF; case R_PPC64_TOC16: case R_PPC64_TOC16_DS: - case R_PPC64_TOC16_HA: case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: return R_GOTREL; + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO_DS: + return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; case R_PPC64_TOC: return R_PPC_TOC; case R_PPC64_REL14: Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -38,7 +38,7 @@ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; - void relaxGot(uint8_t *Loc, uint64_t Val) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -453,7 +453,7 @@ write32le(Loc, Val); } -void X86_64::relaxGot(uint8_t *Loc, uint64_t Val) const { +void X86_64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { const uint8_t Op = Loc[-2]; const uint8_t ModRm = Loc[-1]; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -630,6 +630,7 @@ case R_GOTPLTONLY_PC: return In.GotPlt->getVA() + A - P; case R_GOTREL: + case R_PPC64_RELAX_TOC: return Sym.getVA(A) - In.Got->getVA(); case R_GOTPLTREL: return Sym.getVA(A) - In.GotPlt->getVA(); @@ -876,6 +877,7 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) { assert(Flags & SHF_ALLOC); const unsigned Bits = Config->Wordsize * 8; + unsigned TocRelIndex = 0; for (const Relocation &Rel : Relocations) { uint64_t Offset = Rel.Offset; @@ -893,7 +895,14 @@ switch (Expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - Target->relaxGot(BufLoc, TargetVA); + Target->relaxGot(BufLoc, Type, TargetVA); + break; + case R_PPC64_RELAX_TOC: + // R_PPC64_RELAX_TOC relocations have been sortd by addend. TocRelIndex is + // the index into .rela.toc which get increased while we are resolving + // R_PPC64_RELAX_TOC relocations. + if (!tryRelaxTocPPC64(Type, Rel, TocRelIndex, BufLoc)) + Target->relocateOne(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_IE_TO_LE: Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -93,6 +93,7 @@ R_PPC_CALL, R_PPC_CALL_PLT, R_PPC_TOC, + R_PPC64_RELAX_TOC, R_RISCV_PC_INDIRECT, }; Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -384,7 +384,7 @@ // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return oneof(Expr); + R_PPC64_RELAX_TOC, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr); } // Returns true if a given relocation can be computed at link-time. @@ -404,7 +404,7 @@ R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT, - R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, + R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E)) return true; @@ -1080,7 +1080,7 @@ // The 4 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof(Expr)) { In.GotPlt->HasGotPltOffRel = true; - } else if (oneof(Expr)) { + } else if (oneof(Expr)) { In.Got->HasGotOffRel = true; } @@ -1241,12 +1241,31 @@ for (auto I = Rels.begin(), End = Rels.end(); I != End;) scanReloc(Sec, GetOffset, I, End); - // Sort relocations by offset to binary search for R_RISCV_PCREL_HI20 + // Sort relocations by offset for more efficient searching for + // R_RISCV_PCREL_HI20. if (Config->EMachine == EM_RISCV) std::stable_sort(Sec.Relocations.begin(), Sec.Relocations.end(), [](const Relocation &LHS, const Relocation &RHS) { return LHS.Offset < RHS.Offset; }); + else if (Config->EMachine == EM_PPC64) { + // Sort .toc relocations by offset and R_PPC64_RELAX_TOC relocations by + // addend for more efficient --toc-optimize processing. + if (Sec.Name == ".toc") { + std::stable_sort(Sec.Relocations.begin(), Sec.Relocations.end(), + [](const Relocation &LHS, const Relocation &RHS) { + return LHS.Offset < RHS.Offset; + }); + } else { + auto It = llvm::partition(Sec.Relocations, [](const Relocation &R) { + return R.Expr == R_PPC64_RELAX_TOC; + }); + std::stable_sort(Sec.Relocations.begin(), It, + [](const Relocation &LHS, const Relocation &RHS) { + return LHS.Addend < RHS.Addend; + }); + } + } } template void elf::scanRelocations(InputSectionBase &S) { Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -124,7 +124,7 @@ virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const; - virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; @@ -164,8 +164,13 @@ return getErrorPlace(Loc).Loc; } -// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is -// a global entry point (GEP) which typically is used to intiailzie the TOC +// Tries to relax a toc-indirection. If an indirection can be safely relaxed +// this function performs the relaxation and returns true. +bool tryRelaxTocPPC64(RelType Type, const Relocation &Rel, + unsigned &TocRelIndex, uint8_t *BufLoc); + +// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first +// is a global entry point (GEP) which typically is used to intiailzie the TOC // pointer in general purpose register 2. The second is a local entry // point (LEP) which bypasses the TOC pointer initialization code. The // offset between GEP and LEP is encoded in a function's st_other flags. Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -149,7 +149,7 @@ return Expr; } -void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { +void TargetInfo::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } Index: test/ELF/Inputs/ppc64-toc-relax-shared.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-relax-shared.s @@ -0,0 +1,7 @@ +.data + +.type shared,@object +.globl shared +shared: + .long 8 + .size shared, 4 Index: test/ELF/Inputs/ppc64-toc-relax.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-relax.s @@ -0,0 +1,10 @@ +.data + +.type default,@object +.globl default +.globl hidden +.hidden hidden + +default: +hidden: + .long 0 Index: test/ELF/ppc64-func-entry-points.s =================================================================== --- test/ELF/ppc64-func-entry-points.s +++ test/ELF/ppc64-func-entry-points.s @@ -75,6 +75,6 @@ // CHECK: foo_external_diff: // CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 1 // CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640 -// CHECK-NEXT: 10010088: {{.*}} nop +// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, 1 // CHECK: foo_external_same: // CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3 Index: test/ELF/ppc64-got-indirect.s =================================================================== --- test/ELF/ppc64-got-indirect.s +++ /dev/null @@ -1,115 +0,0 @@ -# REQUIRES: ppc - -# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o -# RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-LE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-LE -# RUN: llvm-objdump -D %t2 | FileCheck %s - -# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o -# RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-BE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-BE -# RUN: llvm-objdump -D %t2 | FileCheck %s - -# Make sure we calculate the offset correctly for a got-indirect access to a -# global variable as described by the PPC64 ELF V2 abi. - .text - .abiversion 2 - .globl _start # -- Begin function _start - .p2align 4 - .type _start,@function -_start: # @_start -.Lfunc_begin0: -.Lfunc_gep0: - addis 2, 12, .TOC.-.Lfunc_gep0@ha - addi 2, 2, .TOC.-.Lfunc_gep0@l -.Lfunc_lep0: - .localentry _start, .Lfunc_lep0-.Lfunc_gep0 -# %bb.0: # %entry - addis 3, 2, .LC0@toc@ha - ld 3, .LC0@toc@l(3) - li 4, 0 - stw 4, -12(1) - li 0,1 - lwa 3, 0(3) - sc - .long 0 - .quad 0 -.Lfunc_end0: - .size _start, .Lfunc_end0-.Lfunc_begin0 - # -- End function - .section .toc,"aw",@progbits -.LC0: - .tc glob[TC],glob - .type glob,@object # @glob - .data - .globl glob - .p2align 2 -glob: - .long 55 # 0x37 - .size glob, 4 - -# Verify the relocations emitted for glob are through the .toc - -# RELOCS-LE: Relocations [ -# RELOCS-LE: .rela.text { -# RELOCS-LE: 0x0 R_PPC64_REL16_HA .TOC. 0x0 -# RELOCS-LE: 0x4 R_PPC64_REL16_LO .TOC. 0x4 -# RELOCS-LE: 0x8 R_PPC64_TOC16_HA .toc 0x0 -# RELOCS-LE: 0xC R_PPC64_TOC16_LO_DS .toc 0x0 -# RELOCS-LE: } -# RELOCS-LE: .rela.toc { -# RELOCS-LE: 0x0 R_PPC64_ADDR64 glob 0x0 -# RELOCS-LE: } - -# RELOCS-BE: Relocations [ -# RELOCS-BE: .rela.text { -# RELOCS-BE: 0x2 R_PPC64_REL16_HA .TOC. 0x2 -# RELOCS-BE: 0x6 R_PPC64_REL16_LO .TOC. 0x6 -# RELOCS-BE: 0xA R_PPC64_TOC16_HA .toc 0x0 -# RELOCS-BE: 0xE R_PPC64_TOC16_LO_DS .toc 0x0 -# RELOCS-BE: } -# RELOCS-BE: .rela.toc { -# RELOCS-BE: 0x0 R_PPC64_ADDR64 glob 0x0 -# RELOCS-BE: } -# RELOCS-BE:] - -# Verify that the global variable access is done through the correct -# toc entry: -# r2 = .TOC. = 0x10038000. -# r3 = r2 - 32760 = 0x10030008 -> .toc entry for glob. - -# CHECK: _start: -# CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 2 -# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32768 -# CHECK-NEXT: 10010008: {{.*}} nop -# CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(2) -# CHECK: 1001001c: {{.*}} lwa 3, 0(3) - -# CHECK-LE: Disassembly of section .got: -# CHECK-LE-NEXT: .got: -# CHECK-LE-NEXT: 10020000: 00 80 02 10 -# CHECK-LE-NEXT: 10020004: 00 00 00 00 - -# Verify that .toc comes right after .got -# CHECK-LE: Disassembly of section .toc: -# CHECK-LE: 10020008: 00 00 03 10 - -# CHECK-LE: Disassembly of section .data: -# CHECK-LE-NEXT: glob: -# CHECK-LE-NEXT: 10030000: 37 00 00 00 - -# CHECK-BE: Disassembly of section .got: -# CHECK-BE-NEXT: .got: -# CHECK-BE-NEXT: 10020000: 00 00 00 00 -# CHECK-BE-NEXT: 10020004: 10 02 80 00 - -# Verify that .toc comes right after .got -# CHECK-BE: Disassembly of section .toc: -# CHECK-BE: 10020008: 00 00 00 00 -# CHECK-BE: 1002000c: 10 03 00 00 - -# CHECK-BE: Disassembly of section .data: -# CHECK-BE-NEXT: glob: -# CHECK-BE-NEXT: 10030000: 00 00 00 37 Index: test/ELF/ppc64-relocs.s =================================================================== --- test/ELF/ppc64-relocs.s +++ test/ELF/ppc64-relocs.s @@ -1,12 +1,12 @@ # REQUIRES: ppc # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 +# RUN: ld.lld --no-toc-optimize %t -o %t2 # RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=DATALE # RUN: llvm-objdump -D %t2 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 +# RUN: ld.lld --no-toc-optimize %t -o %t2 # RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=DATABE # RUN: llvm-objdump -D %t2 | FileCheck %s @@ -63,7 +63,7 @@ # CHECK: Disassembly of section .R_PPC64_TOC16_HA: # CHECK: .FR_PPC64_TOC16_HA: -# CHECK: 10010018: {{.*}} nop +# CHECK: 10010018: {{.*}} addis 1, 2, 0 .section .R_PPC64_REL24,"ax",@progbits .globl .FR_PPC64_REL24 @@ -171,8 +171,8 @@ # 0x10000190 + 0xfeb4 = 0x10010044 # CHECK: Disassembly of section .R_PPC64_REL32: # CHECK: .FR_PPC64_REL32: -# CHECK: 10010040: {{.*}} nop -# CHECK: 10010044: {{.*}} ld 5, -32736(2) +# CHECK: 10010040: {{.*}} addis 5, 2, 0 +# CHECK: 10010044: {{.*}} ld 5, -32736(5) # CHECK: 10010048: {{.*}} add 3, 3, 4 .section .R_PPC64_REL64, "ax",@progbits Index: test/ELF/ppc64-toc-relax-constants.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax-constants.s @@ -0,0 +1,58 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unkown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o +# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefix=RELOCS %s +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s +# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s +# RUN: llvm-objdump -D %t | FileCheck %s + +# Test we can perform toc-optimizations on .toc sections that contain more than +# symbol addresses (constants). + +# RELOCS: .rela.text { +# RELOCS-NEXT: 0x0 R_PPC64_TOC16_HA .toc 0x0 +# RELOCS-NEXT: 0x4 R_PPC64_TOC16_LO_DS .toc 0x0 +# RELOCS-NEXT: 0x8 R_PPC64_TOC16_HA .toc 0x8 +# RELOCS-NEXT: 0xC R_PPC64_TOC16_LO_DS .toc 0x8 +# RELOCS-NEXT: 0x10 R_PPC64_TOC16_HA .toc 0x10 +# RELOCS-NEXT: 0x14 R_PPC64_TOC16_LO_DS .toc 0x10 +# RELOCS-NEXT: } + +# SECTIONS: .got PROGBITS 0000000010020090 +# SECTIONS: .toc PROGBITS 0000000010020090 + +# NM: 0000000010030000 D default + +# .LCONST1 is .toc[0]. +# .LCONST1 - (.got+0x8000) = 0x10020090 - (0x10020090+0x8000) = -32768 +# CHECK: nop +# CHECK: lwa 3, -32768(2) + addis 3, 2, .LCONST1@toc@ha + lwa 3, .LCONST1@toc@l(3) + +# .LCONST2 is .toc[1] +# .LCONST2 - (.got+0x8000) = 0x10020098 - (0x10020090+0x8000) = -32760 +# CHECK: nop +# CHECK: ld 4, -32760(2) + addis 4, 2, .LCONST2@toc@ha + ld 4, .LCONST2@toc@l(4) + +# address(default) - (.got+0x8000) = 0x10030000 - (0x10020090+0x8000) = 32624 +# CHECK: nop +# CHECK: addi 5, 2, 32624 +# CHECK: lwa 5, 0(5) + addis 5, 2, .Ldefault@toc@ha + ld 5, .Ldefault@toc@l(5) + lwa 5, 0(5) + +.section .toc,"aw",@progbits +.LCONST1: + .quad 11 +.LCONST2: + .quad 22 +.Ldefault: + .tc default[TC],default Index: test/ELF/ppc64-toc-relax-jumptable.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax-jumptable.s @@ -0,0 +1,77 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -D %t | FileCheck --check-prefixes=CHECK,LE %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -D %t | FileCheck --check-prefixes=CHECK,BE %s + + +# Verify that the load from the .toc section was relaxed to an +# add of an offset to the TOC base-pointer (calculating the address +# of the jump table rather then loading the address from the .toc). + +# CHECK: Disassembly of section .rodata: +# CHECK-NEXT: .rodata: +# CHECK-NEXT: 100001c8 + +# CHECK-LABEL: _start +# CHECK: clrldi 3, 3, 62 +# CHECK-NEXT: addis 4, 2, -2 +# CHECK-NEXT: addi 4, 4, -32312 +# CHECK-NEXT: sldi 3, 3, 2 + +# LE: Disassembly of section .toc: +# LE-NEXT: .toc: +# LE-NEXT: 10020008: c8 01 00 10 +# LE-NEXT: 1002000c: 00 00 00 00 + +# BE: Disassembly of section .toc: +# BE-NEXT: .toc: +# BE-NEXT: 10020008: 00 00 00 00 +# BE-NEXT: 1002000c: 10 00 01 c8 + + .text + .global _start + .type _start, @function +_start: +.Lstart_gep: + addis 2, 12, .TOC.-.Lstart_gep@ha + addi 2, 2, .TOC.-.Lstart_gep@l +.Lstart_lep: + .localentry _start, .Lstart_lep-.Lstart_gep + rldicl 3, 3, 0, 62 + addis 4, 2, .LJTI_TE@toc@ha + ld 4, .LJTI_TE@toc@l(4) + sldi 3, 3, 2 + lwax 3, 3, 4 + add 3, 3, 4 + mtctr 3 + bctr + +.LBB1: + li 3, 0 + blr +.LBB2: + li 3, 10 + blr +.LBB3: + li 3, 55 + blr +.LBB4: + li 3, 255 + blr + + .section .rodata,"a",@progbits + .p2align 2 +.LJT: + .long .LBB1-.LJT + .long .LBB2-.LJT + .long .LBB3-.LJT + .long .LBB4-.LJT + +.section .toc,"aw",@progbits +# TOC entry for the jumptable address. +.LJTI_TE: + .tc .LJT[TC],.LJT Index: test/ELF/ppc64-toc-relax.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax.s @@ -0,0 +1,86 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o +# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefixes=RELOCS-LE %s +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=COMMON,EXE %s + +# RUN: ld.lld -shared %t1.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefixes=COMMON,SHARED %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o +# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefixes=RELOCS-BE %s +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=COMMON,EXE %s + +# RUN: ld.lld -shared %t1.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefixes=COMMON,SHARED %s + +# RELOCS-LE: .rela.text { +# RELOCS-LE-NEXT: 0x0 R_PPC64_TOC16_HA .toc 0x0 +# RELOCS-LE-NEXT: 0x4 R_PPC64_TOC16_LO_DS .toc 0x0 +# RELOCS-LE: 0x18 R_PPC64_TOC16_HA .toc 0x10 +# RELOCS-LE-NEXT: 0x1C R_PPC64_TOC16_LO_DS .toc 0x10 +# RELOCS-LE-NEXT: } +# RELOCS-LE: .rela.toc { +# RELOCS-LE-NEXT: 0x0 R_PPC64_ADDR64 hidden 0x0 +# RELOCS-LE: 0x10 R_PPC64_ADDR64 default 0x0 +# RELOCS-LE-NEXT: } + +# RELOCS-BE: .rela.text { +# RELOCS-BE-NEXT: 0x2 R_PPC64_TOC16_HA .toc 0x0 +# RELOCS-BE-NEXT: 0x6 R_PPC64_TOC16_LO_DS .toc 0x0 +# RELOCS-BE: 0x1A R_PPC64_TOC16_HA .toc 0x10 +# RELOCS-BE-NEXT: 0x1E R_PPC64_TOC16_LO_DS .toc 0x10 +# RELOCS-BE-NEXT: } +# RELOCS-BE: .rela.toc { +# RELOCS-BE-NEXT: 0x0 R_PPC64_ADDR64 hidden 0x0 +# RELOCS-BE: 0x10 R_PPC64_ADDR64 default 0x0 +# RELOCS-BE-NEXT: } + +# NM: 0000000010030000 D default +# NM: 0000000010030000 d hidden + +# `hidden` is non-preemptable. It is relaxed. +# address(hidden) - (.got+0x8000) = 0x10030000 - (0x100200c0+0x8000) = 32576 +# COMMON: nop +# COMMON: addi 3, 2, 32576 +# COMMON: lwa 3, 0(3) + addis 3, 2, .Lhidden@toc@ha + ld 3, .Lhidden@toc@l(3) + lwa 3, 0(3) + +# `shared` is not defined in an object file. The ld instruction cannot be relaxed. +# The first addis can still be relaxed to nop, though. +# COMMON: nop +# COMMON: ld 4, -32760(2) +# COMMON: lwa 4, 0(4) + addis 4, 2, .Lshared@toc@ha + ld 4, .Lshared@toc@l(4) + lwa 4, 0(4) + +# `default` has default visibility. It is non-preemptable when producing an executable. +# address(default) - (.got+0x8000) = 0x10030000 - (0x100200c0+0x8000) = 32576 +# EXE: nop +# EXE: addi 5, 2, 32576 +# EXE: lwa 5, 0(5) + +# SHARED: nop +# SHARED: ld 5, -32752(2) +# SHARED: lwa 5, 0(5) + addis 5, 2, .Ldefault@toc@ha + ld 5, .Ldefault@toc@l(5) + lwa 5, 0(5) + +.section .toc,"aw",@progbits +.Lhidden: + .tc hidden[TC], hidden +.Lshared: + .tc shared[TC], shared +.Ldefault: + .tc default[TC], default