Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -103,6 +103,78 @@ return Type == R_PPC64_TOC16 || Type == R_PPC64_TOC16_DS; } +// Find the relocation in .rela.toc with matching offset. +template +static std::pair +getRelaTocSymAndAddend(InputSectionBase *TocSec, uint64_t Offset) { + ArrayRef Relas = TocSec->template relas(); + + // .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by + // r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the + // relocation index in most cases. + // + // In rare cases a TOC entry may store a constant that doesn't need an + // R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8 + // points to a relocation with larger r_offset. Do a linear probe then. + // Constants are extremely uncommon in .toc and the extra array accesses can + // be seen as a small constant. + uint64_t Index = Offset / 8; + if (Index >= Relas.size()) + return {}; + for (;;) { + if (Relas[Index].r_offset == Offset) { + Symbol &Sym = TocSec->getFile()->getRelocTargetSym(Relas[Index]); + return {dyn_cast(&Sym), getAddend(Relas[Index])}; + } + if (Relas[Index].r_offset < Offset || Index == 0) + break; + --Index; + } + return {}; +} + +// Loading a symbol address from .toc (e.g. "addis r,2,foo@toc@ha; +// ld r,foo@toc@l(r)") can be relaxed to compute the address using an offset +// relative to the toc pointer (e.g. "addis r,2,offset; addi r,r,offset" or +// "nop; addi r,2,offset"). +// +// This requires the symbol to be defined, non-preemptable and addressable with +// a 32-bit signed offset from the toc pointer. +bool elf::tryRelaxTocPPC64(RelType Type, const Relocation &Rel, + uint8_t *BufLoc) { + assert(Config->TocOptimize); + if (Rel.Addend < 0) + return false; + + // If the symbol is not the .toc section, this isn't a toc-indirection. + Defined *DefSym = dyn_cast(Rel.Sym); + if (!DefSym || !DefSym->isSection() || DefSym->Section->Name != ".toc") + return false; + + Defined *D; + int64_t Addend; + auto *TocISB = cast(DefSym->Section); + std::tie(D, Addend) = + Config->IsLE ? getRelaTocSymAndAddend(TocISB, Rel.Addend) + : getRelaTocSymAndAddend(TocISB, Rel.Addend); + + // Only non-preemptable defined symbols can be relaxed. + if (!D || D->IsPreemptible) + return false; + + // Because we only have the 2 instructions of the got-indirect access sequence + // to rewrite, we can only relax if the symbol definition is within 32 bits of + // the TOC base-pointer. + uint64_t SymVA = D->getVA(Addend); + if (!isInt<32>(SymVA - getPPC64TocBase())) + return false; + + // Note that we use the offset relative to .got, relocateOne will perform the + // adjustment by the TOC bias. + Target->relaxGot(BufLoc, Type, SymVA - In.Got->getVA()); + return true; +} + namespace { class PPC64 final : public TargetInfo { public: @@ -121,6 +193,7 @@ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -270,6 +343,26 @@ return 2; } +void PPC64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { + switch (Type) { + case R_PPC64_TOC16_HA: + relocateOne(Loc, Type, Val); + break; + case R_PPC64_TOC16_LO_DS: { + // Convert "ld reg, foo@toc@l(reg)" to "addi reg, reg, offset_l" or "addi + // reg, 2, offset". + uint32_t Instr = readInstrFromHalf16(Loc); + if (getPrimaryOpCode(Instr) != LD) + error("expected a 'ld' for got-indirect to toc-relative relaxing"); + writeInstrFromHalf16(Loc, (Instr & 0x03FFFFFF) | 0x38000000); + relocateOne(Loc, R_PPC64_TOC16_LO, Val); + break; + } + default: + llvm_unreachable("unexpected relocation type"); + } +} + void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: @@ -439,11 +532,12 @@ return R_GOT_OFF; case R_PPC64_TOC16: case R_PPC64_TOC16_DS: - case R_PPC64_TOC16_HA: case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: return R_GOTREL; + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO_DS: + return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; case R_PPC64_TOC: return R_PPC_TOC; case R_PPC64_REL14: Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -38,7 +38,7 @@ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; - void relaxGot(uint8_t *Loc, uint64_t Val) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -453,7 +453,7 @@ write32le(Loc, Val); } -void X86_64::relaxGot(uint8_t *Loc, uint64_t Val) const { +void X86_64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { const uint8_t Op = Loc[-2]; const uint8_t ModRm = Loc[-1]; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -69,8 +69,10 @@ if (SectionKind == SectionBase::Merge && RawData.size() > UINT32_MAX) error(toString(this) + ": section too large"); + // Spurious false for InputSection::Discarded as Config isn't available. + AreRelocsRela = Config && Config->IsRela; + NumRelocations = 0; - AreRelocsRela = false; Debug = Name.startswith(".debug") || Name.startswith(".zdebug"); // The ELF spec states that a value of 0 means the section has @@ -631,6 +633,7 @@ case R_GOTPLTONLY_PC: return In.GotPlt->getVA() + A - P; case R_GOTREL: + case R_PPC64_RELAX_TOC: return Sym.getVA(A) - In.Got->getVA(); case R_GOTPLTREL: return Sym.getVA(A) - In.GotPlt->getVA(); @@ -894,7 +897,11 @@ switch (Expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - Target->relaxGot(BufLoc, TargetVA); + Target->relaxGot(BufLoc, Type, TargetVA); + break; + case R_PPC64_RELAX_TOC: + if (!tryRelaxTocPPC64(Type, Rel, BufLoc)) + Target->relocateOne(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_IE_TO_LE: Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -93,6 +93,7 @@ R_PPC_CALL, R_PPC_CALL_PLT, R_PPC_TOC, + R_PPC64_RELAX_TOC, R_RISCV_PC_INDIRECT, }; Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -383,7 +383,7 @@ // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return oneof(Expr); + R_PPC64_RELAX_TOC, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr); } // Returns true if a given relocation can be computed at link-time. @@ -403,7 +403,7 @@ R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT, - R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, + R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E)) return true; @@ -1079,7 +1079,7 @@ // The 4 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof(Expr)) { In.GotPlt->HasGotPltOffRel = true; - } else if (oneof(Expr)) { + } else if (oneof(Expr)) { In.Got->HasGotOffRel = true; } @@ -1240,8 +1240,10 @@ for (auto I = Rels.begin(), End = Rels.end(); I != End;) scanReloc(Sec, GetOffset, I, End); - // Sort relocations by offset to binary search for R_RISCV_PCREL_HI20 - if (Config->EMachine == EM_RISCV) + // Sort relocations by offset for more efficient searching for + // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. + if (Config->EMachine == EM_RISCV || + (Config->EMachine == EM_PPC64 && Sec.Name == ".toc")) llvm::stable_sort(Sec.Relocations, [](const Relocation &LHS, const Relocation &RHS) { return LHS.Offset < RHS.Offset; Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -124,7 +124,7 @@ virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const; - virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; @@ -164,8 +164,12 @@ return getErrorPlace(Loc).Loc; } -// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is -// a global entry point (GEP) which typically is used to intiailzie the TOC +// Tries to relax a toc-indirection. If an indirection can be safely relaxed +// this function performs the relaxation and returns true. +bool tryRelaxTocPPC64(RelType Type, const Relocation &Rel, uint8_t *BufLoc); + +// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first +// is a global entry point (GEP) which typically is used to intiailzie the TOC // pointer in general purpose register 2. The second is a local entry // point (LEP) which bypasses the TOC pointer initialization code. The // offset between GEP and LEP is encoded in a function's st_other flags. Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -149,7 +149,7 @@ return Expr; } -void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { +void TargetInfo::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } Index: test/ELF/Inputs/ppc64-toc-relax-shared.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-relax-shared.s @@ -0,0 +1,7 @@ +.data + +.type shared,@object +.globl shared +shared: + .long 8 + .size shared, 4 Index: test/ELF/Inputs/ppc64-toc-relax.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-relax.s @@ -0,0 +1,15 @@ +.data + +.globl default, hidden +.hidden hidden + +default: +hidden: + .long 0 + +.space 65532 + +.globl hidden2 +.hidden hidden2 +hidden2: + .long 0 Index: test/ELF/ppc64-func-entry-points.s =================================================================== --- test/ELF/ppc64-func-entry-points.s +++ test/ELF/ppc64-func-entry-points.s @@ -75,6 +75,6 @@ // CHECK: foo_external_diff: // CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 1 // CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640 -// CHECK-NEXT: 10010088: {{.*}} nop +// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, 1 // CHECK: foo_external_same: // CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3 Index: test/ELF/ppc64-got-indirect.s =================================================================== --- test/ELF/ppc64-got-indirect.s +++ /dev/null @@ -1,115 +0,0 @@ -# REQUIRES: ppc - -# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o -# RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-LE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-LE -# RUN: llvm-objdump -D %t2 | FileCheck %s - -# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o -# RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-BE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-BE -# RUN: llvm-objdump -D %t2 | FileCheck %s - -# Make sure we calculate the offset correctly for a got-indirect access to a -# global variable as described by the PPC64 ELF V2 abi. - .text - .abiversion 2 - .globl _start # -- Begin function _start - .p2align 4 - .type _start,@function -_start: # @_start -.Lfunc_begin0: -.Lfunc_gep0: - addis 2, 12, .TOC.-.Lfunc_gep0@ha - addi 2, 2, .TOC.-.Lfunc_gep0@l -.Lfunc_lep0: - .localentry _start, .Lfunc_lep0-.Lfunc_gep0 -# %bb.0: # %entry - addis 3, 2, .LC0@toc@ha - ld 3, .LC0@toc@l(3) - li 4, 0 - stw 4, -12(1) - li 0,1 - lwa 3, 0(3) - sc - .long 0 - .quad 0 -.Lfunc_end0: - .size _start, .Lfunc_end0-.Lfunc_begin0 - # -- End function - .section .toc,"aw",@progbits -.LC0: - .tc glob[TC],glob - .type glob,@object # @glob - .data - .globl glob - .p2align 2 -glob: - .long 55 # 0x37 - .size glob, 4 - -# Verify the relocations emitted for glob are through the .toc - -# RELOCS-LE: Relocations [ -# RELOCS-LE: .rela.text { -# RELOCS-LE: 0x0 R_PPC64_REL16_HA .TOC. 0x0 -# RELOCS-LE: 0x4 R_PPC64_REL16_LO .TOC. 0x4 -# RELOCS-LE: 0x8 R_PPC64_TOC16_HA .toc 0x0 -# RELOCS-LE: 0xC R_PPC64_TOC16_LO_DS .toc 0x0 -# RELOCS-LE: } -# RELOCS-LE: .rela.toc { -# RELOCS-LE: 0x0 R_PPC64_ADDR64 glob 0x0 -# RELOCS-LE: } - -# RELOCS-BE: Relocations [ -# RELOCS-BE: .rela.text { -# RELOCS-BE: 0x2 R_PPC64_REL16_HA .TOC. 0x2 -# RELOCS-BE: 0x6 R_PPC64_REL16_LO .TOC. 0x6 -# RELOCS-BE: 0xA R_PPC64_TOC16_HA .toc 0x0 -# RELOCS-BE: 0xE R_PPC64_TOC16_LO_DS .toc 0x0 -# RELOCS-BE: } -# RELOCS-BE: .rela.toc { -# RELOCS-BE: 0x0 R_PPC64_ADDR64 glob 0x0 -# RELOCS-BE: } -# RELOCS-BE:] - -# Verify that the global variable access is done through the correct -# toc entry: -# r2 = .TOC. = 0x10038000. -# r3 = r2 - 32760 = 0x10030008 -> .toc entry for glob. - -# CHECK: _start: -# CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 2 -# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32768 -# CHECK-NEXT: 10010008: {{.*}} nop -# CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(2) -# CHECK: 1001001c: {{.*}} lwa 3, 0(3) - -# CHECK-LE: Disassembly of section .got: -# CHECK-LE-NEXT: .got: -# CHECK-LE-NEXT: 10020000: 00 80 02 10 -# CHECK-LE-NEXT: 10020004: 00 00 00 00 - -# Verify that .toc comes right after .got -# CHECK-LE: Disassembly of section .toc: -# CHECK-LE: 10020008: 00 00 03 10 - -# CHECK-LE: Disassembly of section .data: -# CHECK-LE-NEXT: glob: -# CHECK-LE-NEXT: 10030000: 37 00 00 00 - -# CHECK-BE: Disassembly of section .got: -# CHECK-BE-NEXT: .got: -# CHECK-BE-NEXT: 10020000: 00 00 00 00 -# CHECK-BE-NEXT: 10020004: 10 02 80 00 - -# Verify that .toc comes right after .got -# CHECK-BE: Disassembly of section .toc: -# CHECK-BE: 10020008: 00 00 00 00 -# CHECK-BE: 1002000c: 10 03 00 00 - -# CHECK-BE: Disassembly of section .data: -# CHECK-BE-NEXT: glob: -# CHECK-BE-NEXT: 10030000: 00 00 00 37 Index: test/ELF/ppc64-relocs.s =================================================================== --- test/ELF/ppc64-relocs.s +++ test/ELF/ppc64-relocs.s @@ -1,14 +1,14 @@ # REQUIRES: ppc -# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 -# RUN: llvm-readelf -x .rodata -x .eh_frame %t2 | FileCheck %s --check-prefix=DATALE -# RUN: llvm-objdump -d --no-show-raw-insn %t2 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld --no-toc-optimize %t.o -o %t +# RUN: llvm-readelf -x .rodata -x .eh_frame %t | FileCheck %s --check-prefix=DATALE +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 -# RUN: llvm-readelf -x .rodata -x .eh_frame %t2 | FileCheck %s --check-prefix=DATABE -# RUN: llvm-objdump -d --no-show-raw-insn %t2 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: ld.lld --no-toc-optimize %t.o -o %t +# RUN: llvm-readelf -x .rodata -x .eh_frame %t | FileCheck %s --check-prefix=DATABE +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s .text .global _start @@ -63,7 +63,7 @@ # CHECK: Disassembly of section .R_PPC64_TOC16_HA: # CHECK: .FR_PPC64_TOC16_HA: -# CHECK: 10010018: nop +# CHECK: 10010018: addis 1, 2, 0 .section .R_PPC64_REL24,"ax",@progbits .globl .FR_PPC64_REL24 @@ -169,8 +169,8 @@ # 0x10000190 + 0xfeb4 = 0x10010044 # CHECK: Disassembly of section .R_PPC64_REL32: # CHECK: .FR_PPC64_REL32: -# CHECK: 10010040: nop -# CHECK: 10010044: ld 5, -32736(2) +# CHECK: 10010040: addis 5, 2, 0 +# CHECK: 10010044: ld 5, -32736(5) # CHECK: 10010048: add 3, 3, 4 .section .R_PPC64_REL64, "ax",@progbits Index: test/ELF/ppc64-toc-relax-constants.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax-constants.s @@ -0,0 +1,60 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unkown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o +# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefix=RELOCS %s +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s +# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s +# RUN: llvm-objdump -D %t | FileCheck %s + +# Test we can perform toc-optimizations on .toc sections that contain more than +# symbol addresses (constants). + +# RELOCS: .rela.text { +# RELOCS-NEXT: 0x0 R_PPC64_TOC16_HA .toc 0x0 +# RELOCS-NEXT: 0x4 R_PPC64_TOC16_LO_DS .toc 0x0 +# RELOCS-NEXT: 0x8 R_PPC64_TOC16_HA .toc 0x8 +# RELOCS-NEXT: 0xC R_PPC64_TOC16_LO_DS .toc 0x8 +# RELOCS-NEXT: 0x10 R_PPC64_TOC16_HA .toc 0x10 +# RELOCS-NEXT: 0x14 R_PPC64_TOC16_LO_DS .toc 0x10 +# RELOCS-NEXT: } + +# SECTIONS: .got PROGBITS 0000000010020090 +# SECTIONS: .toc PROGBITS 0000000010020090 + +# NM: 0000000010030000 D default + +# .LCONST1 is .toc[0]. +# .LCONST1 - (.got+0x8000) = 0x10020090 - (0x10020090+0x8000) = -32768 +# CHECK: nop +# CHECK: lwa 3, -32768(2) + addis 3, 2, .LCONST1@toc@ha + lwa 3, .LCONST1@toc@l(3) + +# .LCONST2 is .toc[1] +# .LCONST2 - (.got+0x8000) = 0x10020098 - (0x10020090+0x8000) = -32760 +# CHECK: nop +# CHECK: ld 4, -32760(2) + addis 4, 2, .LCONST2@toc@ha + ld 4, .LCONST2@toc@l(4) + +# .Ldefault is .toc[2]. `default` is not preemptable when producing an executable. +# After toc-indirection to toc-relative relaxation, it is loaded using an +# offset relative to r2: 8*2 - 0x8000 = -32752 +# CHECK: nop +# CHECK: ld 5, -32752(2) +# CHECK: lwa 5, 0(5) + addis 5, 2, .Ldefault@toc@ha + ld 5, .Ldefault@toc@l(5) + lwa 5, 0(5) + +.section .toc,"aw",@progbits +.LCONST1: + .quad 11 +.LCONST2: + .quad 22 +.Ldefault: + .tc default[TC],default Index: test/ELF/ppc64-toc-relax-jumptable.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax-jumptable.s @@ -0,0 +1,73 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -S %t | FileCheck --check-prefixes=SECTIONS %s +# RUN: llvm-readelf -x .toc %t | FileCheck --check-prefixes=HEX-LE %s +# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=CHECK %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -S %t | FileCheck --check-prefixes=SECTIONS %s +# RUN: llvm-readelf -x .toc %t | FileCheck --check-prefixes=HEX-BE %s +# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=CHECK %s + +# .LJT is a local symbol (non-preemptable). The toc-indirection load of its +# address can be relaxed to calculate the address relative to .toc + +# SECTIONS: .rodata PROGBITS 00000000100001c8 + +# HEX-LE: section '.toc': +# HEX-LE-NEXT: 10020008 c8010010 00000000 + +# HEX-BE: section '.toc': +# HEX-BE-NEXT: 10020008 00000000 100001c8 + +# CHECK-LABEL: _start +# CHECK: clrldi 3, 3, 62 +# CHECK-NEXT: addis 4, 2, -2 +# CHECK-NEXT: addi 4, 4, -32312 +# CHECK-NEXT: sldi 3, 3, 2 + + .text + .global _start + .type _start, @function +_start: +.Lstart_gep: + addis 2, 12, .TOC.-.Lstart_gep@ha + addi 2, 2, .TOC.-.Lstart_gep@l +.Lstart_lep: + .localentry _start, .Lstart_lep-.Lstart_gep + rldicl 3, 3, 0, 62 + addis 4, 2, .LJTI_TE@toc@ha + ld 4, .LJTI_TE@toc@l(4) + sldi 3, 3, 2 + lwax 3, 3, 4 + add 3, 3, 4 + mtctr 3 + bctr + +.LBB1: + li 3, 0 + blr +.LBB2: + li 3, 10 + blr +.LBB3: + li 3, 55 + blr +.LBB4: + li 3, 255 + blr + + .section .rodata,"a",@progbits + .p2align 2 +.LJT: + .long .LBB1-.LJT + .long .LBB2-.LJT + .long .LBB3-.LJT + .long .LBB4-.LJT + +.section .toc,"aw",@progbits +# TOC entry for the jumptable address. +.LJTI_TE: + .tc .LJT[TC],.LJT Index: test/ELF/ppc64-toc-relax.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax.s @@ -0,0 +1,99 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o +# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefixes=RELOCS-LE,RELOCS %s +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=COMMON,EXE %s + +# RUN: ld.lld -shared %t1.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefixes=COMMON,SHARED %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-toc-relax-shared.s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-toc-relax.s -o %t2.o +# RUN: llvm-readobj -r %t1.o | FileCheck --check-prefixes=RELOCS-BE,RELOCS %s +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=COMMON,EXE %s + +# RUN: ld.lld -shared %t1.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefixes=COMMON,SHARED %s + +# RELOCS-LE: .rela.text { +# RELOCS-LE-NEXT: 0x0 R_PPC64_TOC16_HA .toc 0x0 +# RELOCS-LE-NEXT: 0x4 R_PPC64_TOC16_LO_DS .toc 0x0 +# RELOCS-LE-NEXT: 0xC R_PPC64_TOC16_HA .toc 0x8 +# RELOCS-LE-NEXT: 0x10 R_PPC64_TOC16_LO_DS .toc 0x8 +# RELOCS-LE: 0x24 R_PPC64_TOC16_HA .toc 0x18 +# RELOCS-LE-NEXT: 0x28 R_PPC64_TOC16_LO_DS .toc 0x18 +# RELOCS-LE-NEXT: } + +# RELOCS-BE: .rela.text { +# RELOCS-BE-NEXT: 0x2 R_PPC64_TOC16_HA .toc 0x0 +# RELOCS-BE-NEXT: 0x6 R_PPC64_TOC16_LO_DS .toc 0x0 +# RELOCS-BE-NEXT: 0xE R_PPC64_TOC16_HA .toc 0x8 +# RELOCS-BE-NEXT: 0x12 R_PPC64_TOC16_LO_DS .toc 0x8 +# RELOCS-BE: 0x26 R_PPC64_TOC16_HA .toc 0x18 +# RELOCS-BE-NEXT: 0x2A R_PPC64_TOC16_LO_DS .toc 0x18 +# RELOCS-BE-NEXT: } + +# RELOCS: .rela.toc { +# RELOCS-NEXT: 0x0 R_PPC64_ADDR64 hidden 0x0 +# RELOCS-NEXT: 0x8 R_PPC64_ADDR64 hidden2 0x0 +# RELOCS: 0x18 R_PPC64_ADDR64 default 0x0 +# RELOCS-NEXT: } + +# NM-DAG: 0000000010030000 D default +# NM-DAG: 0000000010030000 d hidden +# NM-DAG: 0000000010040000 d hidden2 + +# `hidden` is non-preemptable. It is relaxed. +# address(hidden) - (.got+0x8000) = 0x10030000 - (0x100200c0+0x8000) = 32576 +# COMMON: nop +# COMMON: addi 3, 2, 32576 +# COMMON: lwa 3, 0(3) + addis 3, 2, .Lhidden@toc@ha + ld 3, .Lhidden@toc@l(3) + lwa 3, 0(3) + +# address(hidden2) - (.got+0x8000) = 0x10040000 - (0x100200c0+0x8000) = (1<<16)+32576 +# COMMON: addis 3, 2, 1 +# COMMON: addi 3, 3, 32576 +# COMMON: lwa 3, 0(3) + addis 3, 2, .Lhidden2@toc@ha + ld 3, .Lhidden2@toc@l(3) + lwa 3, 0(3) + +# `shared` is not defined in an object file. The ld instruction cannot be relaxed. +# The first addis can still be relaxed to nop, though. +# COMMON: nop +# COMMON: ld 4, -32752(2) +# COMMON: lwa 4, 0(4) + addis 4, 2, .Lshared@toc@ha + ld 4, .Lshared@toc@l(4) + lwa 4, 0(4) + +# `default` has default visibility. It is non-preemptable when producing an executable. +# address(default) - (.got+0x8000) = 0x10030000 - (0x100200c0+0x8000) = 32576 +# EXE: nop +# EXE: addi 5, 2, 32576 +# EXE: lwa 5, 0(5) + +# SHARED: nop +# SHARED: ld 5, -32744(2) +# SHARED: lwa 5, 0(5) + addis 5, 2, .Ldefault@toc@ha + ld 5, .Ldefault@toc@l(5) + lwa 5, 0(5) + +.section .toc,"aw",@progbits +.Lhidden: + .tc hidden[TC], hidden +.Lhidden2: + .tc hidden2[TC], hidden2 +.Lshared: + .tc shared[TC], shared +.Ldefault: + .tc default[TC], default