Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -103,6 +103,64 @@ return Type == R_PPC64_TOC16 || Type == R_PPC64_TOC16_DS; } +// Find the relocation in .rela.toc with matching offset. +template +static std::pair +getRelaTocSymAndAddend(InputSectionBase *TocSec, unsigned &TocRelIdx, + uint64_t Offset) { + ArrayRef Relas = TocSec->template relas(); + while (TocRelIdx < Relas.size() && Relas[TocRelIdx].r_offset < Offset) + ++TocRelIdx; + if (!(TocRelIdx < Relas.size() && Relas[TocRelIdx].r_offset == Offset)) + return {}; + Symbol &Sym = TocSec->getFile()->getRelocTargetSym(Relas[TocRelIdx]); + return {dyn_cast_or_null(&Sym), getAddend(Relas[TocRelIdx])}; +} + +// Loading a symbol address from .toc "addis r,2,foo@toc@ha; ld r,foo@toc@l(r)" +// can be relaxed to compute the address using an offset relative to the toc +// pointer "addis r,2,offset; addi r,r,offset" or "nop; addi r,2,offset". +// +// This requires the symbol to be defined, non-preemptable and addressable with +// a 32-bit signed offset from the toc pointer. +bool elf::tryRelaxTocPPC64(RelType Type, const Relocation &Rel, + unsigned &TocRelIdx, uint8_t *BufLoc) { + assert(Config->TocOptimize); + if (Rel.Addend < 0) + return false; + + // If the symbol is not the .toc section, this isn't a toc-indirection. + Defined *DefSym = dyn_cast(Rel.Sym); + if (!DefSym || !DefSym->isSection() || DefSym->Section->Name != ".toc") + return false; + + Defined *D; + int64_t Addend; + auto *TocISB = cast(DefSym->Section); + std::tie(D, Addend) = + Config->IsLE + ? getRelaTocSymAndAddend(TocISB, TocRelIdx, Rel.Addend) + : getRelaTocSymAndAddend(TocISB, TocRelIdx, Rel.Addend); + + // Only non-preemptable defined symbols can be relaxed. + if (!D || D->IsPreemptible) + return false; + + // Because we only have the 2 instructions of the got-indirect access sequence + // to rewrite, we can only relax if the symbol definition is within 32 bits of + // the TOC base-pointer. + uint64_t SymVA = D->getVA(Addend); + if (!isInt<32>(SymVA - getPPC64TocBase())) + return false; + + // Safe to relax, let the target callback perform the actual relaxation. + // Note that we use the offset between the symbols VA and the .got VA as the + // relocation value since relocateOne will perform the adjustment by the TOC + // bias. + Target->relaxGot(BufLoc, Type, SymVA - In.Got->getVA()); + return true; +} + namespace { class PPC64 final : public TargetInfo { public: @@ -121,6 +179,7 @@ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -270,6 +329,25 @@ return 2; } +void PPC64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { + switch (Type) { + case R_PPC64_TOC16_HA: + relocateOne(Loc, Type, Val); + break; + case R_PPC64_TOC16_LO_DS: { + // Convert "ld reg, foo@toc@l(reg)" to "addi reg, reg, offset". + uint32_t Instr = readInstrFromHalf16(Loc); + if (getPrimaryOpCode(Instr) != LD) + error("expected a 'ld' for got-indirect to toc-relative relaxing"); + writeInstrFromHalf16(Loc, (Instr & 0x03FFFFFF) | 0x38000000); + relocateOne(Loc, R_PPC64_TOC16_LO, Val); + break; + } + default: + llvm_unreachable("unexpected relocation type"); + } +} + void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: @@ -439,11 +517,12 @@ return R_GOT_OFF; case R_PPC64_TOC16: case R_PPC64_TOC16_DS: - case R_PPC64_TOC16_HA: case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: return R_GOTREL; + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO_DS: + return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; case R_PPC64_TOC: return R_PPC_TOC; case R_PPC64_REL14: Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -38,7 +38,7 @@ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; - void relaxGot(uint8_t *Loc, uint64_t Val) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -453,7 +453,7 @@ write32le(Loc, Val); } -void X86_64::relaxGot(uint8_t *Loc, uint64_t Val) const { +void X86_64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { const uint8_t Op = Loc[-2]; const uint8_t ModRm = Loc[-1]; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -630,6 +630,7 @@ case R_GOTPLTONLY_PC: return In.GotPlt->getVA() + A - P; case R_GOTREL: + case R_PPC64_RELAX_TOC: return Sym.getVA(A) - In.Got->getVA(); case R_GOTPLTREL: return Sym.getVA(A) - In.GotPlt->getVA(); @@ -876,6 +877,7 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) { assert(Flags & SHF_ALLOC); const unsigned Bits = Config->Wordsize * 8; + unsigned TocRelIndex = 0; for (const Relocation &Rel : Relocations) { uint64_t Offset = Rel.Offset; @@ -893,7 +895,14 @@ switch (Expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - Target->relaxGot(BufLoc, TargetVA); + Target->relaxGot(BufLoc, Type, TargetVA); + break; + case R_PPC64_RELAX_TOC: + // R_PPC64_RELAX_TOC relocations have been sortd by addend. TocRelIndex is + // the index into .rela.toc which get increased while we are resolving + // R_PPC64_RELAX_TOC relocations. + if (!tryRelaxTocPPC64(Type, Rel, TocRelIndex, BufLoc)) + Target->relocateOne(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_IE_TO_LE: Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -93,6 +93,7 @@ R_PPC_CALL, R_PPC_CALL_PLT, R_PPC_TOC, + R_PPC64_RELAX_TOC, R_RISCV_PC_INDIRECT, }; Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -384,7 +384,7 @@ // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return oneof(Expr); + R_PPC64_RELAX_TOC, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr); } // Returns true if a given relocation can be computed at link-time. @@ -404,7 +404,7 @@ R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT, - R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, + R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E)) return true; @@ -1080,7 +1080,7 @@ // The 4 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof(Expr)) { In.GotPlt->HasGotPltOffRel = true; - } else if (oneof(Expr)) { + } else if (oneof(Expr)) { In.Got->HasGotOffRel = true; } @@ -1241,12 +1241,31 @@ for (auto I = Rels.begin(), End = Rels.end(); I != End;) scanReloc(Sec, GetOffset, I, End); - // Sort relocations by offset to binary search for R_RISCV_PCREL_HI20 + // Sort relocations by offset for more efficient searching for + // R_RISCV_PCREL_HI20. if (Config->EMachine == EM_RISCV) std::stable_sort(Sec.Relocations.begin(), Sec.Relocations.end(), [](const Relocation &LHS, const Relocation &RHS) { return LHS.Offset < RHS.Offset; }); + else if (Config->EMachine == EM_PPC64) { + // Sort .toc relocations by offset and R_PPC64_RELAX_TOC relocations by + // addend for more efficient --toc-optimize processing. + if (Sec.Name == ".toc") { + std::stable_sort(Sec.Relocations.begin(), Sec.Relocations.end(), + [](const Relocation &LHS, const Relocation &RHS) { + return LHS.Offset < RHS.Offset; + }); + } else { + auto It = llvm::partition(Sec.Relocations, [](const Relocation &R) { + return R.Expr == R_PPC64_RELAX_TOC; + }); + std::stable_sort(Sec.Relocations.begin(), It, + [](const Relocation &LHS, const Relocation &RHS) { + return LHS.Addend < RHS.Addend; + }); + } + } } template void elf::scanRelocations(InputSectionBase &S) { Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -124,7 +124,7 @@ virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const; - virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; @@ -164,8 +164,13 @@ return getErrorPlace(Loc).Loc; } -// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is -// a global entry point (GEP) which typically is used to intiailzie the TOC +// Tries to relax a toc-indirection. If an indirection can be safely relaxed +// this function performs the relaxation and returns true. +bool tryRelaxTocPPC64(RelType Type, const Relocation &Rel, + unsigned &TocRelIndex, uint8_t *BufLoc); + +// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first +// is a global entry point (GEP) which typically is used to intiailzie the TOC // pointer in general purpose register 2. The second is a local entry // point (LEP) which bypasses the TOC pointer initialization code. The // offset between GEP and LEP is encoded in a function's st_other flags. Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -149,7 +149,7 @@ return Expr; } -void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { +void TargetInfo::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } Index: test/ELF/Inputs/ppc64-global.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-global.s @@ -0,0 +1,8 @@ + .type glob,@object + .data + .global glob + .p2align 2 +glob: + .long 55 + .size glob, 4 + Index: test/ELF/Inputs/ppc64-local.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-local.s @@ -0,0 +1,15 @@ + .data + + .type local_sym,@object + .hidden local_sym + .global local_sym + .p2align 2 +local_sym: + .long 55 + .size local_sym, 4 + + .type defualt_sym,@object + .global default_sym +default_sym: + .long 0 + .size default_sym, 4 Index: test/ELF/Inputs/ppc64-toc-opt-defs.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-opt-defs.s @@ -0,0 +1,45 @@ + .data + + .type a,@object + .globl a + .p2align 2 +a: + .long 1 + .size a, 4 + + .type b,@object + .globl b +b: + .long 2 + .size b, 4 + + .type c,@object + .globl c +c: + .long 3 + .size c, 4 + + .type d,@object + .globl d +d: + .long 4 + .size d, 4 + + .type e,@object + .globl e +e: + .long 5 + .size e, 4 + + .type f,@object + .globl f +f: + .long 6 + .size f, 4 + + .type g,@object + .globl g + .p2align 2 +g: + .long 7 + .size g, 4 Index: test/ELF/Inputs/ppc64-toc-opt-shared.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-opt-shared.s @@ -0,0 +1,16 @@ + .data + + .type ext_1,@object + .global ext_1 + .p2align 2 +ext_1: + .long 8 + .size ext_1, 4 + + .type ext_2,@object + .global ext_2 + .p2align 2 +ext_2: + .long 8 + .size ext_2, 4 + Index: test/ELF/ppc64-func-entry-points.s =================================================================== --- test/ELF/ppc64-func-entry-points.s +++ test/ELF/ppc64-func-entry-points.s @@ -75,6 +75,6 @@ // CHECK: foo_external_diff: // CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 1 // CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640 -// CHECK-NEXT: 10010088: {{.*}} nop +// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, 1 // CHECK: foo_external_same: // CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3 Index: test/ELF/ppc64-got-indirect.s =================================================================== --- test/ELF/ppc64-got-indirect.s +++ test/ELF/ppc64-got-indirect.s @@ -2,15 +2,17 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o # RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-LE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-LE -# RUN: llvm-objdump -D %t2 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-global.s -o %t1.o +# RUN: ld.lld -shared -o %t1.so %t1.o +# RUN: ld.lld %t.o %t1.so -o %t2 +# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefixes=CHECK,CHECK-LE # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o # RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-BE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-BE -# RUN: llvm-objdump -D %t2 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-global.s -o %t1.o +# RUN: ld.lld -shared -o %t1.so %t1.o +# RUN: ld.lld %t.o %t1.so -o %t2 +# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefixes=CHECK,CHECK-BE # Make sure we calculate the offset correctly for a got-indirect access to a # global variable as described by the PPC64 ELF V2 abi. @@ -42,13 +44,6 @@ .section .toc,"aw",@progbits .LC0: .tc glob[TC],glob - .type glob,@object # @glob - .data - .globl glob - .p2align 2 -glob: - .long 55 # 0x37 - .size glob, 4 # Verify the relocations emitted for glob are through the .toc @@ -82,34 +77,34 @@ # CHECK: _start: # CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 2 -# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32768 +# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32576 # CHECK-NEXT: 10010008: {{.*}} nop # CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(2) # CHECK: 1001001c: {{.*}} lwa 3, 0(3) # CHECK-LE: Disassembly of section .got: # CHECK-LE-NEXT: .got: -# CHECK-LE-NEXT: 10020000: 00 80 02 10 -# CHECK-LE-NEXT: 10020004: 00 00 00 00 +# CHECK-LE-NEXT: 100200c0: c0 80 02 10 +# CHECK-LE-NEXT: 100200c4: 00 00 00 00 # Verify that .toc comes right after .got # CHECK-LE: Disassembly of section .toc: -# CHECK-LE: 10020008: 00 00 03 10 -# CHECK-LE: Disassembly of section .data: -# CHECK-LE-NEXT: glob: -# CHECK-LE-NEXT: 10030000: 37 00 00 00 +# XCHECK-LE: Disassembly of section .data: +# XCHECK-LE-NEXT: glob: +# XCHECK-LE-NEXT: 10030000: 37 00 00 00 # CHECK-BE: Disassembly of section .got: -# CHECK-BE-NEXT: .got: -# CHECK-BE-NEXT: 10020000: 00 00 00 00 -# CHECK-BE-NEXT: 10020004: 10 02 80 00 +# CHECK-BE-NEXT: 00000000100200c0 .got: +# CHECK-BE-NEXT: 100200c0: 00 00 00 00 +# CHECK-BE-NEXT: 100200c4: 10 02 80 c0 # Verify that .toc comes right after .got # CHECK-BE: Disassembly of section .toc: -# CHECK-BE: 10020008: 00 00 00 00 -# CHECK-BE: 1002000c: 10 03 00 00 +# CHECK-BE: 00000000100200c8 .toc: +# XCHECK-BE: 10020008: 00 00 00 00 +# XCHECK-BE: 1002000c: 10 03 00 00 -# CHECK-BE: Disassembly of section .data: -# CHECK-BE-NEXT: glob: -# CHECK-BE-NEXT: 10030000: 00 00 00 37 +# XCHECK-BE: Disassembly of section .data: +# XCHECK-BE-NEXT: glob: +# XCHECK-BE-NEXT: 10030000: 00 00 00 37 Index: test/ELF/ppc64-relocs.s =================================================================== --- test/ELF/ppc64-relocs.s +++ test/ELF/ppc64-relocs.s @@ -1,12 +1,12 @@ # REQUIRES: ppc # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 +# RUN: ld.lld --no-toc-optimize %t -o %t2 # RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=DATALE # RUN: llvm-objdump -D %t2 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 +# RUN: ld.lld --no-toc-optimize %t -o %t2 # RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=DATABE # RUN: llvm-objdump -D %t2 | FileCheck %s @@ -63,7 +63,7 @@ # CHECK: Disassembly of section .R_PPC64_TOC16_HA: # CHECK: .FR_PPC64_TOC16_HA: -# CHECK: 10010018: {{.*}} nop +# CHECK: 10010018: {{.*}} addis 1, 2, 0 .section .R_PPC64_REL24,"ax",@progbits .globl .FR_PPC64_REL24 @@ -171,8 +171,8 @@ # 0x10000190 + 0xfeb4 = 0x10010044 # CHECK: Disassembly of section .R_PPC64_REL32: # CHECK: .FR_PPC64_REL32: -# CHECK: 10010040: {{.*}} nop -# CHECK: 10010044: {{.*}} ld 5, -32736(2) +# CHECK: 10010040: {{.*}} addis 5, 2, 0 +# CHECK: 10010044: {{.*}} ld 5, -32736(5) # CHECK: 10010048: {{.*}} add 3, 3, 4 .section .R_PPC64_REL64, "ax",@progbits Index: test/ELF/ppc64-toc-relax-jumptable.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax-jumptable.s @@ -0,0 +1,77 @@ +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -D %t | FileCheck --check-prefixes=CHECK,LE %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -D %t | FileCheck --check-prefixes=CHECK,BE %s + + +# Verify that the load from the .toc section was relaxed to an +# add of an offset to the TOC base-pointer (calculating the address +# of the jump table rather then loading the address from the .toc). + +# CHECK: Disassembly of section .rodata: +# CHECK-NEXT: .rodata: +# CHECK-NEXT: 100001c8 + +# CHECK-LABEL: _start +# CHECK: clrldi 3, 3, 62 +# CHECK-NEXT: addis 4, 2, -2 +# CHECK-NEXT: addi 4, 4, -32312 +# CHECK-NEXT: sldi 3, 3, 2 + +# LE: Disassembly of section .toc: +# LE-NEXT: .toc: +# LE-NEXT: 10020008: c8 01 00 10 +# LE-NEXT: 1002000c: 00 00 00 00 + +# BE: Disassembly of section .toc: +# BE-NEXT: .toc: +# BE-NEXT: 10020008: 00 00 00 00 +# BE-NEXT: 1002000c: 10 00 01 c8 + + .text + .global _start + .type _start, @function +_start: +.Lstart_gep: + addis 2, 12, .TOC.-.Lstart_gep@ha + addi 2, 2, .TOC.-.Lstart_gep@l +.Lstart_lep: + .localentry _start, .Lstart_lep-.Lstart_gep + rldicl 3, 3, 0, 62 + addis 4, 2, .LJTI_TE@toc@ha + ld 4, .LJTI_TE@toc@l(4) + sldi 3, 3, 2 + lwax 3, 3, 4 + add 3, 3, 4 + mtctr 3 + bctr + +.LBB1: + li 3, 0 + blr +.LBB2: + li 3, 10 + blr +.LBB3: + li 3, 55 + blr +.LBB4: + li 3, 255 + blr + + .section .rodata,"a",@progbits + .p2align 2 +.LJT: + .long .LBB1-.LJT + .long .LBB2-.LJT + .long .LBB3-.LJT + .long .LBB4-.LJT + +.section .toc,"aw",@progbits +# TOC entry for the jumptable address. +.LJTI_TE: + .tc .LJT[TC],.LJT + Index: test/ELF/ppc64-toc-relax.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax.s @@ -0,0 +1,116 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-global.s -o %t1.shared.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-local.s -o %t2.o +# RUN: ld.lld -shared %t1.shared.o -o %t.so +# RUN: ld.lld %t.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s + +# RUN: ld.lld -shared %t.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefix=SHARED %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-global.s -o %t1.shared.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-local.s -o %t2.o +# RUN: ld.lld -shared %t1.shared.o -o %t.so +# RUN: ld.lld %t.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s + +# RUN: ld.lld -shared %t.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d --no-show-raw-insn %t2.so | FileCheck --check-prefix=SHARED %s + + .abiversion 2 + .text + + .global should_relax + .type should_relax,@function + + .global can_not_relax + .type can_not_relax,@function + + .global maybe_relax + .type maybe_relax,@function + +# Compiler emits a .toc entry for the symbol becuase it is not defined in this +# compilation unit. A definition will get linked in from another object file. +# The symbol is hidden making it locally defined and so relaxable from a +# got-indirect access to a toc-relative access in both shared objects and +# executables. +should_relax: +.Lshould_relax_gep: + addis 2, 12, .TOC.-.Lshould_relax_gep@ha + addi 2, 2, .TOC.-.Lshould_relax_gep@l +.Lshould_relax_lep: + .localentry should_relax, .Lshould_relax_lep-.Lshould_relax_gep + addis 3, 2, .LTE1@toc@ha + ld 3, .LTE1@toc@l(3) + lwa 3, 0(3) + blr + +# CHECK-LABEL: should_relax +# CHECK: 10010008: nop +# CHECK-NEXT: 1001000c: addi 3, 2, 32576 +# CHECK-NEXT: 10010010: lwa 3, 0(3) + +# SHARED-LABEL: should_relax: +# SHARED-NEXT: 10000: +# SHARED: 10008: nop +# SHARED-NEXT: 1000c: addi 3, 2, 32576 +# SHARED-NEXT: 10010: lwa 3, 0(3) + +# Compiler emits a .toc entry for the symbol becuase it is not defined in this +# compilation unit. A definition will get linked in from a shared-object making +# it unrelaxable. +can_not_relax: +.Lcan_not_relax_gep: + addis 2, 12, .TOC.-.Lcan_not_relax_gep@ha + addi 2, 2, .TOC.-.Lcan_not_relax_gep@l +.Lcan_not_relax_lep: + .localentry can_not_relax, .Lcan_not_relax_lep-.Lcan_not_relax_gep + addis 3, 2, .LTE2@toc@ha + ld 3, .LTE2@toc@l(3) + lwa 3, 0(3) + blr + +# CHECK-LABEL: 0000000010010018 can_not_relax: +# CHECK: 10010020: nop +# CHECK-NEXT: 10010024: ld 3, -32752(2) +# CHECK-NEXT: 10010028: lwa 3, 0(3) + +# SHARED-LABEL: 0000000000010018 can_not_relax: +# SHARED: 10020: nop +# SHARED-NEXT: 10024: ld 3, -32752(2) +# SHARED-NEXT: 10028: lwa 3, 0(3) + +# Compiler emits a .toc entry for the symbol becuase it is not defined in this +# compilation unit. A definition with default visibility gets linked in locally. +# It should be relaxed when linking an executable, and not relaxed if linking a +# shared object. +maybe_relax: +.Lmaybe_relax_gep: + addis 2, 12, .TOC.-.Lmaybe_relax_gep@ha + addi 2, 2, .TOC.-.Lmaybe_relax_gep@l +.Lmaybe_relax_lep: + .localentry can_not_relax, .Lcan_not_relax_lep-.Lcan_not_relax_gep + addis 3, 2, .LTE3@toc@ha + ld 3, .LTE3@toc@l(3) + lwa 3, 0(3) + blr + +# CHECK-LABEL: 0000000010010030 maybe_relax: +# CHECK: 10010038: nop +# CHECK-NEXT: 1001003c: addi 3, 2, 32580 +# CHECK-NEXT: 10010040: lwa 3, 0(3) + +# SHARED-LABEL: 0000000000010030 maybe_relax: +# SHARED: 10038: nop +# SHARED-NEXT: 1003c: ld 3, -32744(2) +# SHARED-NEXT: 10040: lwa 3, 0(3) + + .section .toc,"aw",@progbits +.LTE1: + .tc local_sym[TC], local_sym +.LTE2: + .tc glob[TC], glob +.LTE3: + .tc default_sym[TC], default_sym Index: test/ELF/ppc64-tocopt-constants-in-toc.s =================================================================== --- /dev/null +++ test/ELF/ppc64-tocopt-constants-in-toc.s @@ -0,0 +1,148 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-opt-defs.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unkown-linux %p/Inputs/ppc64-toc-opt-shared.s -o %t3.o +# RUN: ld.lld -shared %t3.o -o %t.so +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-objdump -D %t | FileCheck %s + +# Test to ensure that we can still perform toc-optimizations on .toc sections +# that contain more then just symbol addresses. + + .text + .global test + .p2align 4 + .type test,@function +test: +.Lgep: + addis 2, 12, .TOC.-.Lgep@ha + addi 2, 2, .TOC.-.Lgep@l +.Llep: + .localentry test, .Llep-.Lgep + + addis 3, 2, .LA@toc@ha + ld 3, .LA@toc@l(3) + lwa 0, 0(3) + + addis 4, 2, .LB@toc@ha + ld 4, .LB@toc@l(4) + lwa 4, 0(4) + + addis 5, 2, .LC@toc@ha + ld 5, .LC@toc@l(5) + lwa 5, 0(5) + + addis 6, 2, .LCONST1@toc@ha + lwa 6, .LCONST1@toc@l(6) + + addis 7, 2, .LD@toc@ha + ld 7, .LD@toc@l(7) + lwa 7, 0(7) + + addis 8, 2, .LE@toc@ha + ld 8, .LE@toc@l(8) + lwa 8, 0(8) + + addis 9, 2, .LCONST2@toc@ha + ld 9, .LCONST2@toc@l(9) + + addis 10, 2, .LEXT1@toc@ha + ld 10, .LEXT1@toc@l(10) + lwa 10, 0(10) + + addis 11, 2, .LF@toc@ha + ld 11, .LF@toc@l(11) + lwa 11, 0(11) + + addis 12, 2, .LEXT2@toc@ha + ld 12, .LEXT2@toc@l(12) + lwa 12, 0(12) + + addis 3, 2, .LG@toc@ha + ld 3, .LG@toc@l(3) + lwa 3, 0(3) + + blr + + .section .toc,"aw",@progbits +.LA: + .tc a[TC],a +.LB: + .tc b[TC],b +.LC: + .tc c[TC],c +.LCONST1: + .quad 1 +.LD: + .tc d[TC],d +.LE: + .tc e[TC],e +.LCONST2: + .quad 22 +.LEXT1: + .tc ext_1[TC],ext_1 +.LF: + .tc f[TC],f +.LEXT2: + .tc ext_2[TC],ext_2 +.LG: + .tc g[TC],g + +# a is at address 10020000 in the .data section, TOC-pointer points to 100380c0 +# offset from the TOC-pointer to a is −98496 ==> (-2 << 16) + 32576 +# CHECK: nop +# CHECK: addi 3, 2, 32576 +# CHECK: lwa 0, 0(3) +# CHECK: nop +# CHECK: addi 4, 2, 32580 +# CHECK: lwa 4, 0(4) +# CHECK: nop +# CHECK: addi 5, 2, 32584 +# CHECK: lwa 5, 0(5) + +# The first constant stored in the toc is at address 100300e0 (.toc[3]). +# The offset from the TOC-pointer to .toc[3] is -32736 which fits in 16 bits of +# displacement. The high-adjusted part of the access is nop-ed out and the low +# part of the access is rewritten to depend on r2. +# CHECK: nop +# CHECK: lwa 6, -32736(2) + +# CHECK: nop +# CHECK: addi 7, 2, 32588 +# CHECK: lwa 7, 0(7) +# CHECK: nop +# CHECK: addi 8, 2, 32592 +# CHECK: lwa 8, 0(8) +# CHECK: nop +# CHECK: ld 9, -32712(2) + +# Access of an externaly defined symbol, so it can't be relaxed to a +# toc-relative access. +# CHECK: nop +# CHECK: ld 10, -32704(2) +# CHECK: lwa 10, 0(10) + +# CHECK: nop +# CHECK: addi 11, 2, 32596 +# CHECK: lwa 11, 0(11) +# CHECK: nop +# CHECK: 12, -32688(2) +# CHECK: lwa 12, 0(12) +# CHECK: nop +# CHECK: addi 3, 2, 32600 +# CHECK: lwa 3, 0(3) + +# CHECK: 00000000100200c0 .got: +# CHECK: 100200c0: c0 80 02 10 +# CHECK: 100200c4: 00 00 00 00 +# CHECK: 00000000100200c8 .toc: + +# CHECK: Disassembly of section .data: +# CHECK: 0000000010030000 a: +# CHECK: 0000000010030004 b: +# CHECK: 0000000010030008 c: +# CHECK: 000000001003000c d: +# CHECK: 0000000010030010 e: +# CHECK: 0000000010030014 f: +# CHECK: 0000000010030018 g: