Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -107,6 +107,103 @@ Type == R_PPC64_TOC16_DS; } +// Searches through the relocations in TocSec looking for the one with +// matching Offset. +template +static std::pair getSymAndAddend(InputSectionBase *TocSec, + uint64_t Offset) { + typedef typename ELFT::Rela RelaTy; + ArrayRef Relas = TocSec->template relas(); + if (Relas.empty()) + return {}; + + // Extracts the Symbol and addend from a relocation. + auto SymAndAddend = [&](RelaTy Rela) -> std::pair { + Symbol &Sym = TocSec->getFile()->getRelocTargetSym(Rela); + return {dyn_cast_or_null(&Sym), getAddend(Rela)}; + }; + + // The relocations are sorted by offset, and while the most common case is + // that every .toc entry will have a relocation, it is not guaranteed. For + // example objects compiled with small code model are likely to have + // constants placed in the .toc section. Start looking by mapping the offset + // directly into its corresponding toc entry. + size_t Index = std::min(Offset / Target->GotEntrySize, Relas.size() - 1); + if (Offset == Relas[Index].r_offset) + return SymAndAddend(Relas[Index]); + + // If Offset is greater than r_offset on the relocation we looked up we must + // have less relocations then toc enties and Relas[Index] is the last reloc + // in the array. In this case we have nothing to relax to. + if (Offset > Relas[Index].r_offset) + return {}; + + // Walk back looking for a relocation that relocates .toc + Offset. + for (auto R : llvm::reverse(Relas.slice(0, Index))) { + if (R.r_offset == Offset) + return SymAndAddend(R); + if (R.r_offset < Offset) + return {}; + } + + // No relocation for the toc entry .toc + Offset. + return {}; +} + +// Determines the target of a toc-indirection to relax to. Returns the symbol + +// addend pair if there is a symbol to relax to. +static std::pair relaxTo(Relocation Rel) { + // If the symbol the relocation refers to is not the .toc section + // this can't be a toc-indirection. + Defined *DefSym = dyn_cast(Rel.Sym); + if (!DefSym || !DefSym->isSection() || DefSym->Section->Name != ".toc") + return {}; + + if (Rel.Addend < 0) + return {}; + + auto *TocISB = cast(DefSym->Section); + // Get the symbol + addend that the toc-indirection refers to. + return Config->IsLE ? getSymAndAddend(TocISB, Rel.Addend) + : getSymAndAddend(TocISB, Rel.Addend); +} + +// Attempts to relax loading a symbols address from the .toc section into +// calculating the symbols address using an offset relative to the toc pointer. +// To be able to relax the target symbol must be defined, not preemptible and +// addressable with a 32-bit signed offset from the toc pointer. +bool elf::tryRelaxTocPPC64(RelType Type, const Relocation &Rel, RelExpr Expr, + uint8_t *BufLoc) { + assert(Expr == R_PPC64_RELAX_TOC && + "unexpected RelExpr for got-indirect to toc-relative relaxation"); + assert(Config->TocOptimize && + "tryRelaxTocPPC64 should only be called when --toc-optimize is enabled"); + + Defined *D; + int64_t Addend; + std::tie(D, Addend) = relaxTo(Rel); + + // If there is no defined symbol or if the defined symbol is preemptible then + // the relocation cannot be relaxed. + if (!D || D->IsPreemptible) + return false; + + uint64_t SymVA = D->getVA(Addend); + // Because we only have the 2 instructions of the got-indirect access sequence + // to rewrite, we can only relax if the symbol definition is within 32 bits of + // the TOC base-pointer. For medium code model this is guaranteed but in the + // large code model parts of the data segment will be too far away to relax. + if (!isInt<32>(SymVA - getPPC64TocBase())) + return false; + + // Safe to relax, let the target callback perform the actual relaxation. + // Note that we use the offset between the symbols VA and the .got VA as the + // relocation value since relocateOne will perform the adjustment by the TOC + // bias. + Target->relaxGot(BufLoc, Type, SymVA - In.Got->getVA()); + return true; +} + namespace { class PPC64 final : public TargetInfo { public: @@ -124,6 +221,7 @@ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -260,6 +358,26 @@ return 2; } +void PPC64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { + switch (Type) { + case R_PPC64_TOC16_HA: + relocateOne(Loc, Type, Val); + break; + case R_PPC64_TOC16_LO_DS: { + // rewrite the instuction from a load to an addi. + uint32_t Instr = readInstrFromHalf16(Loc); + if (getPrimaryOpCode(Instr) != LD) + error("expected a 'ld' for got-indirect to toc-relative relaxing"); + writeInstrFromHalf16(Loc, (Instr & 0x03FFFFFF) | 0x38000000); + relocateOne(Loc, R_PPC64_TOC16_LO, Val); + break; + } + default: + error( + "unexpected relocation type for got-indirect to toc-relative relaxing"); + } +} + void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: @@ -429,11 +547,12 @@ return R_GOT_OFF; case R_PPC64_TOC16: case R_PPC64_TOC16_DS: - case R_PPC64_TOC16_HA: case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: return R_GOTREL; + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO_DS: + return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; case R_PPC64_TOC: return R_PPC_TOC; case R_PPC64_REL14: Index: ELF/Arch/X86_64.cpp =================================================================== --- ELF/Arch/X86_64.cpp +++ ELF/Arch/X86_64.cpp @@ -37,7 +37,7 @@ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; - void relaxGot(uint8_t *Loc, uint64_t Val) const override; + void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -454,7 +454,7 @@ } template -void X86_64::relaxGot(uint8_t *Loc, uint64_t Val) const { +void X86_64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { const uint8_t Op = Loc[-2]; const uint8_t ModRm = Loc[-1]; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -619,6 +619,7 @@ case R_GOTONLY_PC_FROM_END: return In.Got->getVA() + A - P + In.Got->getSize(); case R_GOTREL: + case R_PPC64_RELAX_TOC: return Sym.getVA(A) - In.Got->getVA(); case R_GOTREL_FROM_END: return Sym.getVA(A) - In.Got->getVA() - In.Got->getSize(); @@ -887,7 +888,13 @@ switch (Expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - Target->relaxGot(BufLoc, TargetVA); + Target->relaxGot(BufLoc, Type, TargetVA); + break; + case R_PPC64_RELAX_TOC: + // If the relocation is relaxable tryTocRelax will perform the relaxation + // and return true. Otherwise relocate to the original .toc entry. + if (!tryRelaxTocPPC64(Type, Rel, Expr, BufLoc)) + Target->relocateOne(BufLoc, Type, TargetVA); break; case R_RELAX_TLS_IE_TO_LE: Target->relaxTlsIeToLe(BufLoc, Type, TargetVA); Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -71,6 +71,7 @@ R_PPC_CALL, R_PPC_CALL_PLT, R_PPC_TOC, + R_PPC64_RELAX_TOC, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -354,8 +354,9 @@ // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return isRelExprOneOf(Expr); + R_PPC_CALL, R_PPC_CALL_PLT, R_PPC64_RELAX_TOC, + R_AARCH64_PAGE_PC, R_AARCH64_PLT_PAGE_PC, + R_RELAX_GOT_PC>(Expr); } // Returns true if a given relocation can be computed at link-time. @@ -376,8 +377,8 @@ R_AARCH64_GOT_PAGE_PC, R_AARCH64_GOT_PAGE_PC_PLT, R_GOT_PC, R_GOTONLY_PC, R_GOTONLY_PC_FROM_END, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOT_FROM_END, R_TLSGD_PC, R_PPC_CALL_PLT, - R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, - R_TLSLD_HINT, R_TLSIE_HINT>(E)) + R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, + R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E)) return true; // These never do, except if the entire file is position dependent or if @@ -1034,7 +1035,7 @@ // This relocation does not require got entry, but it is relative to got and // needs it to be created. Here we request for that. if (isRelExprOneOf(Expr)) + R_GOTREL_FROM_END, R_PPC_TOC, R_PPC64_RELAX_TOC>(Expr)) In.Got->HasGotOffRel = true; // Read an addend. @@ -1086,8 +1087,10 @@ for (auto I = Rels.begin(), End = Rels.end(); I != End;) scanReloc(Sec, GetOffset, I, End); - // Sort relocations by offset to binary search for R_RISCV_PCREL_HI20 - if (Config->EMachine == EM_RISCV) + // Sort relocations by offset for more efficient searching for + // R_RISCV_PCREL_HI20 and targets of toc-indirections on PPC64. + if (Config->EMachine == EM_RISCV || + (Config->EMachine == EM_PPC64 && Sec.Name == ".toc")) std::stable_sort(Sec.Relocations.begin(), Sec.Relocations.end(), RelocationOffsetComparator{}); } Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -126,7 +126,7 @@ virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const; - virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; + virtual void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const; @@ -167,8 +167,13 @@ return getErrorPlace(Loc).Loc; } -// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is -// a global entry point (GEP) which typically is used to intiailzie the TOC +// Tries to relax a toc-indirection. If an indirection can be safely relaxed +// this function performs the relaxation and returns true. +bool tryRelaxTocPPC64(RelType Type, const Relocation &Rel, + RelExpr Expr, uint8_t *BufLoc); + +// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first +// is a global entry point (GEP) which typically is used to intiailzie the TOC // pointer in general purpose register 2. The second is a local entry // point (LEP) which bypasses the TOC pointer initialization code. The // offset between GEP and LEP is encoded in a function's st_other flags. Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -151,7 +151,7 @@ return Expr; } -void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { +void TargetInfo::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } Index: test/ELF/Inputs/ppc64-global.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-global.s @@ -0,0 +1,8 @@ + .type glob,@object + .data + .global glob + .p2align 2 +glob: + .long 55 + .size glob, 4 + Index: test/ELF/Inputs/ppc64-local.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-local.s @@ -0,0 +1,15 @@ + .data + + .type local_sym,@object + .hidden local_sym + .global local_sym + .p2align 2 +local_sym: + .long 55 + .size local_sym, 4 + + .type defualt_sym,@object + .global default_sym +default_sym: + .long 0 + .size default_sym, 4 Index: test/ELF/Inputs/ppc64-toc-opt-defs.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-opt-defs.s @@ -0,0 +1,45 @@ + .data + + .type a,@object + .globl a + .p2align 2 +a: + .long 1 + .size a, 4 + + .type b,@object + .globl b +b: + .long 2 + .size b, 4 + + .type c,@object + .globl c +c: + .long 3 + .size c, 4 + + .type d,@object + .globl d +d: + .long 4 + .size d, 4 + + .type e,@object + .globl e +e: + .long 5 + .size e, 4 + + .type f,@object + .globl f +f: + .long 6 + .size f, 4 + + .type g,@object + .globl g + .p2align 2 +g: + .long 7 + .size g, 4 Index: test/ELF/Inputs/ppc64-toc-opt-shared.s =================================================================== --- /dev/null +++ test/ELF/Inputs/ppc64-toc-opt-shared.s @@ -0,0 +1,16 @@ + .data + + .type ext_1,@object + .global ext_1 + .p2align 2 +ext_1: + .long 8 + .size ext_1, 4 + + .type ext_2,@object + .global ext_2 + .p2align 2 +ext_2: + .long 8 + .size ext_2, 4 + Index: test/ELF/ppc64-func-entry-points.s =================================================================== --- test/ELF/ppc64-func-entry-points.s +++ test/ELF/ppc64-func-entry-points.s @@ -75,6 +75,6 @@ // CHECK: foo_external_diff: // CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 2 // CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640 -// CHECK-NEXT: 10010088: {{.*}} nop +// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, -1 // CHECK: foo_external_same: // CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3 Index: test/ELF/ppc64-got-indirect.s =================================================================== --- test/ELF/ppc64-got-indirect.s +++ test/ELF/ppc64-got-indirect.s @@ -2,15 +2,17 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o # RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-LE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-LE -# RUN: llvm-objdump -D %t2 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-global.s -o %t1.o +# RUN: ld.lld -shared -o %t1.so %t1.o +# RUN: ld.lld %t.o %t1.so -o %t2 +# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefixes=CHECK,CHECK-LE # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o # RUN: llvm-readobj -relocations %t.o | FileCheck -check-prefix=RELOCS-BE %s -# RUN: ld.lld %t.o -o %t2 -# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=CHECK-BE -# RUN: llvm-objdump -D %t2 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-global.s -o %t1.o +# RUN: ld.lld -shared -o %t1.so %t1.o +# RUN: ld.lld %t.o %t1.so -o %t2 +# RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefixes=CHECK,CHECK-BE # Make sure we calculate the offset correctly for a got-indirect access to a # global variable as described by the PPC64 ELF V2 abi. @@ -42,13 +44,6 @@ .section .toc,"aw",@progbits .LC0: .tc glob[TC],glob - .type glob,@object # @glob - .data - .globl glob - .p2align 2 -glob: - .long 55 # 0x37 - .size glob, 4 # Verify the relocations emitted for glob are through the .toc @@ -77,39 +72,30 @@ # Verify that the global variable access is done through the correct # toc entry: -# r2 = .TOC. = 0x10038000. -# r3 = r2 - 32760 = 0x10030008 -> .toc entry for glob. +# r2 = .TOC. = 0x100280c0 +# r3 = *(r2 - 32760) --> (r2 - 32760) = 0x100280c8 -> .toc entry for glob. # CHECK: _start: -# CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 3 -# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32768 +# CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 2 +# CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32576 # CHECK-NEXT: 10010008: {{.*}} nop # CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(2) # CHECK: 1001001c: {{.*}} lwa 3, 0(3) -# CHECK-LE: Disassembly of section .data: -# CHECK-LE-NEXT: glob: -# CHECK-LE-NEXT: 10020000: 37 00 00 00 - # CHECK-LE: Disassembly of section .got: # CHECK-LE-NEXT: .got: -# CHECK-LE-NEXT: 10030000: 00 80 03 10 -# CHECK-LE-NEXT: 10030004: 00 00 00 00 +# CHECK-LE-NEXT: 100200c0: c0 80 02 10 +# CHECK-LE-NEXT: 100200c4: 00 00 00 00 # Verify that .toc comes right after .got # CHECK-LE: Disassembly of section .toc: -# CHECK-LE: 10030008: 00 00 02 10 - -# CHECK-BE: Disassembly of section .data: -# CHECK-BE-NEXT: glob: -# CHECK-BE-NEXT: 10020000: 00 00 00 37 +# CHECK-LE: 100200c8 .toc # CHECK-BE: Disassembly of section .got: # CHECK-BE-NEXT: .got: -# CHECK-BE-NEXT: 10030000: 00 00 00 00 -# CHECK-BE-NEXT: 10030004: 10 03 80 00 +# CHECK-BE-NEXT: 100200c0: 00 00 00 00 +# CHECK-BE-NEXT: 100200c4: 10 02 80 c0 # Verify that .toc comes right after .got # CHECK-BE: Disassembly of section .toc: -# CHECK-BE: 10030008: 00 00 00 00 -# CHECK-BE: 1003000c: 10 02 00 00 +# CHECK-BE: 100200c8 .toc Index: test/ELF/ppc64-relocs.s =================================================================== --- test/ELF/ppc64-relocs.s +++ test/ELF/ppc64-relocs.s @@ -1,12 +1,12 @@ # REQUIRES: ppc # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 +# RUN: ld.lld --no-toc-optimize %t -o %t2 # RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=DATALE # RUN: llvm-objdump -D %t2 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t -# RUN: ld.lld %t -o %t2 +# RUN: ld.lld --no-toc-optimize %t -o %t2 # RUN: llvm-objdump -D %t2 | FileCheck %s --check-prefix=DATABE # RUN: llvm-objdump -D %t2 | FileCheck %s @@ -63,7 +63,7 @@ # CHECK: Disassembly of section .R_PPC64_TOC16_HA: # CHECK: .FR_PPC64_TOC16_HA: -# CHECK: 10010018: {{.*}} nop +# CHECK: 10010018: {{.*}} addis 1, 2, 0 .section .R_PPC64_REL24,"ax",@progbits .globl .FR_PPC64_REL24 @@ -171,8 +171,8 @@ # 0x10000190 + 0xfeb4 = 0x10010044 # CHECK: Disassembly of section .R_PPC64_REL32: # CHECK: .FR_PPC64_REL32: -# CHECK: 10010040: {{.*}} nop -# CHECK: 10010044: {{.*}} ld 5, -32736(2) +# CHECK: 10010040: {{.*}} addis 5, 2, 0 +# CHECK: 10010044: {{.*}} ld 5, -32736(5) # CHECK: 10010048: {{.*}} add 3, 3, 4 .section .R_PPC64_REL64, "ax",@progbits Index: test/ELF/ppc64-toc-relax-jumptable.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax-jumptable.s @@ -0,0 +1,77 @@ +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -D %t | FileCheck --check-prefixes=CHECK,LE %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -D %t | FileCheck --check-prefixes=CHECK,BE %s + + +# Verify that the load from the .toc section was relaxed to an +# add of an offset to the TOC base-pointer (calculating the address +# of the jump table rather then loading the address from the .toc). + +# CHECK: Disassembly of section .rodata: +# CHECK-NEXT: .rodata: +# CHECK-NEXT: 10000190 + +# CHECK-LABEL: _start +# CHECK: clrldi 3, 3, 62 +# CHECK-NEXT: addis 4, 2, -2 +# CHECK-NEXT: addi 4, 4, -32368 +# CHECK-NEXT: sldi 3, 3, 2 + +# LE: Disassembly of section .toc: +# LE-NEXT: .toc: +# LE-NEXT: 10020008: 90 01 00 10 +# LE-NEXT: 1002000c: 00 00 00 00 + +# BE: Disassembly of section .toc: +# BE-NEXT: .toc: +# BE-NEXT: 10020008: 00 00 00 00 +# BE-NEXT: 1002000c: 10 00 01 90 + + .text + .global _start + .type _start, @function +_start: +.Lstart_gep: + addis 2, 12, .TOC.-.Lstart_gep@ha + addi 2, 2, .TOC.-.Lstart_gep@l +.Lstart_lep: + .localentry _start, .Lstart_lep-.Lstart_gep + rldicl 3, 3, 0, 62 + addis 4, 2, .LJTI_TE@toc@ha + ld 4, .LJTI_TE@toc@l(4) + sldi 3, 3, 2 + lwax 3, 3, 4 + add 3, 3, 4 + mtctr 3 + bctr + +.LBB1: + li 3, 0 + blr +.LBB2: + li 3, 10 + blr +.LBB3: + li 3, 55 + blr +.LBB4: + li 3, 255 + blr + + .section .rodata,"a",@progbits + .p2align 2 +.LJT: + .long .LBB1-.LJT + .long .LBB2-.LJT + .long .LBB3-.LJT + .long .LBB4-.LJT + +.section .toc,"aw",@progbits +# TOC entry for the jumptable address. +.LJTI_TE: + .tc .LJT[TC],.LJT + Index: test/ELF/ppc64-toc-relax.s =================================================================== --- /dev/null +++ test/ELF/ppc64-toc-relax.s @@ -0,0 +1,120 @@ +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-global.s -o %t1.shared.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-local.s -o %t2.o +# RUN: ld.lld -shared %t1.shared.o -o %t.so +# RUN: ld.lld %t.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# RUN: ld.lld -shared %t.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d %t2.so | FileCheck --check-prefix=SHARED %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-global.s -o %t1.shared.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-local.s -o %t2.o +# RUN: ld.lld -shared %t1.shared.o -o %t.so +# RUN: ld.lld %t.o %t2.o %t.so -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# RUN: ld.lld -shared %t.o %t2.o %t.so -o %t2.so +# RUN: llvm-objdump -d %t2.so | FileCheck --check-prefix=SHARED %s + + .abiversion 2 + .text + + .global should_relax + .type should_relax,@function + + .global can_not_relax + .type can_not_relax,@function + + .global maybe_relax + .type maybe_relax,@function + +# Compiler emits a .toc entry for the symbol becuase it is not defined in this +# compilation unit. A definition will get linked in from another object file. +# The symbol is hidden making it locally defined and so relaxable from a +# got-indirect access to a toc-relative access in both shared objects and +# executables. +should_relax: +.Lshould_relax_gep: + addis 2, 12, .TOC.-.Lshould_relax_gep@ha + addi 2, 2, .TOC.-.Lshould_relax_gep@l +.Lshould_relax_lep: + .localentry should_relax, .Lshould_relax_lep-.Lshould_relax_gep + addis 3, 2, .LTE1@toc@ha + ld 3, .LTE1@toc@l(3) + lwa 3, 0(3) + blr + +# CHECK-LABEL: should_relax +# CHECK-NEXT: 10010000: +# CHECK: 10010008: {{.*}} addis 3, 2, -2 +# CHECK-NEXT: 1001000c: {{.*}} addi 3, 3, 32576 +# CHECK-NEXT: 10010010: {{.*}} lwa 3, 0(3) + +# SHARED-LABEL: should_relax: +# SHARED-NEXT: 10000: +# SHARED: 10008: {{.*}} addis 3, 2, -2 +# SHARED-NEXT: 1000c: {{.*}} addi 3, 3, 32576 +# SHARED-NEXT: 10010: {{.*}} lwa 3, 0(3) + +# Compiler emits a .toc entry for the symbol becuase it is not defined in this +# compilation unit. A definition will get linked in from a shared-object making +# it unrelaxable. +can_not_relax: +.Lcan_not_relax_gep: + addis 2, 12, .TOC.-.Lcan_not_relax_gep@ha + addi 2, 2, .TOC.-.Lcan_not_relax_gep@l +.Lcan_not_relax_lep: + .localentry can_not_relax, .Lcan_not_relax_lep-.Lcan_not_relax_gep + addis 3, 2, .LTE2@toc@ha + ld 3, .LTE2@toc@l(3) + lwa 3, 0(3) + blr + +# CHECK-LABEL: can_not_relax: +# CHECK-NEXT: 10010018: +# CHECK: 10010020: {{.*}} nop +# CHECK-NEXT: 10010024: {{.*}} ld 3, -32752(2) +# CHECK-NEXT: 10010028: {{.*}} lwa 3, 0(3) + +# SHARED-LABEL: can_not_relax: +# SHARED-NEXT: 10018: +# SHARED: 10020: {{.*}} nop +# SHARED-NEXT: 10024: {{.*}} ld 3, -32752(2) +# SHARED-NEXT: 10028: {{.*}} lwa 3, 0(3) + +# Compiler emits a .toc entry for the symbol becuase it is not defined in this +# compilation unit. A definition with default visibility gets linked in locally. +# It should be relaxed when linking an executable, and not relaxed if linking a +# shared object. +maybe_relax: +.Lmaybe_relax_gep: + addis 2, 12, .TOC.-.Lmaybe_relax_gep@ha + addi 2, 2, .TOC.-.Lmaybe_relax_gep@l +.Lmaybe_relax_lep: + .localentry can_not_relax, .Lcan_not_relax_lep-.Lcan_not_relax_gep + addis 3, 2, .LTE3@toc@ha + ld 3, .LTE3@toc@l(3) + lwa 3, 0(3) + blr + +# CHECK-LABEL: maybe_relax: +# CHECK-NEXT: 10010030: +# CHECK: 10010038: {{.*}} addis 3, 2, -2 +# CHECK-NEXT: 1001003c: {{.*}} addi 3, 3, 32580 +# CHECK-NEXT: 10010040: {{.*}} lwa 3, 0(3) + +# SHARED-LABEL: maybe_relax: +# SHARED-NEXT: 10030: +# SHARED: 10038: {{.*}} nop +# SHARED-NEXT: 1003c: {{.*}} ld 3, -32744(2) +# SHARED-NEXT: 10040: {{.*}} lwa 3, 0(3) + + .section .toc,"aw",@progbits +.LTE1: + .tc local_sym[TC], local_sym +.LTE2: + .tc glob[TC], glob +.LTE3: + .tc default_sym[TC], default_sym Index: test/ELF/ppc64-tocopt-constants-in-toc.s =================================================================== --- /dev/null +++ test/ELF/ppc64-tocopt-constants-in-toc.s @@ -0,0 +1,149 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-toc-opt-defs.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unkown-linux %p/Inputs/ppc64-toc-opt-shared.s -o %t3.o +# RUN: ld.lld -shared %t3.o -o %t.so +# RUN: ld.lld %t1.o %t2.o %t.so -o %t +# RUN: llvm-objdump -D %t | FileCheck %s + +# Test to ensure that we can still perform toc-optimizations on .toc sections +# that contain more then just symbol addresses. + + .text + .global test + .p2align 4 + .type test,@function +test: +.Lgep: + addis 2, 12, .TOC.-.Lgep@ha + addi 2, 2, .TOC.-.Lgep@l +.Llep: + .localentry test, .Llep-.Lgep + + addis 3, 2, .LA@toc@ha + ld 3, .LA@toc@l(3) + lwa 0, 0(3) + + addis 4, 2, .LB@toc@ha + ld 4, .LB@toc@l(4) + lwa 4, 0(4) + + addis 5, 2, .LC@toc@ha + ld 5, .LC@toc@l(5) + lwa 5, 0(5) + + addis 6, 2, .LCONST1@toc@ha + lwa 6, .LCONST1@toc@l(6) + + addis 7, 2, .LD@toc@ha + ld 7, .LD@toc@l(7) + lwa 7, 0(7) + + addis 8, 2, .LE@toc@ha + ld 8, .LE@toc@l(8) + lwa 8, 0(8) + + addis 9, 2, .LCONST2@toc@ha + ld 9, .LCONST2@toc@l(9) + + addis 10, 2, .LEXT1@toc@ha + ld 10, .LEXT1@toc@l(10) + lwa 10, 0(10) + + addis 11, 2, .LF@toc@ha + ld 11, .LF@toc@l(11) + lwa 11, 0(11) + + addis 12, 2, .LEXT2@toc@ha + ld 12, .LEXT2@toc@l(12) + lwa 12, 0(12) + + addis 3, 2, .LG@toc@ha + ld 3, .LG@toc@l(3) + lwa 3, 0(3) + + blr + + .section .toc,"aw",@progbits +.LA: + .tc a[TC],a +.LB: + .tc b[TC],b +.LC: + .tc c[TC],c +.LCONST1: + .quad 1 +.LD: + .tc d[TC],d +.LE: + .tc e[TC],e +.LCONST2: + .quad 22 +.LEXT1: + .tc ext_1[TC],ext_1 +.LF: + .tc f[TC],f +.LEXT2: + .tc ext_2[TC],ext_2 +.LG: + .tc g[TC],g + +# a is at address 10020000 in the .data section, TOC-pointer points to 100380c0 +# offset from the TOC-pointer to a is −98496 ==> (-2 << 16) + 32576 +# CHECK: addis 3, 2, -2 +# CHECK: addi 3, 3, 32576 +# CHECK: lwa 0, 0(3) +# CHECK: addis 4, 2, -2 +# CHECK: addi 4, 4, 32580 +# CHECK: lwa 4, 0(4) +# CHECK: addis 5, 2, -2 +# CHECK: addi 5, 5, 32584 +# CHECK: lwa 5, 0(5) + +# The first constant stored in the toc is at address 100300e0 (.toc[3]). +# The offset from the TOC-pointer to .toc[3] is -32736 which fits in 16 bits of +# displacement. The high-adjusted part of the access is nop-ed out and the low +# part of the access is rewritten to depend on r2. +# CHECK: nop +# CHECK: lwa 6, -32736(2) + +# CHECK: addis 7, 2, -2 +# CHECK: addi 7, 7, 32588 +# CHECK: lwa 7, 0(7) +# CHECK: addis 8, 2, -2 +# CHECK: addi 8, 8, 32592 +# CHECK: lwa 8, 0(8) +# CHECK: nop +# CHECK: ld 9, -32712(2) + +# Access of an externaly defined symbol, so it can't be relaxed to a +# toc-relative access. +# CHECK: nop +# CHECK: ld 10, -32704(2) +# CHECK: lwa 10, 0(10) + +# CHECK: addis 11, 2, -2 +# CHECK: addi 11, 11, 32596 +# CHECK: lwa 11, 0(11) +# CHECK: nop +# CHECK: 12, -32688(2) +# CHECK: lwa 12, 0(12) +# CHECK: addis 3, 2, -2 +# CHECK: addi 3, 3, 32600 +# CHECK: lwa 3, 0(3) + + +# CHECK: Disassembly of section .data: +# CHECK: 0000000010020000 a: +# CHECK: 0000000010020004 b: +# CHECK: 0000000010020008 c: +# CHECK: 000000001002000c d: +# CHECK: 0000000010020010 e: +# CHECK: 0000000010020014 f: +# CHECK: 0000000010020018 g: + +# CHECK: 00000000100300c0 .got: +# CHECK: 100300c0: c0 80 03 10 +# CHECK: 100300c4: 00 00 00 00 +# CHECK: 00000000100300c8 .toc: