Index: lld/ELF/Arch/PPC64.cpp =================================================================== --- lld/ELF/Arch/PPC64.cpp +++ lld/ELF/Arch/PPC64.cpp @@ -386,9 +386,17 @@ } } +static bool isTocRelType(RelType Type) { + if (Type == R_PPC64_TOC16_HA || Type == R_PPC64_TOC16_LO_DS || + Type == R_PPC64_TOC16_LO) + return true; + return false; +} + void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { // For a TOC-relative relocation, proceed in terms of the corresponding // ADDR16 relocation type. + bool IsTocRelType = isTocRelType(Type); std::tie(Type, Val) = toAddr16Rel(Type, Val); switch (Type) { @@ -412,7 +420,14 @@ case R_PPC64_ADDR16_HA: case R_PPC64_REL16_HA: case R_PPC64_TPREL16_HA: - write16(Loc, ha(Val)); + // When the high-adjusted part of a toc relocation evaluted to 0, + // we can optimize it into a nop. + if (IsTocRelType && ha(Val) == 0) { + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + write32(Loc - EndianOffset, 0x60000000); + } + else + write16(Loc, ha(Val)); break; case R_PPC64_ADDR16_HI: case R_PPC64_REL16_HI: @@ -438,10 +453,26 @@ case R_PPC64_ADDR16_LO: case R_PPC64_REL16_LO: case R_PPC64_TPREL16_LO: + // When the high-adjusted part of a toc relocation evalutes to 0, it is + // changed into a nop. The lo part then needs to be updated to use the toc + // pointer register r2, as the base register. + if (IsTocRelType && ha(Val) == 0) { + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + uint32_t Inst = (read32(Loc - EndianOffset) & 0xFFE00000) | 0x00020000; + write32(Loc - EndianOffset, Inst); + } write16(Loc, lo(Val)); break; case R_PPC64_ADDR16_LO_DS: case R_PPC64_TPREL16_LO_DS: + // When the high-adjusted part of a toc relocation evalutes to 0, it is + // changed into a nop. The lo part then needs to be updated to use the toc + // pointer register r2, as the base register. + if (IsTocRelType && ha(Val) == 0) { + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + uint32_t Inst = (read32(Loc - EndianOffset) & 0xFFE00000) | 0x00020000; + write32(Loc - EndianOffset, Inst); + } write16(Loc, (read16(Loc) & 3) | (lo(Val) & ~3)); break; case R_PPC64_ADDR32: Index: lld/test/ELF/ppc64-func-entry-points.s =================================================================== --- lld/test/ELF/ppc64-func-entry-points.s +++ lld/test/ELF/ppc64-func-entry-points.s @@ -75,6 +75,6 @@ // CHECK: foo_external_diff: // CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 2 // CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640 -// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, 0 +// CHECK-NEXT: 10010088: {{.*}} nop // CHECK: foo_external_same: // CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3 Index: lld/test/ELF/ppc64-got-indirect.s =================================================================== --- lld/test/ELF/ppc64-got-indirect.s +++ lld/test/ELF/ppc64-got-indirect.s @@ -83,8 +83,8 @@ # CHECK: _start: # CHECK-NEXT: 10010000: {{.*}} addis 2, 12, 3 # CHECK-NEXT: 10010004: {{.*}} addi 2, 2, -32768 -# CHECK-NEXT: 10010008: {{.*}} addis 3, 2, 0 -# CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(3) +# CHECK-NEXT: 10010008: {{.*}} nop +# CHECK-NEXT: 1001000c: {{.*}} ld 3, -32760(2) # CHECK: 1001001c: {{.*}} lwa 3, 0(3) # CHECK-LE: Disassembly of section .data: Index: lld/test/ELF/ppc64-relocs.s =================================================================== --- lld/test/ELF/ppc64-relocs.s +++ lld/test/ELF/ppc64-relocs.s @@ -63,7 +63,7 @@ # CHECK: Disassembly of section .R_PPC64_TOC16_HA: # CHECK: .FR_PPC64_TOC16_HA: -# CHECK: 10010018: {{.*}} addis 1, 2, 0 +# CHECK: 10010018: {{.*}} nop .section .R_PPC64_REL24,"ax",@progbits .globl .FR_PPC64_REL24 @@ -160,8 +160,8 @@ # 0x10000190 + 0xfeb4 = 0x10010044 # CHECK: Disassembly of section .R_PPC64_REL32: # CHECK: .FR_PPC64_REL32: -# CHECK: 1001003c: {{.*}} addis 5, 2, 0 -# CHECK: 10010040: {{.*}} ld 5, -32736(5) +# CHECK: 1001003c: {{.*}} nop +# CHECK: 10010040: {{.*}} ld 5, -32736(2) # CHECK: 10010044: {{.*}} add 3, 3, 4 .section .R_PPC64_REL64, "ax",@progbits Index: lld/test/ELF/ppc64-toc-addis-nop.s =================================================================== --- /dev/null +++ lld/test/ELF/ppc64-toc-addis-nop.s @@ -0,0 +1,53 @@ +// REQUIRES: ppc + +// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: ld.lld %t.o -o %t +// RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s + +// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +// RUN: llvm-readelf -relocations --wide %t.o | FileCheck --check-prefix=InputRelocs %s +// RUN: ld.lld %t.o -o %t +// RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s + + .text + .abiversion 2 + +test1: # @test1 + addis 3, 2, .LC0@toc@ha + addi 3, 3, .LC0@toc@l + +test2: + addis 3, 2, .LC0@toc@ha + ld 3, .LC0@toc@l(3) + + .section .toc,"aw",@progbits +.LC0: + .tc y[TC],y + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 55 # 0x37 + .size y, 4 + +// InputRelocs: Relocation section '.rela.text' +// InputRelocs: R_PPC64_TOC16_HA +// InputRelocs: R_PPC64_TOC16_LO +// InputRelocs: R_PPC64_TOC16_LO_DS + +// Verify that we optimize: +// addis r3, r2, 0 +// addi r3, r3, -32768 +// into: +// nop +// addi r3, r2, -32768 + +// Dis: test1: +// Dis: nop +// Dis: addi 3, 2, -32768 + +// Dis: test2: +// Dis: nop +// Dis: ld 3, -32768(2)