diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -40,28 +40,37 @@ }; enum DFormOpcd { - LBZ = 34, + /* Defined below in the PPC enum so they can be used for mapping legacy + instructions to pc-relative instructions. + LBZ = 34 + LHZ = 40 + LWZ = 32 + LD = 58 + STB = 38 + STH = 44 + STW = 36 + STD = 62 + */ LBZU = 35, - LHZ = 40, LHZU = 41, LHAU = 43, - LWZ = 32, LWZU = 33, LFSU = 49, - LD = 58, LFDU = 51, - STB = 38, STBU = 39, - STH = 44, STHU = 45, - STW = 36, STWU = 37, STFSU = 53, STFDU = 55, - STD = 62, ADDI = 14 }; +// Extracts the 'PO' field of an instruction encoding. +static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); } + +#define PPC_LEGACY_TO_PREFIXED_LINKER +#include "llvm/Target/PPCLegacyToPCRelMap.def" + uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a @@ -326,13 +335,11 @@ static uint16_t highest(uint64_t v) { return v >> 48; } static uint16_t highesta(uint64_t v) { return (v + 0x8000) >> 48; } -// Extracts the 'PO' field of an instruction encoding. -static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); } - static bool isDQFormInstruction(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: return false; + case 6: // Power10 paired loads/stores (lxvp, stxvp). case 56: // The only instruction with a primary opcode of 56 is `lq`. return true; @@ -475,6 +482,44 @@ relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } + case R_PPC64_GOT_PCREL34: { + uint64_t insn = readPrefixedInstruction(loc); + + // Clear the first 8 bits of the prefix and the first 6 bits of the + // instruction (the primary opcode). + insn &= ~0xFF000000FC000000lu; + + // Replace the cleared bits with the values for PADDI (0x600000038000000); + insn |= 0x600000038000000lu; + writePrefixedInstruction(loc, insn); + relocate(loc, rel, val); + break; + } + case R_PPC64_PCREL_OPT: { + // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can + // be relaxed. The eligibility for the relaxation needs to be determined + // on that relocation since this one does not relocate a symbol. + uint64_t insn = readPrefixedInstruction(loc); + uint32_t accessInsn = read32(loc + rel.addend); + uint64_t pcRelInsn = getPCRelativeForm(accessInsn); + + // This error is not necessary for correctness but is emitted for now + // to ensure we don't miss these opportunities in real code. It can be + // removed at a later date. + if (pcRelInsn == -1lu) { + error("unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" + + Twine::utohexstr(accessInsn)); + break; + } + + // Convert the PADDI to the prefixed version of accessInsn and convert + // accessInsn to a nop. + uint64_t dispOnly = insn & 0x0003ffff0000ffff; + uint64_t finalInsn = dispOnly | pcRelInsn; + writePrefixedInstruction(loc, finalInsn); + write32(loc + rel.addend, 0x60000000); // nop accessInsn. + break; + } default: llvm_unreachable("unexpected relocation type"); } @@ -668,6 +713,7 @@ case R_PPC64_TOC16_LO: return R_GOTREL; case R_PPC64_GOT_PCREL34: + case R_PPC64_PCREL_OPT: return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: @@ -1085,6 +1131,8 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { + if (type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) + return R_RELAX_GOT_PC; if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -999,6 +999,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + uint64_t lastPPCRelaxedRelocOff = -1lu; for (const Relocation &rel : relocations) { if (rel.expr == R_NONE) @@ -1017,9 +1018,20 @@ switch (expr) { case R_RELAX_GOT_PC: - case R_RELAX_GOT_PC_NOPIC: + case R_RELAX_GOT_PC_NOPIC: { + // The R_PPC64_PCREL_OPT relocation must appear immediately after + // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. + // We can only relax R_PPC64_PCREL_OPT if we have also relaxed + // the associated R_PPC64_GOT_PCREL34 since only the latter has an + // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34 + // and only relax the other if the saved offset matches. + if (type == R_PPC64_GOT_PCREL34) + lastPPCRelaxedRelocOff = offset; + if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) + break; target->relaxGot(bufLoc, rel, targetVA); break; + } case R_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC diff --git a/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s @@ -0,0 +1,130 @@ + .section ".text" + .comm storeVal_vector,8,8 + .comm useVal_vector,8,8 + .globl storeVal_longlong + .globl useAddr_longlong + .globl useVal_longlong + .globl storeVal_sshort + .globl useAddr_sshort + .globl useVal_sshort + .globl storeVal_sint + .globl useAddr_sint + .globl useVal_sint + .globl storeVal_double + .globl useAddr_double + .globl useVal_double + .globl storeVal_float + .globl useAddr_float + .globl useVal_float + .globl storeVal_uint + .globl useAddr_uint + .globl useVal_uint + .globl storeVal_ushort + .globl useAddr_ushort + .globl useVal_ushort + .globl storeVal + .globl useAddr + .globl useVal + .section ".data" + .align 3 + .type storeVal_longlong, @object + .size storeVal_longlong, 8 +storeVal_longlong: + .quad 18 + .type useAddr_longlong, @object + .size useAddr_longlong, 8 +useAddr_longlong: + .quad 17 + .type useVal_longlong, @object + .size useVal_longlong, 8 +useVal_longlong: + .quad 16 + .type storeVal_sshort, @object + .size storeVal_sshort, 2 +storeVal_sshort: + .short -15 + .type useAddr_sshort, @object + .size useAddr_sshort, 2 +useAddr_sshort: + .short -14 + .type useVal_sshort, @object + .size useVal_sshort, 2 +useVal_sshort: + .short -13 + .zero 2 + .type storeVal_sint, @object + .size storeVal_sint, 4 +storeVal_sint: + .long -12 + .type useAddr_sint, @object + .size useAddr_sint, 4 +useAddr_sint: + .long -11 + .type useVal_sint, @object + .size useVal_sint, 4 +useVal_sint: + .long -10 + .zero 4 + .type storeVal_double, @object + .size storeVal_double, 8 +storeVal_double: + .long 858993459 + .long 1076966195 + .type useAddr_double, @object + .size useAddr_double, 8 +useAddr_double: + .long -1717986918 + .long -1070589543 + .type useVal_double, @object + .size useVal_double, 8 +useVal_double: + .long 0 + .long 1076756480 + .type storeVal_float, @object + .size storeVal_float, 4 +storeVal_float: + .long 1045220557 + .type useAddr_float, @object + .size useAddr_float, 4 +useAddr_float: + .long -1050568294 + .type useVal_float, @object + .size useVal_float, 4 +useVal_float: + .long 1095761920 + .type storeVal_uint, @object + .size storeVal_uint, 4 +storeVal_uint: + .long 12 + .type useAddr_uint, @object + .size useAddr_uint, 4 +useAddr_uint: + .long 11 + .type useVal_uint, @object + .size useVal_uint, 4 +useVal_uint: + .long 10 + .type storeVal_ushort, @object + .size storeVal_ushort, 2 +storeVal_ushort: + .short 1 + .type useAddr_ushort, @object + .size useAddr_ushort, 2 +useAddr_ushort: + .short 10 + .type useVal_ushort, @object + .size useVal_ushort, 2 +useVal_ushort: + .short 5 + .type storeVal, @object + .size storeVal, 1 +storeVal: + .byte -1 + .type useAddr, @object + .size useAddr, 1 +useAddr: + .byte 10 + .type useVal, @object + .size useVal, 1 +useVal: + .byte 5 diff --git a/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s @@ -0,0 +1,300 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plbz 10 +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: li 3, 0 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: rldicl 9, 9, 9, 60 +# CHECK-S-NEXT: add 9, 9, 10 +# CHECK-S-NEXT: pstb 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: li 3, 0 +# CHECK-D-NEXT: lbz 10, 0(8) +# CHECK-D-NEXT: rldicl 9, 9, 9, 60 +# CHECK-D-NEXT: add 9, 9, 10 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: stb 9, 0(10) +# CHECK-D-NEXT: blr +check_LBZ_STB: + pld 8,useVal@got@pcrel(0),1 +.Lpcrel1: + pld 9,useAddr@got@pcrel(0),1 + li 3,0 + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + lbz 10,0(8) + rldicl 9,9,9,60 + add 9,9,10 + pld 10,storeVal@got@pcrel(0),1 +.Lpcrel2: + .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) + stb 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plhz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: psth 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lhz 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: sth 3, 0(9) +# CHECK-D-NEXT: blr +check_LHZ_STH: + pld 9,useVal_ushort@got@pcrel(0),1 +.Lpcrel3: + .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) + lhz 3,0(9) + pld 9,storeVal_ushort@got@pcrel(0),1 +.Lpcrel4: + .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) + sth 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plwz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstw 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lwz 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stw 3, 0(9) +# CHECK-D-NEXT: blr +check_LWZ_STW: + pld 9,useVal_uint@got@pcrel(0),1 +.Lpcrel5: + .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) + lwz 3,0(9) + pld 9,storeVal_uint@got@pcrel(0),1 +.Lpcrel6: + .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) + stw 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfs 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfs 1, 0(9) +# CHECK-D-NEXT: blr +check_LFS_STFS: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel7: + .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) + lfs 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel8: + .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) + stfs 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfd 1, 0(9) +# CHECK-D-NEXT: blr +check_LFD_STFD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel9: + .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) + lfd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel10: + .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) + stfd 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plwa 3 +# CHECK-S-NEXT: pstw 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lwa 3, 0(8) +# CHECK-D-NEXT: stw 9, 0(10) +# CHECK-D-NEXT: blr +check_LWA_STW: + mr 9,3 + pld 8,useVal_sint@got@pcrel(0),1 +.Lpcrel11: + pld 10,storeVal_sint@got@pcrel(0),1 +.Lpcrel12: + .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) + lwa 3,0(8) + .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) + stw 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plha 3 +# CHECK-S-NEXT: psth 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lha 3, 0(8) +# CHECK-D-NEXT: sth 9, 0(10) +# CHECK-D-NEXT: blr +check_LHA_STH: + mr 9,3 + pld 8,useVal_sshort@got@pcrel(0),1 +.Lpcrel13: + pld 10,storeVal_sshort@got@pcrel(0),1 +.Lpcrel14: + .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) + lha 3,0(8) + .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) + sth 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: pld 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstd 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD: + pld 9,useVal_longlong@got@pcrel(0),1 +.Lpcrel15: + .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) + ld 3,0(9) + pld 9,storeVal_longlong@got@pcrel(0),1 +.Lpcrel16: + .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) + std 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxv 34, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxv 34, 0(9) +# CHECK-D-NEXT: blr +check_LXV_STXV: + pld 9,useVal_vector@got@pcrel(0),1 +.Lpcrel17: + .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) + lxv 34,0(9) + pld 9,storeVal_vector@got@pcrel(0),1 +.Lpcrel18: + .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) + stxv 34,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxssp 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxssp 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSSP_STXSSP: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel19: + .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8) + lxssp 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel20: + .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8) + stxssp 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel21: + .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8) + lxsd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel22: + .reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8) + stxsd 1,0(9) + blr diff --git a/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def b/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def @@ -0,0 +1,257 @@ +/* This file defines a mapping between legacy instructions and their + PC-relative prefixed versions. It has two uses: + 1. In the compiler, to tell the compiler for which instructions + it is allowed to emit the R_PPC64_PCREL_OPT relocation. + 2. In LLD, to provide the encoding for the PC-relative prefixed + version when given the legacy instruction encoding. + + The way this file differentiates between the two is with two macros: + PPC_LEGACY_TO_PREFIXED_COMPILER + PPC_LEGACY_TO_PREFIXED_LINKER + + Since the linker is just looking at instruction encodings, there is + an inevitable amount of bit manipulation here that can be difficult to + follow. While a significant amount of effort has gone into documenting + the various values and bit positions, the definitive source for instruction + encodings is ISA3.1. The reader is encouraged to reference the ISA that can + be found at: + + https://ibm.ent.box.com/s/hhjfw0x0lrbtyzmiaffnbxh2fuo0fog0 +*/ +#if (!defined PPC_LEGACY_TO_PREFIXED_COMPILER) && \ + (!defined PPC_LEGACY_TO_PREFIXED_LINKER) +#error "Need to define PPC_LEGACY_TO_PREFIXED_{COMPILER|LINKER}" +#endif +#ifdef PPC_LEGACY_TO_PREFIXED_LINKER +// For the linker, the enumerators are primary opcodes for the most part. +// There are some instructions that share primary opcodes. For those, we +// set the extended opcode field that differentiates them as the most +// significant bits. For example, LWA and LD share a primary opcode. What +// differentiates them is the extended opcode field (two least significant +// bits of the encoding). LWA is 0x10 and LD is 0x0. So in order for the +// two instructions to have a unique enumerator value and be mapped +// to the correct prefixed instructions, we shift those values to the +// most significant bits and or them with the primary opcode. +enum PPC : uint64_t { + // Loads + PREFIX_MLS = 0x0610000000000000lu, + PREFIX_8LS = 0x0410000000000000lu, + LBZ = 34, + PLBZpc = PREFIX_MLS, // Prefix only. + LBZ8 = 34, + PLBZ8pc = PREFIX_MLS, // Prefix only. + LHZ = 40, + PLHZpc = PREFIX_MLS, // Prefix only. + LHZ8 = 40, + PLHZ8pc = PREFIX_MLS, // Prefix only. + LWZ = 32, + PLWZpc = PREFIX_MLS, // Prefix only. + LWZ8 = 32, + PLWZ8pc = PREFIX_MLS, // Prefix only. + LHA = 42, + PLHApc = PREFIX_MLS, // Prefix only. + LHA8 = 42, + PLHA8pc = PREFIX_MLS, // Prefix only. + LWA = 58 | 0x80000000, // (Encoding & 0x3) << 30. + PLWApc = PREFIX_8LS | 0xA4000000lu, // Prefix | Primary opc. + LD = 58 | 0x0, // (Encoding & 0x3) << 30. + PLDpc = PREFIX_8LS | 0xE4000000lu, // Prefix | Primary opc. + LFS = 48, + PLFSpc = PREFIX_MLS, // Prefix only. + LXSSP = 57 | 0xC0000000, // (Encoding & 0x3) << 30. + PLXSSPpc = PREFIX_8LS | 0xAC000000lu, // Prefix | Primary opc. + LFD = 50, + PLFDpc = PREFIX_MLS, // Prefix only. + LXSD = 57 | 0x80000000, // (Encoding & 0x3) << 30. + PLXSDpc = PREFIX_8LS | 0xA8000000lu, // Prefix | Primary opc. + LXV = 61 | 0x20000000, // (Encoding & 0x7) << 29. + PLXVpc = PREFIX_8LS | 0xC8000000lu, // Prefix | Primary opc. + LXVP = 6 | 0x0, // (Encoding & 0xF) << 28. + PLXVPpc = PREFIX_8LS | 0xE8000000lu, // Prefix | Primary opc. + DFLOADf32 = 48, + DFLOADf64 = 50, + + // Stores + STB = 38, + PSTBpc = PREFIX_MLS, // Prefix only. + STB8 = 38, + PSTB8pc = PREFIX_MLS, // Prefix only. + STH = 44, + PSTHpc = PREFIX_MLS, // Prefix only. + STH8 = 44, + PSTH8pc = PREFIX_MLS, // Prefix only. + STW = 36, + PSTWpc = PREFIX_MLS, // Prefix only. + STW8 = 36, + PSTW8pc = PREFIX_MLS, // Prefix only. + STD = 62, + PSTDpc = PREFIX_8LS | 0xF4000000lu, // Prefix | Primary opc. + STFS = 52, + PSTFSpc = PREFIX_MLS, // Prefix only. + STXSSP = 61 | 0xC0000000, // (Encoding & 0x3) << 30. + PSTXSSPpc = PREFIX_8LS | 0xBC000000lu, // Prefix | Primary opc. + STFD = 54, + PSTFDpc = PREFIX_MLS, // Prefix only. + STXSD = 61 | 0x80000000, // (Encoding & 0x3) << 30. + PSTXSDpc = PREFIX_8LS | 0xB8000000lu, // Prefix | Primary opc. + STXV = 61 | 0xA0000000, // (Encoding & 0x7) << 29. + PSTXVpc = PREFIX_8LS | 0xD8000000lu, // Prefix | Primary opc. + STXVP = 6 | 0x10000000, // (Encoding & 0xF) << 28. + PSTXVPpc = PREFIX_8LS | 0xF8000000lu, // Prefix | Primary opc. + DFSTOREf32 = 52, + DFSTOREf64 = 54 +}; + +enum Mask : uint64_t { + OPC_AND_RST = 0xFFE00000, // Primary opc (0-5) and R[ST] (6-10). + ONLY_RST = 0x3E00000, // [RS]T (6-10). + ST_STX28_TO5 = 0x3E00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. +}; +static const uint64_t InstrMasks[][2] = { + // Loads. + {PPC::LBZ, OPC_AND_RST}, + {PPC::LHZ, OPC_AND_RST}, + {PPC::LWZ, OPC_AND_RST}, + {PPC::LHA, OPC_AND_RST}, + {PPC::LWA, ONLY_RST}, + {PPC::LD, ONLY_RST}, + {PPC::LFS, OPC_AND_RST}, + {PPC::LXSSP, ONLY_RST}, + {PPC::LFD, OPC_AND_RST}, + {PPC::LXSD, ONLY_RST}, + {PPC::LXV, ST_STX28_TO5}, + {PPC::LXVP, ONLY_RST}, + // Stores. + {PPC::STB, OPC_AND_RST}, + {PPC::STH, OPC_AND_RST}, + {PPC::STW, OPC_AND_RST}, + {PPC::STD, ONLY_RST}, + {PPC::STFS, OPC_AND_RST}, + {PPC::STXSSP, ONLY_RST}, + {PPC::STFD, OPC_AND_RST}, + {PPC::STXSD, ONLY_RST}, + {PPC::STXV, ST_STX28_TO5}, + {PPC::STXVP, ONLY_RST}}; +#endif + +/******************************************************************************* + * The interface between the compiler and the linker is the following table. + * It contains the mapping between the legacy instructions and their + * pc-relative forms. When support for a new such instruction is added to the + * compiler, it needs to be added to this table if the compiler will use this + * instruction as a target of the R_PPC64_PCREL_OPT relocation. If this is not + * done, the linker will not be able to optimize the instruction. + ******************************************************************************/ +static const uint64_t Map[][2] = { + // Loads. + {PPC::LBZ, PPC::PLBZpc}, + {PPC::LBZ8, PPC::PLBZ8pc}, + {PPC::LHZ, PPC::PLHZpc}, + {PPC::LHZ8, PPC::PLHZ8pc}, + {PPC::LWZ, PPC::PLWZpc}, + {PPC::LWZ8, PPC::PLWZ8pc}, + {PPC::LHA, PPC::PLHApc}, + {PPC::LHA8, PPC::PLHA8pc}, + {PPC::LWA, PPC::PLWApc}, + {PPC::LD, PPC::PLDpc}, + {PPC::LFS, PPC::PLFSpc}, + {PPC::LXSSP, PPC::PLXSSPpc}, + {PPC::LFD, PPC::PLFDpc}, + {PPC::LXSD, PPC::PLXSDpc}, + {PPC::LXV, PPC::PLXVpc}, +// FIXME: compiler support for paired memory operations will be added soon. +// {PPC::LXVP, PPC::PLXVPpc}, + {PPC::DFLOADf32, PPC::PLFSpc}, + {PPC::DFLOADf64, PPC::PLFDpc}, + + // Stores. + {PPC::STB, PPC::PSTBpc}, + {PPC::STB8, PPC::PSTB8pc}, + {PPC::STH, PPC::PSTHpc}, + {PPC::STH8, PPC::PSTH8pc}, + {PPC::STW, PPC::PSTWpc}, + {PPC::STW8, PPC::PSTW8pc}, + {PPC::STD, PPC::PSTDpc}, + {PPC::STFS, PPC::PSTFSpc}, + {PPC::STXSSP, PPC::PSTXSSPpc}, + {PPC::STFD, PPC::PLFSpc}, + {PPC::STXSD, PPC::PSTXSDpc}, + {PPC::STXV, PPC::PSTXVpc}, +// FIXME: compiler support for paired memory operations will be added soon. +// {PPC::STXVP, PPC::PSTXVPpc}, + {PPC::DFSTOREf32, PPC::PSTFSpc}, + {PPC::DFSTOREf64, PPC::PSTFDpc}}; + +static unsigned getInstrMapIdx(unsigned Opc) { + for (unsigned i = 0; i < llvm::array_lengthof(Map); i++) + if (Opc == Map[i][0]) + return i; + return -1u; +} + +#ifdef PPC_LEGACY_TO_PREFIXED_COMPILER +// For the compiler, we only care if the opcode has an entry in the map. +static bool hasPCRelativeForm(unsigned Opc) { + return getInstrMapIdx(Opc) != -1u; +} + +#else +// For the linker, we need to be able to replace a legacy instruction with a +// PC-Relative instruction. +static unsigned getInstrMaskIdx(unsigned Opc) { + for (unsigned i = 0; i < llvm::array_lengthof(Map); i++) + if (Opc == InstrMasks[i][0]) + return i; + return -1u; +} + +// Returns the opcode from the PPC enumeration above, accounting for adjustments +// for instructions that share primary opcodes. +static uint64_t getAdjustedOpc(unsigned Encoding) { + uint64_t Opc = getPrimaryOpCode(Encoding); + + // If the primary opcode is shared between multiple instructions, we need to + // fix it up to match the actual instruction we are after. + + // For DQ-Form vector instrs, the two least significant bits are 01 and the + // field that differentiates them is 3 bits wide. + if (Opc == 61 && (Encoding & 0x3) == 0x1) // LXV/STXV. + Opc |= (Encoding & 0x7) << 29; + + // For DS-Form instrs, there are 3 different primary opcodes and the two + // least significant bits differentiate instrs that share a PO. + else if (Opc == 58 || Opc == 57 || Opc == 61) + Opc |= (Encoding & 0x3) << 30; + + // Paired loads and stores from ISA3.1 use the 4 least significant bits to + // differentiate. + else if (Opc == 6) + Opc |= (Encoding & 0xf) << 28; + return Opc; +} + +// Given the encoding of a legacy instruction, returns its prefixed PC-relative +// form with all the displacement bits cleared. The caller is to or this with +// the displacement bits. +static uint64_t getPCRelativeForm(unsigned Encoding) { + uint64_t Opc = getAdjustedOpc(Encoding); + unsigned InstrIdx = getInstrMapIdx(Opc); + unsigned MaskIdx = getInstrMaskIdx(Opc); + if (InstrIdx == -1u || MaskIdx == -1u) + return -1lu; + + // The prefixed instruction is computed by masking out bits from the original + // instruction and then or-ing that with the prefixed instruction set bits. + uint64_t PrefixedInstr = (uint64_t)Encoding & InstrMasks[MaskIdx][1]; + PrefixedInstr |= Map[InstrIdx][1]; + + // If the mask requires moving bit 28 to bit 5, do that now. + if (InstrMasks[MaskIdx][1] == ST_STX28_TO5) { + uint64_t STX = (Encoding & 0x8) << 23; + PrefixedInstr |= STX; + } + return PrefixedInstr; +} +#endif +#undef PPC_LEGACY_TO_PREFIXED_COMPILER +#undef PPC_LEGACY_TO_PREFIXED_LINKER diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -44,446 +44,408 @@ namespace { -static bool hasPCRelativeForm(MachineInstr &Use) { - switch (Use.getOpcode()) { - default: - return false; - case PPC::LBZ: - case PPC::LBZ8: - case PPC::LHA: - case PPC::LHA8: - case PPC::LHZ: - case PPC::LHZ8: - case PPC::LWZ: - case PPC::LWZ8: - case PPC::STB: - case PPC::STB8: - case PPC::STH: - case PPC::STH8: - case PPC::STW: - case PPC::STW8: - case PPC::LD: - case PPC::STD: - case PPC::LWA: - case PPC::LXSD: - case PPC::LXSSP: - case PPC::LXV: - case PPC::STXSD: - case PPC::STXSSP: - case PPC::STXV: - case PPC::LFD: - case PPC::LFS: - case PPC::STFD: - case PPC::STFS: - case PPC::DFLOADf32: - case PPC::DFLOADf64: - case PPC::DFSTOREf32: - case PPC::DFSTOREf64: - return true; +#define PPC_LEGACY_TO_PREFIXED_COMPILER +#include "llvm/Target/PPCLegacyToPCRelMap.def" + +class PPCPreEmitPeephole : public MachineFunctionPass { +public: + static char ID; + PPCPreEmitPeephole() : MachineFunctionPass(ID) { + initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); } -} - - class PPCPreEmitPeephole : public MachineFunctionPass { - public: - static char ID; - PPCPreEmitPeephole() : MachineFunctionPass(ID) { - initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - MachineFunctionPass::getAnalysisUsage(AU); - } - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - - // This function removes any redundant load immediates. It has two level - // loops - The outer loop finds the load immediates BBI that could be used - // to replace following redundancy. The inner loop scans instructions that - // after BBI to find redundancy and update kill/dead flags accordingly. If - // AfterBBI is the same as BBI, it is redundant, otherwise any instructions - // that modify the def register of BBI would break the scanning. - // DeadOrKillToUnset is a pointer to the previous operand that had the - // kill/dead flag set. It keeps track of the def register of BBI, the use - // registers of AfterBBIs and the def registers of AfterBBIs. - bool removeRedundantLIs(MachineBasicBlock &MBB, - const TargetRegisterInfo *TRI) { - LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; - MBB.dump(); dbgs() << "\n"); - - DenseSet InstrsToErase; - for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { - // Skip load immediate that is marked to be erased later because it - // cannot be used to replace any other instructions. - if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) - continue; - // Skip non-load immediate. - unsigned Opc = BBI->getOpcode(); - if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && - Opc != PPC::LIS8) - continue; - // Skip load immediate, where the operand is a relocation (e.g., $r3 = - // LI target-flags(ppc-lo) %const.0). - if (!BBI->getOperand(1).isImm()) - continue; - assert(BBI->getOperand(0).isReg() && - "Expected a register for the first operand"); + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } - LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } - Register Reg = BBI->getOperand(0).getReg(); - int64_t Imm = BBI->getOperand(1).getImm(); - MachineOperand *DeadOrKillToUnset = nullptr; - if (BBI->getOperand(0).isDead()) { - DeadOrKillToUnset = &BBI->getOperand(0); - LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset - << " from load immediate " << *BBI - << " is a unsetting candidate\n"); + // This function removes any redundant load immediates. It has two level + // loops - The outer loop finds the load immediates BBI that could be used + // to replace following redundancy. The inner loop scans instructions that + // after BBI to find redundancy and update kill/dead flags accordingly. If + // AfterBBI is the same as BBI, it is redundant, otherwise any instructions + // that modify the def register of BBI would break the scanning. + // DeadOrKillToUnset is a pointer to the previous operand that had the + // kill/dead flag set. It keeps track of the def register of BBI, the use + // registers of AfterBBIs and the def registers of AfterBBIs. + bool removeRedundantLIs(MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; + MBB.dump(); dbgs() << "\n"); + + DenseSet InstrsToErase; + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Skip load immediate that is marked to be erased later because it + // cannot be used to replace any other instructions. + if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) + continue; + // Skip non-load immediate. + unsigned Opc = BBI->getOpcode(); + if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && + Opc != PPC::LIS8) + continue; + // Skip load immediate, where the operand is a relocation (e.g., $r3 = + // LI target-flags(ppc-lo) %const.0). + if (!BBI->getOperand(1).isImm()) + continue; + assert(BBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + + LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); + + Register Reg = BBI->getOperand(0).getReg(); + int64_t Imm = BBI->getOperand(1).getImm(); + MachineOperand *DeadOrKillToUnset = nullptr; + if (BBI->getOperand(0).isDead()) { + DeadOrKillToUnset = &BBI->getOperand(0); + LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset + << " from load immediate " << *BBI + << " is a unsetting candidate\n"); + } + // This loop scans instructions after BBI to see if there is any + // redundant load immediate. + for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); + ++AfterBBI) { + // Track the operand that kill Reg. We would unset the kill flag of + // the operand if there is a following redundant load immediate. + int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); + + // We can't just clear implicit kills, so if we encounter one, stop + // looking further. + if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) { + LLVM_DEBUG(dbgs() + << "Encountered an implicit kill, cannot proceed: "); + LLVM_DEBUG(AfterBBI->dump()); + break; } - // This loop scans instructions after BBI to see if there is any - // redundant load immediate. - for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); - ++AfterBBI) { - // Track the operand that kill Reg. We would unset the kill flag of - // the operand if there is a following redundant load immediate. - int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); - - // We can't just clear implicit kills, so if we encounter one, stop - // looking further. - if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) { - LLVM_DEBUG(dbgs() - << "Encountered an implicit kill, cannot proceed: "); - LLVM_DEBUG(AfterBBI->dump()); - break; - } - - if (KillIdx != -1) { - assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); - DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); - LLVM_DEBUG(dbgs() - << " Kill flag of " << *DeadOrKillToUnset << " from " - << *AfterBBI << " is a unsetting candidate\n"); - } - if (!AfterBBI->modifiesRegister(Reg, TRI)) - continue; - // Finish scanning because Reg is overwritten by a non-load - // instruction. - if (AfterBBI->getOpcode() != Opc) - break; - assert(AfterBBI->getOperand(0).isReg() && - "Expected a register for the first operand"); - // Finish scanning because Reg is overwritten by a relocation or a - // different value. - if (!AfterBBI->getOperand(1).isImm() || - AfterBBI->getOperand(1).getImm() != Imm) - break; + if (KillIdx != -1) { + assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); + DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); + LLVM_DEBUG(dbgs() + << " Kill flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + } - // It loads same immediate value to the same Reg, which is redundant. - // We would unset kill flag in previous Reg usage to extend live range - // of Reg first, then remove the redundancy. - if (DeadOrKillToUnset) { - LLVM_DEBUG(dbgs() - << " Unset dead/kill flag of " << *DeadOrKillToUnset - << " from " << *DeadOrKillToUnset->getParent()); - if (DeadOrKillToUnset->isDef()) - DeadOrKillToUnset->setIsDead(false); - else - DeadOrKillToUnset->setIsKill(false); - } - DeadOrKillToUnset = - AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); - if (DeadOrKillToUnset) - LLVM_DEBUG(dbgs() - << " Dead flag of " << *DeadOrKillToUnset << " from " - << *AfterBBI << " is a unsetting candidate\n"); - InstrsToErase.insert(&*AfterBBI); - LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; - AfterBBI->dump()); + if (!AfterBBI->modifiesRegister(Reg, TRI)) + continue; + // Finish scanning because Reg is overwritten by a non-load + // instruction. + if (AfterBBI->getOpcode() != Opc) + break; + assert(AfterBBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + // Finish scanning because Reg is overwritten by a relocation or a + // different value. + if (!AfterBBI->getOperand(1).isImm() || + AfterBBI->getOperand(1).getImm() != Imm) + break; + + // It loads same immediate value to the same Reg, which is redundant. + // We would unset kill flag in previous Reg usage to extend live range + // of Reg first, then remove the redundancy. + if (DeadOrKillToUnset) { + LLVM_DEBUG(dbgs() << " Unset dead/kill flag of " << *DeadOrKillToUnset + << " from " << *DeadOrKillToUnset->getParent()); + if (DeadOrKillToUnset->isDef()) + DeadOrKillToUnset->setIsDead(false); + else + DeadOrKillToUnset->setIsKill(false); } + DeadOrKillToUnset = + AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); + if (DeadOrKillToUnset) + LLVM_DEBUG(dbgs() + << " Dead flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + InstrsToErase.insert(&*AfterBBI); + LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; + AfterBBI->dump()); } + } - for (MachineInstr *MI : InstrsToErase) { - MI->eraseFromParent(); - } - NumRemovedInPreEmit += InstrsToErase.size(); - return !InstrsToErase.empty(); + for (MachineInstr *MI : InstrsToErase) { + MI->eraseFromParent(); } + NumRemovedInPreEmit += InstrsToErase.size(); + return !InstrsToErase.empty(); + } - // Check if this instruction is a PLDpc that is part of a GOT indirect - // access. - bool isGOTPLDpc(MachineInstr &Instr) { - if (Instr.getOpcode() != PPC::PLDpc) - return false; + // Check if this instruction is a PLDpc that is part of a GOT indirect + // access. + bool isGOTPLDpc(MachineInstr &Instr) { + if (Instr.getOpcode() != PPC::PLDpc) + return false; - // The result must be a register. - const MachineOperand &LoadedAddressReg = Instr.getOperand(0); - if (!LoadedAddressReg.isReg()) - return false; + // The result must be a register. + const MachineOperand &LoadedAddressReg = Instr.getOperand(0); + if (!LoadedAddressReg.isReg()) + return false; - // Make sure that this is a global symbol. - const MachineOperand &SymbolOp = Instr.getOperand(1); - if (!SymbolOp.isGlobal()) - return false; + // Make sure that this is a global symbol. + const MachineOperand &SymbolOp = Instr.getOperand(1); + if (!SymbolOp.isGlobal()) + return false; - // Finally return true only if the GOT flag is present. - return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG); - } + // Finally return true only if the GOT flag is present. + return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG); + } - bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { - MachineFunction *MF = MBB.getParent(); - // Add this linker opt only if we are using PC Relative memops. - if (!MF->getSubtarget().isUsingPCRelativeCalls()) - return false; - - // Struct to keep track of one def/use pair for a GOT indirect access. - struct GOTDefUsePair { - MachineBasicBlock::iterator DefInst; - MachineBasicBlock::iterator UseInst; - Register DefReg; - Register UseReg; - bool StillValid; - }; - // Vector of def/ues pairs in this basic block. - SmallVector CandPairs; - SmallVector ValidPairs; - bool MadeChange = false; - - // Run through all of the instructions in the basic block and try to - // collect potential pairs of GOT indirect access instructions. - for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { - // Look for the initial GOT indirect load. - if (isGOTPLDpc(*BBI)) { - GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(), - BBI->getOperand(0).getReg(), - PPC::NoRegister, true}; - CandPairs.push_back(CurrentPair); - continue; - } + bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { + MachineFunction *MF = MBB.getParent(); + // Add this linker opt only if we are using PC Relative memops. + if (!MF->getSubtarget().isUsingPCRelativeCalls()) + return false; + + // Struct to keep track of one def/use pair for a GOT indirect access. + struct GOTDefUsePair { + MachineBasicBlock::iterator DefInst; + MachineBasicBlock::iterator UseInst; + Register DefReg; + Register UseReg; + bool StillValid; + }; + // Vector of def/ues pairs in this basic block. + SmallVector CandPairs; + SmallVector ValidPairs; + bool MadeChange = false; + + // Run through all of the instructions in the basic block and try to + // collect potential pairs of GOT indirect access instructions. + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Look for the initial GOT indirect load. + if (isGOTPLDpc(*BBI)) { + GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(), + BBI->getOperand(0).getReg(), PPC::NoRegister, + true}; + CandPairs.push_back(CurrentPair); + continue; + } - // We haven't encountered any new PLD instructions, nothing to check. - if (CandPairs.empty()) + // We haven't encountered any new PLD instructions, nothing to check. + if (CandPairs.empty()) + continue; + + // Run through the candidate pairs and see if any of the registers + // defined in the PLD instructions are used by this instruction. + // Note: the size of CandPairs can change in the loop. + for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { + GOTDefUsePair &Pair = CandPairs[Idx]; + // The instruction does not use or modify this PLD's def reg, + // ignore it. + if (!BBI->readsRegister(Pair.DefReg, TRI) && + !BBI->modifiesRegister(Pair.DefReg, TRI)) continue; - // Run through the candidate pairs and see if any of the registers - // defined in the PLD instructions are used by this instruction. - // Note: the size of CandPairs can change in the loop. - for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { - GOTDefUsePair &Pair = CandPairs[Idx]; - // The instruction does not use or modify this PLD's def reg, - // ignore it. - if (!BBI->readsRegister(Pair.DefReg, TRI) && - !BBI->modifiesRegister(Pair.DefReg, TRI)) - continue; - - // The use needs to be used in the address compuation and not - // as the register being stored for a store. - const MachineOperand *UseOp = - hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr; - - // Check for a valid use. - if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && - UseOp->isUse() && UseOp->isKill()) { - Pair.UseInst = BBI; - Pair.UseReg = BBI->getOperand(0).getReg(); - ValidPairs.push_back(Pair); - } - CandPairs.erase(CandPairs.begin() + Idx); + // The use needs to be used in the address compuation and not + // as the register being stored for a store. + const MachineOperand *UseOp = + hasPCRelativeForm(BBI->getOpcode()) ? &BBI->getOperand(2) : nullptr; + + // Check for a valid use. + if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && + UseOp->isUse() && UseOp->isKill()) { + Pair.UseInst = BBI; + Pair.UseReg = BBI->getOperand(0).getReg(); + ValidPairs.push_back(Pair); } + CandPairs.erase(CandPairs.begin() + Idx); } + } - // Go through all of the pairs and check for any more valid uses. - for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { - // We shouldn't be here if we don't have a valid pair. - assert(Pair->UseInst.isValid() && Pair->StillValid && - "Kept an invalid def/use pair for GOT PCRel opt"); - // We have found a potential pair. Search through the instructions - // between the def and the use to see if it is valid to mark this as a - // linker opt. - MachineBasicBlock::iterator BBI = Pair->DefInst; - ++BBI; - for (; BBI != Pair->UseInst; ++BBI) { - if (BBI->readsRegister(Pair->UseReg, TRI) || - BBI->modifiesRegister(Pair->UseReg, TRI)) { - Pair->StillValid = false; - break; - } + // Go through all of the pairs and check for any more valid uses. + for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { + // We shouldn't be here if we don't have a valid pair. + assert(Pair->UseInst.isValid() && Pair->StillValid && + "Kept an invalid def/use pair for GOT PCRel opt"); + // We have found a potential pair. Search through the instructions + // between the def and the use to see if it is valid to mark this as a + // linker opt. + MachineBasicBlock::iterator BBI = Pair->DefInst; + ++BBI; + for (; BBI != Pair->UseInst; ++BBI) { + if (BBI->readsRegister(Pair->UseReg, TRI) || + BBI->modifiesRegister(Pair->UseReg, TRI)) { + Pair->StillValid = false; + break; } - - if (!Pair->StillValid) - continue; - - // The load/store instruction that uses the address from the PLD will - // either use a register (for a store) or define a register (for the - // load). That register will be added as an implicit def to the PLD - // and as an implicit use on the second memory op. This is a precaution - // to prevent future passes from using that register between the two - // instructions. - MachineOperand ImplDef = - MachineOperand::CreateReg(Pair->UseReg, true, true); - MachineOperand ImplUse = - MachineOperand::CreateReg(Pair->UseReg, false, true); - Pair->DefInst->addOperand(ImplDef); - Pair->UseInst->addOperand(ImplUse); - - // Create the symbol. - MCContext &Context = MF->getContext(); - MCSymbol *Symbol = - Context.createTempSymbol(Twine("pcrel"), false, false); - MachineOperand PCRelLabel = - MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); - Pair->DefInst->addOperand(*MF, PCRelLabel); - Pair->UseInst->addOperand(*MF, PCRelLabel); - MadeChange |= true; } - return MadeChange; + + if (!Pair->StillValid) + continue; + + // The load/store instruction that uses the address from the PLD will + // either use a register (for a store) or define a register (for the + // load). That register will be added as an implicit def to the PLD + // and as an implicit use on the second memory op. This is a precaution + // to prevent future passes from using that register between the two + // instructions. + MachineOperand ImplDef = + MachineOperand::CreateReg(Pair->UseReg, true, true); + MachineOperand ImplUse = + MachineOperand::CreateReg(Pair->UseReg, false, true); + Pair->DefInst->addOperand(ImplDef); + Pair->UseInst->addOperand(ImplUse); + + // Create the symbol. + MCContext &Context = MF->getContext(); + MCSymbol *Symbol = Context.createTempSymbol(Twine("pcrel"), false, false); + MachineOperand PCRelLabel = + MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); + Pair->DefInst->addOperand(*MF, PCRelLabel); + Pair->UseInst->addOperand(*MF, PCRelLabel); + MadeChange |= true; } + return MadeChange; + } - bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { - // Remove UNENCODED_NOP even when this pass is disabled. - // This needs to be done unconditionally so we don't emit zeros - // in the instruction stream. - SmallVector InstrsToErase; - for (MachineBasicBlock &MBB : MF) - for (MachineInstr &MI : MBB) - if (MI.getOpcode() == PPC::UNENCODED_NOP) - InstrsToErase.push_back(&MI); - for (MachineInstr *MI : InstrsToErase) - MI->eraseFromParent(); - return false; - } - bool Changed = false; - const PPCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { + // Remove UNENCODED_NOP even when this pass is disabled. + // This needs to be done unconditionally so we don't emit zeros + // in the instruction stream. SmallVector InstrsToErase; - for (MachineBasicBlock &MBB : MF) { - Changed |= removeRedundantLIs(MBB, TRI); - Changed |= addLinkerOpt(MBB, TRI); - for (MachineInstr &MI : MBB) { - unsigned Opc = MI.getOpcode(); - if (Opc == PPC::UNENCODED_NOP) { + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.getOpcode() == PPC::UNENCODED_NOP) + InstrsToErase.push_back(&MI); + for (MachineInstr *MI : InstrsToErase) + MI->eraseFromParent(); + return false; + } + bool Changed = false; + const PPCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + SmallVector InstrsToErase; + for (MachineBasicBlock &MBB : MF) { + Changed |= removeRedundantLIs(MBB, TRI); + Changed |= addLinkerOpt(MBB, TRI); + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + if (Opc == PPC::UNENCODED_NOP) { + InstrsToErase.push_back(&MI); + continue; + } + // Detect self copies - these can result from running AADB. + if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.getNumOperands() == 3 && + MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { + NumberOfSelfCopies++; + LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); + LLVM_DEBUG(MI.dump()); InstrsToErase.push_back(&MI); continue; - } - // Detect self copies - these can result from running AADB. - if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { - const MCInstrDesc &MCID = TII->get(Opc); - if (MCID.getNumOperands() == 3 && - MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && - MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { - NumberOfSelfCopies++; - LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); - LLVM_DEBUG(MI.dump()); - InstrsToErase.push_back(&MI); - continue; - } - else if (MCID.getNumOperands() == 2 && + } else if (MCID.getNumOperands() == 2 && MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { - NumberOfSelfCopies++; - LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); - LLVM_DEBUG(MI.dump()); - InstrsToErase.push_back(&MI); - continue; - } - } - MachineInstr *DefMIToErase = nullptr; - if (TII->convertToImmediateForm(MI, &DefMIToErase)) { - Changed = true; - NumRRConvertedInPreEmit++; - LLVM_DEBUG(dbgs() << "Converted instruction to imm form: "); - LLVM_DEBUG(MI.dump()); - if (DefMIToErase) { - InstrsToErase.push_back(DefMIToErase); - } - } - if (TII->foldFrameOffset(MI)) { - Changed = true; - NumFrameOffFoldInPreEmit++; - LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); + NumberOfSelfCopies++; + LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + continue; } } - - // Eliminate conditional branch based on a constant CR bit by - // CRSET or CRUNSET. We eliminate the conditional branch or - // convert it into an unconditional branch. Also, if the CR bit - // is not used by other instructions, we eliminate CRSET as well. - auto I = MBB.getFirstInstrTerminator(); - if (I == MBB.instr_end()) - continue; - MachineInstr *Br = &*I; - if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) - continue; - MachineInstr *CRSetMI = nullptr; - Register CRBit = Br->getOperand(0).getReg(); - unsigned CRReg = getCRFromCRBit(CRBit); - bool SeenUse = false; - MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); - for (It++; It != Er; It++) { - if (It->modifiesRegister(CRBit, TRI)) { - if ((It->getOpcode() == PPC::CRUNSET || - It->getOpcode() == PPC::CRSET) && - It->getOperand(0).getReg() == CRBit) - CRSetMI = &*It; - break; + MachineInstr *DefMIToErase = nullptr; + if (TII->convertToImmediateForm(MI, &DefMIToErase)) { + Changed = true; + NumRRConvertedInPreEmit++; + LLVM_DEBUG(dbgs() << "Converted instruction to imm form: "); + LLVM_DEBUG(MI.dump()); + if (DefMIToErase) { + InstrsToErase.push_back(DefMIToErase); } - if (It->readsRegister(CRBit, TRI)) - SeenUse = true; } - if (!CRSetMI) continue; - - unsigned CRSetOp = CRSetMI->getOpcode(); - if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || - (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { - // Remove this branch since it cannot be taken. - InstrsToErase.push_back(Br); - MBB.removeSuccessor(Br->getOperand(1).getMBB()); - } - else { - // This conditional branch is always taken. So, remove all branches - // and insert an unconditional branch to the destination of this. - MachineBasicBlock::iterator It = Br, Er = MBB.end(); - for (; It != Er; It++) { - if (It->isDebugInstr()) continue; - assert(It->isTerminator() && "Non-terminator after a terminator"); - InstrsToErase.push_back(&*It); - } - if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) { - ArrayRef NoCond; - TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, - NoCond, Br->getDebugLoc()); - } - for (auto &Succ : MBB.successors()) - if (Succ != Br->getOperand(1).getMBB()) { - MBB.removeSuccessor(Succ); - break; - } + if (TII->foldFrameOffset(MI)) { + Changed = true; + NumFrameOffFoldInPreEmit++; + LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); + LLVM_DEBUG(MI.dump()); } + } - // If the CRBit is not used by another instruction, we can eliminate - // CRSET/CRUNSET instruction. - if (!SeenUse) { - // We need to check use of the CRBit in successors. - for (auto &SuccMBB : MBB.successors()) - if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) { - SeenUse = true; - break; - } - if (!SeenUse) - InstrsToErase.push_back(CRSetMI); + // Eliminate conditional branch based on a constant CR bit by + // CRSET or CRUNSET. We eliminate the conditional branch or + // convert it into an unconditional branch. Also, if the CR bit + // is not used by other instructions, we eliminate CRSET as well. + auto I = MBB.getFirstInstrTerminator(); + if (I == MBB.instr_end()) + continue; + MachineInstr *Br = &*I; + if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) + continue; + MachineInstr *CRSetMI = nullptr; + Register CRBit = Br->getOperand(0).getReg(); + unsigned CRReg = getCRFromCRBit(CRBit); + bool SeenUse = false; + MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); + for (It++; It != Er; It++) { + if (It->modifiesRegister(CRBit, TRI)) { + if ((It->getOpcode() == PPC::CRUNSET || + It->getOpcode() == PPC::CRSET) && + It->getOperand(0).getReg() == CRBit) + CRSetMI = &*It; + break; } + if (It->readsRegister(CRBit, TRI)) + SeenUse = true; } - for (MachineInstr *MI : InstrsToErase) { - LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); - LLVM_DEBUG(MI->dump()); - MI->eraseFromParent(); - NumRemovedInPreEmit++; + if (!CRSetMI) + continue; + + unsigned CRSetOp = CRSetMI->getOpcode(); + if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || + (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { + // Remove this branch since it cannot be taken. + InstrsToErase.push_back(Br); + MBB.removeSuccessor(Br->getOperand(1).getMBB()); + } else { + // This conditional branch is always taken. So, remove all branches + // and insert an unconditional branch to the destination of this. + MachineBasicBlock::iterator It = Br, Er = MBB.end(); + for (; It != Er; It++) { + if (It->isDebugInstr()) + continue; + assert(It->isTerminator() && "Non-terminator after a terminator"); + InstrsToErase.push_back(&*It); + } + if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) { + ArrayRef NoCond; + TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, NoCond, + Br->getDebugLoc()); + } + for (auto &Succ : MBB.successors()) + if (Succ != Br->getOperand(1).getMBB()) { + MBB.removeSuccessor(Succ); + break; + } + } + + // If the CRBit is not used by another instruction, we can eliminate + // CRSET/CRUNSET instruction. + if (!SeenUse) { + // We need to check use of the CRBit in successors. + for (auto &SuccMBB : MBB.successors()) + if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) { + SeenUse = true; + break; + } + if (!SeenUse) + InstrsToErase.push_back(CRSetMI); } - return Changed; } + for (MachineInstr *MI : InstrsToErase) { + LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); + LLVM_DEBUG(MI->dump()); + MI->eraseFromParent(); + NumRemovedInPreEmit++; + } + return Changed; + } }; }