diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -40,28 +40,37 @@ }; enum DFormOpcd { - LBZ = 34, + /* Defined below in the PPC enum so they can be used for mapping legacy + instructions to pc-relative instructions. + LBZ = 34 + LHZ = 40 + LWZ = 32 + LD = 58 + STB = 38 + STH = 44 + STW = 36 + STD = 62 + */ LBZU = 35, - LHZ = 40, LHZU = 41, LHAU = 43, - LWZ = 32, LWZU = 33, LFSU = 49, - LD = 58, LFDU = 51, - STB = 38, STBU = 39, - STH = 44, STHU = 45, - STW = 36, STWU = 37, STFSU = 53, STFDU = 55, - STD = 62, ADDI = 14 }; +// Extracts the 'PO' field of an instruction encoding. +static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); } + +#define PPC_LEGACY_TO_PREFIXED_LINKER +#include "llvm/Target/PPCLegacyToPCRelMap.def" + uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a @@ -326,13 +335,11 @@ static uint16_t highest(uint64_t v) { return v >> 48; } static uint16_t highesta(uint64_t v) { return (v + 0x8000) >> 48; } -// Extracts the 'PO' field of an instruction encoding. -static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); } - static bool isDQFormInstruction(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: return false; + case 6: // Power10 paired loads/stores (lxvp, stxvp). case 56: // The only instruction with a primary opcode of 56 is `lq`. return true; @@ -475,6 +482,44 @@ relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } + case R_PPC64_GOT_PCREL34: { + uint64_t insn = readPrefixedInstruction(loc); + + // Clear the first 8 bits of the prefix and the first 6 bits of the + // instruction (the primary opcode). + insn &= ~0xFF000000FC000000lu; + + // Replace the cleared bits with the values for PADDI (0x600000038000000); + insn |= 0x600000038000000lu; + writePrefixedInstruction(loc, insn); + relocate(loc, rel, val); + break; + } + case R_PPC64_PCREL_OPT: { + // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can + // be relaxed. The eligibility for the relaxation needs to be determined + // on that relocation since this one does not relocate a symbol. + uint64_t insn = readPrefixedInstruction(loc); + uint32_t accessInsn = read32(loc + rel.addend); + uint64_t pcRelInsn = getPCRelativeForm(accessInsn); + + // This error is not necessary for correctness but is emitted for now + // to ensure we don't miss these opportunities in real code. It can be + // removed at a later date. + if (pcRelInsn == -1lu) { + error("unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" + + Twine::utohexstr(accessInsn)); + break; + } + + // Convert the PADDI to the prefixed version of accessInsn and convert + // accessInsn to a nop. + uint64_t dispOnly = insn & 0x0003ffff0000ffff; + uint64_t finalInsn = dispOnly | pcRelInsn; + writePrefixedInstruction(loc, finalInsn); + write32(loc + rel.addend, 0x60000000); // nop accessInsn. + break; + } default: llvm_unreachable("unexpected relocation type"); } @@ -668,6 +713,7 @@ case R_PPC64_TOC16_LO: return R_GOTREL; case R_PPC64_GOT_PCREL34: + case R_PPC64_PCREL_OPT: return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: @@ -1085,6 +1131,8 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { + if (type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) + return R_RELAX_GOT_PC; if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -999,6 +999,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + uint64_t lastPPCRelaxedRelocOff = -1lu; for (const Relocation &rel : relocations) { if (rel.expr == R_NONE) @@ -1017,9 +1018,20 @@ switch (expr) { case R_RELAX_GOT_PC: - case R_RELAX_GOT_PC_NOPIC: + case R_RELAX_GOT_PC_NOPIC: { + // The R_PPC64_PCREL_OPT relocation must appear immediately after + // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. + // We can only relax R_PPC64_PCREL_OPT if we have also relaxed + // the associated R_PPC64_GOT_PCREL34 since only the latter has an + // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34 + // and only relax the other if the saved offset matches. + if (type == R_PPC64_GOT_PCREL34) + lastPPCRelaxedRelocOff = offset; + if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) + break; target->relaxGot(bufLoc, rel, targetVA); break; + } case R_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC diff --git a/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def b/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def @@ -0,0 +1,257 @@ +/* This file defines a mapping between legacy instructions and their + PC-relative prefixed versions. It has two uses: + 1. In the compiler, to tell the compiler for which instructions + it is allowed to emit the R_PPC64_PCREL_OPT relocation. + 2. In LLD, to provide the encoding for the PC-relative prefixed + version when given the legacy instruction encoding. + + The way this file differentiates between the two is with two macros: + PPC_LEGACY_TO_PREFIXED_COMPILER + PPC_LEGACY_TO_PREFIXED_LINKER + + Since the linker is just looking at instruction encodings, there is + an inevitable amount of bit manipulation here that can be difficult to + follow. While a significant amount of effort has gone into documenting + the various values and bit positions, the definitive source for instruction + encodings is ISA3.1. The reader is encouraged to reference the ISA that can + be found at: + + https://ibm.ent.box.com/s/hhjfw0x0lrbtyzmiaffnbxh2fuo0fog0 +*/ +#if (!defined PPC_LEGACY_TO_PREFIXED_COMPILER) && \ + (!defined PPC_LEGACY_TO_PREFIXED_LINKER) +#error "Need to define PPC_LEGACY_TO_PREFIXED_{COMPILER|LINKER}" +#endif +#ifdef PPC_LEGACY_TO_PREFIXED_LINKER +// For the linker, the enumerators are primary opcodes for the most part. +// There are some instructions that share primary opcodes. For those, we +// set the extended opcode field that differentiates them as the most +// significant bits. For example, LWA and LD share a primary opcode. What +// differentiates them is the extended opcode field (two least significant +// bits of the encoding). LWA is 0x10 and LD is 0x0. So in order for the +// two instructions to have a unique enumerator value and be mapped +// to the correct prefixed instructions, we shift those values to the +// most significant bits and or them with the primary opcode. +enum PPC : uint64_t { + // Loads + PREFIX_MLS = 0x0610000000000000lu, + PREFIX_8LS = 0x0410000000000000lu, + LBZ = 34, + PLBZpc = PREFIX_MLS, // Prefix only. + LBZ8 = 34, + PLBZ8pc = PREFIX_MLS, // Prefix only. + LHZ = 40, + PLHZpc = PREFIX_MLS, // Prefix only. + LHZ8 = 40, + PLHZ8pc = PREFIX_MLS, // Prefix only. + LWZ = 32, + PLWZpc = PREFIX_MLS, // Prefix only. + LWZ8 = 32, + PLWZ8pc = PREFIX_MLS, // Prefix only. + LHA = 42, + PLHApc = PREFIX_MLS, // Prefix only. + LHA8 = 42, + PLHA8pc = PREFIX_MLS, // Prefix only. + LWA = 58 | 0x80000000, // (Encoding & 0x3) << 30. + PLWApc = PREFIX_8LS | 0xA4000000lu, // Prefix | Primary opc. + LD = 58 | 0x0, // (Encoding & 0x3) << 30. + PLDpc = PREFIX_8LS | 0xE4000000lu, // Prefix | Primary opc. + LFS = 48, + PLFSpc = PREFIX_MLS, // Prefix only. + LXSSP = 57 | 0xC0000000, // (Encoding & 0x3) << 30. + PLXSSPpc = PREFIX_8LS | 0xAC000000lu, // Prefix | Primary opc. + LFD = 50, + PLFDpc = PREFIX_MLS, // Prefix only. + LXSD = 57 | 0x80000000, // (Encoding & 0x3) << 30. + PLXSDpc = PREFIX_8LS | 0xA8000000lu, // Prefix | Primary opc. + LXV = 61 | 0x20000000, // (Encoding & 0x7) << 29. + PLXVpc = PREFIX_8LS | 0xC8000000lu, // Prefix | Primary opc. + LXVP = 6 | 0x0, // (Encoding & 0xF) << 28. + PLXVPpc = PREFIX_8LS | 0xE8000000lu, // Prefix | Primary opc. + DFLOADf32 = 48, + DFLOADf64 = 50, + + // Stores + STB = 38, + PSTBpc = PREFIX_MLS, // Prefix only. + STB8 = 38, + PSTB8pc = PREFIX_MLS, // Prefix only. + STH = 44, + PSTHpc = PREFIX_MLS, // Prefix only. + STH8 = 44, + PSTH8pc = PREFIX_MLS, // Prefix only. + STW = 36, + PSTWpc = PREFIX_MLS, // Prefix only. + STW8 = 36, + PSTW8pc = PREFIX_MLS, // Prefix only. + STD = 62, + PSTDpc = PREFIX_8LS | 0xF4000000lu, // Prefix | Primary opc. + STFS = 52, + PSTFSpc = PREFIX_MLS, // Prefix only. + STXSSP = 61 | 0xC0000000, // (Encoding & 0x3) << 30. + PSTXSSPpc = PREFIX_8LS | 0xBC000000lu, // Prefix | Primary opc. + STFD = 54, + PSTFDpc = PREFIX_MLS, // Prefix only. + STXSD = 61 | 0x80000000, // (Encoding & 0x3) << 30. + PSTXSDpc = PREFIX_8LS | 0xB8000000lu, // Prefix | Primary opc. + STXV = 61 | 0xA0000000, // (Encoding & 0x7) << 29. + PSTXVpc = PREFIX_8LS | 0xD8000000lu, // Prefix | Primary opc. + STXVP = 6 | 0x10000000, // (Encoding & 0xF) << 28. + PSTXVPpc = PREFIX_8LS | 0xF8000000lu, // Prefix | Primary opc. + DFSTOREf32 = 52, + DFSTOREf64 = 54 +}; + +enum Mask : uint64_t { + OPC_AND_RST = 0xFFE00000, // Primary opc (0-5) and R[ST] (6-10). + ONLY_RST = 0x3E00000, // [RS]T (6-10). + ST_STX28_TO5 = 0x3E00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. +}; +static const uint64_t InstrMasks[][2] = { + // Loads. + {PPC::LBZ, OPC_AND_RST}, + {PPC::LHZ, OPC_AND_RST}, + {PPC::LWZ, OPC_AND_RST}, + {PPC::LHA, OPC_AND_RST}, + {PPC::LWA, ONLY_RST}, + {PPC::LD, ONLY_RST}, + {PPC::LFS, OPC_AND_RST}, + {PPC::LXSSP, ONLY_RST}, + {PPC::LFD, OPC_AND_RST}, + {PPC::LXSD, ONLY_RST}, + {PPC::LXV, ST_STX28_TO5}, + {PPC::LXVP, ONLY_RST}, + // Stores. + {PPC::STB, OPC_AND_RST}, + {PPC::STH, OPC_AND_RST}, + {PPC::STW, OPC_AND_RST}, + {PPC::STD, ONLY_RST}, + {PPC::STFS, OPC_AND_RST}, + {PPC::STXSSP, ONLY_RST}, + {PPC::STFD, OPC_AND_RST}, + {PPC::STXSD, ONLY_RST}, + {PPC::STXV, ST_STX28_TO5}, + {PPC::STXVP, ONLY_RST}}; +#endif + +/******************************************************************************* + * The interface between the compiler and the linker is the following table. + * It contains the mapping between the legacy instructions and their + * pc-relative forms. When support for a new such instruction is added to the + * compiler, it needs to be added to this table if the compiler will use this + * instruction as a target of the R_PPC64_PCREL_OPT relocation. If this is not + * done, the linker will not be able to optimize the instruction. + ******************************************************************************/ +static const uint64_t Map[][2] = { + // Loads. + {PPC::LBZ, PPC::PLBZpc}, + {PPC::LBZ8, PPC::PLBZ8pc}, + {PPC::LHZ, PPC::PLHZpc}, + {PPC::LHZ8, PPC::PLHZ8pc}, + {PPC::LWZ, PPC::PLWZpc}, + {PPC::LWZ8, PPC::PLWZ8pc}, + {PPC::LHA, PPC::PLHApc}, + {PPC::LHA8, PPC::PLHA8pc}, + {PPC::LWA, PPC::PLWApc}, + {PPC::LD, PPC::PLDpc}, + {PPC::LFS, PPC::PLFSpc}, + {PPC::LXSSP, PPC::PLXSSPpc}, + {PPC::LFD, PPC::PLFDpc}, + {PPC::LXSD, PPC::PLXSDpc}, + {PPC::LXV, PPC::PLXVpc}, +// FIXME: compiler support for paired memory operations will be added soon. +// {PPC::LXVP, PPC::PLXVPpc}, + {PPC::DFLOADf32, PPC::PLFSpc}, + {PPC::DFLOADf64, PPC::PLFDpc}, + + // Stores. + {PPC::STB, PPC::PSTBpc}, + {PPC::STB8, PPC::PSTB8pc}, + {PPC::STH, PPC::PSTHpc}, + {PPC::STH8, PPC::PSTH8pc}, + {PPC::STW, PPC::PSTWpc}, + {PPC::STW8, PPC::PSTW8pc}, + {PPC::STD, PPC::PSTDpc}, + {PPC::STFS, PPC::PSTFSpc}, + {PPC::STXSSP, PPC::PSTXSSPpc}, + {PPC::STFD, PPC::PLFSpc}, + {PPC::STXSD, PPC::PSTXSDpc}, + {PPC::STXV, PPC::PSTXVpc}, +// FIXME: compiler support for paired memory operations will be added soon. +// {PPC::STXVP, PPC::PSTXVPpc}, + {PPC::DFSTOREf32, PPC::PSTFSpc}, + {PPC::DFSTOREf64, PPC::PSTFDpc}}; + +static unsigned getInstrMapIdx(unsigned Opc) { + for (unsigned i = 0; i < llvm::array_lengthof(Map); i++) + if (Opc == Map[i][0]) + return i; + return -1u; +} + +#ifdef PPC_LEGACY_TO_PREFIXED_COMPILER +// For the compiler, we only care if the opcode has an entry in the map. +static bool hasPCRelativeForm(unsigned Opc) { + return getInstrMapIdx(Opc) != -1u; +} + +#else +// For the linker, we need to be able to replace a legacy instruction with a +// PC-Relative instruction. +static unsigned getInstrMaskIdx(unsigned Opc) { + for (unsigned i = 0; i < llvm::array_lengthof(Map); i++) + if (Opc == InstrMasks[i][0]) + return i; + return -1u; +} + +// Returns the opcode from the PPC enumeration above, accounting for adjustments +// for instructions that share primary opcodes. +static uint64_t getAdjustedOpc(unsigned Encoding) { + uint64_t Opc = getPrimaryOpCode(Encoding); + + // If the primary opcode is shared between multiple instructions, we need to + // fix it up to match the actual instruction we are after. + + // For DQ-Form vector instrs, the two least significant bits are 01 and the + // field that differentiates them is 3 bits wide. + if (Opc == 61 && (Encoding & 0x3) == 0x1) // LXV/STXV. + Opc |= (Encoding & 0x7) << 29; + + // For DS-Form instrs, there are 3 different primary opcodes and the two + // least significant bits differentiate instrs that share a PO. + else if (Opc == 58 || Opc == 57 || Opc == 61) + Opc |= (Encoding & 0x3) << 30; + + // Paired loads and stores from ISA3.1 use the 4 least significant bits to + // differentiate. + else if (Opc == 6) + Opc |= (Encoding & 0xf) << 28; + return Opc; +} + +// Given the encoding of a legacy instruction, returns its prefixed PC-relative +// form with all the displacement bits cleared. The caller is to or this with +// the displacement bits. +static uint64_t getPCRelativeForm(unsigned Encoding) { + uint64_t Opc = getAdjustedOpc(Encoding); + unsigned InstrIdx = getInstrMapIdx(Opc); + unsigned MaskIdx = getInstrMaskIdx(Opc); + if (InstrIdx == -1u || MaskIdx == -1u) + return -1lu; + + // The prefixed instruction is computed by masking out bits from the original + // instruction and then or-ing that with the prefixed instruction set bits. + uint64_t PrefixedInstr = (uint64_t)Encoding & InstrMasks[MaskIdx][1]; + PrefixedInstr |= Map[InstrIdx][1]; + + // If the mask requires moving bit 28 to bit 5, do that now. + if (InstrMasks[MaskIdx][1] == ST_STX28_TO5) { + uint64_t STX = (Encoding & 0x8) << 23; + PrefixedInstr |= STX; + } + return PrefixedInstr; +} +#endif +#undef PPC_LEGACY_TO_PREFIXED_COMPILER +#undef PPC_LEGACY_TO_PREFIXED_LINKER diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -44,44 +44,8 @@ namespace { -static bool hasPCRelativeForm(MachineInstr &Use) { - switch (Use.getOpcode()) { - default: - return false; - case PPC::LBZ: - case PPC::LBZ8: - case PPC::LHA: - case PPC::LHA8: - case PPC::LHZ: - case PPC::LHZ8: - case PPC::LWZ: - case PPC::LWZ8: - case PPC::STB: - case PPC::STB8: - case PPC::STH: - case PPC::STH8: - case PPC::STW: - case PPC::STW8: - case PPC::LD: - case PPC::STD: - case PPC::LWA: - case PPC::LXSD: - case PPC::LXSSP: - case PPC::LXV: - case PPC::STXSD: - case PPC::STXSSP: - case PPC::STXV: - case PPC::LFD: - case PPC::LFS: - case PPC::STFD: - case PPC::STFS: - case PPC::DFLOADf32: - case PPC::DFLOADf64: - case PPC::DFSTOREf32: - case PPC::DFSTOREf64: - return true; - } -} +#define PPC_LEGACY_TO_PREFIXED_COMPILER +#include "llvm/Target/PPCLegacyToPCRelMap.def" class PPCPreEmitPeephole : public MachineFunctionPass { public: @@ -280,8 +244,9 @@ // The use needs to be used in the address compuation and not // as the register being stored for a store. - const MachineOperand *UseOp = - hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr; + const MachineOperand *UseOp = hasPCRelativeForm(BBI->getOpcode()) + ? &BBI->getOperand(2) + : nullptr; // Check for a valid use. if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&