diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -62,6 +62,90 @@ ADDI = 14 }; +enum class PPCLegacyInsn : uint32_t { + NOINSN = 0, + // Loads. + LBZ = 0x88000000, + LHZ = 0xa0000000, + LWZ = 0x80000000, + LHA = 0xa8000000, + LWA = 0xe8000002, + LD = 0xe8000000, + LFS = 0xC0000000, + LXSSP = 0xe4000003, + LFD = 0xc8000000, + LXSD = 0xe4000002, + LXV = 0xf4000001, + LXVP = 0x18000000, + + // Stores. + STB = 0x98000000, + STH = 0xb0000000, + STW = 0x90000000, + STD = 0xf8000000, + STFS = 0xd0000000, + STXSSP = 0xf4000003, + STFD = 0xd8000000, + STXSD = 0xf4000002, + STXV = 0xf4000005, + STXVP = 0x18000001 +}; +enum class PPCPrefixedInsn : uint64_t { + NOINSN = 0, + PREFIX_MLS = 0x0610000000000000, + PREFIX_8LS = 0x0410000000000000, + + // Loads. + PLBZ = PREFIX_MLS, + PLHZ = PREFIX_MLS, + PLWZ = PREFIX_MLS, + PLHA = PREFIX_MLS, + PLWA = PREFIX_8LS | 0xa4000000, + PLD = PREFIX_8LS | 0xe4000000, + PLFS = PREFIX_MLS, + PLXSSP = PREFIX_8LS | 0xac000000, + PLFD = PREFIX_MLS, + PLXSD = PREFIX_8LS | 0xa8000000, + PLXV = PREFIX_8LS | 0xc8000000, + PLXVP = PREFIX_8LS | 0xe8000000, + + // Stores. + PSTB = PREFIX_MLS, + PSTH = PREFIX_MLS, + PSTW = PREFIX_MLS, + PSTD = PREFIX_8LS | 0xf4000000, + PSTFS = PREFIX_MLS, + PSTXSSP = PREFIX_8LS | 0xbc000000, + PSTFD = PREFIX_MLS, + PSTXSD = PREFIX_8LS | 0xb8000000, + PSTXV = PREFIX_8LS | 0xd8000000, + PSTXVP = PREFIX_8LS | 0xf8000000 +}; +static bool checkPPCLegacyInsn(uint32_t encoding) { + PPCLegacyInsn insn = static_cast(encoding); + if (insn == PPCLegacyInsn::NOINSN) + return false; +#define PCREL_OPT(Legacy, PCRel, InsnMask) \ + if (insn == PPCLegacyInsn::Legacy) \ + return true; +#include "PPCInsns.def" +#undef PCREL_OPT + return false; +} + +// Masks to apply to legacy instructions when converting them to prefixed, +// pc-relative versions. For the most part, the primary opcode is shared +// between the legacy instruction and the suffix of its prefixed version. +// However, there are some instances where that isn't the case (DS-Form and +// DQ-form instructions). +enum class LegacyToPrefixMask : uint64_t { + NOMASK = 0x0, + OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10). + ONLY_RST = 0x3e00000, // [RS]T (6-10). + ST_STX28_TO5 = + 0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. +}; + uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a @@ -333,6 +417,7 @@ switch (getPrimaryOpCode(encoding)) { default: return false; + case 6: // Power10 paired loads/stores (lxvp, stxvp). case 56: // The only instruction with a primary opcode of 56 is `lq`. return true; @@ -344,6 +429,78 @@ } } +static bool isDSFormInstruction(PPCLegacyInsn insn) { + switch (insn) { + default: + return false; + case PPCLegacyInsn::LWA: + case PPCLegacyInsn::LD: + case PPCLegacyInsn::LXSD: + case PPCLegacyInsn::LXSSP: + case PPCLegacyInsn::STD: + case PPCLegacyInsn::STXSD: + case PPCLegacyInsn::STXSSP: + return true; + } +} + +static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) { + uint32_t opc = encoding & 0xfc000000; + + // If the primary opcode is shared between multiple instructions, we need to + // fix it up to match the actual instruction we are after. + if ((opc == 0xe4000000 || opc == 0xe8000000 || opc == 0xf4000000 || + opc == 0xf8000000) && + !isDQFormInstruction(encoding)) + opc = encoding & 0xfc000003; + else if (opc == 0xf4000000) + opc = encoding & 0xfc000007; + else if (opc == 0x18000000) + opc = encoding & 0xfc00000f; + + // If the value is not one of the enumerators in PPCLegacyInsn, we want to + // return PPCLegacyInsn::NOINSN. + if (!checkPPCLegacyInsn(opc)) + return PPCLegacyInsn::NOINSN; + return static_cast(opc); +} + +static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) { + switch (insn) { +#define PCREL_OPT(Legacy, PCRel, InsnMask) \ + case PPCLegacyInsn::Legacy: \ + return PPCPrefixedInsn::PCRel +#include "PPCInsns.def" +#undef PCREL_OPT + } + return PPCPrefixedInsn::NOINSN; +} + +static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) { + switch (insn) { +#define PCREL_OPT(Legacy, PCRel, InsnMask) \ + case PPCLegacyInsn::Legacy: \ + return LegacyToPrefixMask::InsnMask +#include "PPCInsns.def" +#undef PCREL_OPT + } + return LegacyToPrefixMask::NOMASK; +} +static uint64_t getPCRelativeForm(uint32_t encoding) { + PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding); + PPCPrefixedInsn pcrelInsn = getPCRelativeForm(origInsn); + if (pcrelInsn == PPCPrefixedInsn::NOINSN) + return UINT64_C(-1); + LegacyToPrefixMask origInsnMask = getInsnMask(origInsn); + uint64_t pcrelEncoding = + (uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask); + + // If the mask requires moving bit 28 to bit 5, do that now. + if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5) + pcrelEncoding |= (encoding & 0x8) << 23; + return pcrelEncoding; +} + static bool isInstructionUpdateForm(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: @@ -368,6 +525,25 @@ } } +// Compute the total displacement between the prefixed instruction that gets +// to the start of the data and the load/store instruction that has the offset +// into the data structure. +// For example: +// paddi 3, 0, 1000, 1 +// lwz 3, 20(3) +// Should add up to 1020 for total displacement. +static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) { + int64_t disp34 = llvm::SignExtend64( + ((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34); + int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16); + // For DS and DQ form instructions, we need to mask out the XO bits. + if (isDQFormInstruction(accessInsn)) + disp16 &= ~0xf; + else if (isDSFormInstruction(getPPCLegacyInsn(accessInsn))) + disp16 &= ~0x3; + return disp34 + disp16; +} + // There are a number of places when we either want to read or write an // instruction when handling a half16 relocation type. On big-endian the buffer // pointer is pointing into the middle of the word we want to extract, and on @@ -475,6 +651,49 @@ relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } + case R_PPC64_GOT_PCREL34: { + // Clear the first 8 bits of the prefix and the first 6 bits of the + // instruction (the primary opcode). + uint64_t insn = readPrefixedInstruction(loc); + if ((insn & 0xfc000000) != 0xe4000000) + error("expected a 'pld' for got-indirect to pc-relative relaxing"); + insn &= ~0xff000000fc000000; + + // Replace the cleared bits with the values for PADDI (0x600000038000000); + insn |= 0x600000038000000; + writePrefixedInstruction(loc, insn); + relocate(loc, rel, val); + break; + } + case R_PPC64_PCREL_OPT: { + // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can + // be relaxed. The eligibility for the relaxation needs to be determined + // on that relocation since this one does not relocate a symbol. + uint64_t insn = readPrefixedInstruction(loc); + uint32_t accessInsn = read32(loc + rel.addend); + uint64_t pcRelInsn = getPCRelativeForm(accessInsn); + + // This error is not necessary for correctness but is emitted for now + // to ensure we don't miss these opportunities in real code. It can be + // removed at a later date. + if (pcRelInsn == UINT64_C(-1)) { + errorOrWarn( + "unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" + + Twine::utohexstr(accessInsn)); + break; + } + + int64_t totalDisp = getTotalDisp(insn, accessInsn); + if (!isInt<34>(totalDisp)) + break; // Displacement doesn't fit. + // Convert the PADDI to the prefixed version of accessInsn and convert + // accessInsn to a nop. + writePrefixedInstruction(loc, pcRelInsn | + ((totalDisp & 0x3ffff0000) << 16) | + (totalDisp & 0xffff)); + write32(loc + rel.addend, 0x60000000); // nop accessInsn. + break; + } default: llvm_unreachable("unexpected relocation type"); } @@ -668,6 +887,7 @@ case R_PPC64_TOC16_LO: return R_GOTREL; case R_PPC64_GOT_PCREL34: + case R_PPC64_PCREL_OPT: return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: @@ -1024,6 +1244,9 @@ (val & si1Mask)); break; } + // If we encounter a PCREL_OPT relocation that we won't optimize. + case R_PPC64_PCREL_OPT: + break; default: llvm_unreachable("unknown relocation"); } @@ -1080,6 +1303,14 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { + if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) && + config->pcRelOptimize) { + // It only makes sense to optimize pld since paddi means that the address + // of the object in the GOT is required rather than the object itself. + assert(data && "Expecting an instruction encoding here"); + if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) + return R_PPC64_RELAX_GOT_PC; + } if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) diff --git a/lld/ELF/Arch/PPCInsns.def b/lld/ELF/Arch/PPCInsns.def new file mode 100644 --- /dev/null +++ b/lld/ELF/Arch/PPCInsns.def @@ -0,0 +1,27 @@ +#ifndef PCREL_OPT +#error "Need to define function-style macro PCREL_OPT" +#endif +PCREL_OPT(NOINSN, NOINSN, NOMASK); +PCREL_OPT(LBZ, PLBZ, OPC_AND_RST); +PCREL_OPT(LHZ, PLHZ, OPC_AND_RST); +PCREL_OPT(LWZ, PLWZ, OPC_AND_RST); +PCREL_OPT(LHA, PLHA, OPC_AND_RST); +PCREL_OPT(LWA, PLWA, ONLY_RST); +PCREL_OPT(LD, PLD , ONLY_RST); +PCREL_OPT(LFS, PLFS, OPC_AND_RST); +PCREL_OPT(LXSSP, PLXSSP, ONLY_RST); +PCREL_OPT(LFD, PLFD, OPC_AND_RST); +PCREL_OPT(LXSD, PLXSD, ONLY_RST); +PCREL_OPT(LXV, PLXV, ST_STX28_TO5); +PCREL_OPT(LXVP, PLXVP, OPC_AND_RST); + +PCREL_OPT(STB, PSTB, OPC_AND_RST); +PCREL_OPT(STH, PSTH, OPC_AND_RST); +PCREL_OPT(STW, PSTW, OPC_AND_RST); +PCREL_OPT(STD, PSTD, ONLY_RST); +PCREL_OPT(STFS, PSTFS, OPC_AND_RST); +PCREL_OPT(STXSSP, PSTXSSP, ONLY_RST); +PCREL_OPT(STFD, PSTFD, OPC_AND_RST); +PCREL_OPT(STXSD, PSTXSD, ONLY_RST); +PCREL_OPT(STXV, PSTXV, ST_STX28_TO5); +PCREL_OPT(STXVP, PSTXVP, OPC_AND_RST); diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -206,6 +206,7 @@ bool thinLTOIndexOnly; bool timeTraceEnabled; bool tocOptimize; + bool pcRelOptimize; bool undefinedVersion; bool unique; bool useAndroidRelrTags = false; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -309,6 +309,9 @@ if (config->tocOptimize && config->emachine != EM_PPC64) error("--toc-optimize is only supported on the PowerPC64 target"); + if (config->pcRelOptimize && config->emachine != EM_PPC64) + error("--pcrel--optimize is only supported on the PowerPC64 target"); + if (config->pie && config->shared) error("-shared and -pie may not be used together"); @@ -1288,6 +1291,8 @@ config->tocOptimize = args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64); + config->pcRelOptimize = + args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64); } // Returns a value of "-format" option. diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -807,6 +807,7 @@ case R_PPC64_TOCBASE: return getPPC64TocBase() + a; case R_RELAX_GOT_PC: + case R_PPC64_RELAX_GOT_PC: return sym.getVA(a) - p; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_IE_TO_LE: @@ -1004,6 +1005,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); for (const Relocation &rel : relocations) { if (rel.expr == R_NONE) @@ -1025,6 +1027,20 @@ case R_RELAX_GOT_PC_NOPIC: target->relaxGot(bufLoc, rel, targetVA); break; + case R_PPC64_RELAX_GOT_PC: { + // The R_PPC64_PCREL_OPT relocation must appear immediately after + // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. + // We can only relax R_PPC64_PCREL_OPT if we have also relaxed + // the associated R_PPC64_GOT_PCREL34 since only the latter has an + // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34 + // and only relax the other if the saved offset matches. + if (type == R_PPC64_GOT_PCREL34) + lastPPCRelaxedRelocOff = offset; + if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) + break; + target->relaxGot(bufLoc, rel, targetVA); + break; + } case R_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -404,6 +404,10 @@ "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; +defm pcrel_optimize : B<"pcrel-optimize", + "(PowerPC64) Enable PC-relative optimizations (default)", + "(PowerPC64) Disable PC-relative optimizations">; + def trace: F<"trace">, HelpText<"Print the names of the input files">; defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">; diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -96,6 +96,7 @@ R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_PPC64_TOCBASE, + R_PPC64_RELAX_GOT_PC, R_RISCV_ADD, R_RISCV_PC_INDIRECT, }; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -376,7 +376,7 @@ static bool isRelExpr(RelExpr expr) { return oneof(expr); + R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr); } // Returns true if a given relocation can be computed at link-time. diff --git a/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s @@ -0,0 +1,66 @@ + .section ".text" + .comm storeVal_vector,8,8 + .comm useVal_vector,8,8 + .globl storeVal_longlong, useAddr_longlong, useVal_longlong, storeVal_sshort + .globl useAddr_sshort, useVal_sshort, storeVal_sint, useAddr_sint, useVal_sint + .globl storeVal_double, useAddr_double, useVal_double, storeVal_float + .globl useAddr_float, useVal_float, storeVal_uint, storeVal_uint + .globl useVal_uint, storeVal_ushort, useAddr_ushort, useVal_ushort + .globl storeVal, useAddr, useVal + .section ".data" + .align 3 + .type storeVal_longlong, @object + .size storeVal_longlong, 8 +storeVal_longlong: + .quad 18 +useAddr_longlong: + .quad 17 +useVal_longlong: + .quad 16 +storeVal_sshort: + .short -15 +useAddr_sshort: + .short -14 +useVal_sshort: + .short -13 + .zero 2 +storeVal_sint: + .long -12 +useAddr_sint: + .long -11 +useVal_sint: + .long -10 + .zero 4 +storeVal_double: + .long 858993459 + .long 1076966195 +useAddr_double: + .long -1717986918 + .long -1070589543 +useVal_double: + .long 0 + .long 1076756480 +storeVal_float: + .long 1045220557 +useAddr_float: + .long -1050568294 +useVal_float: + .long 1095761920 +storeVal_uint: + .long 12 +useAddr_uint: + .long 11 +useVal_uint: + .long 10 +storeVal_ushort: + .short 1 +useAddr_ushort: + .short 10 +useVal_ushort: + .short 5 +storeVal: + .byte -1 +useAddr: + .byte 10 +useVal: + .byte 5 diff --git a/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s @@ -0,0 +1,392 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2 +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2 +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plbz 10 +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: li 3, 0 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: rldicl 9, 9, 9, 60 +# CHECK-S-NEXT: add 9, 9, 10 +# CHECK-S-NEXT: pstb 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: li 3, 0 +# CHECK-D-NEXT: lbz 10, 0(8) +# CHECK-D-NEXT: rldicl 9, 9, 9, 60 +# CHECK-D-NEXT: add 9, 9, 10 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: stb 9, 0(10) +# CHECK-D-NEXT: blr +check_LBZ_STB: + pld 8,useVal@got@pcrel(0),1 +.Lpcrel1: + pld 9,useAddr@got@pcrel(0),1 + li 3,0 + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + lbz 10,0(8) + rldicl 9,9,9,60 + add 9,9,10 + pld 10,storeVal@got@pcrel(0),1 +.Lpcrel2: + .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) + stb 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plhz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: psth 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lhz 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: sth 3, 0(9) +# CHECK-D-NEXT: blr +check_LHZ_STH: + pld 9,useVal_ushort@got@pcrel(0),1 +.Lpcrel3: + .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) + lhz 3,0(9) + pld 9,storeVal_ushort@got@pcrel(0),1 +.Lpcrel4: + .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) + sth 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plwz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstw 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lwz 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stw 3, 0(9) +# CHECK-D-NEXT: blr +check_LWZ_STW: + pld 9,useVal_uint@got@pcrel(0),1 +.Lpcrel5: + .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) + lwz 3,0(9) + pld 9,storeVal_uint@got@pcrel(0),1 +.Lpcrel6: + .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) + stw 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfs 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfs 1, 0(9) +# CHECK-D-NEXT: blr +check_LFS_STFS: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel7: + .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) + lfs 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel8: + .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) + stfs 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfd 1, 0(9) +# CHECK-D-NEXT: blr +check_LFD_STFD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel9: + .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) + lfd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel10: + .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) + stfd 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plwa 3 +# CHECK-S-NEXT: pstw 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lwa 3, 0(8) +# CHECK-D-NEXT: stw 9, 0(10) +# CHECK-D-NEXT: blr +check_LWA_STW: + mr 9,3 + pld 8,useVal_sint@got@pcrel(0),1 +.Lpcrel11: + pld 10,storeVal_sint@got@pcrel(0),1 +.Lpcrel12: + .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) + lwa 3,0(8) + .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) + stw 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plha 3 +# CHECK-S-NEXT: psth 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lha 3, 0(8) +# CHECK-D-NEXT: sth 9, 0(10) +# CHECK-D-NEXT: blr +check_LHA_STH: + mr 9,3 + pld 8,useVal_sshort@got@pcrel(0),1 +.Lpcrel13: + pld 10,storeVal_sshort@got@pcrel(0),1 +.Lpcrel14: + .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) + lha 3,0(8) + .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) + sth 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: pld 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstd 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD: + pld 9,useVal_longlong@got@pcrel(0),1 +.Lpcrel15: + .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) + ld 3,0(9) + pld 9,storeVal_longlong@got@pcrel(0),1 +.Lpcrel16: + .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) + std 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxv 34, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxv 34, 0(9) +# CHECK-D-NEXT: blr +check_LXV_STXV: + pld 9,useVal_vector@got@pcrel(0),1 +.Lpcrel17: + .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) + lxv 34,0(9) + pld 9,storeVal_vector@got@pcrel(0),1 +.Lpcrel18: + .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) + stxv 34,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxssp 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxssp 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSSP_STXSSP: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel19: + .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8) + lxssp 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel20: + .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8) + stxssp 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1, [[#ADDR1:]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1, [[#ADDR2:]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel21: + .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8) + lxsd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel22: + .reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8) + stxsd 1,0(9) + blr + +# The respective displacements are computed relative to the PC which advanced +# by 28 bytes in this function. Since the displacements in the two access +# instructions are 8 and 32 so the displacements are those computed above minus +# 20 and plus 4 (+8 - 28 and +32 - 28) respectively. +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1, [[#ADDR1-20]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1, [[#ADDR2+4]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 8(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 32(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD_aggr: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel23: + .reloc .Lpcrel23-8,R_PPC64_PCREL_OPT,.-(.Lpcrel23-8) + lxsd 1,8(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel24: + .reloc .Lpcrel24-8,R_PPC64_PCREL_OPT,.-(.Lpcrel24-8) + stxsd 1,32(9) + blr + +# This includes a nop but that is not emitted by the linker. +# It is an alignment nop to prevent the prefixed instruction from +# crossing a 64-byte boundary. +# CHECK-S-LABEL: : +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: ld 3, 0(9) +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: std 3, 0(9) +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: paddi 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: paddi 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD_W_PADDI: + paddi 9,0,useVal_longlong@got@pcrel,1 +.Lpcrel25: + .reloc .Lpcrel25-8,R_PPC64_PCREL_OPT,.-(.Lpcrel25-8) + ld 3,0(9) + paddi 9,0,storeVal_longlong@got@pcrel,1 +.Lpcrel26: + .reloc .Lpcrel26-8,R_PPC64_PCREL_OPT,.-(.Lpcrel26-8) + std 3,0(9) + blr +# CHECK-S-LABEL: : +# CHECK-S-NEXT: paddi 3, 0, -12, 1 +# CHECK-S-NEXT: lwz 4, 8(3) +# CHECK-S-NEXT: paddi 3, 0, -24, 1 +# CHECK-S-NEXT: stw 4, 32(3) +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: paddi 3, 0, -12, 1 +# CHECK-D-NEXT: lwz 4, 8(3) +# CHECK-D-NEXT: paddi 3, 0, -24, 1 +# CHECK-D-NEXT: stw 4, 32(3) +# CHECK-D-NEXT: blr +.type Arr,@object # @Arr +.globl Arr +.p2align 2 +Arr: +.long 11 # 0xb +.long 22 # 0x16 +.long 33 # 0x21 +check_LXSD_STXSD_aggr_notoc: + paddi 3, 0, Arr@PCREL, 1 +.Lpcrel27: + .reloc .Lpcrel27-8,R_PPC64_PCREL_OPT,.-(.Lpcrel27-8) + lwz 4,8(3) + paddi 3, 0, Arr@PCREL, 1 +.Lpcrel28: + .reloc .Lpcrel28-8,R_PPC64_PCREL_OPT,.-(.Lpcrel28-8) + stw 4,32(3) + blr +