diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -62,6 +62,31 @@ ADDI = 14 }; +enum class PPCInsn : uint64_t { + NOINSN = 0, + PREFIX_MLS = 0x0610000000000000, + PREFIX_8LS = 0x0410000000000000, +#define PPC_INSN_ENUM +#include "PPCInsns.def" +}; +static bool checkPPCInsn(PPCInsn insn) { +#define PPC_INSN_CHECK +#include "PPCInsns.def" + return false; +} + +// Masks to apply to legacy instructions when converting them to prefixed, +// pc-relative versions. For the most part, the primary opcode is shared +// between the legacy instruction and the suffix of its prefixed version. +// However, there are some instances where that isn't the case (DS-Form and +// DQ-form instructions). +enum class LegacyToPrefixMask : uint64_t { + OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10). + ONLY_RST = 0x3e00000, // [RS]T (6-10). + ST_STX28_TO5 = + 0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. +}; + uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a @@ -333,6 +358,7 @@ switch (getPrimaryOpCode(encoding)) { default: return false; + case 6: // Power10 paired loads/stores (lxvp, stxvp). case 56: // The only instruction with a primary opcode of 56 is `lq`. return true; @@ -344,6 +370,98 @@ } } +static bool isDSFormInstruction(PPCInsn insn) { + switch (insn) { + default: + return false; + case PPCInsn::LWA: + case PPCInsn::LD: + case PPCInsn::LXSD: + case PPCInsn::LXSSP: + case PPCInsn::STD: + case PPCInsn::STXSD: + case PPCInsn::STXSSP: + return true; + } +} + +static PPCInsn getPPCInsn(uint32_t encoding) { + uint64_t opc = getPrimaryOpCode(encoding); + + // If the primary opcode is shared between multiple instructions, we need to + // fix it up to match the actual instruction we are after. + if ((opc == 57 || opc == 58 || opc == 61 || opc == 62) && + !isDQFormInstruction(encoding)) + opc = encoding & 0xfc000003; + else if (opc == 61) + opc = encoding & 0xfc000007; + else if (opc == 6) + opc = encoding & 0xfc00000f; + + // If the value is not one of the enumerators in PPCInsn, we want to return + // PPCInsn::NOINSN. + if (!checkPPCInsn(static_cast(opc))) + return PPCInsn::NOINSN; + return static_cast(opc); +} + +static LegacyToPrefixMask getInsnMask(PPCInsn insn) { + if (isDSFormInstruction(insn) || insn == PPCInsn::LXVP || + insn == PPCInsn::STXVP) + return LegacyToPrefixMask::ONLY_RST; + if (insn == PPCInsn::LXV || insn == PPCInsn::STXV) + return LegacyToPrefixMask::ST_STX28_TO5; + return LegacyToPrefixMask::OPC_AND_RST; +} + +static PPCInsn getPCRelativeForm(PPCInsn insn) { + // If we are saving the opcode and the source/target register from the + // original instruction, the pc-relative form is just the prefix. + if (getInsnMask(insn) == LegacyToPrefixMask::OPC_AND_RST) + return PPCInsn::PREFIX_MLS; + + switch (insn) { + default: + return PPCInsn::NOINSN; + case PPCInsn::LWA: + return PPCInsn::PLWA; + case PPCInsn::LD: + return PPCInsn::PLD; + case PPCInsn::LXSD: + return PPCInsn::PLXSD; + case PPCInsn::LXSSP: + return PPCInsn::PLXSSP; + case PPCInsn::LXV: + return PPCInsn::PLXV; + case PPCInsn::LXVP: + return PPCInsn::PLXVP; + case PPCInsn::STD: + return PPCInsn::PSTD; + case PPCInsn::STXSD: + return PPCInsn::PSTXSD; + case PPCInsn::STXSSP: + return PPCInsn::PSTXSSP; + case PPCInsn::STXV: + return PPCInsn::PSTXV; + case PPCInsn::STXVP: + return PPCInsn::PSTXVP; + } +} +static uint64_t getPCRelativeForm(uint32_t encoding) { + PPCInsn origInsn = getPPCInsn(encoding); + PPCInsn pcrelInsn = getPCRelativeForm(origInsn); + if (pcrelInsn == PPCInsn::NOINSN) + return UINT64_C(-1); + LegacyToPrefixMask origInsnMask = getInsnMask(origInsn); + uint64_t pcrelEncoding = + (uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask); + + // If the mask requires moving bit 28 to bit 5, do that now. + if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5) + pcrelEncoding |= (encoding & 0x8) << 23; + return pcrelEncoding; +} + static bool isInstructionUpdateForm(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: @@ -368,6 +486,25 @@ } } +// Compute the total displacement between the prefixed instruction that gets +// to the start of the data and the load/store instruction that has the offset +// into the data structure. +// For example: +// paddi 3, 0, 1000, 1 +// lwz 3, 20(3) +// Should add up to 1020 for total displacement. +static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) { + int64_t disp34 = llvm::SignExtend64( + ((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34); + int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16); + // For DS and DQ form instructions, we need to mask out the XO bits. + if (isDQFormInstruction(accessInsn)) + disp16 &= ~0xf; + else if (isDSFormInstruction(getPPCInsn(accessInsn))) + disp16 &= ~0x3; + return disp34 + disp16; +} + // There are a number of places when we either want to read or write an // instruction when handling a half16 relocation type. On big-endian the buffer // pointer is pointing into the middle of the word we want to extract, and on @@ -475,6 +612,49 @@ relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } + case R_PPC64_GOT_PCREL34: { + // Clear the first 8 bits of the prefix and the first 6 bits of the + // instruction (the primary opcode). + uint64_t insn = readPrefixedInstruction(loc); + if ((insn & 0xfc000000) != 0xe4000000) + error ("expected a 'pld' for got-indirect to pc-relative relaxing"); + insn &= ~0xff000000fc000000; + + // Replace the cleared bits with the values for PADDI (0x600000038000000); + insn |= 0x600000038000000; + writePrefixedInstruction(loc, insn); + relocate(loc, rel, val); + break; + } + case R_PPC64_PCREL_OPT: { + // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can + // be relaxed. The eligibility for the relaxation needs to be determined + // on that relocation since this one does not relocate a symbol. + uint64_t insn = readPrefixedInstruction(loc); + uint32_t accessInsn = read32(loc + rel.addend); + uint64_t pcRelInsn = getPCRelativeForm(accessInsn); + + // This error is not necessary for correctness but is emitted for now + // to ensure we don't miss these opportunities in real code. It can be + // removed at a later date. + if (pcRelInsn == UINT64_C(-1)) { + errorOrWarn( + "unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" + + Twine::utohexstr(accessInsn)); + break; + } + + int64_t totalDisp = getTotalDisp(insn, accessInsn); + if (!isInt<34>(totalDisp)) + break; // Displacement doesn't fit. + // Convert the PADDI to the prefixed version of accessInsn and convert + // accessInsn to a nop. + writePrefixedInstruction(loc, pcRelInsn | + ((totalDisp & 0x3ffff0000) << 16) | + (totalDisp & 0xffff)); + write32(loc + rel.addend, 0x60000000); // nop accessInsn. + break; + } default: llvm_unreachable("unexpected relocation type"); } @@ -668,6 +848,7 @@ case R_PPC64_TOC16_LO: return R_GOTREL; case R_PPC64_GOT_PCREL34: + case R_PPC64_PCREL_OPT: return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: @@ -1024,6 +1205,9 @@ (val & si1Mask)); break; } + // If we encounter a PCREL_OPT relocation that we won't optimize. + case R_PPC64_PCREL_OPT: + break; default: llvm_unreachable("unknown relocation"); } @@ -1080,6 +1264,14 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { + if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) && + config->pcRelOptimize) { + // It only makes sense to optimize pld since paddi means that the address + // of the object in the GOT is required rather than the object itself. + assert(data && "Expecting an instruction encoding here"); + if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) + return R_PPC64_RELAX_GOT_PC; + } if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) diff --git a/lld/ELF/Arch/PPCInsns.def b/lld/ELF/Arch/PPCInsns.def new file mode 100644 --- /dev/null +++ b/lld/ELF/Arch/PPCInsns.def @@ -0,0 +1,58 @@ +#ifdef PPC_INSN_ENUM +#define PPC_INSN(Name, Val) Name = (Val), +#elif defined PPC_INSN_CHECK +#define PPC_INSN(Name, Val) \ + if (insn == PPCInsn::Name) \ + return true; +#else +#error "Must define either PPC_INSN_ENUM or PPC_INSN_CHECK" +#endif +// Loads. +PPC_INSN(LBZ, 34) +PPC_INSN(PLBZ, PREFIX_MLS) // Prefix only. +PPC_INSN(LHZ, 40) +PPC_INSN(PLHZ, PREFIX_MLS) // Prefix only. +PPC_INSN(LWZ, 32) +PPC_INSN(PLWZ, PREFIX_MLS) // Prefix only. +PPC_INSN(LHA, 42) +PPC_INSN(PLHA, PREFIX_MLS) // Prefix only. +PPC_INSN(LWA, 0xe8000002) // Primary opc | secondary opc +PPC_INSN(PLWA, (PREFIX_8LS | 0xa4000000)) // Prefix | Primary opc. +PPC_INSN(LD, 0xe8000000) // Primary opc | secondary opc +PPC_INSN(PLD, (PREFIX_8LS | 0xe4000000)) // Prefix | Primary opc. +PPC_INSN(LFS, 48) +PPC_INSN(PLFS, PREFIX_MLS) // Prefix only. +PPC_INSN(LXSSP, 0xe4000003) // Primary opc | secondary opc +PPC_INSN(PLXSSP, (PREFIX_8LS | 0xac000000)) // Prefix | Primary opc. +PPC_INSN(LFD, 50) +PPC_INSN(PLFD, PREFIX_MLS) // Prefix only. +PPC_INSN(LXSD, 0xe4000002) // Primary opc | secondary opc +PPC_INSN(PLXSD, (PREFIX_8LS | 0xa8000000)) // Prefix | Primary opc. +PPC_INSN(LXV, 0xf4000001) // Primary opc | secondary opc +PPC_INSN(PLXV, (PREFIX_8LS | 0xc8000000)) // Prefix | Primary opc. +PPC_INSN(LXVP, 0x18000000) // Primary opc | secondary opc +PPC_INSN(PLXVP, (PREFIX_8LS | 0xe8000000)) // Prefix | Primary opc. +// Stores. +PPC_INSN(STB, 38) +PPC_INSN(PSTB, PREFIX_MLS) // Prefix only. +PPC_INSN(STH, 44) +PPC_INSN(PSTH, PREFIX_MLS) // Prefix only. +PPC_INSN(STW, 36) +PPC_INSN(PSTW, PREFIX_MLS) // Prefix only. +PPC_INSN(STD, 0xf8000000) // Primary opc | secondary opc +PPC_INSN(PSTD, (PREFIX_8LS | 0xf4000000)) // Prefix | Primary opc. +PPC_INSN(STFS, 52) +PPC_INSN(PSTFS, PREFIX_MLS) // Prefix only. +PPC_INSN(STXSSP, 0xf4000003) // Primary opc | secondary opc +PPC_INSN(PSTXSSP, (PREFIX_8LS | 0xbc000000)) // Prefix | Primary opc. +PPC_INSN(STFD, 54) +PPC_INSN(PSTFD, PREFIX_MLS) // Prefix only. +PPC_INSN(STXSD, 0xf4000002) // Primary opc | secondary opc +PPC_INSN(PSTXSD, (PREFIX_8LS | 0xb8000000)) // Prefix | Primary opc. +PPC_INSN(STXV, 0xf4000005) // Primary opc | secondary opc +PPC_INSN(PSTXV, (PREFIX_8LS | 0xd8000000)) // Prefix | Primary opc. +PPC_INSN(STXVP, 0x18000001) // Primary opc | secondary opc +PPC_INSN(PSTXVP, (PREFIX_8LS | 0xf8000000)) // Prefix | Primary opc. +#undef PPC_INSN_ENUM +#undef PPC_INSN_CHECK +#undef PPC_INSN diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -206,6 +206,7 @@ bool thinLTOIndexOnly; bool timeTraceEnabled; bool tocOptimize; + bool pcRelOptimize; bool undefinedVersion; bool unique; bool useAndroidRelrTags = false; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -309,6 +309,9 @@ if (config->tocOptimize && config->emachine != EM_PPC64) error("--toc-optimize is only supported on the PowerPC64 target"); + if (config->pcRelOptimize && config->emachine != EM_PPC64) + error("--pcrel-got-optimize is only supported on the PowerPC64 target"); + if (config->pie && config->shared) error("-shared and -pie may not be used together"); @@ -1288,6 +1291,8 @@ config->tocOptimize = args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64); + config->pcRelOptimize = args.hasFlag( + OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64); } // Returns a value of "-format" option. diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -807,6 +807,7 @@ case R_PPC64_TOCBASE: return getPPC64TocBase() + a; case R_RELAX_GOT_PC: + case R_PPC64_RELAX_GOT_PC: return sym.getVA(a) - p; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_IE_TO_LE: @@ -1003,6 +1004,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); for (const Relocation &rel : relocations) { if (rel.expr == R_NONE) @@ -1024,6 +1026,20 @@ case R_RELAX_GOT_PC_NOPIC: target->relaxGot(bufLoc, rel, targetVA); break; + case R_PPC64_RELAX_GOT_PC: { + // The R_PPC64_PCREL_OPT relocation must appear immediately after + // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. + // We can only relax R_PPC64_PCREL_OPT if we have also relaxed + // the associated R_PPC64_GOT_PCREL34 since only the latter has an + // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34 + // and only relax the other if the saved offset matches. + if (type == R_PPC64_GOT_PCREL34) + lastPPCRelaxedRelocOff = offset; + if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) + break; + target->relaxGot(bufLoc, rel, targetVA); + break; + } case R_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -404,6 +404,10 @@ "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; +defm pcrel_optimize : B<"pcrel-got-optimize", + "(PowerPC64) Enable PC-relative optimizations (default)", + "(PowerPC64) Disable PC-relative optimizations">; + def trace: F<"trace">, HelpText<"Print the names of the input files">; defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">; diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -96,6 +96,7 @@ R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_PPC64_TOCBASE, + R_PPC64_RELAX_GOT_PC, R_RISCV_ADD, R_RISCV_PC_INDIRECT, }; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -376,7 +376,7 @@ static bool isRelExpr(RelExpr expr) { return oneof(expr); + R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr); } // Returns true if a given relocation can be computed at link-time. diff --git a/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s @@ -0,0 +1,66 @@ + .section ".text" + .comm storeVal_vector,8,8 + .comm useVal_vector,8,8 + .globl storeVal_longlong, useAddr_longlong, useVal_longlong, storeVal_sshort + .globl useAddr_sshort, useVal_sshort, storeVal_sint, useAddr_sint, useVal_sint + .globl storeVal_double, useAddr_double, useVal_double, storeVal_float + .globl useAddr_float, useVal_float, storeVal_uint, storeVal_uint + .globl useVal_uint, storeVal_ushort, useAddr_ushort, useVal_ushort + .globl storeVal, useAddr, useVal + .section ".data" + .align 3 + .type storeVal_longlong, @object + .size storeVal_longlong, 8 +storeVal_longlong: + .quad 18 +useAddr_longlong: + .quad 17 +useVal_longlong: + .quad 16 +storeVal_sshort: + .short -15 +useAddr_sshort: + .short -14 +useVal_sshort: + .short -13 + .zero 2 +storeVal_sint: + .long -12 +useAddr_sint: + .long -11 +useVal_sint: + .long -10 + .zero 4 +storeVal_double: + .long 858993459 + .long 1076966195 +useAddr_double: + .long -1717986918 + .long -1070589543 +useVal_double: + .long 0 + .long 1076756480 +storeVal_float: + .long 1045220557 +useAddr_float: + .long -1050568294 +useVal_float: + .long 1095761920 +storeVal_uint: + .long 12 +useAddr_uint: + .long 11 +useVal_uint: + .long 10 +storeVal_ushort: + .short 1 +useAddr_ushort: + .short 10 +useVal_ushort: + .short 5 +storeVal: + .byte -1 +useAddr: + .byte 10 +useVal: + .byte 5 diff --git a/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s @@ -0,0 +1,361 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2 +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-got-optimize +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2 +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-got-optimize +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plbz 10 +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: li 3, 0 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: rldicl 9, 9, 9, 60 +# CHECK-S-NEXT: add 9, 9, 10 +# CHECK-S-NEXT: pstb 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: li 3, 0 +# CHECK-D-NEXT: lbz 10, 0(8) +# CHECK-D-NEXT: rldicl 9, 9, 9, 60 +# CHECK-D-NEXT: add 9, 9, 10 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: stb 9, 0(10) +# CHECK-D-NEXT: blr +check_LBZ_STB: + pld 8,useVal@got@pcrel(0),1 +.Lpcrel1: + pld 9,useAddr@got@pcrel(0),1 + li 3,0 + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + lbz 10,0(8) + rldicl 9,9,9,60 + add 9,9,10 + pld 10,storeVal@got@pcrel(0),1 +.Lpcrel2: + .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) + stb 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plhz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: psth 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lhz 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: sth 3, 0(9) +# CHECK-D-NEXT: blr +check_LHZ_STH: + pld 9,useVal_ushort@got@pcrel(0),1 +.Lpcrel3: + .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) + lhz 3,0(9) + pld 9,storeVal_ushort@got@pcrel(0),1 +.Lpcrel4: + .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) + sth 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plwz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstw 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lwz 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stw 3, 0(9) +# CHECK-D-NEXT: blr +check_LWZ_STW: + pld 9,useVal_uint@got@pcrel(0),1 +.Lpcrel5: + .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) + lwz 3,0(9) + pld 9,storeVal_uint@got@pcrel(0),1 +.Lpcrel6: + .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) + stw 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfs 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfs 1, 0(9) +# CHECK-D-NEXT: blr +check_LFS_STFS: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel7: + .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) + lfs 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel8: + .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) + stfs 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfd 1, 0(9) +# CHECK-D-NEXT: blr +check_LFD_STFD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel9: + .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) + lfd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel10: + .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) + stfd 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plwa 3 +# CHECK-S-NEXT: pstw 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lwa 3, 0(8) +# CHECK-D-NEXT: stw 9, 0(10) +# CHECK-D-NEXT: blr +check_LWA_STW: + mr 9,3 + pld 8,useVal_sint@got@pcrel(0),1 +.Lpcrel11: + pld 10,storeVal_sint@got@pcrel(0),1 +.Lpcrel12: + .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) + lwa 3,0(8) + .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) + stw 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plha 3 +# CHECK-S-NEXT: psth 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lha 3, 0(8) +# CHECK-D-NEXT: sth 9, 0(10) +# CHECK-D-NEXT: blr +check_LHA_STH: + mr 9,3 + pld 8,useVal_sshort@got@pcrel(0),1 +.Lpcrel13: + pld 10,storeVal_sshort@got@pcrel(0),1 +.Lpcrel14: + .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) + lha 3,0(8) + .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) + sth 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: pld 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstd 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD: + pld 9,useVal_longlong@got@pcrel(0),1 +.Lpcrel15: + .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) + ld 3,0(9) + pld 9,storeVal_longlong@got@pcrel(0),1 +.Lpcrel16: + .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) + std 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxv 34, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxv 34, 0(9) +# CHECK-D-NEXT: blr +check_LXV_STXV: + pld 9,useVal_vector@got@pcrel(0),1 +.Lpcrel17: + .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) + lxv 34,0(9) + pld 9,storeVal_vector@got@pcrel(0),1 +.Lpcrel18: + .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) + stxv 34,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxssp 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxssp 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSSP_STXSSP: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel19: + .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8) + lxssp 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel20: + .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8) + stxssp 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1, [[#ADDR1:]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1, [[#ADDR2:]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel21: + .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8) + lxsd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel22: + .reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8) + stxsd 1,0(9) + blr + +# The respective displacements are computed relative to the PC which advanced +# by 28 bytes in this function. Since the displacements in the two access +# instructions are 8 and 32 so the displacements are those computed above minus +# 20 and plus 4 (+8 - 28 and +32 - 28) respectively. +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1, [[#ADDR1-20]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1, [[#ADDR2+4]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 8(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 32(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD_aggr: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel23: + .reloc .Lpcrel23-8,R_PPC64_PCREL_OPT,.-(.Lpcrel23-8) + lxsd 1,8(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel24: + .reloc .Lpcrel24-8,R_PPC64_PCREL_OPT,.-(.Lpcrel24-8) + stxsd 1,32(9) + blr + +# This includes a nop but that is not emitted by the linker. +# It is an alignment nop to prevent the prefixed instruction from +# crossing a 64-byte boundary. +# CHECK-S-LABEL: : +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: ld 3, 0(9) +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: std 3, 0(9) +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: paddi 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: paddi 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD_W_PADDI: + paddi 9,0,useVal_longlong@got@pcrel,1 +.Lpcrel25: + .reloc .Lpcrel25-8,R_PPC64_PCREL_OPT,.-(.Lpcrel25-8) + ld 3,0(9) + paddi 9,0,storeVal_longlong@got@pcrel,1 +.Lpcrel26: + .reloc .Lpcrel26-8,R_PPC64_PCREL_OPT,.-(.Lpcrel26-8) + std 3,0(9) + blr