diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -302,6 +302,7 @@ VK_PPC_TLSLD, // symbol@tlsld VK_PPC_LOCAL, // symbol@local VK_PPC_NOTOC, // symbol@notoc + VK_PPC_PCREL_OPT, // .reloc expr, R_PPC64_PCREL_OPT, expr VK_COFF_IMGREL32, // symbol@imgrel (image-relative) diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -324,6 +324,7 @@ case VK_PPC_TLSLD: return "tlsld"; case VK_PPC_LOCAL: return "local"; case VK_PPC_NOTOC: return "notoc"; + case VK_PPC_PCREL_OPT: return "<>"; case VK_COFF_IMGREL32: return "IMGREL"; case VK_Hexagon_LO16: return "LO16"; case VK_Hexagon_HI16: return "HI16"; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h @@ -43,8 +43,15 @@ void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; private: void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); + void emitGOTToPCRelReloc(const MCInst &Inst); + void emitGOTToPCRelLabel(const MCInst &Inst); }; +// Check if the instruction Inst is part of a pair of instructions that make up +// a link time GOT PC Rel optimization. +Optional isPartOfGOTToPCRelPair(const MCInst &Inst, + const MCSubtargetInfo &STI); + MCELFStreamer *createPPCELFStreamer(MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -20,6 +20,7 @@ #include "PPCELFStreamer.h" +#include "PPCFixupKinds.h" #include "PPCInstrInfo.h" #include "PPCMCCodeEmitter.h" #include "llvm/BinaryFormat/ELF.h" @@ -89,12 +90,33 @@ PPCMCCodeEmitter *Emitter = static_cast(getAssembler().getEmitterPtr()); + // If the instruction is a part of the GOT to PC-Rel link time optimization + // instruction pair, return a value, otherwise return None. A true returned + // value means the instruction is the PLDpc and a false value means it is + // the user instruction. + Optional IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI); + + // User of the GOT-indirect address. + // For example, the load that will get the relocation as follows: + // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + // lwa 3, 4(3) + if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue()) + emitGOTToPCRelReloc(Inst); + // Special handling is only for prefixed instructions. if (!Emitter->isPrefixedInstruction(Inst)) { MCELFStreamer::emitInstruction(Inst, STI); return; } emitPrefixedInstruction(Inst, STI); + + // Producer of the GOT-indirect address. + // For example, the prefixed load from the got that will get the label as + // follows: + // pld 3, vec@got@pcrel(0), 1 + // .Lpcrel1: + if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue()) + emitGOTToPCRelLabel(Inst); } void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { @@ -103,6 +125,102 @@ MCELFStreamer::emitLabel(Symbol); } +// This linker time GOT PC Relative optimization relocation will look like this: +// pld symbol@got@pcrel +// : +// .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8) +// load , 0() +// The reason we place the label after the PLDpc instruction is that there +// may be an alignment nop before it since prefixed instructions must not +// cross a 64-byte boundary (please see +// PPCELFStreamer::emitPrefixedInstruction()). When referring to the +// label, we subtract the width of a prefixed instruction (8 bytes) to ensure +// we refer to the PLDpc. +void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) { + // Get the last operand which contains the symbol. + const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1); + assert(Operand.isExpr() && "Expecting an MCExpr."); + // Cast the last operand to MCSymbolRefExpr to get the symbol. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast(Expr); + assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT && + "Expecting a symbol of type VK_PPC_PCREL_OPT"); + MCSymbol *LabelSym = + getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); + const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext()); + const MCExpr *Eight = MCConstantExpr::create(8, getContext()); + // SubExpr is just Label###-8 + const MCExpr *SubExpr = + MCBinaryExpr::createSub(LabelExpr, Eight, getContext()); + MCSymbol *CurrentLocation = getContext().createTempSymbol(); + const MCExpr *CurrentLocationExpr = + MCSymbolRefExpr::create(CurrentLocation, getContext()); + // SubExpr2 is .-(Label###-8) + const MCExpr *SubExpr2 = + MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext()); + + MCDataFragment *DF = static_cast(LabelSym->getFragment()); + assert(DF && "Expecting a valid data fragment."); + MCFixupKind FixupKind = static_cast(FirstLiteralRelocationKind + + ELF::R_PPC64_PCREL_OPT); + DF->getFixups().push_back( + MCFixup::create(LabelSym->getOffset() - 8, SubExpr2, + FixupKind, Inst.getLoc())); + emitLabel(CurrentLocation, Inst.getLoc()); +} + +// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel +// optimization. +void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) { + // Get the last operand which contains the symbol. + const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1); + assert(Operand.isExpr() && "Expecting an MCExpr."); + // Cast the last operand to MCSymbolRefExpr to get the symbol. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast(Expr); + assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT && + "Expecting a symbol of type VK_PPC_PCREL_OPT"); + MCSymbol *LabelSym = + getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); + emitLabel(LabelSym, Inst.getLoc()); +} + +// This funciton checks if the parameter Inst is part of the setup for a link +// time GOT PC Relative optimization. For example in this situation: +// +// >)>> +// +// >)>> +// The above is a pair of such instructions and this function will not return +// None for either one of them. In both cases we are looking for the last +// operand >)> which needs to be an MCExpr +// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look +// at the opcode and in the case of PLDpc we will return true. For the load +// (or store) this function will return false indicating it has found the second +// instruciton in the pair. +Optional llvm::isPartOfGOTToPCRelPair(const MCInst &Inst, + const MCSubtargetInfo &STI) { + // Need at least two operands. + if (Inst.getNumOperands() < 2) + return None; + + unsigned LastOp = Inst.getNumOperands() - 1; + // The last operand needs to be an MCExpr and it needs to have a variant kind + // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a + // link time GOT PC Rel opt instruction and we can ignore it and return None. + const MCOperand &Operand = Inst.getOperand(LastOp); + if (!Operand.isExpr()) + return None; + + // Check for the variant kind VK_PPC_PCREL_OPT in this expression. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast(Expr); + if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT) + return None; + + return (Inst.getOpcode() == PPC::PLDpc); +} + MCELFStreamer *llvm::createPPCELFStreamer( MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -92,6 +92,36 @@ return; } + // Check if the last operand is an expression with the variant kind + // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization + // relocation and the .reloc directive needs to be added. + unsigned LastOp = MI->getNumOperands() - 1; + if (MI->getNumOperands() > 1) { + const MCOperand &Operand = MI->getOperand(LastOp); + if (Operand.isExpr()) { + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = + static_cast(Expr); + + if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) { + const MCSymbol &Symbol = SymExpr->getSymbol(); + if (MI->getOpcode() == PPC::PLDpc) { + printInstruction(MI, Address, O); + O << "\n"; + Symbol.print(O, &MAI); + O << ":"; + return; + } else { + O << "\t.reloc "; + Symbol.print(O, &MAI); + O << "-8,R_PPC64_PCREL_OPT,.-("; + Symbol.print(O, &MAI); + O << "-8)\n"; + } + } + } + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -107,6 +107,10 @@ /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL. MO_GOT_FLAG = 8, + // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a + // PC Relative linker optimization. + MO_PCREL_OPT_FLAG = 16, + /// The next are not flags but distinct values. MO_ACCESS_MASK = 0xf00, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2283,7 +2283,8 @@ {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, {MO_PCREL_FLAG, "ppc-pcrel"}, - {MO_GOT_FLAG, "ppc-got"}}; + {MO_GOT_FLAG, "ppc-got"}, + {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}}; return makeArrayRef(TargetFlags); } diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -100,6 +100,8 @@ MIOpcode == PPC::BL8_NOTOC) { RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; } + if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT; } const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -43,6 +43,46 @@ cl::desc("Run pre-emit peephole optimizations.")); namespace { + +static bool hasPCRelativeForm(MachineInstr &Use) { + switch (Use.getOpcode()) { + default: + return false; + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + case PPC::STB: + case PPC::STB8: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + case PPC::LD: + case PPC::STD: + case PPC::LWA: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::LXV: + case PPC::STXSD: + case PPC::STXSSP: + case PPC::STXV: + case PPC::LFD: + case PPC::LFS: + case PPC::STFD: + case PPC::STFS: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + return true; + } +} + class PPCPreEmitPeephole : public MachineFunctionPass { public: static char ID; @@ -172,6 +212,135 @@ return !InstrsToErase.empty(); } + // Check if this instruction is a PLDpc that is part of a GOT indirect + // access. + bool isGOTPLDpc(MachineInstr &Instr) { + if (Instr.getOpcode() != PPC::PLDpc) + return false; + + // The result must be a register. + const MachineOperand &LoadedAddressReg = Instr.getOperand(0); + if (!LoadedAddressReg.isReg()) + return false; + + // Make sure that this is a global symbol. + const MachineOperand &SymbolOp = Instr.getOperand(1); + if (!SymbolOp.isGlobal()) + return false; + + // Finally return true only if the GOT flag is present. + return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG); + } + + bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { + MachineFunction *MF = MBB.getParent(); + // Add this linker opt only if we are using PC Relative memops. + if (!MF->getSubtarget().isUsingPCRelativeCalls()) + return false; + + // Struct to keep track of one def/use pair for a GOT indirect access. + struct GOTDefUsePair { + MachineBasicBlock::iterator DefInst; + MachineBasicBlock::iterator UseInst; + Register DefReg; + Register UseReg; + bool StillValid; + }; + // Vector of def/ues pairs in this basic block. + SmallVector CandPairs; + SmallVector ValidPairs; + bool MadeChange = false; + + // Run through all of the instructions in the basic block and try to + // collect potential pairs of GOT indirect access instructions. + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Look for the initial GOT indirect load. + if (isGOTPLDpc(*BBI)) { + GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(), + BBI->getOperand(0).getReg(), + PPC::NoRegister, true}; + CandPairs.push_back(CurrentPair); + continue; + } + + // We haven't encountered any new PLD instructions, nothing to check. + if (CandPairs.empty()) + continue; + + // Run through the candidate pairs and see if any of the registers + // defined in the PLD instructions are used by this instruction. + // Note: the size of CandPairs can change in the loop. + for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { + GOTDefUsePair &Pair = CandPairs[Idx]; + // The instruction does not use or modify this PLD's def reg, + // ignore it. + if (!BBI->readsRegister(Pair.DefReg, TRI) && + !BBI->modifiesRegister(Pair.DefReg, TRI)) + continue; + + // The use needs to be used in the address compuation and not + // as the register being stored for a store. + const MachineOperand *UseOp = + hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr; + + // Check for a valid use. + if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && + UseOp->isUse() && UseOp->isKill()) { + Pair.UseInst = BBI; + Pair.UseReg = BBI->getOperand(0).getReg(); + ValidPairs.push_back(Pair); + } + CandPairs.erase(CandPairs.begin() + Idx); + } + } + + // Go through all of the pairs and check for any more valid uses. + for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { + // We shouldn't be here if we don't have a valid pair. + assert(Pair->UseInst.isValid() && Pair->StillValid && + "Kept an invalid def/use pair for GOT PCRel opt"); + // We have found a potential pair. Search through the instructions + // between the def and the use to see if it is valid to mark this as a + // linker opt. + MachineBasicBlock::iterator BBI = Pair->DefInst; + ++BBI; + for (; BBI != Pair->UseInst; ++BBI) { + if (BBI->readsRegister(Pair->UseReg, TRI) || + BBI->modifiesRegister(Pair->UseReg, TRI)) { + Pair->StillValid = false; + break; + } + } + + if (!Pair->StillValid) + continue; + + // The load/store instruction that uses the address from the PLD will + // either use a register (for a store) or define a register (for the + // load). That register will be added as an implicit def to the PLD + // and as an implicit use on the second memory op. This is a precaution + // to prevent future passes from using that register between the two + // instructions. + MachineOperand ImplDef = + MachineOperand::CreateReg(Pair->UseReg, true, true); + MachineOperand ImplUse = + MachineOperand::CreateReg(Pair->UseReg, false, true); + Pair->DefInst->addOperand(ImplDef); + Pair->UseInst->addOperand(ImplUse); + + // Create the symbol. + MCContext &Context = MF->getContext(); + MCSymbol *Symbol = + Context.createTempSymbol(Twine("pcrel"), false, false); + MachineOperand PCRelLabel = + MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); + Pair->DefInst->addOperand(*MF, PCRelLabel); + Pair->UseInst->addOperand(*MF, PCRelLabel); + MadeChange |= true; + } + return MadeChange; + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. @@ -192,6 +361,7 @@ SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) { Changed |= removeRedundantLIs(MBB, TRI); + Changed |= addLinkerOpt(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc == PPC::UNENCODED_NOP) { diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -67,6 +67,8 @@ ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl localCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel: +; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -150,6 +152,8 @@ ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl externCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel0: +; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -212,6 +216,8 @@ ; CHECK-S: .localentry TailCallLocal2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -254,6 +260,8 @@ ; CHECK-S: .localentry TailCallExtern2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel2: +; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -319,6 +327,8 @@ ; CHECK-S-NEXT: mtctr r12 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel3: +; CHECK-S-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -23,12 +23,16 @@ ; LE-LABEL: ReadGlobalVarChar: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valChar@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel: +; LE-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; LE-NEXT: lbz r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarChar: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valChar@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel: +; BE-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; BE-NEXT: lbz r3, 0(r3) ; BE-NEXT: blr entry: @@ -60,12 +64,16 @@ ; LE-LABEL: ReadGlobalVarShort: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valShort@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel0: +; LE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; LE-NEXT: lha r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarShort: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valShort@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel0: +; BE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; BE-NEXT: lha r3, 0(r3) ; BE-NEXT: blr entry: @@ -97,12 +105,16 @@ ; LE-LABEL: ReadGlobalVarInt: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valInt@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel1: +; LE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarInt: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valInt@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel1: +; BE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; BE-NEXT: lwa r3, 0(r3) ; BE-NEXT: blr entry: @@ -133,12 +145,16 @@ ; LE-LABEL: ReadGlobalVarUnsigned: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel2: +; LE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarUnsigned: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel2: +; BE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; BE-NEXT: lwa r3, 0(r3) ; BE-NEXT: blr entry: @@ -169,12 +185,16 @@ ; LE-LABEL: ReadGlobalVarLong: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valLong@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel3: +; LE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarLong: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valLong@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel3: +; BE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; BE-NEXT: lwa r3, 4(r3) ; BE-NEXT: blr entry: @@ -206,12 +226,16 @@ ; LE-LABEL: ReadGlobalPtr: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, ptr@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel4: +; LE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; LE-NEXT: ld r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalPtr: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, ptr@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel4: +; BE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; BE-NEXT: ld r3, 0(r3) ; BE-NEXT: blr entry: @@ -223,7 +247,9 @@ ; LE-LABEL: WriteGlobalPtr: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, ptr@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel5: ; LE-NEXT: li r4, 3 +; LE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; LE-NEXT: ld r3, 0(r3) ; LE-NEXT: stw r4, 0(r3) ; LE-NEXT: blr @@ -231,7 +257,9 @@ ; BE-LABEL: WriteGlobalPtr: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, ptr@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel5: ; BE-NEXT: li r4, 3 +; BE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; BE-NEXT: ld r3, 0(r3) ; BE-NEXT: stw r4, 0(r3) ; BE-NEXT: blr @@ -259,12 +287,16 @@ ; LE-LABEL: ReadGlobalArray: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, array@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel6: +; LE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; LE-NEXT: lwa r3, 12(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalArray: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, array@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel6: +; BE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; BE-NEXT: lwa r3, 12(r3) ; BE-NEXT: blr entry: @@ -295,12 +327,16 @@ ; LE-LABEL: ReadGlobalStruct: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, structure@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel7: +; LE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; LE-NEXT: lwa r3, 4(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalStruct: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, structure@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel7: +; BE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; BE-NEXT: lwa r3, 4(r3) ; BE-NEXT: blr entry: @@ -332,6 +368,8 @@ ; LE: .localentry ReadFuncPtr, 1 ; LE-NEXT: # %bb.0: # %entry ; LE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel8: +; LE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; LE-NEXT: ld r12, 0(r3) ; LE-NEXT: mtctr r12 ; LE-NEXT: bctr @@ -341,6 +379,8 @@ ; BE: .localentry ReadFuncPtr, 1 ; BE-NEXT: # %bb.0: # %entry ; BE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel8: +; BE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; BE-NEXT: ld r12, 0(r3) ; BE-NEXT: mtctr r12 ; BE-NEXT: bctr diff --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll @@ -0,0 +1,395 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s + +; On future CPU with PC Relative addressing enabled, it is possible for the +; linker to optimize GOT indirect accesses. In order for the linker to do this +; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation. +; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation +; correctly. + +@input8 = external local_unnamed_addr global i8, align 1 +@output8 = external local_unnamed_addr global i8, align 1 +@input16 = external local_unnamed_addr global i16, align 2 +@output16 = external local_unnamed_addr global i16, align 2 +@input32 = external global i32, align 4 +@output32 = external local_unnamed_addr global i32, align 4 +@input64 = external local_unnamed_addr global i64, align 8 +@output64 = external local_unnamed_addr global i64, align 8 +@input128 = external local_unnamed_addr global i128, align 16 +@output128 = external local_unnamed_addr global i128, align 16 +@inputf32 = external local_unnamed_addr global float, align 4 +@outputf32 = external local_unnamed_addr global float, align 4 +@inputf64 = external local_unnamed_addr global double, align 8 +@outputf64 = external local_unnamed_addr global double, align 8 +@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16 +@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16 +@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16 +@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16 +@ArrayIn = external global [10 x i32], align 4 +@ArrayOut = external local_unnamed_addr global [10 x i32], align 4 +@IntPtrIn = external local_unnamed_addr global i32*, align 8 +@IntPtrOut = external local_unnamed_addr global i32*, align 8 +@FuncPtrIn = external local_unnamed_addr global void (...)*, align 8 +@FuncPtrOut = external local_unnamed_addr global void (...)*, align 8 + +define dso_local void @ReadWrite8() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWrite8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel: +; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) +; CHECK-NEXT: lbz r3, 0(r3) +; In this test the stb r3, 0(r4) cannot be optimized because it +; uses the register r3 and that register is defined by lbz r3, 0(r3) +; which is defined between the pld and the stb. +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* @input8, align 1 + store i8 %0, i8* @output8, align 1 + ret void +} + +define dso_local void @ReadWrite16() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWrite16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel0: +; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; CHECK-NEXT: lhz r3, 0(r3) +; In this test the sth r3, 0(r4) cannot be optimized because it +; uses the register r3 and that register is defined by lhz r3, 0(r3) +; which is defined between the pld and the sth. +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* @input16, align 2 + store i16 %0, i16* @output16, align 2 + ret void +} + +define dso_local void @ReadWrite32() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWrite32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @input32, align 4 + store i32 %0, i32* @output32, align 4 + ret void +} + +define dso_local void @ReadWrite64() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWrite64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* @input64, align 8 + store i64 %0, i64* @output64, align 8 + ret void +} + +; FIXME: we should always convert X-Form instructions that use +; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt. +define dso_local void @ReadWrite128() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWrite128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load i128, i128* @input128, align 16 + store i128 %0, i128* @output128, align 16 + ret void +} + +define dso_local void @ReadWritef32() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWritef32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: xxspltidp vs1, 1078103900 +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; CHECK-NEXT: lfs f0, 0(r3) +; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 +; CHECK-NEXT: xsaddsp f0, f0, f1 +; CHECK-NEXT: stfs f0, 0(r3) +; CHECK-NEXT: blr +entry: + %0 = load float, float* @inputf32, align 4 + %add = fadd float %0, 0x400851EB80000000 + store float %add, float* @outputf32, align 4 + ret void +} + +define dso_local void @ReadWritef64() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWritef64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 +; CHECK-NEXT: xsadddp f0, f0, f1 +; CHECK-NEXT: stfd f0, 0(r3) +; CHECK-NEXT: blr +entry: + %0 = load double, double* @inputf64, align 8 + %add = fadd double %0, 6.800000e+00 + store double %add, double* @outputf64, align 8 + ret void +} + +; FIXME: we should always convert X-Form instructions that use +; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt. +define dso_local void @ReadWriteVi32() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWriteVi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: li r4, 45 +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1 +; CHECK-NEXT: xxinsertw vs0, vs1, 8 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecins = insertelement <4 x i32> %0, i32 45, i32 1 + store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16 + ret void +} + +define dso_local void @ReadWriteVi64() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWriteVi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16 + store <2 x i64> %0, <2 x i64>* @outputVi64, align 16 + ret void +} + +define dso_local void @ReadWriteArray() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWriteArray: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; CHECK-NEXT: lwz r3, 28(r3) +; CHECK-NEXT: addi r3, r3, 42 +; CHECK-NEXT: stw r3, 8(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4 + %add = add nsw i32 %0, 42 + store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4 + ret void +} + +define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWriteSameArray: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: lwz r4, 12(r3) +; CHECK-NEXT: addi r4, r4, 8 +; CHECK-NEXT: stw r4, 24(r3) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4 + %add = add nsw i32 %0, 8 + store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4 + ret void +} + +define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWriteIntPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel6: +; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel7: +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; CHECK-NEXT: ld r4, 0(r4) +; CHECK-NEXT: lwz r5, 216(r3) +; CHECK-NEXT: lwz r3, 48(r3) +; CHECK-NEXT: add r3, r3, r5 +; CHECK-NEXT: stw r3, 136(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32*, i32** @IntPtrIn, align 8 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 54 + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add nsw i32 %2, %1 + %3 = load i32*, i32** @IntPtrOut, align 8 + %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34 + store i32 %add, i32* %arrayidx2, align 4 + ret void +} + +define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 { +; CHECK-LABEL: ReadWriteFuncPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: +; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8 + store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8 + ret void +} + +define dso_local void @FuncPtrCopy() local_unnamed_addr #0 { +; CHECK-LABEL: FuncPtrCopy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + store void (...)* @Callee, void (...)** @FuncPtrOut, align 8 + ret void +} + +declare void @Callee(...) + +define dso_local void @FuncPtrCall() local_unnamed_addr #0 { +; CHECK-LABEL: FuncPtrCall: +; CHECK: .localentry FuncPtrCall, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel9: +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8 + tail call void %0() + ret void +} + +define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 { +; CHECK-LABEL: ReadVecElement: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel10: +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) +; CHECK-NEXT: lwa r3, 4(r3) +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecext = extractelement <4 x i32> %0, i32 1 + ret i32 %vecext +} + +define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 { +; CHECK-LABEL: VecMultiUse: +; CHECK: .localentry VecMultiUse, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: lwz r29, 4(r30) +; CHECK-NEXT: bl Callee@notoc +; CHECK-NEXT: lwz r3, 8(r30) +; CHECK-NEXT: add r29, r3, r29 +; CHECK-NEXT: bl Callee@notoc +; CHECK-NEXT: lwz r3, 0(r30) +; CHECK-NEXT: add r3, r29, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + tail call void bitcast (void (...)* @Callee to void ()*)() + %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %2 = extractelement <4 x i32> %1, i32 2 + %3 = extractelement <4 x i32> %0, i64 1 + %4 = add nsw i32 %2, %3 + tail call void bitcast (void (...)* @Callee to void ()*)() + %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecext2 = extractelement <4 x i32> %5, i32 0 + %add3 = add nsw i32 %4, %vecext2 + ret i32 %add3 +} + +define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 { +; CHECK-LABEL: UseAddr: +; CHECK: .localentry UseAddr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: lwz r5, 16(r4) +; CHECK-NEXT: add r30, r5, r3 +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: bl getAddr@notoc +; CHECK-NEXT: add r3, r30, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0)) + %add1 = add nsw i32 %add, %call + ret i32 %add1 +} + +declare signext i32 @getAddr(i32*) local_unnamed_addr + +define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr #0 { +; CHECK-LABEL: AddrTaken32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 +; CHECK-NEXT: blr +entry: + ret i32* @input32 +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll @@ -50,12 +50,15 @@ ; CHECK-S-LABEL: getElementExtern4: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel: +; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; CHECK-S-NEXT: lwa r3, 16(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : ; CHECK-O: pld 3, 0(0), 1 ; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 array1 -; CHECK-O-NEXT: lwa 3, 16(3) +; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8 +; CHECK-O: lwa 3, 16(3) ; CHECK-O-NEXT: blr entry: %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 4), align 4 @@ -66,12 +69,15 @@ ; CHECK-S-LABEL: getElementExternNegative: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel0: +; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-S-NEXT: lwa r3, -4(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : ; CHECK-O: pld 3, 0(0), 1 ; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 array1 -; CHECK-O-NEXT: lwa 3, -4(3) +; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8 +; CHECK-O: lwa 3, -4(3) ; CHECK-O-NEXT: blr entry: %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 -1), align 4 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -51,6 +51,8 @@ ; CHECK: .localentry TailCallExtrnFuncPtr, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel: +; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr diff --git a/llvm/test/CodeGen/PowerPC/pcrel.ll b/llvm/test/CodeGen/PowerPC/pcrel.ll --- a/llvm/test/CodeGen/PowerPC/pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel.ll @@ -41,12 +41,15 @@ ; CHECK-S-LABEL: ReadGlobalVarInt ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, valIntGlob@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel: +; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; CHECK-S-NEXT: lwa r3, 0(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: ReadGlobalVarInt ; CHECK-O: pld 3, 0(0), 1 ; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 valIntGlob +; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8 ; CHECK-O-NEXT: lwa 3, 0(3) ; CHECK-O-NEXT: blr entry: