diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -47,6 +47,7 @@ bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const; }; +std::unique_ptr createLinkerOptDAGMutation(); } // end namespace llvm #endif // LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -8,9 +8,13 @@ #include "PPCMachineScheduler.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "PPC.h" +#include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-scheduler" + static cl::opt DisableAddiLoadHeuristic("disable-ppc-sched-addi-load", cl::desc("Disable scheduling addi instruction before" @@ -21,6 +25,11 @@ "as possible post ra"), cl::Hidden, cl::init(true)); +static cl::opt EnableLinkerOptMutation( + "ppc-sched-linker-opt", + cl::desc("Enable the mutation for linker optimization"), cl::Hidden, + cl::init(true)); + static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) { return Cand.SU->getInstr()->getOpcode() == PPC::ADDI || Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; @@ -117,3 +126,164 @@ return PostGenericScheduler::pickNode(IsTopNode); } +//===----------------------------------------------------------------------===// +// PPCLinkerOptMutation - DAG post-processing for linker opt. +//===----------------------------------------------------------------------===// +namespace { + +// x = PLDpc (GOT) +// ... +// stw y, off(x) +// +// We will create the symbol has MO_PCREL_OPT_FLAG flag for linker to do the +// optimization if it meets the following conditions: +// 1. The first instruction is PLDpc and it is loaded from GOT. +// 2. The PLDpc has only single use. +// 3. The single user of the PLDpc must have Pcrel form. +// 4. No use/def of the user first operand is allowed in-between the PLDpc +// and the single user. +// So, we need to move all the hazard instructions in-between the PLDpc and the +// store. +class PPCLinkerOptMutation : public ScheduleDAGMutation { +public: +private: + SUnit *getPLDpcAndLdStPair(SUnit &SU) const; + void schedHazardInsts(ScheduleDAGInstrs &DAG, SUnit &PLDpc, SUnit &LdSt); + +protected: + void apply(ScheduleDAGInstrs *DAG) override; +}; + +void PPCLinkerOptMutation::apply(ScheduleDAGInstrs *DAG) { + for (SUnit &PLDpc : DAG->SUnits) + if (SUnit *LdSt = getPLDpcAndLdStPair(PLDpc)) + schedHazardInsts(*DAG, PLDpc, *LdSt); +} + +// Create edges from PLDpc to all the pred hazard SU of the LdSt so that they +// are scheduled before the PLDpc. +void PPCLinkerOptMutation::schedHazardInsts(ScheduleDAGInstrs &DAG, + SUnit &PLDpc, SUnit &LdSt) { + LLVM_DEBUG(dbgs() << "Linker Opt: "; DAG.dumpNodeName(PLDpc); dbgs() << " - "; + DAG.dumpNodeName(LdSt); dbgs() << " / "; + dbgs() << DAG.TII->getName(PLDpc.getInstr()->getOpcode()) << " - " + << DAG.TII->getName(LdSt.getInstr()->getOpcode()) << '\n';); + assert(LdSt.getInstr()->mayLoadOrStore() && "Load/Store instr expected"); + // +-----------+ new pred edge + // | x = PLDpc +-----------+ + // +-----^-----+ | + // | v + // | +---+----+ + // | | inst Y | + // | +---^----+ + // +-----+-----+ | + // | stw y, x +-----------+ + // +-----------+ + // Create pred edges from PLDpc to the preds(inst Y) of the stw so that, + // these instructions are scheduled before PLDpc. Because, the linker + // will remove the stw and replace PLDpc with prefix store. It acts as + // scheduling the stw immediate after PLDpc. + for (SDep &Pred : LdSt.Preds) { + if (Pred.getSUnit() == &PLDpc) + continue; + + LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum + << ")\n"); + DAG.addEdge(&PLDpc, SDep(Pred.getSUnit(), SDep::Artificial)); + } +} + +static bool hasPCRelForm(const MachineInstr &MI) { + // TODO - Using InstrMapping to map the opcode for Pcrel form and non-Pcrel + // form. + switch (MI.getOpcode()) { + default: + return false; + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + case PPC::STB: + case PPC::STB8: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + case PPC::LD: + case PPC::STD: + case PPC::LWA: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::LXV: + case PPC::STXSD: + case PPC::STXSSP: + case PPC::STXV: + case PPC::LFD: + case PPC::LFS: + case PPC::STFD: + case PPC::STFS: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + return true; + } +} + +SUnit *PPCLinkerOptMutation::getPLDpcAndLdStPair(SUnit &SU) const { + // TODO - common the code with PPCPreEmitPeephole::isGOTPLDpc() + if (SU.getInstr()->getOpcode() != PPC::PLDpc) + return nullptr; + + // The result must be a register. + const MachineOperand &DefMO = SU.getInstr()->getOperand(0); + if (!DefMO.isReg()) + return nullptr; + + // Make sure that this is a global symbol and it is loaded from GOT. + const MachineOperand &Global = SU.getInstr()->getOperand(1); + if (!Global.isGlobal() || !(Global.getTargetFlags() & PPCII::MO_GOT_FLAG)) + return nullptr; + + // Only when the PLDpc has single use so that, there is benefit to fold it + // into pcrel load/store and remove it. + SUnit *SingleUseSU = nullptr; + for (const SDep &Succ : SU.Succs) { + if (Succ.getKind() == SDep::Data) { + assert(Succ.getReg() == DefMO.getReg() && "Unexpected data dependency"); + if (SingleUseSU) + return nullptr; + SingleUseSU = Succ.getSUnit(); + } + } + if (!SingleUseSU) + return nullptr; + + // The paired instruction must have pcrel form. + const MachineInstr *SingleUseMI = SingleUseSU->getInstr(); + if (!hasPCRelForm(*SingleUseMI)) + return nullptr; + assert(SingleUseMI->mayLoadOrStore() && "Unknown pcrel instruction"); + + // Assume that, the second operand is the base register of the load/store and + // it must be the result register of the PLDpc so that, it can be folded into + // pcrel load/store by linker. + if (!SingleUseMI->getOperand(2).isReg() || + SingleUseMI->getOperand(2).getReg() != DefMO.getReg()) + return nullptr; + + return SingleUseSU; +} + +} // end anonymous namespace + +namespace llvm { +std::unique_ptr createLinkerOptDAGMutation() { + return EnableLinkerOptMutation ? std::make_unique() + : nullptr; +} +} // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -298,6 +298,9 @@ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); + if (ST.isUsingPCRelativeCalls()) + DAG->addMutation(createLinkerOptDAGMutation()); + return DAG; } diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -44,15 +44,19 @@ define dso_local void @WriteGlobalVarChar() local_unnamed_addr { ; LE-LABEL: WriteGlobalVarChar: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, valChar@got@pcrel(0), 1 ; LE-NEXT: li r4, 3 +; LE-NEXT: pld r3, valChar@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel0: +; LE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; LE-NEXT: stb r4, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalVarChar: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, valChar@got@pcrel(0), 1 ; BE-NEXT: li r4, 3 +; BE-NEXT: pld r3, valChar@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel0: +; BE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; BE-NEXT: stb r4, 0(r3) ; BE-NEXT: blr entry: @@ -64,16 +68,16 @@ ; LE-LABEL: ReadGlobalVarShort: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valShort@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel0: -; LE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; LE-NEXT: .Lpcrel1: +; LE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; LE-NEXT: lha r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarShort: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valShort@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel0: -; BE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; BE-NEXT: .Lpcrel1: +; BE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; BE-NEXT: lha r3, 0(r3) ; BE-NEXT: blr entry: @@ -85,15 +89,19 @@ define dso_local void @WriteGlobalVarShort() local_unnamed_addr { ; LE-LABEL: WriteGlobalVarShort: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, valShort@got@pcrel(0), 1 ; LE-NEXT: li r4, 3 +; LE-NEXT: pld r3, valShort@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel2: +; LE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; LE-NEXT: sth r4, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalVarShort: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, valShort@got@pcrel(0), 1 ; BE-NEXT: li r4, 3 +; BE-NEXT: pld r3, valShort@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel2: +; BE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; BE-NEXT: sth r4, 0(r3) ; BE-NEXT: blr entry: @@ -105,16 +113,16 @@ ; LE-LABEL: ReadGlobalVarInt: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valInt@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel1: -; LE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; LE-NEXT: .Lpcrel3: +; LE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarInt: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valInt@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel1: -; BE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; BE-NEXT: .Lpcrel3: +; BE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; BE-NEXT: lwa r3, 0(r3) ; BE-NEXT: blr entry: @@ -125,15 +133,19 @@ define dso_local void @WriteGlobalVarInt() local_unnamed_addr { ; LE-LABEL: WriteGlobalVarInt: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, valInt@got@pcrel(0), 1 ; LE-NEXT: li r4, 33 +; LE-NEXT: pld r3, valInt@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel4: +; LE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; LE-NEXT: stw r4, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalVarInt: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, valInt@got@pcrel(0), 1 ; BE-NEXT: li r4, 33 +; BE-NEXT: pld r3, valInt@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel4: +; BE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; BE-NEXT: stw r4, 0(r3) ; BE-NEXT: blr entry: @@ -145,16 +157,16 @@ ; LE-LABEL: ReadGlobalVarUnsigned: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel2: -; LE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; LE-NEXT: .Lpcrel5: +; LE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarUnsigned: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel2: -; BE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; BE-NEXT: .Lpcrel5: +; BE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; BE-NEXT: lwa r3, 0(r3) ; BE-NEXT: blr entry: @@ -165,15 +177,19 @@ define dso_local void @WriteGlobalVarUnsigned() local_unnamed_addr { ; LE-LABEL: WriteGlobalVarUnsigned: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 ; LE-NEXT: li r4, 33 +; LE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel6: +; LE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; LE-NEXT: stw r4, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalVarUnsigned: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 ; BE-NEXT: li r4, 33 +; BE-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel6: +; BE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; BE-NEXT: stw r4, 0(r3) ; BE-NEXT: blr entry: @@ -185,16 +201,16 @@ ; LE-LABEL: ReadGlobalVarLong: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, valLong@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel3: -; LE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; LE-NEXT: .Lpcrel7: +; LE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; LE-NEXT: lwa r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalVarLong: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, valLong@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel3: -; BE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; BE-NEXT: .Lpcrel7: +; BE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; BE-NEXT: lwa r3, 4(r3) ; BE-NEXT: blr entry: @@ -206,15 +222,19 @@ define dso_local void @WriteGlobalVarLong() local_unnamed_addr { ; LE-LABEL: WriteGlobalVarLong: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, valLong@got@pcrel(0), 1 ; LE-NEXT: li r4, 3333 +; LE-NEXT: pld r3, valLong@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel8: +; LE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; LE-NEXT: std r4, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalVarLong: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, valLong@got@pcrel(0), 1 ; BE-NEXT: li r4, 3333 +; BE-NEXT: pld r3, valLong@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel8: +; BE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; BE-NEXT: std r4, 0(r3) ; BE-NEXT: blr entry: @@ -226,16 +246,16 @@ ; LE-LABEL: ReadGlobalPtr: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel4: -; LE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; LE-NEXT: .Lpcrel9: +; LE-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; LE-NEXT: ld r3, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalPtr: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel4: -; BE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; BE-NEXT: .Lpcrel9: +; BE-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; BE-NEXT: ld r3, 0(r3) ; BE-NEXT: blr entry: @@ -247,9 +267,9 @@ ; LE-LABEL: WriteGlobalPtr: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel5: +; LE-NEXT: .Lpcrel10: ; LE-NEXT: li r4, 3 -; LE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; LE-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) ; LE-NEXT: ld r3, 0(r3) ; LE-NEXT: stw r4, 0(r3) ; LE-NEXT: blr @@ -257,9 +277,9 @@ ; BE-LABEL: WriteGlobalPtr: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, ptr@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel5: +; BE-NEXT: .Lpcrel10: ; BE-NEXT: li r4, 3 -; BE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; BE-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) ; BE-NEXT: ld r3, 0(r3) ; BE-NEXT: stw r4, 0(r3) ; BE-NEXT: blr @@ -287,16 +307,16 @@ ; LE-LABEL: ReadGlobalArray: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, array@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel6: -; LE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; LE-NEXT: .Lpcrel11: +; LE-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) ; LE-NEXT: lwa r3, 12(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalArray: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, array@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel6: -; BE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; BE-NEXT: .Lpcrel11: +; BE-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) ; BE-NEXT: lwa r3, 12(r3) ; BE-NEXT: blr entry: @@ -307,15 +327,19 @@ define dso_local void @WriteGlobalArray() local_unnamed_addr { ; LE-LABEL: WriteGlobalArray: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, array@got@pcrel(0), 1 ; LE-NEXT: li r4, 5 +; LE-NEXT: pld r3, array@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel12: +; LE-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) ; LE-NEXT: stw r4, 12(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalArray: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, array@got@pcrel(0), 1 ; BE-NEXT: li r4, 5 +; BE-NEXT: pld r3, array@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel12: +; BE-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) ; BE-NEXT: stw r4, 12(r3) ; BE-NEXT: blr entry: @@ -327,16 +351,16 @@ ; LE-LABEL: ReadGlobalStruct: ; LE: # %bb.0: # %entry ; LE-NEXT: pld r3, structure@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel7: -; LE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; LE-NEXT: .Lpcrel13: +; LE-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) ; LE-NEXT: lwa r3, 4(r3) ; LE-NEXT: blr ; ; BE-LABEL: ReadGlobalStruct: ; BE: # %bb.0: # %entry ; BE-NEXT: pld r3, structure@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel7: -; BE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; BE-NEXT: .Lpcrel13: +; BE-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) ; BE-NEXT: lwa r3, 4(r3) ; BE-NEXT: blr entry: @@ -347,15 +371,19 @@ define dso_local void @WriteGlobalStruct() local_unnamed_addr { ; LE-LABEL: WriteGlobalStruct: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, structure@got@pcrel(0), 1 ; LE-NEXT: li r4, 3 +; LE-NEXT: pld r3, structure@got@pcrel(0), 1 +; LE-NEXT: .Lpcrel14: +; LE-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) ; LE-NEXT: stw r4, 4(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteGlobalStruct: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, structure@got@pcrel(0), 1 ; BE-NEXT: li r4, 3 +; BE-NEXT: pld r3, structure@got@pcrel(0), 1 +; BE-NEXT: .Lpcrel14: +; BE-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) ; BE-NEXT: stw r4, 4(r3) ; BE-NEXT: blr entry: @@ -368,8 +396,8 @@ ; LE: .localentry ReadFuncPtr, 1 ; LE-NEXT: # %bb.0: # %entry ; LE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 -; LE-NEXT: .Lpcrel8: -; LE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; LE-NEXT: .Lpcrel15: +; LE-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) ; LE-NEXT: ld r12, 0(r3) ; LE-NEXT: mtctr r12 ; LE-NEXT: bctr @@ -379,8 +407,8 @@ ; BE: .localentry ReadFuncPtr, 1 ; BE-NEXT: # %bb.0: # %entry ; BE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 -; BE-NEXT: .Lpcrel8: -; BE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; BE-NEXT: .Lpcrel15: +; BE-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) ; BE-NEXT: ld r12, 0(r3) ; BE-NEXT: mtctr r12 ; BE-NEXT: bctr @@ -391,18 +419,19 @@ ret void } +; FIXME - The linker opt relocation symbol is missing for pld - std pair. define dso_local void @WriteFuncPtr() local_unnamed_addr { ; LE-LABEL: WriteFuncPtr: ; LE: # %bb.0: # %entry -; LE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; LE-NEXT: pld r4, function@got@pcrel(0), 1 +; LE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; LE-NEXT: std r4, 0(r3) ; LE-NEXT: blr ; ; BE-LABEL: WriteFuncPtr: ; BE: # %bb.0: # %entry -; BE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; BE-NEXT: pld r4, function@got@pcrel(0), 1 +; BE-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; BE-NEXT: std r4, 0(r3) ; BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll @@ -39,12 +39,11 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 ; CHECK-NEXT: .Lpcrel: -; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 ; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) ; CHECK-NEXT: lbz r3, 0(r3) -; In this test the stb r3, 0(r4) cannot be optimized because it -; uses the register r3 and that register is defined by lbz r3, 0(r3) -; which is defined between the pld and the stb. +; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel0: +; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) ; CHECK-NEXT: stb r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -57,13 +56,12 @@ ; CHECK-LABEL: ReadWrite16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel0: -; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-NEXT: lhz r3, 0(r3) -; In this test the sth r3, 0(r4) cannot be optimized because it -; uses the register r3 and that register is defined by lhz r3, 0(r3) -; which is defined between the pld and the sth. +; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-NEXT: sth r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -76,10 +74,12 @@ ; CHECK-LABEL: ReadWrite32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel1: -; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-NEXT: stw r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -92,10 +92,12 @@ ; CHECK-LABEL: ReadWrite64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel2: -; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel6: +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -124,12 +126,14 @@ ; CHECK-LABEL: ReadWritef32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: .Lpcrel7: ; CHECK-NEXT: xxspltidp vs1, 1078103900 -; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; CHECK-NEXT: lfs f0, 0(r3) -; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 ; CHECK-NEXT: xsaddsp f0, f0, f1 +; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; CHECK-NEXT: stfs f0, 0(r3) ; CHECK-NEXT: blr entry: @@ -143,12 +147,14 @@ ; CHECK-LABEL: ReadWritef64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: .Lpcrel9: ; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 -; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; CHECK-NEXT: lfd f0, 0(r3) -; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 ; CHECK-NEXT: xsadddp f0, f0, f1 +; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel10: +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) ; CHECK-NEXT: stfd f0, 0(r3) ; CHECK-NEXT: blr entry: @@ -196,11 +202,13 @@ ; CHECK-LABEL: ReadWriteArray: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel5: -; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; CHECK-NEXT: .Lpcrel11: +; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) ; CHECK-NEXT: lwz r3, 28(r3) ; CHECK-NEXT: addi r3, r3, 42 +; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel12: +; CHECK-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) ; CHECK-NEXT: stw r3, 8(r4) ; CHECK-NEXT: blr entry: @@ -229,12 +237,12 @@ ; CHECK-LABEL: ReadWriteIntPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel6: +; CHECK-NEXT: .Lpcrel13: ; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel7: -; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; CHECK-NEXT: .Lpcrel14: +; CHECK-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) ; CHECK-NEXT: ld r3, 0(r3) -; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; CHECK-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) ; CHECK-NEXT: ld r4, 0(r4) ; CHECK-NEXT: lwz r5, 216(r3) ; CHECK-NEXT: lwz r3, 48(r3) @@ -258,10 +266,12 @@ ; CHECK-LABEL: ReadWriteFuncPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel8: -; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 -; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; CHECK-NEXT: .Lpcrel15: +; CHECK-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) ; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel16: +; CHECK-NEXT: .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: blr entry: @@ -273,8 +283,8 @@ define dso_local void @FuncPtrCopy() local_unnamed_addr #0 { ; CHECK-LABEL: FuncPtrCopy: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 ; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1 +; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 ; CHECK-NEXT: std r4, 0(r3) ; CHECK-NEXT: blr entry: @@ -289,8 +299,8 @@ ; CHECK: .localentry FuncPtrCall, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel9: -; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) +; CHECK-NEXT: .Lpcrel17: +; CHECK-NEXT: .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr @@ -305,8 +315,8 @@ ; CHECK-LABEL: ReadVecElement: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 -; CHECK-NEXT: .Lpcrel10: -; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) +; CHECK-NEXT: .Lpcrel18: +; CHECK-NEXT: .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) ; CHECK-NEXT: lwa r3, 4(r3) ; CHECK-NEXT: blr entry: @@ -392,4 +402,38 @@ ret i32* @input32 } +; Verify that mulli can still be scheduled in-between pld and stw. +define dso_local i32 @interleave(i32* %t) local_unnamed_addr #0 { +; CHECK-LABEL: interleave: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: li r5, 4 +; CHECK-NEXT: pld r4, input32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel19: +; CHECK-NEXT: mulli r3, r3, 999 +; CHECK-NEXT: .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8) +; CHECK-NEXT: stw r5, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* %t, align 4 + store i32 4, i32* @input32, align 4 + %mul = mul nsw i32 %0, 999 + ret i32 %mul +} + +define dso_local void @multi_uses() local_unnamed_addr #0 { +; CHECK-LABEL: multi_uses: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: li r4, 3 +; CHECK-NEXT: stw r4, 4(r3) +; CHECK-NEXT: li r4, 4 +; CHECK-NEXT: stw r4, 12(r3) +; CHECK-NEXT: blr +entry: + store i32 3, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 1), align 4 + store i32 4, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -16,13 +16,14 @@ @FuncLocal = common dso_local local_unnamed_addr global i32 (...)* null, align 8 ; No calls in this function but we assign the function pointers. +; FIXME - the pcrel opt relocation is missing for pld r3 - std r4 pair. define dso_local void @AssignFuncPtr() local_unnamed_addr { ; CHECK-LABEL: AssignFuncPtr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 ; CHECK-NEXT: pld r4, Function@got@pcrel(0), 1 -; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 ; CHECK-NEXT: pstd r4, FuncLocal@PCREL(0), 1 +; CHECK-NEXT: std r4, 0(r3) ; CHECK-NEXT: blr entry: store i32 (...)* @Function, i32 (...)** @Func, align 8