Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h @@ -48,6 +48,7 @@ private: void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); + MCSymbol *checkLinkerOpt(const MCInst &Inst, const MCSubtargetInfo &STI); }; MCELFStreamer *createPPCELFStreamer(MCContext &Context, Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -89,12 +89,18 @@ PPCMCCodeEmitter *Emitter = static_cast(getAssembler().getEmitterPtr()); + // Special handling for a linker optimization. + MCSymbol *LabelSym = checkLinkerOpt(Inst, STI); + // Special handling is only for prefixed instructions. if (!Emitter->isPrefixedInstruction(Inst)) { MCELFStreamer::emitInstruction(Inst, STI); return; } emitPrefixedInstruction(Inst, STI); + + if (LabelSym && Inst.getOpcode() == PPC::PLDpc) + emitLabel(LabelSym, Inst.getLoc()); } void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { @@ -195,6 +201,66 @@ return MCELFStreamer::emitRelocDirective(Offset, Name, Expr, Loc, STI); } +MCSymbol *PPCELFStreamer::checkLinkerOpt(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCSymbol *LabelSym = nullptr; + + // Need at least two operands. + if (Inst.getNumOperands() < 2) + return LabelSym; + + unsigned LastOp = Inst.getNumOperands() - 1; + // The last opernad needs to be an MCExpr and it needs to have a variant kind + // of VK_PPC_LINKER_OPT. If it does not satisfy these conditions it is not a + // linker opt instruction and we can ignore it and return null. + const MCOperand &Operand = Inst.getOperand(LastOp); + if (!Operand.isExpr()) + return LabelSym; + + // Check for the variant kind VK_PPC_LINKER_OPT in this expression. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast(Expr); + if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_LINKER_OPT) + return LabelSym; + + // Obtian the label symbol and if the instruction is PLDpc return immediately. + // The PLDpc is the first of the two instructions that are linked together by + // this linker opt and we must wait for the second instruction before the + // fixup can be added. + LabelSym = getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); + if (Inst.getOpcode() == PPC::PLDpc) + return LabelSym; + + // For all other instructions we can create the fixup. + // This fixup will look like this: + // pld symbol@got@pcrel + // : + // .reloc pcrel###-8,R_PPC64_PCREL_OPT,.-(pcrel###-8) + // load , 0() + // The reason we use put the label after the pld instruction is because it is + // possible to have a nop inserted between the label and the prefixed pld. + // This nop is possible for any prefixed instruction because such instructions + // cannot cross 64 byte boundaries. Therefore we add the label after the pld + // and use label-8 to get the addrss of the pld instruction. + // The 8 is just the size of the pld. + const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext()); + const MCExpr *Eight = MCConstantExpr::create(8, getContext()); + const MCExpr *SubExpr = + MCBinaryExpr::createSub(LabelExpr, Eight, getContext()); + MCSymbol *TmpLabel = getContext().createTempSymbol(); + const MCExpr *TmpExpr = MCSymbolRefExpr::create(TmpLabel, getContext()); + const MCExpr *SubExpr2 = + MCBinaryExpr::createSub(TmpExpr, SubExpr, getContext()); + + MCDataFragment *DF = static_cast(LabelSym->getFragment()); + assert(DF && "Expecting a valid data fragment."); + DF->getFixups().push_back( + MCFixup::create(LabelSym->getOffset() - 8, SubExpr2, + (MCFixupKind)PPC::fixup_ppc_linker_opt, Inst.getLoc())); + emitLabel(TmpLabel, Inst.getLoc()); + return LabelSym; +} + MCELFStreamer *llvm::createPPCELFStreamer( MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -92,6 +92,36 @@ return; } + // Check if the last operand is an expression with the variant kind + // VK_PPC_LINKER_OPT. If this is the case then this is a linker optimization + // relocation and the .reloc directive needs to be added. + unsigned LastOp = MI->getNumOperands() - 1; + if (MI->getNumOperands() > 1) { + const MCOperand &Operand = MI->getOperand(LastOp); + if (Operand.isExpr()) { + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = + static_cast(Expr); + + if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_LINKER_OPT) { + StringRef SymbolName = SymExpr->getSymbol().getName(); + if (MI->getOpcode() == PPC::PLDpc) { + printInstruction(MI, Address, O); + O << "\n.L"; + O << SymbolName; + O << ":"; + return; + } else { + O << "\t.reloc .L"; + O << SymbolName; + O << "-8,R_PPC64_PCREL_OPT,.-(.L"; + O << SymbolName; + O << "-8)\n"; + } + } + } + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); Index: llvm/lib/Target/PowerPC/PPC.h =================================================================== --- llvm/lib/Target/PowerPC/PPC.h +++ llvm/lib/Target/PowerPC/PPC.h @@ -107,6 +107,10 @@ /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL. MO_GOT_FLAG = 8, + // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a + // PC Relative linker optimization. + MO_PCREL_OPT_FLAG = 16, + /// The next are not flags but distinct values. MO_ACCESS_MASK = 0xf00, Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1943,7 +1943,8 @@ {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, {MO_PCREL_FLAG, "ppc-pcrel"}, - {MO_GOT_FLAG, "ppc-got"}}; + {MO_GOT_FLAG, "ppc-got"}, + {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}}; return makeArrayRef(TargetFlags); } Index: llvm/lib/Target/PowerPC/PPCMCInstLower.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -100,6 +100,8 @@ MIOpcode == PPC::BL8_NOTOC) { RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; } + if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_LINKER_OPT; } const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); Index: llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -43,6 +43,46 @@ cl::desc("Run pre-emit peephole optimizations.")); namespace { + +static bool hasPCRelativeForm(MachineInstr &Use) { + switch (Use.getOpcode()) { + default: + return false; + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + case PPC::STB: + case PPC::STB8: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + case PPC::LD: + case PPC::STD: + case PPC::LWA: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::LXV: + case PPC::STXSD: + case PPC::STXSSP: + case PPC::STXV: + case PPC::LFD: + case PPC::LFS: + case PPC::STFD: + case PPC::STFS: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + return true; + } +} + class PPCPreEmitPeephole : public MachineFunctionPass { public: static char ID; @@ -172,6 +212,77 @@ return !InstrsToErase.empty(); } + bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Look for the initial GOT indirect load. + if (BBI->getOpcode() != PPC::PLDpc) + continue; + + MachineInstr &DefMI = *BBI; + + const MachineOperand &LoadedAddressReg = BBI->getOperand(0); + // Needs to be a register. + if (!LoadedAddressReg.isReg()) + continue; + + const MachineOperand &SymbolOp = BBI->getOperand(1); + + // Make sure that this is a GOT indirect symbol. + if (!SymbolOp.isGlobal() || + !(SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG)) + continue; + + Register Reg = LoadedAddressReg.getReg(); + MachineInstrBundleIterator UseFound; + for (auto SearchBBI = ++BBI; SearchBBI != MBB.instr_end(); + ++SearchBBI) { + if (SearchBBI->readsRegister(Reg, TRI) && + SearchBBI->killsRegister(Reg, TRI) && + hasPCRelativeForm(*SearchBBI)) { + UseFound = SearchBBI; + break; + } + + // If we read/write the register and it is not the kind of use we are + // looking for then just fail and leave the search loop. + if (SearchBBI->readsRegister(Reg, TRI) || + SearchBBI->modifiesRegister(Reg, TRI)) + break; + } + if (!UseFound.isValid()) + continue; + + // The first operand of the use is going to be either the element we + // load in a load or the element we store in a store. + const MachineOperand FirstOp = UseFound->getOperand(0); + // We have found a potential use. Search through the instructions + // between the def and the use to see if it is valid to mark this as a + // linker opt. + bool IsStillValid = true; + for (auto SearchBBI = BBI; SearchBBI != UseFound; ++SearchBBI) { + if (SearchBBI->readsRegister(FirstOp.getReg(), TRI) || + SearchBBI->modifiesRegister(FirstOp.getReg(), TRI)) { + IsStillValid = false; + break; + } + } + + if (IsStillValid) { + static uint64_t UseCounter = 0; + UseCounter++; + MachineFunction *MF = MBB.getParent(); + MCContext &Context = MF->getContext(); + MCSymbol *Symbol = + Context.getOrCreateSymbol(Twine("pcrel") + Twine(UseCounter)); + MachineOperand LabelNum = MachineOperand::CreateMCSymbol( + Symbol, PPCII::MO_PCREL_OPT_FLAG); + DefMI.addOperand(*MF, LabelNum); + UseFound->addOperand(*MF, LabelNum); + } + } + return false; + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. @@ -192,6 +303,7 @@ SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) { Changed |= removeRedundantLIs(MBB, TRI); + Changed |= addLinkerOpt(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc == PPC::UNENCODED_NOP) { Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -64,6 +64,8 @@ ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl localCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -147,6 +149,8 @@ ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl externCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel2: +; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -209,6 +213,8 @@ ; CHECK-S: .localentry TailCallLocal2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel3: +; CHECK-S-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -251,6 +257,8 @@ ; CHECK-S: .localentry TailCallExtern2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel4: +; CHECK-S-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -316,6 +324,8 @@ ; CHECK-S-NEXT: mtctr r12 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel5: +; CHECK-S-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 Index: llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -19,6 +19,8 @@ ; CHECK-LABEL: ReadGlobalVarChar: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valChar@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-NEXT: lbz r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -43,6 +45,8 @@ ; CHECK-LABEL: ReadGlobalVarShort: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valShort@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-NEXT: lha r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -67,6 +71,8 @@ ; CHECK-LABEL: ReadGlobalVarInt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valInt@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-NEXT: lwa r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -90,6 +96,8 @@ ; CHECK-LABEL: ReadGlobalVarUnsigned: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-NEXT: lwa r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -113,6 +121,8 @@ ; CHECK-LABEL: ReadGlobalVarLong: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valLong@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-NEXT: lwa r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -137,6 +147,8 @@ ; CHECK-LABEL: ReadGlobalPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptr@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel6: +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -148,7 +160,9 @@ ; CHECK-LABEL: WriteGlobalPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptr@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel7: ; CHECK-NEXT: li r4, 3 +; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: stw r4, 0(r3) ; CHECK-NEXT: blr @@ -171,6 +185,8 @@ ; CHECK-LABEL: ReadGlobalArray: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, array@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; CHECK-NEXT: lwa r3, 12(r3) ; CHECK-NEXT: blr entry: @@ -194,6 +210,8 @@ ; CHECK-LABEL: ReadGlobalStruct: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, structure@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel9: +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; CHECK-NEXT: lwa r3, 4(r3) ; CHECK-NEXT: blr entry: @@ -218,6 +236,8 @@ ; CHECK: .localentry ReadFuncPtr, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel10: +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll @@ -0,0 +1,390 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s + +; On future CPU with PC Relative addressing enabled, it is possible for the +; linker to optimize GOT indirect accesses. In order for the linker to do this +; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation. +; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation +; correctly. + +@input8 = external local_unnamed_addr global i8, align 1 +@output8 = external local_unnamed_addr global i8, align 1 +@input16 = external local_unnamed_addr global i16, align 2 +@output16 = external local_unnamed_addr global i16, align 2 +@input32 = external global i32, align 4 +@output32 = external local_unnamed_addr global i32, align 4 +@input64 = external local_unnamed_addr global i64, align 8 +@output64 = external local_unnamed_addr global i64, align 8 +@input128 = external local_unnamed_addr global i128, align 16 +@output128 = external local_unnamed_addr global i128, align 16 +@inputf32 = external local_unnamed_addr global float, align 4 +@outputf32 = external local_unnamed_addr global float, align 4 +@inputf64 = external local_unnamed_addr global double, align 8 +@outputf64 = external local_unnamed_addr global double, align 8 +@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16 +@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16 +@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16 +@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16 +@ArrayIn = external global [10 x i32], align 4 +@ArrayOut = external local_unnamed_addr global [10 x i32], align 4 +@IntPtrIn = external local_unnamed_addr global i32*, align 8 +@IntPtrOut = external local_unnamed_addr global i32*, align 8 +@FuncPtrIn = external local_unnamed_addr global void (...)*, align 8 +@FuncPtrOut = external local_unnamed_addr global void (...)*, align 8 + +define dso_local void @ReadWrite8() local_unnamed_addr { +; CHECK-LABEL: ReadWrite8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; CHECK-NEXT: lbz r3, 0(r3) +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* @input8, align 1 + store i8 %0, i8* @output8, align 1 + ret void +} + +define dso_local void @ReadWrite16() local_unnamed_addr { +; CHECK-LABEL: ReadWrite16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* @input16, align 2 + store i16 %0, i16* @output16, align 2 + ret void +} + +define dso_local void @ReadWrite32() local_unnamed_addr { +; CHECK-LABEL: ReadWrite32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @input32, align 4 + store i32 %0, i32* @output32, align 4 + ret void +} + +define dso_local void @ReadWrite64() local_unnamed_addr { +; CHECK-LABEL: ReadWrite64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* @input64, align 8 + store i64 %0, i64* @output64, align 8 + ret void +} + +define dso_local void @ReadWrite128() local_unnamed_addr { +; CHECK-LABEL: ReadWrite128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load i128, i128* @input128, align 16 + store i128 %0, i128* @output128, align 16 + ret void +} + +define dso_local void @ReadWritef32() local_unnamed_addr { +; CHECK-LABEL: ReadWritef32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; CHECK-NEXT: lfs f0, 0(r3) +; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 +; CHECK-NEXT: xsaddsp f0, f0, f1 +; CHECK-NEXT: stfs f0, 0(r3) +; CHECK-NEXT: blr +entry: + %0 = load float, float* @inputf32, align 4 + %add = fadd float %0, 0x400851EB80000000 + store float %add, float* @outputf32, align 4 + ret void +} + +define dso_local void @ReadWritef64() local_unnamed_addr { +; CHECK-LABEL: ReadWritef64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel6: +; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 +; CHECK-NEXT: xsadddp f0, f0, f1 +; CHECK-NEXT: stfd f0, 0(r3) +; CHECK-NEXT: blr +entry: + %0 = load double, double* @inputf64, align 8 + %add = fadd double %0, 6.800000e+00 + store double %add, double* @outputf64, align 8 + ret void +} + +define dso_local void @ReadWriteVi32() local_unnamed_addr { +; CHECK-LABEL: ReadWriteVi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: li r4, 45 +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1 +; CHECK-NEXT: xxinsertw vs0, vs1, 8 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecins = insertelement <4 x i32> %0, i32 45, i32 1 + store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16 + ret void +} + +define dso_local void @ReadWriteVi64() local_unnamed_addr { +; CHECK-LABEL: ReadWriteVi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16 + store <2 x i64> %0, <2 x i64>* @outputVi64, align 16 + ret void +} + +define dso_local void @ReadWriteArray() local_unnamed_addr { +; CHECK-LABEL: ReadWriteArray: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel7: +; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; CHECK-NEXT: lwz r3, 28(r3) +; CHECK-NEXT: addi r3, r3, 42 +; CHECK-NEXT: stw r3, 8(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4 + %add = add nsw i32 %0, 42 + store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4 + ret void +} + +define dso_local void @ReadWriteSameArray() local_unnamed_addr { +; CHECK-LABEL: ReadWriteSameArray: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: lwz r4, 12(r3) +; CHECK-NEXT: addi r4, r4, 8 +; CHECK-NEXT: stw r4, 24(r3) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4 + %add = add nsw i32 %0, 8 + store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4 + ret void +} + +define dso_local void @ReadWriteIntPtr() local_unnamed_addr { +; CHECK-LABEL: ReadWriteIntPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: +; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: ld r4, 0(r4) +; CHECK-NEXT: lwz r5, 216(r3) +; CHECK-NEXT: lwz r3, 48(r3) +; CHECK-NEXT: add r3, r3, r5 +; CHECK-NEXT: stw r3, 136(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32*, i32** @IntPtrIn, align 8 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 54 + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add nsw i32 %2, %1 + %3 = load i32*, i32** @IntPtrOut, align 8 + %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34 + store i32 %add, i32* %arrayidx2, align 4 + ret void +} + +define dso_local void @ReadWriteFuncPtr() local_unnamed_addr { +; CHECK-LABEL: ReadWriteFuncPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel9: +; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8 + store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8 + ret void +} + +define dso_local void @FuncPtrCopy() local_unnamed_addr { +; CHECK-LABEL: FuncPtrCopy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + store void (...)* @Callee, void (...)** @FuncPtrOut, align 8 + ret void +} + +declare void @Callee(...) + +define dso_local void @FuncPtrCall() local_unnamed_addr { +; CHECK-LABEL: FuncPtrCall: +; CHECK: .localentry FuncPtrCall, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel10: +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8 + tail call void %0() + ret void +} + +define dso_local signext i32 @ReadVecElement() local_unnamed_addr { +; CHECK-LABEL: ReadVecElement: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel11: +; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) +; CHECK-NEXT: lwa r3, 4(r3) +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecext = extractelement <4 x i32> %0, i32 1 + ret i32 %vecext +} + +define dso_local signext i32 @VecMultiUse() local_unnamed_addr { +; CHECK-LABEL: VecMultiUse: +; CHECK: .localentry VecMultiUse, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: lwz r29, 4(r30) +; CHECK-NEXT: bl Callee@notoc +; CHECK-NEXT: lwz r3, 8(r30) +; CHECK-NEXT: add r29, r3, r29 +; CHECK-NEXT: bl Callee@notoc +; CHECK-NEXT: lwz r3, 0(r30) +; CHECK-NEXT: add r3, r29, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + tail call void bitcast (void (...)* @Callee to void ()*)() + %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %2 = extractelement <4 x i32> %1, i32 2 + %3 = extractelement <4 x i32> %0, i64 1 + %4 = add nsw i32 %2, %3 + tail call void bitcast (void (...)* @Callee to void ()*)() + %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecext2 = extractelement <4 x i32> %5, i32 0 + %add3 = add nsw i32 %4, %vecext2 + ret i32 %add3 +} + +define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: UseAddr: +; CHECK: .localentry UseAddr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: lwz r5, 16(r4) +; CHECK-NEXT: add r30, r5, r3 +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: bl getAddr@notoc +; CHECK-NEXT: add r3, r30, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0)) + %add1 = add nsw i32 %add, %call + ret i32 %add1 +} + +declare signext i32 @getAddr(i32*) local_unnamed_addr + +define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr { +; CHECK-LABEL: AddrTaken32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 +; CHECK-NEXT: blr +entry: + ret i32* @input32 +} + + Index: llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll +++ llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll @@ -42,12 +42,15 @@ ; CHECK-S-LABEL: getElementExtern4: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwa r3, 16(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : ; CHECK-O: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 ; CHECK-O-NEXT: 0000000000000040: R_PPC64_GOT_PCREL34 array1 -; CHECK-O-NEXT: 12 00 63 e8 lwa 3, 16(3) +; CHECK-O-NEXT: 0000000000000040: R_PPC64_PCREL_OPT *ABS*+0x8 +; CHECK-O: 12 00 63 e8 lwa 3, 16(3) ; CHECK-O-NEXT: 20 00 80 4e blr entry: %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 4), align 4 @@ -58,12 +61,15 @@ ; CHECK-S-LABEL: getElementExternNegative: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel2: +; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-S-NEXT: lwa r3, -4(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : ; CHECK-O: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 ; CHECK-O-NEXT: 0000000000000060: R_PPC64_GOT_PCREL34 array1 -; CHECK-O-NEXT: fe ff 63 e8 lwa 3, -4(3) +; CHECK-O-NEXT: 0000000000000060: R_PPC64_PCREL_OPT *ABS*+0x8 +; CHECK-O: fe ff 63 e8 lwa 3, -4(3) ; CHECK-O-NEXT: 20 00 80 4e blr entry: %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 -1), align 4 Index: llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -47,6 +47,8 @@ ; CHECK: .localentry TailCallExtrnFuncPtr, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr Index: llvm/test/CodeGen/PowerPC/pcrel.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel.ll +++ llvm/test/CodeGen/PowerPC/pcrel.ll @@ -41,12 +41,16 @@ ; CHECK-S-LABEL: ReadGlobalVarInt ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, valIntGlob@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwa r3, 0(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: ReadGlobalVarInt ; CHECK-O: pld 3, 0(0), 1 ; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 valIntGlob +; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8 +; CHECK-O: : ; CHECK-O-NEXT: lwa 3, 0(3) ; CHECK-O-NEXT: blr entry: