Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -92,6 +92,32 @@ return; } + unsigned LastOp = MI->getNumOperands()-1; + if (MI->getNumOperands() > 1) { + const MCOperand &Operand = MI->getOperand(LastOp); + if (Operand.isExpr()) { + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast(Expr); + + if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_LINKER_OPT) { + StringRef SymbolName = SymExpr->getSymbol().getName(); + if (MI->getOpcode() == PPC::PLDpc) { + printInstruction(MI, Address, O); + O << "\n.L"; + O << SymbolName; + O << ":"; + return; + } else { + O << "\t.reloc .L"; + O << SymbolName; + O << "-8,R_PPC64_PCREL_OPT,.-(.L"; + O << SymbolName; + O << "-8)\n"; + } + } + } + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); Index: llvm/lib/Target/PowerPC/PPC.h =================================================================== --- llvm/lib/Target/PowerPC/PPC.h +++ llvm/lib/Target/PowerPC/PPC.h @@ -107,6 +107,10 @@ /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL. MO_GOT_FLAG = 8, + // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a + // PC Relative linker optimization. + MO_PCREL_OPT_FLAG = 16, + /// The next are not flags but distinct values. MO_ACCESS_MASK = 0xf00, Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2055,7 +2055,8 @@ {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, {MO_PCREL_FLAG, "ppc-pcrel"}, - {MO_GOT_FLAG, "ppc-got"}}; + {MO_GOT_FLAG, "ppc-got"}, + {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}}; return makeArrayRef(TargetFlags); } Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -498,5 +498,12 @@ // If the PPCmatpcreladdr node is not caught by any other pattern it should be // caught here and turned into a paddi instruction to materialize the address. def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + + // Pseudos + let isNotDuplicable = 1,isCodeGenOnly = 1, mayLoad = 1, mayStore = 0 in { + def PLDpcnocopy :PPCEmitTimePseudo<(outs g8rc:$RT), + (ins memri34_pcrel:$D_RA), + "#PLDpcnocopy $RT $D_RA", []>; + } } Index: llvm/lib/Target/PowerPC/PPCMCInstLower.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -100,6 +100,8 @@ MIOpcode == PPC::BL8_NOTOC) { RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; } + if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_LINKER_OPT; } const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); @@ -138,7 +140,14 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP) { - OutMI.setOpcode(MI->getOpcode()); + // A PLDpcnocopy is the same as a PLDpc except that it has a flag to + // indicate that it cannot be copied. + // Since we are lowering the instruction we no longer need the flag to + // indicate that this instruction cannot be copied so change it to a PLDpc. + if (MI->getOpcode() == PPC::PLDpcnocopy) + OutMI.setOpcode(PPC::PLDpc); + else + OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MCOperand MCOp; Index: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -265,6 +266,45 @@ TOCSaves[MI] = Keep; } +static bool hasPCRelativeForm(MachineInstr &Use) { + switch(Use.getOpcode()) { + default: + return false; + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + case PPC::STB: + case PPC::STB8: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + case PPC::LD: + case PPC::STD: + case PPC::LWA: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::LXV: + case PPC::STXSD: + case PPC::STXSSP: + case PPC::STXV: + case PPC::LFD: + case PPC::LFS: + case PPC::STFD: + case PPC::STFS: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + return true; + } +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -320,6 +360,66 @@ default: break; + case PPC::PLDpc: { + // Only valid with PC Relative + if(!MF->getSubtarget().isUsingPCRelativeCalls()) + break; + + Register ResultOp = MI.getOperand(0).getReg(); + // Limit this to the case where there is exactly one use. + if (!MRI->hasOneUse(ResultOp)) + break; + + // Make sure that this is a GOT indirect symbol. + const MachineOperand& SymbolOp = MI.getOperand(1); + if(!SymbolOp.isGlobal() || + !(SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG)) + break; + + MachineInstr &Use = *(MRI->use_instr_begin(ResultOp)); + + // If the only use of the PLDpc is an inline asm block stop here. + // It is not safe to do anything with inline asm blocks. + if (Use.isInlineAsm()) + break; + + // We intend to have the linker replace this use with a nop. If it has + // side effects we cannot safely do the replacement. + if (Use.hasUnmodeledSideEffects()) + break; + + // It is not safe to replace calls with nop. + if (Use.isCall()) + break; + + // Only looking for loads and stores. + if (!Use.mayLoadOrStore()) + break; + + // Not all instructions that may load or store are valid for this linker + // optimization. + if (!hasPCRelativeForm(Use)) + break; + + // The use of this register must be as part of the address and not + // part of operand zero of a store. + if (ResultOp == Use.getOperand(0).getReg()) + break; + + // A PLDpc is the same as a PLDpcnocopy except that the PLDpcnocopy + // instruction is not allowed to be coped by other passes that come + // later. This is important here because we want the symbol we create + // "pcrel" to be unique and copying the instruction will + // make duplicates of it. + MI.setDesc(TII->get(PPC::PLDpcnocopy)); + static uint64_t UseCounter = 0; + UseCounter++; + MCSymbol *Symbol = MF->getContext().getOrCreateSymbol(Twine("pcrel") + Twine(UseCounter)); + MachineOperand LabelNum = MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); + MI.addOperand(*MF, LabelNum); + Use.addOperand(*MF, LabelNum); + break; + } case PPC::STD: { MachineFrameInfo &MFI = MF->getFrameInfo(); if (MFI.hasVarSizedObjects() || Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -64,6 +64,8 @@ ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl localCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -147,6 +149,8 @@ ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: bl externCall@notoc ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel2: +; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -209,6 +213,8 @@ ; CHECK-S: .localentry TailCallLocal2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel3: +; CHECK-S-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -251,6 +257,8 @@ ; CHECK-S: .localentry TailCallExtern2 ; CHECK-S: # %bb.0: # %entry ; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel4: +; CHECK-S-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 @@ -316,6 +324,8 @@ ; CHECK-S-NEXT: mtctr r12 ; CHECK-S-NEXT: bctrl ; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel5: +; CHECK-S-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: mullw r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 Index: llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -19,6 +19,8 @@ ; CHECK-LABEL: ReadGlobalVarChar: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valChar@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-NEXT: lbz r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -31,7 +33,9 @@ ; CHECK-LABEL: WriteGlobalVarChar: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valChar@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: ; CHECK-NEXT: li r4, 3 +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-NEXT: stb r4, 0(r3) ; CHECK-NEXT: blr entry: @@ -43,6 +47,8 @@ ; CHECK-LABEL: ReadGlobalVarShort: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valShort@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) ; CHECK-NEXT: lha r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -55,7 +61,9 @@ ; CHECK-LABEL: WriteGlobalVarShort: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valShort@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel4: ; CHECK-NEXT: li r4, 3 +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) ; CHECK-NEXT: sth r4, 0(r3) ; CHECK-NEXT: blr entry: @@ -67,6 +75,8 @@ ; CHECK-LABEL: ReadGlobalVarInt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valInt@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-NEXT: lwa r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -78,7 +88,9 @@ ; CHECK-LABEL: WriteGlobalVarInt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valInt@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel6: ; CHECK-NEXT: li r4, 33 +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) ; CHECK-NEXT: stw r4, 0(r3) ; CHECK-NEXT: blr entry: @@ -90,6 +102,8 @@ ; CHECK-LABEL: ReadGlobalVarUnsigned: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel7: +; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) ; CHECK-NEXT: lwa r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -101,7 +115,9 @@ ; CHECK-LABEL: WriteGlobalVarUnsigned: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valUnsigned@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: ; CHECK-NEXT: li r4, 33 +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) ; CHECK-NEXT: stw r4, 0(r3) ; CHECK-NEXT: blr entry: @@ -113,6 +129,8 @@ ; CHECK-LABEL: ReadGlobalVarLong: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valLong@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel9: +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) ; CHECK-NEXT: lwa r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -125,7 +143,9 @@ ; CHECK-LABEL: WriteGlobalVarLong: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, valLong@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel10: ; CHECK-NEXT: li r4, 3333 +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) ; CHECK-NEXT: std r4, 0(r3) ; CHECK-NEXT: blr entry: @@ -137,6 +157,8 @@ ; CHECK-LABEL: ReadGlobalPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptr@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel11: +; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: blr entry: @@ -148,7 +170,9 @@ ; CHECK-LABEL: WriteGlobalPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptr@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel12: ; CHECK-NEXT: li r4, 3 +; CHECK-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: stw r4, 0(r3) ; CHECK-NEXT: blr @@ -171,6 +195,8 @@ ; CHECK-LABEL: ReadGlobalArray: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, array@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel13: +; CHECK-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) ; CHECK-NEXT: lwa r3, 12(r3) ; CHECK-NEXT: blr entry: @@ -182,7 +208,9 @@ ; CHECK-LABEL: WriteGlobalArray: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, array@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel14: ; CHECK-NEXT: li r4, 5 +; CHECK-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) ; CHECK-NEXT: stw r4, 12(r3) ; CHECK-NEXT: blr entry: @@ -194,6 +222,8 @@ ; CHECK-LABEL: ReadGlobalStruct: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, structure@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel15: +; CHECK-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) ; CHECK-NEXT: lwa r3, 4(r3) ; CHECK-NEXT: blr entry: @@ -205,7 +235,9 @@ ; CHECK-LABEL: WriteGlobalStruct: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, structure@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel16: ; CHECK-NEXT: li r4, 3 +; CHECK-NEXT: .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) ; CHECK-NEXT: stw r4, 4(r3) ; CHECK-NEXT: blr entry: @@ -218,6 +250,8 @@ ; CHECK: .localentry ReadFuncPtr, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel17: +; CHECK-NEXT: .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr @@ -232,7 +266,9 @@ ; CHECK-LABEL: WriteFuncPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel18: ; CHECK-NEXT: pld r4, function@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) ; CHECK-NEXT: std r4, 0(r3) ; CHECK-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll @@ -0,0 +1,410 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s + +; On future CPU with PC Relative addressing enabled it is possible for the +; linker to optimize GOT indirect accesses. In order for the linker to do this +; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation. +; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation +; correctly. + +@input8 = external local_unnamed_addr global i8, align 1 +@output8 = external local_unnamed_addr global i8, align 1 +@input16 = external local_unnamed_addr global i16, align 2 +@output16 = external local_unnamed_addr global i16, align 2 +@input32 = external global i32, align 4 +@output32 = external local_unnamed_addr global i32, align 4 +@input64 = external local_unnamed_addr global i64, align 8 +@output64 = external local_unnamed_addr global i64, align 8 +@input128 = external local_unnamed_addr global i128, align 16 +@output128 = external local_unnamed_addr global i128, align 16 +@inputf32 = external local_unnamed_addr global float, align 4 +@outputf32 = external local_unnamed_addr global float, align 4 +@inputf64 = external local_unnamed_addr global double, align 8 +@outputf64 = external local_unnamed_addr global double, align 8 +@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16 +@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16 +@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16 +@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16 +@ArrayIn = external global [10 x i32], align 4 +@ArrayOut = external local_unnamed_addr global [10 x i32], align 4 +@IntPtrIn = external local_unnamed_addr global i32*, align 8 +@IntPtrOut = external local_unnamed_addr global i32*, align 8 +@FuncPtrIn = external local_unnamed_addr global void (...)*, align 8 +@FuncPtrOut = external local_unnamed_addr global void (...)*, align 8 + +define dso_local void @ReadWrite8() local_unnamed_addr { +; CHECK-LABEL: ReadWrite8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +; CHECK-NEXT: lbz r3, 0(r3) +; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* @input8, align 1 + store i8 %0, i8* @output8, align 1 + ret void +} + +define dso_local void @ReadWrite16() local_unnamed_addr { +; CHECK-LABEL: ReadWrite16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel3: +; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel4: +; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* @input16, align 2 + store i16 %0, i16* @output16, align 2 + ret void +} + +define dso_local void @ReadWrite32() local_unnamed_addr { +; CHECK-LABEL: ReadWrite32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel5: +; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel6: +; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) +; CHECK-NEXT: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @input32, align 4 + store i32 %0, i32* @output32, align 4 + ret void +} + +define dso_local void @ReadWrite64() local_unnamed_addr { +; CHECK-LABEL: ReadWrite64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel7: +; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel8: +; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* @input64, align 8 + store i64 %0, i64* @output64, align 8 + ret void +} + +define dso_local void @ReadWrite128() local_unnamed_addr { +; CHECK-LABEL: ReadWrite128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load i128, i128* @input128, align 16 + store i128 %0, i128* @output128, align 16 + ret void +} + +define dso_local void @ReadWritef32() local_unnamed_addr { +; CHECK-LABEL: ReadWritef32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel9: +; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) +; CHECK-NEXT: lfs f0, 0(r3) +; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel10: +; CHECK-NEXT: xsaddsp f0, f0, f1 +; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) +; CHECK-NEXT: stfs f0, 0(r3) +; CHECK-NEXT: blr +entry: + %0 = load float, float* @inputf32, align 4 + %add = fadd float %0, 0x400851EB80000000 + store float %add, float* @outputf32, align 4 + ret void +} + +define dso_local void @ReadWritef64() local_unnamed_addr { +; CHECK-LABEL: ReadWritef64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel11: +; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 +; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel12: +; CHECK-NEXT: xsadddp f0, f0, f1 +; CHECK-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) +; CHECK-NEXT: stfd f0, 0(r3) +; CHECK-NEXT: blr +entry: + %0 = load double, double* @inputf64, align 8 + %add = fadd double %0, 6.800000e+00 + store double %add, double* @outputf64, align 8 + ret void +} + +define dso_local void @ReadWriteVi32() local_unnamed_addr { +; CHECK-LABEL: ReadWriteVi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: li r4, 45 +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1 +; CHECK-NEXT: xxinsertw vs0, vs1, 8 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecins = insertelement <4 x i32> %0, i32 45, i32 1 + store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16 + ret void +} + +define dso_local void @ReadWriteVi64() local_unnamed_addr { +; CHECK-LABEL: ReadWriteVi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1 +; CHECK-NEXT: lxvx vs0, 0, r3 +; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1 +; CHECK-NEXT: stxvx vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16 + store <2 x i64> %0, <2 x i64>* @outputVi64, align 16 + ret void +} + +define dso_local void @ReadWriteArray() local_unnamed_addr { +; CHECK-LABEL: ReadWriteArray: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel13: +; CHECK-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) +; CHECK-NEXT: lwz r3, 28(r3) +; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel14: +; CHECK-NEXT: addi r3, r3, 42 +; CHECK-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) +; CHECK-NEXT: stw r3, 8(r4) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4 + %add = add nsw i32 %0, 42 + store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4 + ret void +} + +define dso_local void @ReadWriteSameArray() local_unnamed_addr { +; CHECK-LABEL: ReadWriteSameArray: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: lwz r4, 12(r3) +; CHECK-NEXT: addi r4, r4, 8 +; CHECK-NEXT: stw r4, 24(r3) +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4 + %add = add nsw i32 %0, 8 + store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4 + ret void +} + +define dso_local void @ReadWriteIntPtr() local_unnamed_addr { +; CHECK-LABEL: ReadWriteIntPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel15: +; CHECK-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: lwz r4, 216(r3) +; CHECK-NEXT: lwz r3, 48(r3) +; CHECK-NEXT: pld r5, IntPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel16: +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) +; CHECK-NEXT: ld r5, 0(r5) +; CHECK-NEXT: stw r3, 136(r5) +; CHECK-NEXT: blr +entry: + %0 = load i32*, i32** @IntPtrIn, align 8 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 54 + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add nsw i32 %2, %1 + %3 = load i32*, i32** @IntPtrOut, align 8 + %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34 + store i32 %add, i32* %arrayidx2, align 4 + ret void +} + +define dso_local void @ReadWriteFuncPtr() local_unnamed_addr { +; CHECK-LABEL: ReadWriteFuncPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel17: +; CHECK-NEXT: .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel18: +; CHECK-NEXT: .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8 + store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8 + ret void +} + +define dso_local void @FuncPtrCopy() local_unnamed_addr { +; CHECK-LABEL: FuncPtrCopy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel19: +; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8) +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + store void (...)* @Callee, void (...)** @FuncPtrOut, align 8 + ret void +} + +declare void @Callee(...) + +define dso_local void @FuncPtrCall() local_unnamed_addr { +; CHECK-LABEL: FuncPtrCall: +; CHECK: .localentry FuncPtrCall, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel20: +; CHECK-NEXT: .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8) +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8 + tail call void %0() + ret void +} + +define dso_local signext i32 @ReadVecElement() local_unnamed_addr { +; CHECK-LABEL: ReadVecElement: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel21: +; CHECK-NEXT: .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8) +; CHECK-NEXT: lwa r3, 4(r3) +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecext = extractelement <4 x i32> %0, i32 1 + ret i32 %vecext +} + +define dso_local signext i32 @VecMultiUse() local_unnamed_addr { +; CHECK-LABEL: VecMultiUse: +; CHECK: .localentry VecMultiUse, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1 +; CHECK-NEXT: lwz r29, 4(r30) +; CHECK-NEXT: bl Callee@notoc +; CHECK-NEXT: lwz r3, 8(r30) +; CHECK-NEXT: add r29, r3, r29 +; CHECK-NEXT: bl Callee@notoc +; CHECK-NEXT: lwz r3, 0(r30) +; CHECK-NEXT: add r3, r29, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + tail call void bitcast (void (...)* @Callee to void ()*)() + %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %2 = extractelement <4 x i32> %1, i32 2 + %3 = extractelement <4 x i32> %0, i64 1 + %4 = add nsw i32 %2, %3 + tail call void bitcast (void (...)* @Callee to void ()*)() + %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 + %vecext2 = extractelement <4 x i32> %5, i32 0 + %add3 = add nsw i32 %4, %vecext2 + ret i32 %add3 +} + +define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: UseAddr: +; CHECK: .localentry UseAddr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1 +; CHECK-NEXT: lwz r5, 16(r4) +; CHECK-NEXT: add r30, r5, r3 +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: bl getAddr@notoc +; CHECK-NEXT: add r3, r30, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0)) + %add1 = add nsw i32 %add, %call + ret i32 %add1 +} + +declare signext i32 @getAddr(i32*) local_unnamed_addr + +define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr { +; CHECK-LABEL: AddrTaken32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 +; CHECK-NEXT: blr +entry: + ret i32* @input32 +} + + Index: llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll +++ llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll @@ -42,6 +42,8 @@ ; CHECK-S-LABEL: getElementExtern4: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel1: +; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-S-NEXT: lwa r3, 16(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : @@ -58,6 +60,8 @@ ; CHECK-S-LABEL: getElementExternNegative: ; CHECK-S: # %bb.0: # %entry ; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: .Lpcrel2: +; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-S-NEXT: lwa r3, -4(r3) ; CHECK-S-NEXT: blr ; CHECK-O-LABEL: : Index: llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll +++ llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -16,7 +16,9 @@ ; CHECK-LABEL: AssignFuncPtr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel1: ; CHECK-NEXT: pld r4, Function@got@pcrel(0), 1 +; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) ; CHECK-NEXT: std r4, 0(r3) ; CHECK-NEXT: pstd r4, FuncLocal@PCREL(0), 1 ; CHECK-NEXT: blr @@ -47,6 +49,8 @@ ; CHECK: .localentry TailCallExtrnFuncPtr, 1 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: .Lpcrel2: +; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 ; CHECK-NEXT: bctr