Index: llvm/include/llvm/BinaryFormat/ELF.h =================================================================== --- llvm/include/llvm/BinaryFormat/ELF.h +++ llvm/include/llvm/BinaryFormat/ELF.h @@ -393,12 +393,6 @@ unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT; return ((1 << Val) >> 2) << 2; } -static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) { - unsigned Val = - (Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4) - : (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0))); - return Val << STO_PPC64_LOCAL_BIT; -} // ELF Relocation types for PPC64 enum { Index: llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def =================================================================== --- llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def +++ llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -96,6 +96,7 @@ #undef R_PPC64_TPREL16_HIGHA #undef R_PPC64_DTPREL16_HIGH #undef R_PPC64_DTPREL16_HIGHA +#undef R_PPC64_REL24_NOTOC #undef R_PPC64_IRELATIVE #undef R_PPC64_REL16 #undef R_PPC64_REL16_LO @@ -190,6 +191,7 @@ ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113) ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114) ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115) +ELF_RELOC(R_PPC64_REL24_NOTOC, 116) ELF_RELOC(R_PPC64_IRELATIVE, 248) ELF_RELOC(R_PPC64_REL16, 249) ELF_RELOC(R_PPC64_REL16_LO, 250) Index: llvm/include/llvm/MC/MCExpr.h =================================================================== --- llvm/include/llvm/MC/MCExpr.h +++ llvm/include/llvm/MC/MCExpr.h @@ -283,6 +283,7 @@ VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha VK_PPC_TLSLD, // symbol@tlsld VK_PPC_LOCAL, // symbol@local + VK_PPC_NOTOC, // symbol@notoc VK_COFF_IMGREL32, // symbol@imgrel (image-relative) Index: llvm/lib/MC/MCExpr.cpp =================================================================== --- llvm/lib/MC/MCExpr.cpp +++ llvm/lib/MC/MCExpr.cpp @@ -318,6 +318,7 @@ case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha"; case VK_PPC_TLSLD: return "tlsld"; case VK_PPC_LOCAL: return "local"; + case VK_PPC_NOTOC: return "notoc"; case VK_COFF_IMGREL32: return "IMGREL"; case VK_Hexagon_PCREL: return "PCREL"; case VK_Hexagon_LO16: return "LO16"; @@ -431,6 +432,7 @@ .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO) .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) + .Case("notoc", VK_PPC_NOTOC) .Case("gdgot", VK_Hexagon_GD_GOT) .Case("gdplt", VK_Hexagon_GD_PLT) .Case("iegot", VK_Hexagon_IE_GOT) Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -40,6 +40,7 @@ return Value & 0xfffc; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: return Value & 0x3fffffc; case PPC::fixup_ppc_half16: return Value & 0xffff; @@ -65,6 +66,7 @@ case PPC::fixup_ppc_brcond14abs: case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: return 4; case FK_Data_8: return 8; @@ -91,6 +93,7 @@ const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_br24abs", 6, 24, 0 }, { "fixup_ppc_brcond14abs", 16, 14, 0 }, @@ -101,6 +104,7 @@ const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_br24abs", 2, 24, 0 }, { "fixup_ppc_brcond14abs", 2, 14, 0 }, Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -94,8 +94,14 @@ case MCSymbolRefExpr::VK_PPC_LOCAL: Type = ELF::R_PPC_LOCAL24PC; break; + case MCSymbolRefExpr::VK_PPC_NOTOC: + Type = ELF::R_PPC64_REL24_NOTOC; + break; } break; + case PPC::fixup_ppc_br24_notoc: + Type = ELF::R_PPC64_REL24_NOTOC; + break; case PPC::fixup_ppc_brcond14: case PPC::fixup_ppc_brcond14abs: Type = ELF::R_PPC_REL14; @@ -431,6 +437,7 @@ return false; case ELF::R_PPC_REL24: + case ELF::R_PPC64_REL24_NOTOC: // If the target symbol has a local entry point, we must keep the // target symbol to preserve that information for the linker. // The "other" values are stored in the last 6 bits of the second byte. Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -19,6 +19,10 @@ // 24-bit PC relative relocation for direct branches like 'b' and 'bl'. fixup_ppc_br24 = FirstTargetFixupKind, + // 24-bit PC relative relocation for direct branches like 'b' and 'bl' where + // the caller does not use the TOC. + fixup_ppc_br24_notoc, + /// 14-bit PC relative relocation for conditional branches. fixup_ppc_brcond14, Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -48,7 +48,9 @@ // Add a fixup for the branch target. Fixups.push_back(MCFixup::create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_br24)); + ((MI.getOpcode() == PPC::BL8_NOTOC) + ? (MCFixupKind)PPC::fixup_ppc_br24_notoc + : (MCFixupKind)PPC::fixup_ppc_br24))); return 0; } Index: llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp =================================================================== --- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -179,13 +179,9 @@ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); - int64_t Res; - if (!LocalOffset->evaluateAsAbsolute(Res, MCA)) - report_fatal_error(".localentry expression must be absolute."); - - unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); - if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) - report_fatal_error(".localentry expression cannot be encoded."); + // encodePPC64LocalEntryOffset will report an error if it cannot + // encode LocalOffset. + unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset); unsigned Other = S->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; @@ -230,6 +226,35 @@ D->setOther(Other); return true; } + + unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) { + MCAssembler &MCA = getStreamer().getAssembler(); + int64_t Offset; + if (!LocalOffset->evaluateAsAbsolute(Offset, MCA)) + MCA.getContext().reportFatalError( + LocalOffset->getLoc(), ".localentry expression must be absolute."); + + switch (Offset) { + default: + MCA.getContext().reportFatalError( + LocalOffset->getLoc(), + ".localentry expression is not a valid power of 2."); + case 0: + return 0; + case 1: + return 1 << ELF::STO_PPC64_LOCAL_BIT; + case 4: + return 2 << ELF::STO_PPC64_LOCAL_BIT; + case 8: + return 3 << ELF::STO_PPC64_LOCAL_BIT; + case 16: + return 4 << ELF::STO_PPC64_LOCAL_BIT; + case 32: + return 5 << ELF::STO_PPC64_LOCAL_BIT; + case 64: + return 6 << ELF::STO_PPC64_LOCAL_BIT; + } + } }; class PPCTargetMachOStreamer : public PPCTargetStreamer { Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -504,6 +504,8 @@ return GetJTISymbol(MO.getIndex()); case MachineOperand::MO_BlockAddress: return GetBlockAddressSymbol(MO.getBlockAddress()); + case MachineOperand::MO_ExternalSymbol: + return OutContext.getOrCreateSymbol(MO.getSymbolName()); default: llvm_unreachable("Unexpected operand type to get symbol."); } @@ -555,6 +557,27 @@ case TargetOpcode::PATCHPOINT: return LowerPATCHPOINT(SM, *MI); + // Special handling for BL8_NOTOC. When we output the usual BL8_NOP we end up + // with 'bl symbol'. For BL8_NOTOC we need 'bl symbol@notoc' instead. This + // case adds the @notoc as required. + case PPC::BL8_NOTOC: { + const MachineOperand &MO = MI->getOperand(0); + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO); + const MCExpr *NoTOCFuncSym = MCSymbolRefExpr::create( + MOSymbol, MCSymbolRefExpr::VK_PPC_NOTOC, OutContext); + MCInstBuilder Builder = MCInstBuilder(PPC::BL8_NOTOC); + Builder.addExpr(NoTOCFuncSym); + MCOperand MCOp; + + // Start with 1 as operand 0 has already been added. + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, *this)) + Builder.addOperand(MCOp); + } + + EmitToStreamer(*OutStreamer, Builder); + return; + } case PPC::MoveGOTtoLR: { // Transform %lr = MoveGOTtoLR // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4 @@ -831,6 +854,40 @@ EmitToStreamer(*OutStreamer, TmpInst); return; } + // When using PC Relative addressing tail calls also need to be marked with + // the @notoc relocation. If we don't do this the linker will complain that + // there is a missing nop after the tail call. By adding the @notoc we are + // telling the linker that it does not need to restore R2 after this call and + // so it does not need a nop after it. + case PPC::TAILB: + case PPC::TAILBCTR: + case PPC::TAILBA: + case PPC::TAILB8: + case PPC::TAILBCTR8: + case PPC::TAILBA8: { + // Currently PCRelative is only supported under very specific conditions. + if (!Subtarget->isPPC64() || !Subtarget->isELFv2ABI() || + !Subtarget->hasPCRelativeMemops()) + break; + + const MachineOperand &MO = MI->getOperand(0); + assert(MO.getType() != MachineOperand::MO_ExternalSymbol && + "Extrnal symbol for tail call is unsupported.\n"); + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); + const MCExpr *NoTOCTailCallSym = MCSymbolRefExpr::create( + MOSymbol, MCSymbolRefExpr::VK_PPC_NOTOC, OutContext); + MCInstBuilder Builder = MCInstBuilder(MI->getOpcode()); + Builder.addExpr(NoTOCTailCallSym); + MCOperand MCOp; + + // Start with 1 as operand 0 has already been added. + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, *this)) + Builder.addOperand(MCOp); + } + EmitToStreamer(*OutStreamer, Builder); + return; + } case PPC::LDtocL: { // Transform %xd = LDtocL @sym, %xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); @@ -1524,6 +1581,29 @@ if (TS) TS->emitLocalEntry(cast(CurrentFnSym), LocalOffsetExp); + } else if (Subtarget->isELFv2ABI()) { + // When generating the entry point for a function we have three scenarios + // based on whether or not that function uses R2 and whether or not that + // function makes calls (or is a leaf function). + // 1) A leaf function that does not use R2. In this case st_other=0 and both + // the local and global entry points for the function are the same. + // No special entry point code is required. + // 2) A function uses the TOC pointer R2. This function may or may not have + // calls. In this case st_other=[2,6] and the global and local entry + // points are different. Code to correctly setup the TOC pointer in R2 + // is put between the global and local entry points. This case is + // covered by the if statatement above. + // 3) A function does not use the TOC pointer R2 but does have calls. + // In this case st_other=1 since we do not know whether or not any + // of the callees clobber R2. This case is dealt with in this else if + // block. + if (MF->getFrameInfo().hasCalls()) { + PPCTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); + if (TS) + TS->emitLocalEntry(cast(CurrentFnSym), + MCConstantExpr::create(1, OutContext)); + } } } Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -169,9 +169,11 @@ /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit + /// CALL_NOTOC the caller does not use the TOC. /// SVR4 calls and 32-bit/64-bit AIX calls. CALL, CALL_NOP, + CALL_NOTOC, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1367,6 +1367,7 @@ case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; + case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; @@ -5053,6 +5054,11 @@ return PPCISD::BCTRL; } + if (Subtarget.isUsingPCRelativeCalls()) { + assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."); + return PPCISD::CALL_NOTOC; + } + // The ABIs that maintain a TOC pointer accross calls need to have a nop // immediately following the call instruction if the caller and callee may // have different TOC bases. At link time if the linker determines the calls @@ -5062,8 +5068,8 @@ // will rewrite the nop to be a load of the TOC pointer from the linkage area // into gpr2. if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) - return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL - : PPCISD::CALL_NOP; + return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL + : PPCISD::CALL_NOP; return PPCISD::CALL; } @@ -5353,7 +5359,7 @@ // no way to mark dependencies as implicit here. // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && - !CFlags.IsPatchPoint) + !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls()) Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls @@ -5379,7 +5385,8 @@ unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite CS) const { - if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) + if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || + Subtarget.isAIXABI()) setUsesTOCBasePtr(DAG); unsigned CallOpc = @@ -11158,7 +11165,8 @@ if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.is64BitELFABI() && - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT && + !Subtarget.isUsingPCRelativeCalls()) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code Index: llvm/lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -140,6 +140,15 @@ (outs), (ins abscalltarget:$func), "bla $func\n\tnop", IIC_BrB, [(PPCcall_nop (i64 imm:$func))]>; + let Predicates = [PCRelativeMemops] in { + // BL8_NOTOC means that the caller does not use the TOC pointer and if + // it does use R2 then it is just a caller saved register. Therefore it is + // safe to emit only the bl and not the nop for this instruction. The + // linker will not try to restore R2 after the call. + def BL8_NOTOC : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), + (ins calltarget:$func), + "bl $func", IIC_BrB, []>; + } } let Uses = [CTR8, RM] in { let isPredicable = 1 in @@ -194,6 +203,11 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)), + (BL8_NOTOC tglobaladdr:$dst)>; +def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)), + (BL8_NOTOC texternalsym:$dst)>; + // Calls for AIX def : Pat<(PPCcall (i64 mcsym:$dst)), (BL8 mcsym:$dst)>; Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -255,6 +255,9 @@ def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -993,6 +996,7 @@ def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; def HasFPU : Predicate<"PPCSubTarget->hasFPU()">; +def PCRelativeMemops : Predicate<"PPCSubTarget->hasPCRelativeMemops()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. Index: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -57,6 +57,8 @@ "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); +STATISTIC(NumX2FoundForPCRel, "Number of times the X2 TOC pointer has been " + "found when PC relative NOTOC is being used."); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -99,6 +101,11 @@ // Initialize class variables. void initialize(MachineFunction &MFParm); + // Perform peepholes that cannot be skipped. + // Some peephole simplifications are required for correctness and will not + // be skipped even if skipFunction(MF.getFunction()) returns true. + void unskipableSimplifyCode(void); + // Perform peepholes. bool simplifyCode(void); @@ -124,9 +131,10 @@ // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { + initialize(MF); + unskipableSimplifyCode(); if (skipFunction(MF.getFunction())) return false; - initialize(MF); return simplifyCode(); } }; @@ -260,6 +268,29 @@ TOCSaves[MI] = Keep; } +void PPCMIPeephole::unskipableSimplifyCode(void) { + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == PPC::BL8_NOTOC) { + // At this point the BL8_NOTOC instruction is not really safe because it + // assumes that the caller does not need the TOC. It will be safe + // later once the full PC relative implementation is complete but it is + // not now. + // Here we are looking for X2. Since this is Pre-RA the only uses of X2 + // would indicate the use of the TOC. We want to detect all uses of the + // TOC. Once the work is done we should not see any uses of the TOC. + // TODO: Once the implementation is complete this should be turned into + // an assert. + if (!MF->getRegInfo().use_empty(PPC::X2)) { + MI.setDesc(TII->get(PPC::BL8_NOP)); + ++NumX2FoundForPCRel; + } + break; + } + } + } +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -153,7 +153,8 @@ return CSR_SRV464_TLS_PE_SaveList; // On PPC64, we might need to save r2 (but only if it is not reserved). - bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); + bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) && + !Subtarget.isUsingPCRelativeCalls(); // Cold calling convention CSRs. if (MF->getFunction().getCallingConv() == CallingConv::Cold) { Index: llvm/lib/Target/PowerPC/PPCScheduleP9.td =================================================================== --- llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -42,7 +42,7 @@ // Do not support QPX (Quad Processing eXtension), SPE (Signal Procesing // Engine) or prefixed instructions on Power 9. - let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs]; + let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops]; } Index: llvm/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -327,6 +327,7 @@ bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); } bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } + bool isUsingPCRelativeCalls() const; /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. Index: llvm/lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -224,3 +224,8 @@ bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); } + +bool PPCSubtarget::isUsingPCRelativeCalls() const { + return isPPC64() && hasPCRelativeMemops() && + CodeModel::Medium == getTargetMachine().getCodeModel(); +} Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage.ll @@ -0,0 +1,41 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-S +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future --filetype=obj < %s | \ +; RUN: llvm-objdump -dr - | FileCheck %s --check-prefix=CHECK-O + + +; CHECK-S-LABEL: caller +; CHECK-S: bl callee@notoc +; CHECK-S: blr + +; CHECK-O-LABEL: caller +; CHECK-O: bl +; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee +; CHECK-O: blr +define dso_local signext i32 @caller() local_unnamed_addr { +entry: + %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)() + ret i32 %call +} + +declare signext i32 @callee(...) local_unnamed_addr + + +; Some calls can be considered Extrnal Symbols. +; CHECK-S-LABEL: ExternalSymbol +; CHECK-S: bl memcpy@notoc +; CHECK-S: blr + +; CHECK-O-LABEL: ExternalSymbol +; CHECK-O: bl +; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy +; CHECK-O: blr +define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + Index: llvm/test/MC/PowerPC/ppc64-localentry-error1.s =================================================================== --- llvm/test/MC/PowerPC/ppc64-localentry-error1.s +++ llvm/test/MC/PowerPC/ppc64-localentry-error1.s @@ -7,5 +7,5 @@ sym: .localentry sym, 123 -# CHECK: LLVM ERROR: .localentry expression cannot be encoded. +# CHECK: error: .localentry expression is not a valid power of 2. Index: llvm/test/MC/PowerPC/ppc64-localentry-error2.s =================================================================== --- llvm/test/MC/PowerPC/ppc64-localentry-error2.s +++ llvm/test/MC/PowerPC/ppc64-localentry-error2.s @@ -8,5 +8,5 @@ sym: .localentry sym, remote_sym -# CHECK: LLVM ERROR: .localentry expression must be absolute. +# CHECK: error: .localentry expression must be absolute.