diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -393,12 +393,6 @@ unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT; return ((1 << Val) >> 2) << 2; } -static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) { - unsigned Val = - (Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4) - : (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0))); - return Val << STO_PPC64_LOCAL_BIT; -} // ELF Relocation types for PPC64 enum { diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -96,6 +96,7 @@ #undef R_PPC64_TPREL16_HIGHA #undef R_PPC64_DTPREL16_HIGH #undef R_PPC64_DTPREL16_HIGHA +#undef R_PPC64_REL24_NOTOC #undef R_PPC64_IRELATIVE #undef R_PPC64_REL16 #undef R_PPC64_REL16_LO @@ -190,6 +191,7 @@ ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113) ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114) ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115) +ELF_RELOC(R_PPC64_REL24_NOTOC, 116) ELF_RELOC(R_PPC64_IRELATIVE, 248) ELF_RELOC(R_PPC64_REL16, 249) ELF_RELOC(R_PPC64_REL16_LO, 250) diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -284,6 +284,7 @@ VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha VK_PPC_TLSLD, // symbol@tlsld VK_PPC_LOCAL, // symbol@local + VK_PPC_NOTOC, // symbol@notoc VK_COFF_IMGREL32, // symbol@imgrel (image-relative) diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -319,6 +319,7 @@ case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha"; case VK_PPC_TLSLD: return "tlsld"; case VK_PPC_LOCAL: return "local"; + case VK_PPC_NOTOC: return "notoc"; case VK_COFF_IMGREL32: return "IMGREL"; case VK_Hexagon_LO16: return "LO16"; case VK_Hexagon_HI16: return "HI16"; @@ -432,6 +433,7 @@ .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO) .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) + .Case("notoc", VK_PPC_NOTOC) .Case("gdgot", VK_Hexagon_GD_GOT) .Case("gdplt", VK_Hexagon_GD_PLT) .Case("iegot", VK_Hexagon_IE_GOT) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -39,6 +39,7 @@ return Value & 0xfffc; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: return Value & 0x3fffffc; case PPC::fixup_ppc_half16: return Value & 0xffff; @@ -62,6 +63,7 @@ case PPC::fixup_ppc_brcond14abs: case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: return 4; case FK_Data_8: return 8; @@ -88,6 +90,7 @@ const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_br24abs", 6, 24, 0 }, { "fixup_ppc_brcond14abs", 16, 14, 0 }, @@ -98,6 +101,7 @@ const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_br24abs", 2, 24, 0 }, { "fixup_ppc_brcond14abs", 2, 14, 0 }, @@ -151,6 +155,7 @@ return Kind >= FirstLiteralRelocationKind; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: // If the target symbol has a local entry point we must not attempt // to resolve the fixup directly. Emit a relocation and leave // resolution of the final target address to the linker. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -86,6 +86,7 @@ llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_None: @@ -97,6 +98,9 @@ case MCSymbolRefExpr::VK_PPC_LOCAL: Type = ELF::R_PPC_LOCAL24PC; break; + case MCSymbolRefExpr::VK_PPC_NOTOC: + Type = ELF::R_PPC64_REL24_NOTOC; + break; } break; case PPC::fixup_ppc_brcond14: @@ -431,6 +435,7 @@ return false; case ELF::R_PPC_REL24: + case ELF::R_PPC64_REL24_NOTOC: // If the target symbol has a local entry point, we must keep the // target symbol to preserve that information for the linker. // The "other" values are stored in the last 6 bits of the second byte. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -19,6 +19,10 @@ // 24-bit PC relative relocation for direct branches like 'b' and 'bl'. fixup_ppc_br24 = FirstTargetFixupKind, + // 24-bit PC relative relocation for direct branches like 'b' and 'bl' where + // the caller does not use the TOC. + fixup_ppc_br24_notoc, + /// 14-bit PC relative relocation for conditional branches. fixup_ppc_brcond14, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -48,7 +48,9 @@ // Add a fixup for the branch target. Fixups.push_back(MCFixup::create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_br24)); + ((MI.getOpcode() == PPC::BL8_NOTOC) + ? (MCFixupKind)PPC::fixup_ppc_br24_notoc + : (MCFixupKind)PPC::fixup_ppc_br24))); return 0; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -179,13 +179,9 @@ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); - int64_t Res; - if (!LocalOffset->evaluateAsAbsolute(Res, MCA)) - report_fatal_error(".localentry expression must be absolute."); - - unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); - if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) - report_fatal_error(".localentry expression cannot be encoded."); + // encodePPC64LocalEntryOffset will report an error if it cannot + // encode LocalOffset. + unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset); unsigned Other = S->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; @@ -230,6 +226,31 @@ D->setOther(Other); return true; } + + unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) { + MCAssembler &MCA = getStreamer().getAssembler(); + int64_t Offset; + if (!LocalOffset->evaluateAsAbsolute(Offset, MCA)) + MCA.getContext().reportFatalError( + LocalOffset->getLoc(), ".localentry expression must be absolute."); + + switch (Offset) { + default: + MCA.getContext().reportFatalError( + LocalOffset->getLoc(), + ".localentry expression is not a valid power of 2."); + case 0: + return 0; + case 1: + return 1 << ELF::STO_PPC64_LOCAL_BIT; + case 4: + case 8: + case 16: + case 32: + case 64: + return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT; + } + } }; class PPCTargetMachOStreamer : public PPCTargetStreamer { diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1460,14 +1460,16 @@ // // This ensures we have r2 set up correctly while executing the function // body, no matter which entry point is called. - if (Subtarget->isELFv2ABI() - // Only do all that if the function uses r2 in the first place. - && !MF->getRegInfo().use_empty(PPC::X2)) { + const PPCFunctionInfo *PPCFI = MF->getInfo(); + const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) || + !MF->getRegInfo().use_empty(PPC::R2); + // Only do all that if the function uses R2 as the TOC pointer + // in the first place. We don't need the global entry point if the + // function uses R2 as an allocatable register. + if (Subtarget->isELFv2ABI() && UsesX2OrR2 && PPCFI->usesTOCBasePtr()) { // Note: The logic here must be synchronized with the code in the // branch-selection pass which sets the offset of the first block in the // function. This matters because it affects the alignment. - const PPCFunctionInfo *PPCFI = MF->getInfo(); - MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol(); OutStreamer->emitLabel(GlobalEntryLabel); const MCSymbolRefExpr *GlobalEntryLabelExp = @@ -1519,6 +1521,35 @@ if (TS) TS->emitLocalEntry(cast(CurrentFnSym), LocalOffsetExp); + } else if (Subtarget->isELFv2ABI()) { + // When generating the entry point for a function we have a few scenarios + // based on whether or not that function uses R2 and whether or not that + // function makes calls (or is a leaf function). + // 1) A leaf function that does not use R2 (or treats it as callee-saved + // and preserves it). In this case st_other=0 and both + // the local and global entry points for the function are the same. + // No special entry point code is required. + // 2) A function uses the TOC pointer R2. This function may or may not have + // calls. In this case st_other=[2,6] and the global and local entry + // points are different. Code to correctly setup the TOC pointer in R2 + // is put between the global and local entry points. This case is + // covered by the if statatement above. + // 3) A function does not use the TOC pointer R2 but does have calls. + // In this case st_other=1 since we do not know whether or not any + // of the callees clobber R2. This case is dealt with in this else if + // block. + // 4) The function does not use the TOC pointer but R2 is used inside + // the function. In this case st_other=1 once again. + // 5) This function uses inline asm. We mark R2 as reserved if the function + // has inline asm so we have to assume that it may be used. + if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() || + (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { + PPCTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); + if (TS) + TS->emitLocalEntry(cast(CurrentFnSym), + MCConstantExpr::create(1, OutContext)); + } } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -165,9 +165,11 @@ /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit + /// CALL_NOTOC the caller does not use the TOC. /// SVR4 calls and 32-bit/64-bit AIX calls. CALL, CALL_NOP, + CALL_NOTOC, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1404,6 +1404,7 @@ case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; + case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; @@ -4689,6 +4690,16 @@ SelectionDAG& DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; + // FIXME: Tail calls are currently disabled when using PC Relative addressing. + // The issue is that PC Relative is only partially implemented and so there + // is currently a mix of functions that require the TOC and functions that do + // not require it. If we have A calls B calls C and both A and B require the + // TOC and C does not and is marked as clobbering R2 then it is not safe for + // B to tail call C. Since we do not have the information of whether or not + // a funciton needs to use the TOC here in this function we need to be + // conservatively safe and disable all tail calls for now. + if (Subtarget.isUsingPCRelativeCalls()) return false; + if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. @@ -5085,6 +5096,17 @@ return PPCISD::BCTRL; } + // FIXME: At this moment indirect calls are treated ahead of the + // PC Relative condition because binaries can still contain a possible + // mix of functions that use a TOC and functions that do not use a TOC. + // Once the PC Relative feature is complete this condition should be moved + // up ahead of the indirect calls and should return a PPCISD::BCTRL for + // that case. + if (Subtarget.isUsingPCRelativeCalls()) { + assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."); + return PPCISD::CALL_NOTOC; + } + // The ABIs that maintain a TOC pointer accross calls need to have a nop // immediately following the call instruction if the caller and callee may // have different TOC bases. At link time if the linker determines the calls @@ -5094,8 +5116,8 @@ // will rewrite the nop to be a load of the TOC pointer from the linkage area // into gpr2. if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) - return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL - : PPCISD::CALL_NOP; + return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL + : PPCISD::CALL_NOP; return PPCISD::CALL; } @@ -5372,7 +5394,7 @@ // no way to mark dependencies as implicit here. // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && - !CFlags.IsPatchPoint) + !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls()) Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls @@ -5398,7 +5420,8 @@ unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite CS) const { - if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) + if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || + Subtarget.isAIXABI()) setUsesTOCBasePtr(DAG); unsigned CallOpc = @@ -11373,7 +11396,8 @@ if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.is64BitELFABI() && - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT && + !Subtarget.isUsingPCRelativeCalls()) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -140,6 +140,15 @@ (outs), (ins abscalltarget:$func), "bla $func\n\tnop", IIC_BrB, [(PPCcall_nop (i64 imm:$func))]>; + let Predicates = [PCRelativeMemops] in { + // BL8_NOTOC means that the caller does not use the TOC pointer and if + // it does use R2 then it is just a caller saved register. Therefore it is + // safe to emit only the bl and not the nop for this instruction. The + // linker will not try to restore R2 after the call. + def BL8_NOTOC : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), + (ins calltarget:$func), + "bl $func", IIC_BrB, []>; + } } let Uses = [CTR8, RM] in { let isPredicable = 1 in @@ -194,6 +203,11 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)), + (BL8_NOTOC tglobaladdr:$dst)>; +def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)), + (BL8_NOTOC texternalsym:$dst)>; + // Calls for AIX def : Pat<(PPCcall (i64 mcsym:$dst)), (BL8 mcsym:$dst)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -252,6 +252,9 @@ def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -994,6 +997,7 @@ def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; def HasFPU : Predicate<"PPCSubTarget->hasFPU()">; +def PCRelativeMemops : Predicate<"PPCSubTarget->hasPCRelativeMemops()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -81,7 +81,12 @@ if (MO.getTargetFlags() == PPCII::MO_PLT) RefKind = MCSymbolRefExpr::VK_PLT; - const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const MachineInstr *MI = MO.getParent(); + + if (MI->getOpcode() == PPC::BL8_NOTOC) + RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; + + const MachineFunction *MF = MI->getMF(); const Module *M = MF->getFunction().getParent(); const PPCSubtarget *Subtarget = &(MF->getSubtarget()); const TargetMachine &TM = Printer.TM; diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -57,6 +57,8 @@ "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); +STATISTIC(NumX2FoundForPCRel, "Number of times the X2 TOC pointer has been " + "found when PC relative NOTOC is being used."); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -99,6 +101,11 @@ // Initialize class variables. void initialize(MachineFunction &MFParm); + // Perform peepholes that cannot be skipped. + // Some peephole simplifications are required for correctness and will not + // be skipped even if skipFunction(MF.getFunction()) returns true. + void unskipableSimplifyCode(void); + // Perform peepholes. bool simplifyCode(void); @@ -124,9 +131,14 @@ // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { + initialize(MF); + // FIXME: This introduces another complete traversal of the instructions + // in the function in the common case (function is not skipped). Although + // this is less than ideal for compile time, this code will go away once + // our PC-Rel implementation is complete. + unskipableSimplifyCode(); if (skipFunction(MF.getFunction())) return false; - initialize(MF); return simplifyCode(); } }; @@ -260,6 +272,41 @@ TOCSaves[MI] = Keep; } +void PPCMIPeephole::unskipableSimplifyCode(void) { + // If this function has no uses of R2 there is nothing to do here. + if(MF->getRegInfo().use_empty(PPC::X2)) + return; + + // This is only for PCRelative calls. + if (!MF->getSubtarget().isUsingPCRelativeCalls()) { + return; + } + + // This function has R2 so we need to mark an implicit def for it. + PPCFunctionInfo *FuncInfo = MF->getInfo(); + FuncInfo->setUsesTOCBasePtr(); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == PPC::BL8_NOTOC) { + // At this point the BL8_NOTOC instruction is not really safe because it + // assumes that the caller does not need the TOC. It will be safe + // later once the full PC relative implementation is complete but it is + // not now. + // Here we are looking for X2. Since this is Pre-RA the only uses of X2 + // would indicate the use of the TOC. We want to detect all uses of the + // TOC. Once the work is done we should not see any uses of the TOC. + // TODO: Once the implementation is complete this should be turned into + // an assert + Register Reg = MF->getSubtarget().getTOCPointerRegister(); + MachineOperand MO = MachineOperand::CreateReg(Reg, false, true); + MI.addOperand(*MF, MO); + MI.setDesc(TII->get(PPC::BL8_NOP)); + ++NumX2FoundForPCRel; + } + } + } +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -153,7 +153,14 @@ return CSR_SRV464_TLS_PE_SaveList; // On PPC64, we might need to save r2 (but only if it is not reserved). - bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); + // We do not need to treat R2 as callee-saved when using PC-Relative calls + // because any direct uses of R2 will cause it to be reserved. If the function + // is a leaf or the only uses of R2 are implicit uses for calls, the calls + // will use the @notoc relocation which will cause this function to set the + // st_other bit to 1, thereby communicating to its caller that it arbitrarily + // clobbers the TOC. + bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) && + !Subtarget.isUsingPCRelativeCalls(); // Cold calling convention CSRs. if (MF->getFunction().getCallingConv() == CallingConv::Cold) { diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -41,8 +41,8 @@ let CompleteModel = 1; // Do not support QPX (Quad Processing eXtension), SPE (Signal Procesing - // Engine) or prefixed instructions on Power 9. - let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs]; + // Engine), prefixed instructions on Power 9 or PC relative mem ops. + let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops]; } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -333,6 +333,7 @@ bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); } bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } + bool isUsingPCRelativeCalls() const; /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -227,3 +227,8 @@ bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); } + +bool PPCSubtarget::isUsingPCRelativeCalls() const { + return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() && + CodeModel::Medium == getTargetMachine().getCodeModel(); +} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -0,0 +1,176 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-S + +@global_int = common dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local signext i32 @NoTOC() local_unnamed_addr { +; CHECK-S-LABEL: NoTOC: +; CHECK-S-NOT: .localentry +; CHECK-S: li r3, 42 +; CHECK-S-NEXT: blr +entry: + ret i32 42 +} + +define dso_local signext i32 @AsmClobberX2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberX2: +; CHECK-S: .localentry AsmClobberX2, 1 +; CHECK-S: add r3, r4, r3 +; CHECK-S: #APP +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: #NO_APP +; CHECK-S: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "nop", "~{r2}"() + ret i32 %add +} + +; FIXME: This is actually a test case that shows a bug. On power9 and earlier +; this test should not compile. On later CPUs (like this test) the @toc +; should be replaced with @pcrel and we won't need R2 and so the problem +; goes away. +define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberX2WithTOC: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep2@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep2@l +; CHECK-S: .localentry AsmClobberX2WithTOC, .Lfunc_lep2-.Lfunc_gep2 +; CHECK-S: #APP +; CHECK-S-NEXT: li r2, 0 +; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: addis r5, r2, global_int@toc@ha +; CHECK-S-NEXT: lwz r5, global_int@toc@l(r5) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: add r3, r3, r5 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "li 2, 0", "~{r2}"() + %0 = load i32, i32* @global_int, align 4 + %add1 = add nsw i32 %add, %0 + ret i32 %add1 +} + +define dso_local signext i32 @AsmClobberX5(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberX5: +; CHECK-S: .localentry AsmClobberX5, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: #APP +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "nop", "~{r5}"() + ret i32 %add +} + +; Clobber all GPRs except R2. +define dso_local signext i32 @AsmClobberNotR2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberNotR2: +; CHECK-S: .localentry AsmClobberNotR2, 1 +; CHECK-S: add r3, r4, r3 +; CHECK-S: stw r3, -148(r1) # 4-byte Folded Spill +; CHECK-S-NEXT: #APP +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload +; CHECK-S: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "nop", "~{r0},~{r1},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() + ret i32 %add +} + +; Increase register pressure enough to force the register allocator to +; make use of R2. +define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) local_unnamed_addr { +; CHECK-S-LABEL: X2IsCallerSaved: +; CHECK-S: .localentry X2IsCallerSaved, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: add r11, r4, r3 +; CHECK-S-NEXT: subf r29, r9, r8 +; CHECK-S-NEXT: add r9, r10, r9 +; CHECK-S-NEXT: subf r10, r3, r10 +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: mullw r3, r3, r11 +; CHECK-S-NEXT: mullw r3, r3, r5 +; CHECK-S-NEXT: subf r12, r5, r4 +; CHECK-S-NEXT: mullw r3, r3, r6 +; CHECK-S-NEXT: add r0, r6, r5 +; CHECK-S-NEXT: mullw r3, r3, r12 +; CHECK-S-NEXT: mullw r3, r3, r0 +; CHECK-S-NEXT: mullw r3, r3, r7 +; CHECK-S-NEXT: subf r2, r7, r6 +; CHECK-S-NEXT: mullw r3, r3, r8 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: add r30, r8, r7 +; CHECK-S-NEXT: mullw r3, r3, r2 +; CHECK-S-NEXT: mullw r3, r3, r30 +; CHECK-S-NEXT: mullw r3, r3, r29 +; CHECK-S-NEXT: mullw r3, r3, r9 +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mullw r3, r3, r10 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %sub = sub nsw i32 %b, %c + %add1 = add nsw i32 %d, %c + %sub2 = sub nsw i32 %d, %e + %add3 = add nsw i32 %f, %e + %sub4 = sub nsw i32 %f, %g + %add5 = add nsw i32 %h, %g + %sub6 = sub nsw i32 %h, %a + %mul = mul i32 %b, %a + %mul7 = mul i32 %mul, %add + %mul8 = mul i32 %mul7, %c + %mul9 = mul i32 %mul8, %d + %mul10 = mul i32 %mul9, %sub + %mul11 = mul i32 %mul10, %add1 + %mul12 = mul i32 %mul11, %e + %mul13 = mul i32 %mul12, %f + %mul14 = mul i32 %mul13, %sub2 + %mul15 = mul i32 %mul14, %add3 + %mul16 = mul i32 %mul15, %sub4 + %mul17 = mul i32 %mul16, %add5 + %mul18 = mul i32 %mul17, %sub6 + ret i32 %mul18 +} + + +define dso_local signext i32 @UsesX2AsTOC() local_unnamed_addr { +; CHECK-S-LABEL: UsesX2AsTOC: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep6@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep6@l +; CHECK-S: .localentry UsesX2AsTOC, .Lfunc_lep6-.Lfunc_gep6 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: addis r3, r2, global_int@toc@ha +; CHECK-S-NEXT: lwa r3, global_int@toc@l(r3) +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @global_int, align 4 + ret i32 %0 +} + + +define dso_local double @UsesX2AsConstPoolTOC() local_unnamed_addr { +; CHECK-S-LABEL: UsesX2AsConstPoolTOC: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep7@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep7@l +; CHECK-S: .localentry UsesX2AsConstPoolTOC, .Lfunc_lep7-.Lfunc_gep7 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-S-NEXT: lfd f1, .LCPI7_0@toc@l(r3) +; CHECK-S-NEXT: blr +entry: + ret double 0x404124A4EBDD334C +} + + diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-S +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names --filetype=obj < %s | \ +; RUN: llvm-objdump -dr - | FileCheck %s --check-prefix=CHECK-O + + +; CHECK-S-LABEL: caller +; CHECK-S: bl callee@notoc +; CHECK-S: blr + +; CHECK-O-LABEL: caller +; CHECK-O: bl +; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee +; CHECK-O: blr +define dso_local signext i32 @caller() local_unnamed_addr { +entry: + %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)() + ret i32 %call +} + +declare signext i32 @callee(...) local_unnamed_addr + + +; Some calls can be considered Extrnal Symbols. +; CHECK-S-LABEL: ExternalSymbol +; CHECK-S: bl memcpy@notoc +; CHECK-S: blr + +; CHECK-O-LABEL: ExternalSymbol +; CHECK-O: bl +; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy +; CHECK-O: blr +define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -0,0 +1,521 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-S + +@globalVar = common dso_local local_unnamed_addr global i32 0, align 4 +@externGlobalVar = external local_unnamed_addr global i32, align 4 +@indirectCall = common dso_local local_unnamed_addr global i32 (i32)* null, align 8 + +; This funcion needs to remain as noinline. +; The compiler needs to know this function is local but must be forced to call +; it. The only thing we really need to check here is that st_other=0 and +; so we make sure that there is no .localentry. +define dso_local signext i32 @localCall(i32 signext %a) local_unnamed_addr #0 { +; CHECK-S-LABEL: localCall: +; CHECK-S-NOT: .localentry +; CHECK-S: addi r3, r3, 5 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %a, 5 + ret i32 %add +} + +define dso_local signext i32 @DirectCallLocal1(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallLocal1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep1@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep1@l +; CHECK-S: .localentry DirectCallLocal1, .Lfunc_lep1-.Lfunc_gep1 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @localCall(i32 signext %add) + %0 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallLocal2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep2@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep2@l +; CHECK-S: .localentry DirectCallLocal2, .Lfunc_lep2-.Lfunc_gep2 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @localCall(i32 signext %add) + %0 = load i32, i32* @externGlobalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @DirectCallLocalNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallLocalNoGlobal: +; CHECK-S: .localentry DirectCallLocalNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: .cfi_def_cfa_offset 48 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: .cfi_offset r30, -16 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -48(r1) +; CHECK-S-NEXT: mr r30, r4 +; CHECK-S-NEXT: bl localCall@notoc +; CHECK-S-NEXT: add r3, r3, r30 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 48 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @localCall(i32 signext %a) + %add = add nsw i32 %call, %b + ret i32 %add +} + +define dso_local signext i32 @DirectCallExtern1(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallExtern1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep4@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep4@l +; CHECK-S: .localentry DirectCallExtern1, .Lfunc_lep4-.Lfunc_gep4 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @externCall(i32 signext %add) + %0 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +declare signext i32 @externCall(i32 signext) local_unnamed_addr + +define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallExtern2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep5@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep5@l +; CHECK-S: .localentry DirectCallExtern2, .Lfunc_lep5-.Lfunc_gep5 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @externCall(i32 signext %add) + %0 = load i32, i32* @externGlobalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @DirectCallExternNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallExternNoGlobal: +; CHECK-S: .localentry DirectCallExternNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: .cfi_def_cfa_offset 48 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: .cfi_offset r30, -16 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -48(r1) +; CHECK-S-NEXT: mr r30, r4 +; CHECK-S-NEXT: bl externCall@notoc +; CHECK-S-NEXT: add r3, r3, r30 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 48 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @externCall(i32 signext %a) + %add = add nsw i32 %call, %b + ret i32 %add +} + +define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallLocal1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep7@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep7@l +; CHECK-S: .localentry TailCallLocal1, .Lfunc_lep7-.Lfunc_gep7 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @globalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @localCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallLocal2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep8@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep8@l +; CHECK-S: .localentry TailCallLocal2, .Lfunc_lep8-.Lfunc_gep8 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @externGlobalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @localCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallLocalNoGlobal: +; CHECK-S: .localentry TailCallLocalNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: bl localCall@notoc +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @localCall(i32 signext %a) + ret i32 %call +} + +define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallExtern1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep10@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep10@l +; CHECK-S: .localentry TailCallExtern1, .Lfunc_lep10-.Lfunc_gep10 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @globalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @externCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallExtern2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep11@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep11@l +; CHECK-S: .localentry TailCallExtern2, .Lfunc_lep11-.Lfunc_gep11 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @externGlobalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @externCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallExternNoGlobal: +; CHECK-S: .localentry TailCallExternNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: bl externCall@notoc +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @externCall(i32 signext %a) + ret i32 %call +} + +define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCall1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep13@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep13@l +; CHECK-S: .localentry IndirectCall1, .Lfunc_lep13-.Lfunc_gep13 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r5, r2, indirectCall@toc@ha +; CHECK-S-NEXT: ld r12, indirectCall@toc@l(r5) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: mtctr r12 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8 + %call = tail call signext i32 %0(i32 signext %add) + %1 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %1, %call + ret i32 %mul +} + +define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCall2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep14@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep14@l +; CHECK-S: .localentry IndirectCall2, .Lfunc_lep14-.Lfunc_gep14 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r5, r2, indirectCall@toc@ha +; CHECK-S-NEXT: ld r12, indirectCall@toc@l(r5) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: mtctr r12 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8 + %call = tail call signext i32 %0(i32 signext %add) + %1 = load i32, i32* @externGlobalVar, align 4 + %mul = mul nsw i32 %1, %call + ret i32 %mul +} + +define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCall3: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep15@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep15@l +; CHECK-S: .localentry IndirectCall3, .Lfunc_lep15-.Lfunc_gep15 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: mtctr r5 +; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 %call_param(i32 signext %add) + %0 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCallNoGlobal: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep16@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep16@l +; CHECK-S: .localentry IndirectCallNoGlobal, .Lfunc_lep16-.Lfunc_gep16 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: .cfi_def_cfa_offset 48 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: .cfi_offset r30, -16 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -48(r1) +; CHECK-S-NEXT: mtctr r5 +; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: mr r30, r4 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: add r3, r3, r30 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 48 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 %call_param(i32 signext %a) + %add = add nsw i32 %call, %b + ret i32 %add +} + +define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCallOnly: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep17@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep17@l +; CHECK-S: .localentry IndirectCallOnly, .Lfunc_lep17-.Lfunc_gep17 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: mtctr r4 +; CHECK-S-NEXT: mr r12, r4 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 %call_param(i32 signext %a) + ret i32 %call +} + +attributes #0 = { noinline } + diff --git a/llvm/test/MC/PowerPC/ppc64-localentry-error1.s b/llvm/test/MC/PowerPC/ppc64-localentry-error1.s --- a/llvm/test/MC/PowerPC/ppc64-localentry-error1.s +++ b/llvm/test/MC/PowerPC/ppc64-localentry-error1.s @@ -1,11 +1,11 @@ -# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s -# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s sym: .localentry sym, 123 -# CHECK: LLVM ERROR: .localentry expression cannot be encoded. +# CHECK: error: .localentry expression is not a valid power of 2. diff --git a/llvm/test/MC/PowerPC/ppc64-localentry-error2.s b/llvm/test/MC/PowerPC/ppc64-localentry-error2.s --- a/llvm/test/MC/PowerPC/ppc64-localentry-error2.s +++ b/llvm/test/MC/PowerPC/ppc64-localentry-error2.s @@ -1,12 +1,12 @@ -# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s -# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s .globl remote_sym sym: .localentry sym, remote_sym -# CHECK: LLVM ERROR: .localentry expression must be absolute. +# CHECK: error: .localentry expression must be absolute.