Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -290,6 +290,9 @@ /// stack objects like arguments so we can't treat them as immutable. bool HasTailCall = false; + /// True if this function contains an indirect call. + bool HasIndirectCall = false; + /// Not null, if shrink-wrapping found a better place for the prologue. MachineBasicBlock *Save = nullptr; /// Not null, if shrink-wrapping found a better place for the epilogue. @@ -544,6 +547,10 @@ bool hasTailCall() const { return HasTailCall; } void setHasTailCall() { HasTailCall = true; } + /// Returns true if the function contains an indirect call. + bool hasIndirectCall() const { return HasIndirectCall; } + void setHasIndirectCall() { HasIndirectCall = true; } + /// Computes the maximum size of a callframe and the AdjustsStack property. /// This only works for targets defining /// TargetInstrInfo::getCallFrameSetupOpcode(), getCallFrameDestroyOpcode(), Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -145,6 +145,11 @@ def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; +def FeatureSaveTOCIndirect : + SubtargetFeature<"save-toc-indirect", + "HasSaveTOCIndirect", "true", + "TOC reg is saved in prologue when there are indirect calls">; + def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -415,6 +415,13 @@ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); } +/// MustSaveTOC - Return true if this function requires that we save the +/// TOC on the stack in the prologue rather than before each indirect call. +static bool MustSaveTOC(const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MF.getSubtarget().hasSaveTOCIndirect() && + MFI.hasIndirectCall(); +} /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, @@ -727,6 +734,8 @@ // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); + bool MustSaveTOC = FI->mustSaveTOC(); + const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? @@ -738,6 +747,7 @@ unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; + unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) @@ -972,6 +982,13 @@ HasSTUX = true; } + if (MustSaveTOC) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(TOCReg, getKillRegState(true)) + .addImm(24) + .addReg(SPReg); + } + if (!HasRedZone) { assert(!isPPC64 && "A red zone is always available on PPC64"); if (HasSTUX) { @@ -1173,6 +1190,8 @@ if (PPC::CRBITRCRegClass.contains(Reg)) continue; + if (Reg == PPC::X2 || Reg == PPC::R2) + continue; // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) @@ -1582,11 +1601,13 @@ FI->setMustSaveLR(MustSaveLR(MF, LR)); SavedRegs.reset(LR); + MachineFrameInfo &MFI = MF.getFrameInfo(); + FI->setMustSaveTOC(MustSaveTOC(MF) && !MFI.hasVarSizedObjects()); + // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); - MachineFrameInfo &MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. if (!FPSI && needsFP(MF)) { @@ -1655,6 +1676,9 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); + PPCFunctionInfo *FI = MF.getInfo(); + bool MustSaveTOC = FI->mustSaveTOC(); + // If the function is shrink-wrapped, and if the function has a tail call, the // tail call might not be in the new RestoreBlock, so real branch instruction // won't be generated by emitEpilogue(), because shrink-wrap has chosen new @@ -1691,7 +1715,9 @@ for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::GPRCRegClass.contains(Reg)) { + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) { + ; + } else if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1933,6 +1959,8 @@ return false; MachineFunction *MF = MBB.getParent(); + PPCFunctionInfo *FI = MF->getInfo(); + bool MustSaveTOC = FI->mustSaveTOC(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc DL; bool CRSpilled = false; @@ -1948,7 +1976,6 @@ // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; - // Add the callee-saved register as live-in; it's killed at the spill. MBB.addLiveIn(Reg); @@ -1957,6 +1984,12 @@ continue; } + // The actual spill will happen in the prologue. + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) { + continue; + } + + // Insert the spill to the stack frame. if (IsCRField) { PPCFunctionInfo *FuncInfo = MF->getInfo(); @@ -2074,6 +2107,9 @@ return false; MachineFunction *MF = MBB.getParent(); + PPCFunctionInfo *FI = MF->getInfo(); + bool MustSaveTOC = FI->mustSaveTOC(); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); bool CR2Spilled = false; bool CR3Spilled = false; @@ -2097,6 +2133,10 @@ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) continue; + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) { + continue; + } + if (Reg == PPC::CR2) { CR2Spilled = true; // The spill slot is associated only with CR2, which is the Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -111,6 +111,7 @@ static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); +STATISTIC(NumSetHasIndirectCall, "Number of indirect calls"); STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); @@ -4990,6 +4991,8 @@ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { + DAG.getMachineFunction().getFrameInfo().setHasIndirectCall(); + NumSetHasIndirectCall++; // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function @@ -5953,14 +5956,20 @@ !isa(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. setUsesTOCBasePtr(DAG); - SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); - // TOC save area offset. - unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore( - Val.getValue(1), dl, Val, AddPtr, - MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); + // Don't emit a store of TOC before indirect call if using option + // -msave-toc-direct and the function doesn't have dynamic allocations + // on the stack. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!(Subtarget.hasSaveTOCIndirect() && !MFI.hasVarSizedObjects())) { + SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); + // TOC save area offset. + unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); + } // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. Index: lib/Target/PowerPC/PPCMachineFunctionInfo.h =================================================================== --- lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -45,6 +45,11 @@ /// PEI. bool MustSaveLR; + /// MustSaveTOC - Indicates whether TOC should be saved in prologue for option + /// -msave-toc-indirect. This is only valid after the initial scan of the + /// function by PEI. + bool MustSaveTOC; + /// Does this function have any stack spills. bool HasSpills = false; @@ -147,6 +152,13 @@ void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + /// MustSaveTOC - This is set when a function contains indirect calls and + /// no dynamic stack allocations. If using option -msave-toc-indirect, + /// the TOC will be saved once in the prologue rather than before each + /// indirect function call. + void setMustSaveTOC(bool U) { MustSaveTOC = U; } + bool mustSaveTOC() const { return MustSaveTOC; } + void setHasSpills() { HasSpills = true; } bool hasSpills() const { return HasSpills; } Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -126,6 +126,7 @@ bool IsLittleEndian; bool HasICBT; bool HasInvariantFunctionDescriptors; + bool HasSaveTOCIndirect; bool HasPartwordAtomics; bool HasDirectMove; bool HasHTM; @@ -262,6 +263,9 @@ bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; } + bool hasSaveTOCIndirect() const { + return HasSaveTOCIndirect; + } bool hasPartwordAtomics() const { return HasPartwordAtomics; } bool hasDirectMove() const { return HasDirectMove; } Index: test/CodeGen/PowerPC/save_toc_indirect.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/save_toc_indirect.ll @@ -0,0 +1,28 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mattr=+save-toc-indirect < %s | FileCheck %s --check-prefix=TOC_INDIRECT +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +define signext i32 @test(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +; TOC_INDIRECT-LABEL: test: +; TOC_INDIRECT: # BB#0: # %entry +; TOC_INDIRECT: std 2, 24(1) +; TOC_INDIRECT: bctrl +; TOC_INDIRECT: ld 2, 24(1) +; TOC_INDIRECT-NOT: std 2, 24(1) +; TOC_INDIRECT: bctrl +; TOC_INDIRECT: ld 2, 24(1) + +; CHECK-LABEL: test: +; CHECK: # BB#0: # %entry +; CHECK: std 2, 24(1) +; CHECK: bctrl +; CHECK: ld 2, 24(1) +; CHECK: std 2, 24(1) +; CHECK: bctrl +; CHECK: ld 2, 24(1) + +entry: + %call = tail call signext i32 %Func(i32 signext %i) + %call1 = tail call signext i32 %Func2(i32 signext %i) + %add2 = add nsw i32 %call1, %call + ret i32 %add2 +} Index: test/CodeGen/PowerPC/save_toc_indirect2.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/save_toc_indirect2.ll @@ -0,0 +1,46 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mattr=+save-toc-indirect < %s | FileCheck %s --check-prefix=TOC_INDIRECT + +define signext i32 @test(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) { +; TOC_INDIRECT-LABEL: test: +; TOC_INDIRECT: # BB#0: # %entry +; TOC_INDIRECT: std 2, {{[0-9]+}}(1) +; TOC_INDIRECT: bctrl +; TOC_INDIRECT: ld 2, {{[0-9]+}}(1) +; TOC_INDIRECT: std 2, {{[0-9]+}}(1) +; TOC_INDIRECT: bctrl +; TOC_INDIRECT: ld 2, {{[0-9]+}}(1) +; TOC_INDIRECT: std 2, {{[0-9]+}}(1) +; TOC_INDIRECT: bctrl +; TOC_INDIRECT: ld 2, {{[0-9]+}}(1) +entry: + %call = tail call signext i32 %Func(i32 signext %i) + %call1 = tail call signext i32 %Func2(i32 signext %i) + %add2 = add nsw i32 %call1, %call + %conv = sext i32 %add2 to i64 + %0 = alloca i8, i64 %conv, align 16 + %1 = bitcast i8* %0 to i32* + %cmp24 = icmp sgt i32 %add2, 0 + br i1 %cmp24, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %wide.trip.count = zext i32 %add2 to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %call7 = call signext i32 @UseAlloca(i32* nonnull %1) + %add8 = add nsw i32 %call7, %add2 + ret i32 %add8 + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %2 = add nsw i64 %indvars.iv, %conv + %3 = trunc i64 %2 to i32 + %call6 = tail call signext i32 %Func(i32 signext %3) + %arrayidx = getelementptr inbounds i32, i32* %1, i64 %indvars.iv + store i32 %call6, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +declare signext i32 @UseAlloca(i32*)