Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -823,6 +823,44 @@ assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + // Check if we can move the stack update instruction (stdu) down the prologue + // past the callee saves. Hopefully this will avoid the situation where the + // saves are waiting for the update on the store with update to complete. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateDown = false; + // This optimization has a number of guards. At this point we are being very + // cautious and we do not try to do this when we have a fast call or + // we are using PIC base or we are using a frame pointer or a base pointer. + // It would be possible to turn on this optimization under these conditions + // as well but it would require further modifications to the prologue and + // epilogue. For example, if we want to turn on this optimization for + // functions that use frame pointers we would have to take into consideration + // the fact that spills to the stack may be using r30 instead of r1. + // If the frame index requires scavenging there is the possibility that we may + // require a spill in the prologue in which case it is unsafe to move the + // stack pointer update. + // Aside form that we need to have a non-zero frame and we need to have a + // non-large frame size. Notice that we did not use !isLargeFrame but we used + // isInt<16>(FrameSize) instead. This is important because this guard has to + // be identical to the one in the epilogue and in the epilogue the variable + // is defined as bool isLargeFrame = !isInt<16>(FrameSize); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + bool RequiresScavenging = TRI->requiresFrameIndexScavenging(MF); + if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && + !HasBP && isInt<16>(FrameSize) && !RequiresScavenging) { + const std::vector &Info = MFI.getCalleeSavedInfo(); + for (unsigned i=0; igetDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1339,6 +1383,25 @@ unsigned RBReg = SPReg; unsigned SPAdd = 0; + // Check if we can move the stack update instruction up the epilogue + // past the callee saves. This will allow the move to LR instruction + // to be executed before the restores of the callee saves which means + // that the callee saves can hide the latency from the MTLR instrcution. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + bool RequiresScavenging = TRI->requiresFrameIndexScavenging(MF); + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && + !HasBP && !isLargeFrame && !RequiresScavenging) { + const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo(); + for (unsigned i=0; i(LROffset+SPAdd)) { - BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1497,7 +1560,7 @@ .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization Index: lib/Target/PowerPC/PPCRegisterInfo.h =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.h +++ lib/Target/PowerPC/PPCRegisterInfo.h @@ -90,9 +90,7 @@ return true; } - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -297,6 +297,55 @@ } } +bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &Info = MFI.getCalleeSavedInfo(); + bool RequiresScavenging = false; + + // If the saved info is invalid we have to default to true for safety. + if (!MFI.isCalleeSavedInfoValid()) + return true; + + // The saved info is valid so it can be traversed. + // Checking for registers that need saving that do not have load or store + // forms where the address offset is an immediate. + for (unsigned i=0; i