Index: lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.h +++ lib/Target/PowerPC/PPCFrameLowering.h @@ -39,19 +39,33 @@ * * This method will return true if an available register was found (including * R0). If no available registers are found, the method returns false and sets - * ScratchRegister to R0, as per the recommendation in the ABI. + * SR1 to R0, as per the recommendation in the ABI. + * + * For cases where a second register is required (such as when the stack needs + * to be realigned) the function will check for and set two registers. By + * default these will be R0 and R12. * * \param[in] MBB The machine basic block to find an available register for * \param[in] UseAtEnd Specify whether the scratch register will be used at * the end of the basic block (i.e., will the scratch * register kill a register defined in the basic block) - * \param[out] ScratchRegister The scratch register to use - * \return true if a scratch register was found. false of a scratch register - * was not found and R0 is being used as the default. + * \param[in] TwoUniqueRegsRequired Specify whether this basic block will + * require two scratch registers. + * \param[out] SR1 The scratch register to use + * \param[out] SR2 The second scratch register. If this pointer + * is not null, the function will attempt to + * set it to an available register regardless + * of whether there is a hard requirement for + * two scratch registers. + * \return true if the required number of registers was found. + * false if the required number of scratch register were not available + * and R0 is being used as the default. */ bool findScratchRegister(MachineBasicBlock *MBB, bool UseAtEnd, - unsigned *ScratchRegister) const; + bool TwoUniqueRegsRequired = false, + unsigned *SR1 = nullptr, + unsigned *SR2 = nullptr) const; public: PPCFrameLowering(const PPCSubtarget &STI); Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -556,16 +556,26 @@ } } -bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, - bool UseAtEnd, - unsigned *ScratchRegister) const { +bool +PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, + bool UseAtEnd, + bool TwoUniqueRegsRequired, + unsigned *SR1, + unsigned *SR2) const { RegScavenger RS; - unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; + + // Set the defaults for the two scratch registers. + if (SR1) + *SR1 = R0; - if (ScratchRegister) - *ScratchRegister = R0; + if (SR2) { + assert (SR1 && "Asking for the second scratch register but not the first?"); + *SR2 = R12; + } - // If MBB is an entry or exit block, use R0 as the scratch register + // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. if ((UseAtEnd && MBB->isReturnBlock()) || (!UseAtEnd && (&MBB->getParent()->front() == MBB))) return true; @@ -573,8 +583,8 @@ RS.enterBasicBlock(MBB); if (UseAtEnd && !MBB->empty()) { - // The scratch register will be used at the end of the block, so must consider - // all registers used within the block + // The scratch register will be used at the end of the block, so must + // consider all registers used within the block MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); // If no terminator, back iterator up to previous instruction. @@ -584,21 +594,55 @@ if (MBBI != MBB->begin()) RS.forward(MBBI); } - - if (!RS.isRegUsed(R0)) + + // If R0 is available and we don't require another one, we're all good. + if (!RS.isRegUsed(R0) && !TwoUniqueRegsRequired) return true; - unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass - : &PPC::GPRCRegClass); - + // Since we may be in a situation where two scratch registers would be + // advantageous but not required, we do not want to blindly return R12 as + // the second scratch register. Conservatively assume we do not have two + // available scratch registers and then try to find a pair. This is because + // a block may have only one available scratch register but still be a + // valid candidate for shrink wrapping (i.e. CR needs to be saved but no + // requirement for realigning a large stack frame exists for the function). + if (SR2) + *SR2 = R0; + + // Get the list of callee-saved registers for the target. + const PPCRegisterInfo *RegInfo = + static_cast(Subtarget.getRegisterInfo()); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); + + // Get all the available registers in the block. + BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + + // We shouldn't use callee-saved registers as scratch registers as they may be + // available when looking for a candidate block for shrink wrapping but not + // available when the actual prologue/epilogue is being emitted because they + // were added as live-in to the prologue block by PrologueEpilogueInserter. + for (int i = 0; CSRegs[i]; i++) + BV.reset(CSRegs[i]); + // Make sure the register scavenger was able to find an available register // If not, use R0 but return false to indicate no register was available and - // R0 must be used (as recommended by the ABI) - if (Reg == 0) + // R0 must be used (as recommended by the ABI). Also, if the function requires + // two registers, return false if we don't have enough. + if (BV.empty() || (BV.count() < 2 && TwoUniqueRegsRequired)) return false; - if (ScratchRegister) - *ScratchRegister = Reg; + // Set the first scratch register to the first available one. + if (SR1) + *SR1 = BV.find_first(); + + // If there is another one available, set the second scratch register to that. + // Note: we can only be here if we either do not require two registers or we + // do AND we have enough available. + if (SR2) { + int SecondScratchReg = BV.find_next(*SR1); + *SR2 = SecondScratchReg == -1 ? *SR1 : SecondScratchReg; + } return true; } @@ -606,13 +650,32 @@ bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast(&MBB); - return findScratchRegister(TmpMBB, false, nullptr); + /* Need to determine if this basic block is part of a function that will + require a pair of temporary registers in ordert to emit the prologue. + */ + const PPCRegisterInfo *RegInfo = + static_cast(Subtarget.getRegisterInfo()); + MachineFunction &MF = *(TmpMBB->getParent()); + bool HasBP = RegInfo->hasBasePointer(MF); + unsigned FrameSize = determineFrameLayout(MF); + bool IsLargeFrame = !isInt<16>(-FrameSize); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // We need a scratch register for spilling LR and for spilling CR. By default, + // we use two scratch registers to hide latency. However, if only one scratch + // register is available, we can adjust for that by not overlapping the spill + // code. However, if we need to realign the stack (i.e. have a base pointer) + // and the stack frame is large, we need two scratch registers. + bool TwoUniqueRegsRequired = IsLargeFrame && HasBP && MaxAlign > 1; + + return findScratchRegister(TmpMBB, false, TwoUniqueRegsRequired); } bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast(&MBB); - return findScratchRegister(TmpMBB, true, nullptr); + return findScratchRegister(TmpMBB, true); } void PPCFrameLowering::emitPrologue(MachineFunction &MF, @@ -664,6 +727,7 @@ PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); + bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); @@ -694,6 +758,13 @@ const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 : PPC::SUBFIC); + // Frames of 32KB & larger require special handling because they cannot be + // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. + bool isLargeFrame = !isInt<16>(NegFrameSize); + + // Get stack alignments. + unsigned MaxAlign = MFI->getMaxAlignment(); + // Regarding this assert: Even though LR is saved in the caller's frame (i.e., // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no // Red Zone, an asynchronous event (a form of "callee") could claim a frame & @@ -701,9 +772,14 @@ assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); - findScratchRegister(&MBB, false, &ScratchReg); + bool TwoUniqueRegsRequired = MustSaveCR || + (isLargeFrame && HasBP && MaxAlign > 1); + + findScratchRegister(&MBB, false, TwoUniqueRegsRequired, &ScratchReg, &TempReg); + assert(ScratchReg && "No scratch register!"); - + bool SingleScratchReg = ScratchReg == TempReg; + int LROffset = getReturnSaveOffset(); int FPOffset = 0; @@ -738,23 +814,34 @@ PBPOffset = FFI->getObjectOffset(PBPIndex); } - // Get stack alignments. - unsigned MaxAlign = MFI->getMaxAlignment(); if (HasBP && MaxAlign > 1) assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && "Invalid alignment!"); - // Frames of 32KB & larger require special handling because they cannot be - // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. - bool isLargeFrame = !isInt<16>(NegFrameSize); + assert((isPPC64 || !MustSaveCR) && + "Prologue CR saving supported only in 64-bit mode"); + + // If we need to spill the CR and the LR but we don't have two separate + // registers available, we must spill them one at a time + if (MustSaveCR && SingleScratchReg && MustSaveLR) { + // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it could be more + // efficient to use mfocrf to selectively save just those fields. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + .addReg(TempReg, getKillRegState(true)) + .addImm(8) + .addReg(SPReg); + } if (MustSaveLR) BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); - assert((isPPC64 || MustSaveCRs.empty()) && - "Prologue CR saving supported only in 64-bit mode"); - - if (!MustSaveCRs.empty()) { // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. // If only one or two CR fields are clobbered, it could be more // efficient to use mfocrf to selectively save just those fields. @@ -792,7 +879,8 @@ .addImm(LROffset) .addReg(SPReg); - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) .addImm(8) @@ -811,6 +899,7 @@ .addReg(SPReg); } + // This condition must be kept in sync with canUseAsPrologue. if (HasBP && MaxAlign > 1) { if (isPPC64) BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) @@ -828,6 +917,7 @@ .addReg(ScratchReg, RegState::Kill) .addImm(NegFrameSize); } else { + assert(!SingleScratchReg && "Only a single scratch reg available"); BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) .addImm(NegFrameSize >> 16); BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) @@ -951,7 +1041,7 @@ // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) - && MustSaveCRs.empty()) + && !MustSaveCR) continue; // For 64-bit SVR4 when we have spilled CRs, the spill location @@ -1005,6 +1095,7 @@ PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); + bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); @@ -1026,14 +1117,16 @@ : PPC::ADDI ); const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 : PPC::ADD4 ); - + int LROffset = getReturnSaveOffset(); int FPOffset = 0; - findScratchRegister(&MBB, true, &ScratchReg); + findScratchRegister(&MBB, true, MustSaveCR, &ScratchReg, &TempReg); + assert(ScratchReg && "No scratch register!"); - + bool SingleScratchReg = ScratchReg == TempReg; + if (HasFP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -1130,15 +1223,27 @@ } } + assert((isPPC64 || !MustSaveCR) && + "Epilogue CR restoring supported only in 64-bit mode"); + + // If we need to save both the LR and the CR and we only have one available + // scratch register, we must do them one at a time. + if (MustSaveCR && SingleScratchReg && MustSaveLR) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) + .addImm(8) + .addReg(SPReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + .addReg(TempReg, getKillRegState(i == e-1)); + } + if (MustSaveLR) BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) .addImm(LROffset) .addReg(SPReg); - assert((isPPC64 || MustSaveCRs.empty()) && - "Epilogue CR restoring supported only in 64-bit mode"); - - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) .addImm(8) .addReg(SPReg); @@ -1160,7 +1265,8 @@ .addImm(BPOffset) .addReg(SPReg); - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) .addReg(TempReg, getKillRegState(i == e-1));