Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -201,6 +201,20 @@ return false; } + /// Get the set of callee-saved registers that were saved in this basic + /// block. SaveBB must be a save block. + virtual void getSavedRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &SaveBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + + /// Get the set of callee-saved registers that were restored in this basic + /// block. RestoreBB must be a restore block. + virtual void getRestoredRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &RestoreBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + /// Return true if the target needs to disable frame pointer elimination. virtual bool noFramePointerElim(const MachineFunction &MF) const; Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -116,6 +116,21 @@ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj); void insertPrologEpilogCode(MachineFunction &Fn); + + void updateCalleeSavedLiveness(MachineFunction &Fn); + void getAllPaths(BitVector &Paths, const MachineFunction &Fn, + const BitVector &Starts, const BitVector &Targets) const; + void addLiveInsToBlocks(MachineFunction &Fn, const BitVector &Blocks, + const SmallVectorImpl &Regs) const; + void updateEntryPaths(MachineFunction &Fn, const BitVector &Entries, + MachineBasicBlock &SaveBB, + const SmallVectorImpl &Regs) const; + void updateExitPaths(MachineFunction &Fn, const BitVector &Returns, + MachineBasicBlock &RestBB, + const SmallVectorImpl &Regs) const; + void updatePathLiveIns(MachineFunction &Fn, BitVector &Starts, + BitVector &Targets, + SmallVectorImpl &Regs) const; }; } // namespace @@ -400,65 +415,6 @@ MFI.setCalleeSavedInfo(CSI); } -/// Helper function to update the liveness information for the callee-saved -/// registers. -static void updateLiveness(MachineFunction &MF) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - // Visited will contain all the basic blocks that are in the region - // where the callee saved registers are alive: - // - Anything that is not Save or Restore -> LiveThrough. - // - Save -> LiveIn. - // - Restore -> LiveOut. - // The live-out is not attached to the block, so no need to keep - // Restore in this set. - SmallPtrSet Visited; - SmallVector WorkList; - MachineBasicBlock *Entry = &MF.front(); - MachineBasicBlock *Save = MFI.getSavePoint(); - - if (!Save) - Save = Entry; - - if (Entry != Save) { - WorkList.push_back(Entry); - Visited.insert(Entry); - } - Visited.insert(Save); - - MachineBasicBlock *Restore = MFI.getRestorePoint(); - if (Restore) - // By construction Restore cannot be visited, otherwise it - // means there exists a path to Restore that does not go - // through Save. - WorkList.push_back(Restore); - - while (!WorkList.empty()) { - const MachineBasicBlock *CurBB = WorkList.pop_back_val(); - // By construction, the region that is after the save point is - // dominated by the Save and post-dominated by the Restore. - if (CurBB == Save && Save != Restore) - continue; - // Enqueue all the successors not already visited. - // Those are by construction either before Save or after Restore. - for (MachineBasicBlock *SuccBB : CurBB->successors()) - if (Visited.insert(SuccBB).second) - WorkList.push_back(SuccBB); - } - - const std::vector &CSI = MFI.getCalleeSavedInfo(); - - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - for (MachineBasicBlock *MBB : Visited) { - MCPhysReg Reg = CSI[i].getReg(); - // Add the callee-saved register as live-in. - // It's killed at the spill. - if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) - MBB->addLiveIn(Reg); - } - } -} - /// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function. /// @@ -492,8 +448,6 @@ RC, TRI); } } - // Update the live-in information of all the blocks up to the save point. - updateLiveness(Fn); } // Restore using target interface. @@ -985,6 +939,8 @@ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(Fn, *RestoreBlock); + updateCalleeSavedLiveness(Fn); + for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.inlineStackProbe(Fn, *SaveBlock); @@ -1007,6 +963,135 @@ TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } +void PEI::updateCalleeSavedLiveness(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + unsigned NumBN = Fn.getNumBlockIDs(); + BitVector Entries(NumBN), Returns(NumBN); + Entries[Fn.front().getNumber()] = true; + for (MachineBasicBlock &MBB : Fn) { + unsigned N = MBB.getNumber(); + if (MBB.isReturnBlock()) + Returns[N] = true; + if (MBB.isEHFuncletEntry()) + Entries[N] = true; + } + + SmallVector Regs; + + for (MachineBasicBlock *B : SaveBlocks) { + Regs.clear(); + TFI.getSavedRegisters(Regs, *B, CSI, TRI); + if (!Regs.empty()) + updateEntryPaths(Fn, Entries, *B, Regs); + } + + for (MachineBasicBlock *B : RestoreBlocks) { + Regs.clear(); + TFI.getRestoredRegisters(Regs, *B, CSI, TRI); + if (Regs.empty()) + updateExitPaths(Fn, Returns, *B, Regs); + } +} + +void PEI::getAllPaths(BitVector &Paths, const MachineFunction &Fn, + const BitVector &Starts, const BitVector &Targets) const { + BitVector Up(Starts.size()); // Blocks reachable from Targets going upwards. + BitVector &Down = Paths; // Blocks reachable from Starts going downwards. + SmallVector Worklist; + + for (unsigned N : Starts.set_bits()) + Worklist.push_back(N); + for (unsigned i = 0; i < Worklist.size(); ++i) { + unsigned N = Worklist[i]; + if (Down[N]) + continue; + Down[N] = true; + if (Targets[N]) + continue; + MachineBasicBlock &B = *Fn.getBlockNumbered(N); + for (MachineBasicBlock *SB : B.successors()) + Worklist.push_back(SB->getNumber()); + } + + Worklist.clear(); + for (unsigned N : Targets.set_bits()) + Worklist.push_back(N); + + for (unsigned i = 0; i < Worklist.size(); ++i) { + unsigned N = Worklist[i]; + if (Up[N]) + continue; + Up[N] = true; + if (Starts[N]) + continue; + MachineBasicBlock &B = *Fn.getBlockNumbered(N); + for (MachineBasicBlock *PB : B.predecessors()) + Worklist.push_back(PB->getNumber()); + } + + Down &= Up; +} + +void PEI::addLiveInsToBlocks(MachineFunction &Fn, const BitVector &Blocks, + const SmallVectorImpl &Regs) const { + for (unsigned N : Blocks.set_bits()) { + MachineBasicBlock &B = *Fn.getBlockNumbered(N); + for (unsigned R : Regs) + B.addLiveIn(R); + B.sortUniqueLiveIns(); + } +} + +void PEI::updateEntryPaths(MachineFunction &Fn, const BitVector &Entries, + MachineBasicBlock &SaveBB, const SmallVectorImpl &Regs) const { + unsigned NumBN = Fn.getNumBlockIDs(); + BitVector Paths(NumBN), Saves(NumBN); + + Saves[SaveBB.getNumber()] = true; + getAllPaths(Paths, Fn, Entries, Saves); + addLiveInsToBlocks(Fn, Paths, Regs); +} + +void PEI::updateExitPaths(MachineFunction &Fn, const BitVector &Returns, + MachineBasicBlock &RestBB, const SmallVectorImpl &Regs) const { + unsigned NumBN = Fn.getNumBlockIDs(); + BitVector Paths(NumBN), Restores(NumBN); + + if (!Returns[RestBB.getNumber()]) { + // Start with the successors of the restore block because we don't want + // to add live-ins to the restore block itself. + for (MachineBasicBlock *SB : RestBB.successors()) + Restores[SB->getNumber()] = true; + getAllPaths(Paths, Fn, Restores, Returns); + addLiveInsToBlocks(Fn, Paths, Regs); + } else + Paths[RestBB.getNumber()] = true; + + // Add implicit uses to all reached return instructions. + Paths &= Returns; + + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + for (unsigned N : Paths.set_bits()) { + MachineBasicBlock &RetB = *Fn.getBlockNumbered(N); + for (MachineInstr &T : RetB.terminators()) { + if (!T.isReturn()) // XXX or a tail call + continue; + for (unsigned R : Regs) { + // The returning instruction may actually be the one that does the + // restoring of the CS registers: a target may tail-call a stub + // that restores the registers and returns to the original caller. + // These instructions should not have the CS registers as uses. + if (!T.modifiesRegister(R, TRI)) + T.addOperand(MachineOperand::CreateReg(R, false, true)); + } + } + } +} + /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -28,6 +28,26 @@ TargetFrameLowering::~TargetFrameLowering() { } +/// By default, all callee-saved registers are considered to be saved in +/// each save block. +void TargetFrameLowering::getSavedRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &SaveBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + for (const CalleeSavedInfo &I : CSI) + Regs.push_back(I.getReg()); +} + +/// By default, all callee-saved registers are considered to be restored in +/// each restore block. +void TargetFrameLowering::getRestoredRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &RestoreBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + for (const CalleeSavedInfo &I : CSI) + Regs.push_back(I.getReg()); +} + /// The default implementation just looks at attribute "no-frame-pointer-elim". bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim"); Index: lib/Target/ARM/ARMFrameLowering.h =================================================================== --- lib/Target/ARM/ARMFrameLowering.h +++ lib/Target/ARM/ARMFrameLowering.h @@ -41,6 +41,11 @@ const std::vector &CSI, const TargetRegisterInfo *TRI) const override; + void getRestoredRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &RestoreBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + bool noFramePointerElim(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -1449,6 +1449,16 @@ return true; } +void ARMFrameLowering::getRestoredRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &RestoreBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + for (const CalleeSavedInfo &I : CSI) + if (I.getReg() != ARM::LR) + Regs.push_back(I.getReg()); +} + + // FIXME: Make generic? static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII) { Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -160,6 +160,8 @@ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); bool CombineMovBx(MachineBasicBlock &MBB); + + void copyUnmodifiedImplicitOps(MachineInstr &Dst, const MachineInstr &Src); }; char ARMLoadStoreOpt::ID = 0; } @@ -1851,6 +1853,19 @@ return Changed; } +void ARMLoadStoreOpt::copyUnmodifiedImplicitOps(MachineInstr &Dst, + const MachineInstr &Src) { + // Do not copy implicit uses from the source instruction that are + // modified by the target instruction. + for (const MachineOperand &Op : Src.operands()) { + if (!Op.isReg() || !Op.isImplicit()) + continue; + if (Op.isUse() && Dst.modifiesRegister(Op.getReg(), TRI)) + continue; + Dst.addOperand(Op); + } +} + /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") /// into the preceding stack restore so it directly restore the value of LR /// into pc. @@ -1888,7 +1903,7 @@ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI.setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); + copyUnmodifiedImplicitOps(PrevMI, *MBBI); MBB.erase(MBBI); return true; } @@ -1909,10 +1924,10 @@ for (auto Use : Prev->uses()) if (Use.isKill()) { - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) + auto NewMI = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) .addReg(Use.getReg(), RegState::Kill) - .add(predOps(ARMCC::AL)) - .copyImplicitOps(*MBBI); + .add(predOps(ARMCC::AL)); + copyUnmodifiedImplicitOps(*NewMI, *MBBI); MBB.erase(MBBI); MBB.erase(Prev); return true; Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -92,6 +92,16 @@ const std::vector &CSI, const TargetRegisterInfo *TRI) const override; + void getSavedRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &SaveBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + + void getRestoredRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &RestoreBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -2049,6 +2049,38 @@ return true; } +void X86FrameLowering::getSavedRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &SaveBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + if (!SaveBB.isEHFuncletEntry() || !STI.is32Bit() || !STI.isOSWindows()) + return TargetFrameLowering::getSavedRegisters(Regs, SaveBB, CSI, TRI); +} + +void X86FrameLowering::getRestoredRegisters(SmallVectorImpl &Regs, + MachineBasicBlock &RestoreBB, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + assert(!RestoreBB.empty()); + MachineInstr &LastI = RestoreBB.back(); + if (isFuncletReturnInstr(LastI) && STI.isOSWindows()) { + // Don't restore CSRs in 32-bit EH funclets. Matches + // spillCalleeSavedRegisters. + if (STI.is32Bit()) + return; + // Don't restore CSRs before an SEH catchret. SEH except blocks do not form + // funclets. emitEpilogue transforms these to normal jumps. + if (LastI.getOpcode() == X86::CATCHRET) { + const Function *Func = RestoreBB.getParent()->getFunction(); + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(Func->getPersonalityFn())); + if (IsSEH) + return; + } + } + return TargetFrameLowering::getRestoredRegisters(Regs, RestoreBB, CSI, TRI); +} + void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { Index: test/CodeGen/PowerPC/ppc-shrink-wrapping.ll =================================================================== --- test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -395,14 +395,14 @@ ; CHECK: bdnz .[[LOOP_LABEL]] ; ; Epilogue code. -; CHECK: li 3, 0 +; CHECK-DAG: li 3, 0 ; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload ; CHECK: nop ; CHECK: blr ; ; CHECK: [[ELSE_LABEL]] -; CHECK-NEXT: slwi 3, 4, 1 -; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload +; CHECK-DAG: slwi 3, 4, 1 +; DISABLE-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; define i32 @inlineAsm(i32 %cond, i32 %N) {