Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -31,15 +31,18 @@ class CalleeSavedInfo { unsigned Reg; int FrameIdx; + bool Restored; public: explicit CalleeSavedInfo(unsigned R, int FI = 0) - : Reg(R), FrameIdx(FI) {} + : Reg(R), FrameIdx(FI), Restored(true) {} // Accessors. unsigned getReg() const { return Reg; } int getFrameIdx() const { return FrameIdx; } void setFrameIdx(int FI) { FrameIdx = FI; } + bool isRestored() const { return Restored; } + void setRestored(bool R) { Restored = R; } }; /// The MachineFrameInfo class represents an abstract stack frame until @@ -664,6 +667,7 @@ int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca); /// Returns a reference to call saved info vector for the current function. + std::vector &getCalleeSavedInfo() { return CSInfo; } const std::vector &getCalleeSavedInfo() const { return CSInfo; } Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -193,10 +193,12 @@ /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee /// saved registers and returns true if it isn't possible / profitable to do /// so by issuing a series of load instructions via loadRegToStackSlot(). + /// If it returns true, and any of the registers in CSI is not restored, + /// it sets the corresponding Restored flag in CSI to false. /// Returns false otherwise. virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { return false; } Index: lib/CodeGen/BranchFolding.h =================================================================== --- lib/CodeGen/BranchFolding.h +++ lib/CodeGen/BranchFolding.h @@ -138,6 +138,10 @@ MBFIWrapper &MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; + /// Calculate the set of physical registers available at the exit + /// from MBB. + void computeLiveOuts(LivePhysRegs &LiveOut, const MachineBasicBlock &MBB); + bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB, Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -237,6 +238,20 @@ return MadeChange; } +void BranchFolder::computeLiveOuts(LivePhysRegs &LiveOut, + const MachineBasicBlock &MBB) { + LiveOut.init(*TRI); + LiveOut.addLiveIns(MBB); + + SmallVector,8> Clobbers; + for (const MachineInstr &MI : MBB) { + LiveOut.stepForward(MI, Clobbers); + for (auto C : Clobbers) + if (C.second->isReg() && C.second->isDead()) + LiveOut.removeReg(C.first); + } +} + //===----------------------------------------------------------------------===// // Tail Merging of Blocks //===----------------------------------------------------------------------===// @@ -372,6 +387,20 @@ if (UpdateLiveIns) { NewDest->clearLiveIns(); computeLiveIns(LiveRegs, *MRI, *NewDest); + + LivePhysRegs LiveOut; + for (MachineBasicBlock *Pred : NewDest->predecessors()) { + computeLiveOuts(LiveOut, *Pred); + for (MachineBasicBlock::RegisterMaskPair P : NewDest->liveins()) { + // "Available" means "unused". + if (!LiveOut.available(*MRI, P.PhysReg)) + continue; + MachineBasicBlock::iterator At = Pred->getFirstTerminator(); + const DebugLoc &dl = Pred->findDebugLoc(At); + BuildMI(*Pred, At, dl, TII->get(TargetOpcode::IMPLICIT_DEF), + P.PhysReg); + } + } } ++NumTailMerge; Index: lib/CodeGen/LivePhysRegs.cpp =================================================================== --- lib/CodeGen/LivePhysRegs.cpp +++ lib/CodeGen/LivePhysRegs.cpp @@ -192,7 +192,8 @@ const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) { for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) - addReg(Info.getReg()); + if (Info.isRestored()) + addReg(Info.getReg()); } } } Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -488,7 +488,7 @@ /// Insert restore code for the callee-saved registers used in the function. static void insertCSRRestores(MachineBasicBlock &RestoreBlock, - ArrayRef CSI) { + std::vector &CSI) { MachineFunction &Fn = *RestoreBlock.getParent(); const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -533,7 +533,7 @@ if (!F->hasFnAttribute(Attribute::Naked)) { MFI.setCalleeSavedInfoValid(true); - ArrayRef CSI = MFI.getCalleeSavedInfo(); + std::vector &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) { insertCSRSaves(*SaveBlock, CSI); Index: lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.h +++ lib/Target/AArch64/AArch64FrameLowering.h @@ -50,7 +50,7 @@ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; /// \brief Can this function use the red zone for local allocations. Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1084,7 +1084,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); Index: lib/Target/ARM/ARMFrameLowering.h =================================================================== --- lib/Target/ARM/ARMFrameLowering.h +++ lib/Target/ARM/ARMFrameLowering.h @@ -38,7 +38,7 @@ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool noFramePointerElim(const MachineFunction &MF) const override; @@ -69,7 +69,7 @@ bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, unsigned LdmOpc, + std::vector &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -1021,7 +1021,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), @@ -1092,9 +1092,18 @@ .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet && MI != MBB.end()) { - MIB.copyImplicitOps(*MI); - MI->eraseFromParent(); + if (DeleteRet) { + if (MI != MBB.end()) { + MIB.copyImplicitOps(*MI); + MI->eraseFromParent(); + } + // If LR is not restored, mark it in CSI. + for (CalleeSavedInfo &I : CSI) { + if (I.getReg() != ARM::LR) + continue; + I.setRestored(false); + break; + } } MI = MIB; } else if (Regs.size() == 1) { @@ -1423,7 +1432,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/ARM/Thumb1FrameLowering.h =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.h +++ lib/Target/ARM/Thumb1FrameLowering.h @@ -36,7 +36,7 @@ const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -780,7 +780,7 @@ bool Thumb1FrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/AVR/AVRFrameLowering.h =================================================================== --- lib/Target/AVR/AVRFrameLowering.h +++ lib/Target/AVR/AVRFrameLowering.h @@ -30,7 +30,7 @@ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; Index: lib/Target/AVR/AVRFrameLowering.cpp =================================================================== --- lib/Target/AVR/AVRFrameLowering.cpp +++ lib/Target/AVR/AVRFrameLowering.cpp @@ -275,7 +275,7 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) { return false; Index: lib/Target/Hexagon/HexagonFrameLowering.h =================================================================== --- lib/Target/Hexagon/HexagonFrameLowering.h +++ lib/Target/Hexagon/HexagonFrameLowering.h @@ -48,7 +48,7 @@ } bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, const std::vector &CSI, + MachineBasicBlock::iterator MI, std::vector &CSI, const TargetRegisterInfo *TRI) const override { return true; } Index: lib/Target/MSP430/MSP430FrameLowering.h =================================================================== --- lib/Target/MSP430/MSP430FrameLowering.h +++ lib/Target/MSP430/MSP430FrameLowering.h @@ -40,7 +40,7 @@ const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool hasFP(const MachineFunction &MF) const override; Index: lib/Target/MSP430/MSP430FrameLowering.cpp =================================================================== --- lib/Target/MSP430/MSP430FrameLowering.cpp +++ lib/Target/MSP430/MSP430FrameLowering.cpp @@ -206,7 +206,7 @@ bool MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/Mips/Mips16FrameLowering.h =================================================================== --- lib/Target/Mips/Mips16FrameLowering.h +++ lib/Target/Mips/Mips16FrameLowering.h @@ -33,7 +33,7 @@ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; Index: lib/Target/Mips/Mips16FrameLowering.cpp =================================================================== --- lib/Target/Mips/Mips16FrameLowering.cpp +++ lib/Target/Mips/Mips16FrameLowering.cpp @@ -143,7 +143,7 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { // // Registers RA,S0,S1 are the callee saved registers and they will be restored Index: lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.h +++ lib/Target/PowerPC/PPCFrameLowering.h @@ -106,7 +106,7 @@ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; /// targetHandlesStackFrameRounding - Returns true if the target is Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2067,7 +2067,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. Index: lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- lib/Target/SystemZ/SystemZFrameLowering.h +++ lib/Target/SystemZ/SystemZFrameLowering.h @@ -35,7 +35,7 @@ const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBII, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, Index: lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZFrameLowering.cpp +++ lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -220,7 +220,7 @@ bool SystemZFrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -89,7 +89,7 @@ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool hasFP(const MachineFunction &MF) const override; Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1999,7 +1999,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/XCore/XCoreFrameLowering.h =================================================================== --- lib/Target/XCore/XCoreFrameLowering.h +++ lib/Target/XCore/XCoreFrameLowering.h @@ -38,7 +38,7 @@ const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const override; MachineBasicBlock::iterator Index: lib/Target/XCore/XCoreFrameLowering.cpp =================================================================== --- lib/Target/XCore/XCoreFrameLowering.cpp +++ lib/Target/XCore/XCoreFrameLowering.cpp @@ -452,7 +452,7 @@ bool XCoreFrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, + std::vector &CSI, const TargetRegisterInfo *TRI) const{ MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); Index: test/CodeGen/Hexagon/branchfolder-insert-impdef.mir =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/branchfolder-insert-impdef.mir @@ -0,0 +1,43 @@ +# RUN: llc -march=hexagon -run-pass branch-folder %s -o - -verify-machineinstrs | FileCheck %s + +# Branch folding will perform tail merging of bb.1 and bb.2, and bb.2 will +# become the common tail. The use of R0 in bb.2 is while the +# corresponding use in bb.1 is not. The common tail will have the +# flag removed, which will cause R0 to become a live-in to bb.2. The problem +# is that R0 is not live-out from all predecessors of bb.2, namely is not +# live-out from bb.0. To remedy that, the branch folder should add an +# IMPLICIT_DEF to that block. + +# CHECK-LABEL: bb.0: +# CHECK: %r0 = IMPLICIT_DEF +# CHECK-LABEL: bb.1: +# CHECK-LABEL: bb.2: +# CHECK: liveins: %r0 +# CHECK: PS_storerhabs 0, %r0 +# CHECK: PS_jmpret + +--- +name: fred +tracksRegLiveness: true + +body: | + bb.0: + successors: %bb.1, %bb.2 + J2_jumpt undef %p0, %bb.2, implicit-def %pc + J2_jump %bb.1, implicit-def %pc + + bb.1: + successors: %bb.3 + %r0 = L2_loadruh_io undef %r1, 0 + PS_storerhabs 0, killed %r0 + J2_jump %bb.3, implicit-def %pc + + bb.2: + successors: %bb.3 + PS_storerhabs 0, undef %r0 + J2_jump %bb.3, implicit-def %pc + + bb.3: + PS_jmpret killed %r31, implicit-def %pc +... + Index: test/CodeGen/Hexagon/livephysregs-lane-masks2.mir =================================================================== --- test/CodeGen/Hexagon/livephysregs-lane-masks2.mir +++ test/CodeGen/Hexagon/livephysregs-lane-masks2.mir @@ -13,13 +13,13 @@ body: | bb.0: - liveins: %p2, %r0 + liveins: %p0:0x1, %p2, %r0 successors: %bb.1, %bb.2 J2_jumpt killed %p2, %bb.1, implicit-def %pc J2_jump %bb.2, implicit-def %pc bb.1: - liveins: %r0, %r19 + liveins: %p0:0x1, %r0, %r19 successors: %bb.3 %r2 = A2_tfrsi 4 %r1 = COPY %r19 @@ -28,7 +28,7 @@ J2_jump %bb.3, implicit-def %pc bb.2: - liveins: %r0, %r18 + liveins: %p0:0x1, %r0, %r18 successors: %bb.3 %r2 = A2_tfrsi 5 %r1 = L2_loadrh_io %r18, 0