Index: include/llvm/CodeGen/LivePhysRegs.h =================================================================== --- include/llvm/CodeGen/LivePhysRegs.h +++ include/llvm/CodeGen/LivePhysRegs.h @@ -159,12 +159,18 @@ return OS; } -/// \brief Computes the live-in list for \p MBB assuming all of its successors -/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p -/// LiveRegs; This is just here to avoid repeated heap allocations when calling -/// this multiple times in a pass. -void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI, - MachineBasicBlock &MBB); +/// \brief Computes registers live-in to \p MBB assuming all of its successors +/// live-in lists are up-to-date. Puts the result into the given LivePhysReg +/// instance \p LiveRegs. +void computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB); + +/// Adds registers contained in \p LiveRegs to the block live-in list of \p MBB. +/// Does not add reserved registers. +void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); + +/// Convenience function combining computeLiveIns() and addLiveIns(). +void computeAndAddLiveIns(LivePhysRegs &LiveRegs, + MachineBasicBlock &MBB); } // end namespace llvm Index: lib/CodeGen/BranchFolding.h =================================================================== --- lib/CodeGen/BranchFolding.h +++ lib/CodeGen/BranchFolding.h @@ -146,8 +146,8 @@ /// Delete the instruction OldInst and everything after it, replacing it /// with an unconditional branch to NewDest. - void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest); + void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock &NewDest); /// Given a machine basic block and an iterator into it, split the MBB so /// that the part before the iterator falls into the part starting at the @@ -182,8 +182,8 @@ unsigned &commonTailIndex); /// Create merged DebugLocs of identical instructions across SameTails and - /// assign it to the instruction in common tail. - void MergeCommonTailDebugLocs(unsigned commonTailIndex); + /// assign it to the instruction in common tail; merge MMOs and undef flags. + void mergeCommonTails(unsigned commonTailIndex); bool OptimizeBranches(MachineFunction &MF); Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -365,15 +366,37 @@ return TailLen; } -void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest) { - TII->ReplaceTailWithBranchTo(OldInst, NewDest); - +void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock &NewDest) { if (UpdateLiveIns) { - NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *MRI, *NewDest); + // OldInst should always point to an instruction. + MachineBasicBlock &OldMBB = *OldInst->getParent(); + LiveRegs.clear(); + LiveRegs.addLiveOuts(OldMBB); + // Move backward to the place where will insert the jump. + MachineBasicBlock::iterator I = OldMBB.end(); + do { + --I; + LiveRegs.stepBackward(*I); + } while (I != OldInst); + + // Merging the tails may have switched some undef operand to non-undef ones. + // Add IMPLICIT_DEFS into the predecessor as necessary to have a definition + // of the register. + for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) { + // We computed the liveins with computeLiveIn earlier and should only see + // full registers: + assert(P.LaneMask == LaneBitmask::getAll() && + "Can only handle full register."); + MCPhysReg Reg = P.PhysReg; + if (!LiveRegs.available(*MRI, Reg)) + continue; + DebugLoc DL; + BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg); + } } + TII->ReplaceTailWithBranchTo(OldInst, &NewDest); ++NumTailMerge; } @@ -408,7 +431,7 @@ MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); if (UpdateLiveIns) - computeLiveIns(LiveRegs, *MRI, *NewMBB); + computeAndAddLiveIns(LiveRegs, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -766,43 +789,6 @@ return true; } -void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) { - MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - - std::vector NextCommonInsts(SameTails.size()); - for (unsigned int i = 0 ; i != SameTails.size() ; ++i) { - if (i != commonTailIndex) - NextCommonInsts[i] = SameTails[i].getTailStartPos(); - else { - assert(SameTails[i].getTailStartPos() == MBB->begin() && - "MBB is not a common tail only block"); - } - } - - for (auto &MI : *MBB) { - if (MI.isDebugValue()) - continue; - DebugLoc DL = MI.getDebugLoc(); - for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { - if (i == commonTailIndex) - continue; - - auto &Pos = NextCommonInsts[i]; - assert(Pos != SameTails[i].getBlock()->end() && - "Reached BB end within common tail"); - while (Pos->isDebugValue()) { - ++Pos; - assert(Pos != SameTails[i].getBlock()->end() && - "Reached BB end within common tail"); - } - assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); - DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); - NextCommonInsts[i] = ++Pos; - } - MI.setDebugLoc(DL); - } -} - static void mergeOperations(MachineBasicBlock::iterator MBBIStartPos, MachineBasicBlock &MBBCommon) { @@ -853,6 +839,67 @@ } } +void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + std::vector NextCommonInsts(SameTails.size()); + for (unsigned int i = 0 ; i != SameTails.size() ; ++i) { + if (i != commonTailIndex) { + NextCommonInsts[i] = SameTails[i].getTailStartPos(); + mergeOperations(SameTails[i].getTailStartPos(), *MBB); + } else { + assert(SameTails[i].getTailStartPos() == MBB->begin() && + "MBB is not a common tail only block"); + } + } + + for (auto &MI : *MBB) { + if (MI.isDebugValue()) + continue; + DebugLoc DL = MI.getDebugLoc(); + for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { + if (i == commonTailIndex) + continue; + + auto &Pos = NextCommonInsts[i]; + assert(Pos != SameTails[i].getBlock()->end() && + "Reached BB end within common tail"); + while (Pos->isDebugValue()) { + ++Pos; + assert(Pos != SameTails[i].getBlock()->end() && + "Reached BB end within common tail"); + } + assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); + DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); + NextCommonInsts[i] = ++Pos; + } + MI.setDebugLoc(DL); + } + + if (UpdateLiveIns) { + LivePhysRegs NewLiveIns(*TRI); + computeLiveIns(NewLiveIns, *MBB); + + // The flag merging may lead to some register uses no longer using the + // flag, add IMPLICIT_DEFs in the predecessors as necessary. + for (MachineBasicBlock *Pred : MBB->predecessors()) { + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(*Pred); + MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator(); + for (unsigned Reg : NewLiveIns) { + if (!LiveRegs.available(*MRI, Reg)) + continue; + DebugLoc DL; + BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF), + Reg); + } + } + + MBB->clearLiveIns(); + addLiveIns(*MBB, NewLiveIns); + } +} + // See if any of the blocks in MergePotentials (which all have SuccBB as a // successor, or all have no successor if it is null) can be tail-merged. // If there is a successor, any blocks in MergePotentials that are not @@ -955,8 +1002,9 @@ // Recompute common tail MBB's edge weights and block frequency. setCommonTailEdgeWeights(*MBB); - // Merge debug locations across identical instructions for common tail. - MergeCommonTailDebugLocs(commonTailIndex); + // Merge debug locations, MMOs and undef flags across identical instructions + // for common tail. + mergeCommonTails(commonTailIndex); // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. @@ -967,10 +1015,8 @@ continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); - // Merge operations (MMOs, undef flags) - mergeOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. - ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); + replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].getMPIter()); } Index: lib/CodeGen/BranchRelaxation.cpp =================================================================== --- lib/CodeGen/BranchRelaxation.cpp +++ lib/CodeGen/BranchRelaxation.cpp @@ -259,7 +259,7 @@ // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB); + computeAndAddLiveIns(LiveRegs, *NewBB); ++NumSplit; @@ -348,7 +348,7 @@ // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB); + computeAndAddLiveIns(LiveRegs, NewBB); } // We now have an appropriate fall-through block in place (either naturally or Index: lib/CodeGen/LivePhysRegs.cpp =================================================================== --- lib/CodeGen/LivePhysRegs.cpp +++ lib/CodeGen/LivePhysRegs.cpp @@ -218,16 +218,22 @@ } void llvm::computeLiveIns(LivePhysRegs &LiveRegs, - const MachineRegisterInfo &MRI, - MachineBasicBlock &MBB) { + const MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - assert(MBB.livein_empty()); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) + for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) LiveRegs.stepBackward(MI); +} - for (unsigned Reg : LiveRegs) { +void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) { + assert(MBB.livein_empty() && "Expected empty live-in list"); + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + for (MCPhysReg Reg : LiveRegs) { if (MRI.isReserved(Reg)) continue; // Skip the register if we are about to add one of its super registers. @@ -243,3 +249,9 @@ MBB.addLiveIn(Reg); } } + +void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs, + MachineBasicBlock &MBB) { + computeLiveIns(LiveRegs, MBB); + addLiveIns(MBB, LiveRegs); +} Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -672,16 +672,15 @@ MI.eraseFromParent(); // Recompute livein lists. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); LivePhysRegs LiveRegs; - computeLiveIns(LiveRegs, MRI, *DoneBB); - computeLiveIns(LiveRegs, MRI, *StoreBB); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); // Do an extra pass around the loop to get loop carried registers right. StoreBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *StoreBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); LoadCmpBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); return true; } @@ -766,16 +765,15 @@ MI.eraseFromParent(); // Recompute liveness bottom up. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); LivePhysRegs LiveRegs; - computeLiveIns(LiveRegs, MRI, *DoneBB); - computeLiveIns(LiveRegs, MRI, *StoreBB); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); // Do an extra pass in the loop to get the loop carried dependencies right. StoreBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *StoreBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); LoadCmpBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); return true; } Index: lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -845,16 +845,15 @@ MI.eraseFromParent(); // Recompute livein lists. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); LivePhysRegs LiveRegs; - computeLiveIns(LiveRegs, MRI, *DoneBB); - computeLiveIns(LiveRegs, MRI, *StoreBB); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); // Do an extra pass around the loop to get loop carried registers right. StoreBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *StoreBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); LoadCmpBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); return true; } @@ -965,16 +964,15 @@ MI.eraseFromParent(); // Recompute livein lists. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); LivePhysRegs LiveRegs; - computeLiveIns(LiveRegs, MRI, *DoneBB); - computeLiveIns(LiveRegs, MRI, *StoreBB); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); // Do an extra pass around the loop to get loop carried registers right. StoreBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *StoreBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); LoadCmpBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); return true; } Index: test/CodeGen/Hexagon/branchfolder-insert-impdef.mir =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/branchfolder-insert-impdef.mir @@ -0,0 +1,87 @@ +# RUN: llc -march=hexagon -run-pass branch-folder %s -o - -verify-machineinstrs | FileCheck %s + +# Branch folding will perform tail merging of bb.1 and bb.2, and bb.2 will +# become the common tail. The use of R0 in bb.2 is while the +# corresponding use in bb.1 is not. The common tail will have the +# flag removed, which will cause R0 to become a live-in to bb.2. The problem +# is that R0 is not live-out from all predecessors of bb.2, namely is not +# live-out from bb.0. To remedy that, the branch folder should add an +# IMPLICIT_DEF to that block. + +# CHECK-LABEL: name: func0 +# CHECK-LABEL: bb.0: +# CHECK: %r0 = IMPLICIT_DEF +# CHECK-LABEL: bb.1: +# CHECK-LABEL: bb.2: +# CHECK: liveins: %r0 +# CHECK: PS_storerhabs 0, %r0 +# CHECK: PS_jmpret + +--- +name: func0 +tracksRegLiveness: true + +body: | + bb.0: + liveins: %r31 + successors: %bb.1, %bb.2 + J2_jumpt undef %p0, %bb.2, implicit-def %pc + J2_jump %bb.1, implicit-def %pc + + bb.1: + liveins: %r31 + successors: %bb.3 + %r0 = L2_loadruh_io undef %r1, 0 + PS_storerhabs 0, killed %r0 + J2_jump %bb.3, implicit-def %pc + + bb.2: + liveins: %r31 + successors: %bb.3 + PS_storerhabs 0, undef %r0 + J2_jump %bb.3, implicit-def %pc + + bb.3: + liveins: %r31 + PS_jmpret killed %r31, implicit-def %pc +... +--- +# CHECK-LABEL: name: func1 +# CHECK-LABEL: bb.1: +# CHECK: %r0 = IMPLICIT_DEF +# CHECK-LABEL: bb.2: +# CHECK-LABEL: bb.3: +# CHECK: liveins: %r0 +# CHECK: PS_storerhabs 0, killed %r0 +# CHECK: PS_jmpret + +name: func1 +tracksRegLiveness: true + +body: | + bb.0: + liveins: %r31 + successors: %bb.1, %bb.2 + J2_jumpt undef %p0, %bb.2, implicit-def %pc + J2_jump %bb.1, implicit-def %pc + + bb.1: + liveins: %r31 + successors: %bb.3 + %r1 = A2_tfrsi 1 + PS_storerhabs 0, undef %r0 + %r0 = A2_tfrsi 1 + J2_jump %bb.3, implicit-def %pc + + bb.2: + liveins: %r31 + successors: %bb.3 + %r0 = L2_loadruh_io undef %r1, 0 + PS_storerhabs 0, killed %r0 + %r0 = A2_tfrsi 1 + J2_jump %bb.3, implicit-def %pc + + bb.3: + liveins: %r31 + PS_jmpret killed %r31, implicit undef %r0, implicit-def %pc +... Index: test/CodeGen/Hexagon/livephysregs-lane-masks2.mir =================================================================== --- test/CodeGen/Hexagon/livephysregs-lane-masks2.mir +++ test/CodeGen/Hexagon/livephysregs-lane-masks2.mir @@ -13,13 +13,13 @@ body: | bb.0: - liveins: %p2, %r0 + liveins: %p0:0x1, %p2, %r0 successors: %bb.1, %bb.2 J2_jumpt killed %p2, %bb.1, implicit-def %pc J2_jump %bb.2, implicit-def %pc bb.1: - liveins: %r0, %r19 + liveins: %p0:0x1, %r0, %r19 successors: %bb.3 %r2 = A2_tfrsi 4 %r1 = COPY %r19 @@ -28,7 +28,7 @@ J2_jump %bb.3, implicit-def %pc bb.2: - liveins: %r0, %r18 + liveins: %p0:0x1, %r0, %r18 successors: %bb.3 %r2 = A2_tfrsi 5 %r1 = L2_loadrh_io %r18, 0