Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -110,6 +111,12 @@ static cl::opt JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); +static cl::opt +TailDupPlacement("tail-dup-placement", + cl::desc("Perform tail duplication during placement. " + "Creates more fallthrough opportunites in " + "outline branches."), + cl::init(true), cl::Hidden); namespace { class BlockChain; @@ -239,6 +246,13 @@ /// \brief A handle to the post dominator tree. MachineDominatorTree *MDT; + /// \brief Duplicator used to duplicate tails during placement. + /// + /// Placement decisions can open up new tail duplication opportunities, but + /// since tail duplication affects placement decisions of later blocks, it + /// must be done inline. + TailDuplicator TailDup; + /// \brief A set of blocks that are unavoidably execute, i.e. they dominate /// all terminators of the MachineFunction. SmallPtrSet UnavoidableBlocks; @@ -626,11 +640,30 @@ // after this block. MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + // Placing an actual successor may have changed tail duplication + // opportunities check for that now. + if (TailDupPlacement && BestSucc) { + DEBUG(dbgs() << "Redoing tail duplication for BestSucc#" + << BestSucc->getNumber() << "\n"); + bool IsSimple = TailDup.isSimpleBB(BestSucc); + bool IgnoreFallthrough = true; + // Simple blocks should just fallthrough, so only worry about non-simple + // ones. + if (!IsSimple && TailDup.shouldTailDuplicate(F, IsSimple, + *BestSucc, IgnoreFallthrough)) { + SmallVector Cond; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + if (!TII->AnalyzeBranch(*BestSucc, TBB, FBB, Cond)) + TailDup.tailDuplicateAndUpdate(F, IsSimple, BestSucc, BB); + } + } + // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at // this point. This won't be a fallthrough, but it will increase locality. - if (!BestSucc) + if (!BestSucc) { BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + } if (!BestSucc) { BestSucc = @@ -1388,6 +1421,10 @@ TII = F.getSubtarget().getInstrInfo(); TLI = F.getSubtarget().getTargetLowering(); MDT = &getAnalysis(); + auto MMI = getAnalysisIfAvailable(); + if (TailDupPlacement) + TailDup.initMF(F, MMI, MBPI); + assert(BlockToChain.empty()); buildCFGChains(F); Index: lib/CodeGen/TailDuplication.cpp =================================================================== --- lib/CodeGen/TailDuplication.cpp +++ lib/CodeGen/TailDuplication.cpp @@ -17,21 +17,16 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSSAUpdater.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "tailduplication" @@ -56,72 +51,20 @@ static cl::opt TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); -typedef std::vector > AvailableValsTy; - namespace { - /// Perform tail duplication. + /// Perform tail duplication. Delegates to TailDuplicator class TailDuplicatePass : public MachineFunctionPass { - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineBranchProbabilityInfo *MBPI; - MachineModuleInfo *MMI; - MachineRegisterInfo *MRI; - std::unique_ptr RS; - bool PreRegAlloc; - - // A list of virtual registers for which to update SSA form. - SmallVector SSAUpdateVRs; - - // For each virtual register in SSAUpdateVals keep a list of source virtual - // registers. - DenseMap SSAUpdateVals; + TailDuplicator Duplicator; public: static char ID; explicit TailDuplicatePass() : - MachineFunctionPass(ID), PreRegAlloc(false) {} + MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; - private: - void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, - MachineBasicBlock *BB); - void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - DenseMap &LocalVRMap, - SmallVectorImpl > &Copies, - const DenseSet &UsedByPhi, - bool Remove); - void DuplicateInstruction(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - MachineFunction &MF, - DenseMap &LocalVRMap, - const DenseSet &UsedByPhi); - void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVectorImpl &TDBBs, - SmallSetVector &Succs); - bool TailDuplicateBlocks(MachineFunction &MF); - bool shouldTailDuplicate(const MachineFunction &MF, - bool IsSimple, MachineBasicBlock &TailBB); - bool isSimpleBB(MachineBasicBlock *TailBB); - bool canCompletelyDuplicateBB(MachineBasicBlock &BB); - bool duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVectorImpl &TDBBs, - const DenseSet &RegsUsedByPhi, - SmallVectorImpl &Copies); - bool TailDuplicate(MachineBasicBlock *TailBB, - bool IsSimple, - MachineFunction &MF, - SmallVectorImpl &TDBBs, - SmallVectorImpl &Copies); - bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, - bool IsSimple, - MachineFunction &MF); - - void RemoveDeadBlock(MachineBasicBlock *MBB); }; char TailDuplicatePass::ID = 0; @@ -136,19 +79,13 @@ if (skipOptnoneFunction(*MF.getFunction())) return false; - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); - MMI = getAnalysisIfAvailable(); - MBPI = &getAnalysis(); + auto MMI = getAnalysisIfAvailable(); + auto MBPI = &getAnalysis(); - PreRegAlloc = MRI->isSSA(); - RS.reset(); - if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) - RS.reset(new RegScavenger()); + Duplicator.initMF(MF, MMI, MBPI); bool MadeChange = false; - while (TailDuplicateBlocks(MF)) + while (Duplicator.tailDuplicateBlocks(MF)) MadeChange = true; return MadeChange; @@ -159,6 +96,26 @@ MachineFunctionPass::getAnalysisUsage(AU); } +namespace llvm { + +void TailDuplicator::initMF(MachineFunction &MF, + const MachineModuleInfo *MMIin, + const MachineBranchProbabilityInfo *MBPIin) { + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + MMI = MMIin; + MBPI = MBPIin; + + assert (MBPI != nullptr && "Machine Branch Probability Info required"); + + PreRegAlloc = MRI->isSSA(); + RS.reset(); + + if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS.reset(new RegScavenger()); +} + static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -209,16 +166,17 @@ /// Tail duplicate the block and cleanup. bool -TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, - bool IsSimple, - MachineFunction &MF) { +TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, + bool IsSimple, + MachineBasicBlock *MBB, + MachineBasicBlock *ForcedLayoutPred) { // Save the successors list. SmallSetVector Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector TDBBs; SmallVector Copies; - if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) + if (!TailDuplicate(MBB, ForcedLayoutPred, IsSimple, MF, TDBBs, Copies)) return false; ++NumTails; @@ -313,7 +271,7 @@ /// Look for small blocks that are unconditionally branched to and do not fall /// through. Tail-duplicate their instructions into their predecessors to /// eliminate (dynamic) branches. -bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { +bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; if (PreRegAlloc && TailDupVerify) { @@ -329,10 +287,11 @@ bool IsSimple = isSimpleBB(MBB); - if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + bool IgnoreFallthrough = false; + if (!shouldTailDuplicate(MF, IsSimple, *MBB, IgnoreFallthrough)) continue; - MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); + MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB, nullptr); } if (PreRegAlloc && TailDupVerify) @@ -376,8 +335,8 @@ } /// Add a definition and source virtual registers pair for SSA update. -void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, - MachineBasicBlock *BB) { +void TailDuplicator::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { DenseMap::iterator LI= SSAUpdateVals.find(OrigReg); if (LI != SSAUpdateVals.end()) LI->second.push_back(std::make_pair(BB, NewReg)); @@ -391,7 +350,7 @@ /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the /// source register that's contributed by PredBB and update SSA update map. -void TailDuplicatePass::ProcessPHI( +void TailDuplicator::ProcessPHI( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap &LocalVRMap, SmallVectorImpl > &Copies, @@ -422,7 +381,8 @@ /// Duplicate a TailBB instruction to PredBB and update /// the source operands due to earlier PHI translation. -void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, +void +TailDuplicator::DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, MachineFunction &MF, @@ -463,9 +423,9 @@ /// have gained new predecessors. Update the PHI instructions in them /// accordingly. void -TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVectorImpl &TDBBs, - SmallSetVector &Succs) { +TailDuplicator::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVectorImpl &TDBBs, + SmallSetVector &Succs) { for (SmallSetVector::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; @@ -547,12 +507,17 @@ /// Determine if it is profitable to duplicate this block. bool -TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, - bool IsSimple, - MachineBasicBlock &TailBB) { +TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, + MachineBasicBlock &TailBB, + bool IgnoreFallthrough) { // Only duplicate blocks that end with unconditional branches. - if (TailBB.canFallThrough()) + // IgnoreFallthrough is set when considering duplication during layout. + // Because the ultimate layout may change, it is better to consider + // duplicating blocks that can't fall through. + if (TailBB.canFallThrough() && !IgnoreFallthrough) { return false; + } // Don't try to tail-duplicate single-block loops. if (TailBB.isSuccessor(&TailBB)) @@ -649,7 +614,7 @@ /// True if this BB has only one unconditional jump. bool -TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { +TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { if (TailBB->succ_size() != 1) return false; if (TailBB->pred_empty()) @@ -671,7 +636,7 @@ } bool -TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { +TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { for (MachineBasicBlock *PredBB : BB.predecessors()) { if (PredBB->succ_size() > 1) return false; @@ -688,7 +653,7 @@ } bool -TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, +TailDuplicator::duplicateSimpleBB(MachineBasicBlock *TailBB, SmallVectorImpl &TDBBs, const DenseSet &UsedByPhi, SmallVectorImpl &Copies) { @@ -767,11 +732,12 @@ /// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool -TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, - bool IsSimple, - MachineFunction &MF, - SmallVectorImpl &TDBBs, - SmallVectorImpl &Copies) { +TailDuplicator::TailDuplicate(MachineBasicBlock *TailBB, + MachineBasicBlock *ForcedLayoutPred, + bool IsSimple, + MachineFunction &MF, + SmallVectorImpl &TDBBs, + SmallVectorImpl &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet UsedByPhi; @@ -790,6 +756,10 @@ PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; + DEBUG(dbgs() << "Considering duping BB#" << TailBB->getNumber() + << " into BB#" << PredBB->getNumber() << "\n" + << *PredBB << "\n"); + assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); // EH edges are ignored by AnalyzeBranch. @@ -803,8 +773,17 @@ if (!PredCond.empty()) continue; // Don't duplicate into a fall-through predecessor (at least for now). - if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + bool IsLayoutSuccessor = false; + if (ForcedLayoutPred) + IsLayoutSuccessor = (ForcedLayoutPred == PredBB); + else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + IsLayoutSuccessor = true; + if (IsLayoutSuccessor) { + DEBUG(dbgs() << "Not tail-duplicating into PredBB#" << PredBB->getNumber() + << " From Succ#" << TailBB->getNumber() + << " because fallthrough\n"); continue; + } DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); @@ -866,6 +845,7 @@ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); + // PredBB->updateTerminator(); Changed = true; ++NumTailDups; @@ -873,13 +853,15 @@ // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this - // block into the prior block. + // block into the prior block. Don't do this when ForcedLayoutPred is + // non-null, as it can break layout to remove blocks. MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. - if (PrevBB->succ_size() == 1 && + if (ForcedLayoutPred == nullptr && + PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { @@ -982,7 +964,7 @@ /// Remove the specified dead machine basic block from the function, updating /// the CFG. -void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { +void TailDuplicator::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); @@ -993,3 +975,5 @@ // Remove the block. MBB->eraseFromParent(); } + +} // End llvm namespace Index: test/CodeGen/AArch64/machine_cse.ll =================================================================== --- test/CodeGen/AArch64/machine_cse.ll +++ test/CodeGen/AArch64/machine_cse.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s +; The test has a false positive when tail duplication fires during layout. ; marked as external to prevent possible optimizations @a = external global i32