Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -399,6 +399,9 @@ /// This pass frees the memory occupied by the MachineFunction. FunctionPass *createFreeMachineFunctionPass(); + + /// Branch Coalescing - combine basic blocks guarded by the same branch + extern char &BranchCoalescingID; } // End llvm namespace /// Target machine pass initializer for passes with dependencies. Use with Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -77,6 +77,7 @@ void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingPass(PassRegistry&); +void initializeBranchCoalescingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); Index: lib/CodeGen/BranchCoalescing.cpp =================================================================== --- /dev/null +++ lib/CodeGen/BranchCoalescing.cpp @@ -0,0 +1,758 @@ +//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Coalesce basic blocks guarded by the same branch condition into a single +/// basic block. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "coal-branch" + +static cl::opt + EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, + cl::desc("enable coalescing of duplicate branches")); + +STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced"); +STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged"); +STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); + +//===----------------------------------------------------------------------===// +// BranchCoalescing +//===----------------------------------------------------------------------===// +/// +/// Improve scheduling by coalescing branches that depend on the same condition. +/// This pass looks for blocks that are guarded by the same branch condition +/// and attempts to merge the blocks together. Such opportunities arise from +/// the expansion of select statements in the IR. +/// +/// For example, consider the following LLVM IR: +/// +/// %test = icmp eq i32 %x 0 +/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 +/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 +/// +/// This IR expands to the following machine code on PowerPC: +/// +/// BB#0: derived from LLVM BB %entry +/// Live Ins: %F1 %F3 %X6 +/// +/// %vreg0 = COPY %F1; F8RC:%vreg0 +/// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4 +/// %vreg8 = LXSDX %ZERO8, %vreg7, %RM; +/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 +/// BCC 76, %vreg5, ; CRRC:%vreg5 +/// Successors according to CFG: BB#1(?%) BB#2(?%) +/// +/// BB#1: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 +/// Successors according to CFG: BB#2(?%) +/// +/// BB#2: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 BB#1 +/// %vreg9 = PHI %vreg8, , %vreg0, ; +/// F8RC:%vreg9,%vreg8,%vreg0 +/// +/// BCC 76, %vreg5, ; CRRC:%vreg5 +/// Successors according to CFG: BB#3(?%) BB#4(?%) +/// +/// BB#3: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#2 +/// Successors according to CFG: BB#4(?%) +/// +/// BB#4: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#2 BB#3 +/// %vreg13 = PHI %vreg12, , %vreg2, ; +/// F8RC:%vreg13,%vreg12,%vreg2 +/// +/// BLR8 %LR8, %RM, %F1 +/// +/// When this pattern is detected, branch coalescing will try to collapse +/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3. +/// +/// If all conditions are meet, IR should collapse to: +/// +/// BB#0: derived from LLVM BB %entry +/// Live Ins: %F1 %F3 %X6 +/// +/// %vreg0 = COPY %F1; F8RC:%vreg0 +/// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4 +/// %vreg8 = LXSDX %ZERO8, %vreg7, %RM; +/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 +/// +/// BCC 76, %vreg5, ; CRRC:%vreg5 +/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%) +/// BB#4(0x55555554 / 0x80000000 = 66.67%) +/// +/// BB#1: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 +/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%) +/// +/// BB#4: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 BB#1 +/// %vreg9 = PHI %vreg8, , %vreg0, ; +/// F8RC:%vreg9,%vreg8,%vreg0 +/// %vreg13 = PHI %vreg12, , %vreg2, ; +/// F8RC:%vreg13,%vreg12,%vreg2 +/// +/// BLR8 %LR8, %RM, %F1 +/// +/// Branch Coalesce does not split blocks, it moves everything in the same +/// direction ensuring it does not break use/definition semantics. +/// +/// PHI nodes and its corresponding use instructions are moved to its successor +/// block if there are no uses within the successor block PHI nodes. PHI +/// node ordering cannot be assumed. +/// +/// Non-PHI can be moved up to the predecessor basic block or down to the +/// successor basic block following any PHI instructions. Whether it moves +/// up or down depends on whether the register(s) defined in the instructions +/// are used in current block or in any PHI instructions at the beginning of +/// the successor block. + +namespace { + +class BranchCoalescing : public MachineFunctionPass { + struct CoalescingCandidateInfo { + MachineBasicBlock *BranchBlock; //< Block containing the branch + MachineBasicBlock *BranchTargetBlock; //< Block branched to + MachineBasicBlock *FallThroughBlock; //< Fall-through if branch not taken + SmallVector Cond; + bool MustMoveDown; + bool MustMoveUp; + + CoalescingCandidateInfo(); + void clear(); + }; + + MachineDominatorTree *MDT; + MachinePostDominatorTree *MPDT; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + + void initialize(MachineFunction &F); + bool analyzeBranch(CoalescingCandidateInfo &Cand); + bool identicalOperands(ArrayRef OperandList1, + ArrayRef OperandList2) const; + bool validateCandidates(CoalescingCandidateInfo &SourceRegion, + CoalescingCandidateInfo &TargetRegion) const; + + static bool isBranchCoalescingEnabled() { + return EnableBranchCoalescing != cl::BOU_FALSE; + } + +public: + static char ID; + + BranchCoalescing() : MachineFunctionPass(ID) { + initializeBranchCoalescingPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Branch Coalescing"; } + + bool mergeCandidates(CoalescingCandidateInfo &SourceRegion, + CoalescingCandidateInfo &TargetRegion); + bool canMoveTo(const MachineInstr &MI, const MachineBasicBlock &MBB, + bool MoveToBeginning) const; + bool canMerge(CoalescingCandidateInfo &SourceRegion, + CoalescingCandidateInfo &TargetRegion) const; + void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB, + MachineBasicBlock *TargetRegionMBB); + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char BranchCoalescing::ID = 0; +char &llvm::BranchCoalescingID = BranchCoalescing::ID; + +INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing", + "Branch Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing", + false, false) + +BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() + : BranchBlock(nullptr), BranchTargetBlock(nullptr), + FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {} + +void BranchCoalescing::CoalescingCandidateInfo::clear() { + BranchBlock = nullptr; + BranchTargetBlock = nullptr; + FallThroughBlock = nullptr; + Cond.clear(); + MustMoveDown = false; + MustMoveUp = false; +} + +void BranchCoalescing::initialize(MachineFunction &MF) { + MDT = &getAnalysis(); + MPDT = &getAnalysis(); + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); +} + +/// +/// Analyze the branch statement to determine if it can be coalesced. This +/// method analyses the branch statement for the given candidate to determine +/// if it can be coalesced. If the branch can be coalesced, then the +/// BranchTargetBlock and the FallThroughBlock are recorded in the specified +/// Candidate. +/// +///\param[in,out] Cand The coalescing candidate to analyze +///\return true if and only if the branch can be coalesced, false otherwise +/// +bool BranchCoalescing::analyzeBranch(CoalescingCandidateInfo &Cand) { + DEBUG(dbgs() << "Analyzing branch for block " << Cand.BranchBlock->getNumber() + << ": "); + MachineBasicBlock *FalseMBB = nullptr; + + if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB, + Cand.Cond)) { + DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n"); + return false; + } + + for (auto &I : Cand.BranchBlock->terminators()) { + DEBUG(dbgs() << "Looking at terminator : " << I << "\n"); + if (!I.isBranch()) + continue; + + if (I.getNumOperands() != I.getNumExplicitOperands()) { + DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I + << "\n"); + return false; + } + } + + // For now only consider triangles (i.e, BranchTargetBlock is set, + // FalseMBBim is null) + if (!Cand.BranchTargetBlock || (Cand.BranchTargetBlock && FalseMBB)) { + DEBUG(dbgs() << "Does not form a triangle - skip\n"); + return false; + } + + if (Cand.BranchTargetBlock == Cand.BranchBlock) { + DEBUG(dbgs() << "Branch to the same block - skip\n"); + return false; + } + + // Only consider simple control flow for now. In other words, only try to + // coalesce the branch-taken block (i.e., BranchTargetBlock) if it + // post-dominates the current block + if (!MPDT->dominates(Cand.BranchTargetBlock, Cand.BranchBlock)) { + DEBUG(dbgs() << "Complex control flow - skip\n"); + return false; + } + + if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) { + DEBUG(dbgs() << "EH Pad - skip\n"); + return false; + } + + // Ensure there are only two successors + if (Cand.BranchBlock->succ_size() != 2) { + DEBUG(dbgs() << "Does not have 2 successors - skip\n"); + return false; + } + + // Sanity check - the block must be able to fall through + assert(Cand.BranchBlock->canFallThrough() && + "Expecting the block to fall through!"); + + // Record the fall through block + for (MachineBasicBlock *Succ : Cand.BranchBlock->successors()) + if (Succ != Cand.BranchTargetBlock) { + assert(Succ && "Expecting a valid fall-through block\n"); + + if (!Succ->empty()) { + DEBUG(dbgs() << "Fall-through block contains code -- skip\n"); + return false; + } + + if (!Succ->isSuccessor(Cand.BranchTargetBlock)) { + DEBUG(dbgs() + << "Successor of fall through block is not branch taken block\n"); + return false; + } + Cand.FallThroughBlock = Succ; + } + + DEBUG(dbgs() << "Valid Candidate\n"); + return true; +} + +/// +/// Determine if the two operand lists are identical +/// +/// \param[in] OpList1 operand list +/// \param[in] OpList2 operand list +/// \return true if and only if the operands lists are identical +/// +bool BranchCoalescing::identicalOperands( + ArrayRef OpList1, ArrayRef OpList2) const { + + if (OpList1.size() != OpList2.size()) { + DEBUG(dbgs() << "Operand list is different size\n"); + return false; + } + + for (unsigned i = 0; i < OpList1.size(); ++i) { + const MachineOperand &Op1 = OpList1[i]; + const MachineOperand &Op2 = OpList2[i]; + + DEBUG(dbgs() << "Op1: " << Op1 << "\n" + << "Op2: " << Op2 << "\n"); + + if (Op1.isIdenticalTo(Op2)) { + DEBUG(dbgs() << "Op1 and Op2 are identical!\n"); + continue; + } + + // If the operands are not identical, but are registers, check to see if the + // definition of the register produces the same value. If they produce the + // same value, consider them to be identical. + if (Op1.isReg() && Op2.isReg() && + TargetRegisterInfo::isVirtualRegister(Op1.getReg()) && + TargetRegisterInfo::isVirtualRegister(Op2.getReg())) { + MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg()); + MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg()); + if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) { + DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def + << " produce the same value!\n"); + } else { + DEBUG(dbgs() << "Operands produce different values\n"); + return false; + } + } else { + DEBUG(dbgs() << "The operands are not provably identical.\n"); + return false; + } + } + return true; +} + +/// +/// Moves ALL PHI instructions in SourceMBB into TargetMBB and update them to +/// refer to the new block. PHI instructions in SourceMBB are placed at the +/// beginning of TargetMBB, before existing PHI instructions. +/// +/// \param[in] SourceMBB block to move PHI instructions from +/// \param[in] TargetMBB block to move PHI instructions to +/// +void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, + MachineBasicBlock *TargetMBB) { + + MachineBasicBlock::iterator MI = SourceMBB->begin(); + MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI(); + + if (MI == ME) { + DEBUG(dbgs() << "SourceMBB contain no PHI instructions.\n"); + return; + } + + // Always to move to top of TargetMBB + MachineBasicBlock::iterator InsertLoc = TargetMBB->begin(); + for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) { + MachineInstr &PHIInst = *Iter; + for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) { + MachineOperand &MO = PHIInst.getOperand(i); + if (MO.getMBB() == SourceMBB) + MO.setMBB(TargetMBB); + } + } + // Move all PHI instructions in SourceMBB to TargetMBB + TargetMBB->splice(InsertLoc, SourceMBB, MI, ME); +} + +/// +/// Determine if the specified instruction can be moved to the TargetMBB. +/// If MoveToBeginning is set to true, function checks if MI can be moved to +/// the begining of the TargetMBB following PHI instructions. +/// If MoveToBeginning is set to false, checks if MI can be moved to the end +/// of the TargetMBB, immediately before the first terminator. +/// +/// An MI instruction can be moved to beginning of the TargetMBB if there are no +/// PHI's in the TargetMBB that use what MI defines. +/// +/// An MI instruction can be moved to then end of the TargetMBB if no PHI node +/// defines what MI uses within it's own MBB. +/// +/// \param[in] MI the machine instruction to move. +/// \param[in] MBB the machine basic block to move to +/// \param[in] MoveToBeginning true indicates move to the beginning of MBB, +/// false indicates move to end of MBB. +/// \return true if it is safe to move MI to MBB, false otherwise +/// +bool BranchCoalescing::canMoveTo(const MachineInstr &MI, + const MachineBasicBlock &TargetMBB, + bool MoveToBeginning) const { + + if (MoveToBeginning) { + DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of " + << TargetMBB.getNumber() << "\n"); + for (auto &Def : MI.defs()) { // Looking at Def + for (auto &Use : MRI->use_instructions(Def.getReg())) { + if (Use.isPHI() && Use.getParent() == &TargetMBB) { + DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n"); + return false; + } + } + } + } else { + DEBUG(dbgs() << "Checking if " << MI << " can move to end of " + << TargetMBB.getNumber() << "\n"); + for (auto &Use : MI.uses()) { + if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) { + MachineInstr *DefInst = MRI->getVRegDef(Use.getReg()); + if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) { + DEBUG(dbgs() << " *** Cannot move this instruction ***\n"); + return false; + } else { + DEBUG(dbgs() << " *** def is in another block -- safe to move!\n"); + } + } + } + } + + DEBUG(dbgs() << " Safe to move\n"); + return true; +} + +/// +/// This method checks to ensure the two coalescing candidates follows the +/// expected pattern required for coalescing. +/// +/// \param[in] SourceRegion The candidate to move statements from +/// \param[in] TargetRegion The candidate to move statements to +/// \return true if all instructions in SourceRegion.BranchBlock can be merged +/// into a block in TargetRegion; false otherwise. +/// +bool BranchCoalescing::validateCandidates( + CoalescingCandidateInfo &SourceRegion, + CoalescingCandidateInfo &TargetRegion) const { + std::string err_msg; + + if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock) + err_msg = "Expecting SourceRegion to immediately follow TargetRegion"; + else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock)) + err_msg = "Expecting TargetRegion to dominate SourceRegion"; + else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock)) + err_msg = "Expecting SourceRegion to post-dominate TargetRegion"; + else if (!TargetRegion.FallThroughBlock->empty() || + !SourceRegion.FallThroughBlock->empty()) + err_msg = "Expecting fall-through blocks to be empty"; + + bool verify = err_msg.empty(); + DEBUG(dbgs() << err_msg << "\n"); + + assert(verify && "Invalid candidates for branch coalescing!"); + + return (verify); +} + +/// +/// This method determines whether the two coalescing candidates can be merged. +/// In order to be merged, all instructions must be able to +/// 1. Move to the beginning of the SourceRegion.BranchTargetBlock; +/// 2. Move to the end of the TargetRegion.BranchBlock. +/// Merging involves moving the instructions in the +/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock). +/// +/// The preference is to move instructions down, to the +/// beginning of the SourceRegion.BranchTargetBlock. This is not possible if any +/// register defined in SourceRegion.BranchBlock is used in a PHI node in the +/// SourceRegion.BranchTargetBlock. In this case, check whether the statement +/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately +/// before the branch statement). If it cannot move, then these blocks cannot +/// be merged. +/// +/// Note that there is no analysis for moving instructions past the fall-through +/// blocks because they are assumed to be empty. If they are not empty, then +/// additional safety analysis must be added here to ensure it is safe to move +/// the instructions in SourceRegion.BranchBlock past the fall-through blocks. +/// +/// \param[in] SourceRegion The candidate to move statements from +/// \param[in] TargetRegion The candidate to move statements to +/// \return true if all instructions in SourceRegion.BranchBlock can be merged +/// into a block in TargetRegion; false otherwise. +/// +bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, + CoalescingCandidateInfo &TargetRegion) const { + if (!validateCandidates(SourceRegion, TargetRegion)) + return false; + + // Walk through PHI nodes first and see if they force the merge into the + // SourceRegion.BranchTargetBlock. + for (MachineBasicBlock::iterator + I = SourceRegion.BranchBlock->instr_begin(), + E = SourceRegion.BranchBlock->getFirstNonPHI(); + I != E; ++I) { + for (auto &Def : I->defs()) + for (auto &Use : MRI->use_instructions(Def.getReg())) { + if (Use.getParent() == SourceRegion.BranchBlock) { + DEBUG(dbgs() << "PHI " << *I + << " defines register used in this " + "block -- all must move down\n"); + SourceRegion.MustMoveDown = true; + } + if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) { + DEBUG(dbgs() << "PHI " << *I << " defines register used in another " + "PHI within branch target block -- can't merge\n"); + NumPHINotMoved++; + return false; + } + } + } + + for (MachineBasicBlock::iterator + I = SourceRegion.BranchBlock->getFirstNonPHI(), + E = SourceRegion.BranchBlock->end(); + I != E; ++I) { + if (!canMoveTo(*I, *SourceRegion.BranchTargetBlock, true)) { + DEBUG(dbgs() << "Instruction " << *I + << " cannot move down - must move up!\n"); + SourceRegion.MustMoveUp = true; + } + if (!canMoveTo(*I, *TargetRegion.BranchBlock, false)) { + DEBUG(dbgs() << "Instruction " << *I + << " cannot move up - must move down!\n"); + SourceRegion.MustMoveDown = true; + } + } + + return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true; +} + +/// Merge the instructions from SourceRegion.BranchBlock, +/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into +/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and +/// TargetRegion.FallThroughBlock respectively. +/// +/// The successors for blocks in TargetRegion will be updated to use the +/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion +/// will be removed from the function. +/// +/// A region consists of a BranchBlock, a FallThroughBlock, and a +/// BranchTargetBlock. Branch coalesce works on patterns where the +/// TargetRegion's BranchTargetBlock must also be the SourceRegions's +/// BranchBlock. +/// +/// Before mergeCandidates: +/// +/// +---------------------------+ +/// | TargetRegion.BranchBlock | +/// +---------------------------+ +/// / | +/// / +--------------------------------+ +/// | | TargetRegion.FallThroughBlock | +/// \ +--------------------------------+ +/// \ | +/// +----------------------------------+ +/// | TargetRegion.BranchTargetBlock | +/// | SourceRegion.BranchBlock | +/// +----------------------------------+ +/// / | +/// / +--------------------------------+ +/// | | SourceRegion.FallThroughBlock | +/// \ +--------------------------------+ +/// \ | +/// +----------------------------------+ +/// | SourceRegion.BranchTargetBlock | +/// +----------------------------------+ +/// +/// After mergeCandidates: +/// +/// +-----------------------------+ +/// | TargetRegion.BranchBlock | +/// | SourceRegion.BranchBlock | +/// +-----------------------------+ +/// / | +/// / +---------------------------------+ +/// | | TargetRegion.FallThroughBlock | +/// | | SourceRegion.FallThroughBlock | +/// \ +---------------------------------+ +/// \ | +/// +----------------------------------+ +/// | SourceRegion.BranchTargetBlock | +/// +----------------------------------+ +/// +/// \param[in] SourceRegion The candidate to move blocks from +/// \param[in] TargetRegion The candidate to move blocks to +/// +bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, + CoalescingCandidateInfo &TargetRegion) { + + if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) { + assert(0 && "Cannot have both MustMoveDown and MustMoveUp set!"); + DEBUG(dbgs() << "Cannot have both MustMoveDown and MustMoveUp set!"); + return false; + } + + if (!validateCandidates(SourceRegion, TargetRegion)) + return false; + + // Handle the BranchBlock first + // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block + moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock); + + // Move remaining instructions in SourceRegion.BranchBlock into + // TargetRegion.BranchBlock + MachineBasicBlock::iterator firstInstr = + SourceRegion.BranchBlock->getFirstNonPHI(); + MachineBasicBlock::iterator lastInstr = + SourceRegion.BranchBlock->getFirstTerminator(); + + MachineBasicBlock *Source = SourceRegion.MustMoveDown + ? SourceRegion.BranchTargetBlock + : TargetRegion.BranchBlock; + + MachineBasicBlock::iterator Target = + SourceRegion.MustMoveDown + ? SourceRegion.BranchTargetBlock->getFirstNonPHI() + : TargetRegion.BranchBlock->getFirstTerminator(); + + Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr); + + // Clean-up the control flow + // Remove SourceRegion.FallThroughBlock before transferring successors of + // SourceRegion.BranchBlock to TargetRegion.BranchBlock. + SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock); + TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs( + SourceRegion.BranchBlock); + // Update branch in TargetRegion.BranchBlock to jump to + // SourceRegion.BranchTargetBlock + // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock. + TargetRegion.BranchBlock->ReplaceUsesOfBlockWith( + SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock); + // Remove the branch statement(s) in SourceRegion.BranchBlock + MachineBasicBlock::iterator I = + SourceRegion.BranchBlock->terminators().begin(); + while (I != SourceRegion.BranchBlock->terminators().end()) { + MachineInstr &CurrInst = *I; + ++I; + if (CurrInst.isBranch()) + CurrInst.eraseFromParent(); + } + + // Merge FallThroughBlock + // Move any PHIs down to the branch-taken block + + // Not necessary to merge the fall-through blocks, they should be empty! + assert(TargetRegion.FallThroughBlock->empty() && + "FallThroughBlocks should be empty!"); + + // We still need to transfer the successors though, and update the CFG + TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs( + SourceRegion.FallThroughBlock); + TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock); + + // Remove the blocks from the function. + assert(SourceRegion.BranchBlock->empty() && + "Expecting branch block to be empty!"); + SourceRegion.BranchBlock->eraseFromParent(); + + assert(SourceRegion.FallThroughBlock->empty() && + "Expecting fall-through block to be empty!\n"); + SourceRegion.FallThroughBlock->eraseFromParent(); + + NumBlocksCoalesced++; + return true; +} + +bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) { + + if (skipFunction(*MF.getFunction()) || MF.empty() || + !isBranchCoalescingEnabled()) + return false; + + bool didSomething = false; + + DEBUG(dbgs() << "******** Branch Coalescing ********\n"); + initialize(MF); + + DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); + + CoalescingCandidateInfo Cand1, Cand2; + // Walk over blocks and find candidates to merge + // Continue trying to merge with the first candidate found, as long as merging + // is successfull. + for (MachineBasicBlock &MBB : MF) { + bool MergedCandidates = false; + do { + MergedCandidates = false; + Cand1.clear(); + Cand2.clear(); + + Cand1.BranchBlock = &MBB; + + // If unable to analyze the branch, then continue to next block + if (!analyzeBranch(Cand1)) + break; + + Cand2.BranchBlock = Cand1.BranchTargetBlock; + if (!analyzeBranch(Cand2)) + break; + + // Sanity check + // The branch-taken block of the second candidate should post-dominate the + // first candidate + assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) && + "Branch-taken block should post-dominate first candidate"); + + if (!identicalOperands(Cand1.Cond, Cand2.Cond)) { + DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and " + << Cand2.BranchBlock->getNumber() + << " have different branches\n"); + break; + } + if (!canMerge(Cand2, Cand1)) { + DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber() + << " and " << Cand2.BranchBlock->getNumber() << "\n"); + NumBlocksNotCoalesced++; + continue; + } + DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber() + << " and " << Cand1.BranchTargetBlock->getNumber() << "\n"); + MergedCandidates = mergeCandidates(Cand2, Cand1); + if (MergedCandidates) + didSomething = true; + + DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n"); + } while (MergedCandidates); + } + +#ifndef NDEBUG + // Verify MF is still valid after branch coalescing + if (didSomething) + MF.verify(nullptr, "Error in code produced by branch coalescing"); +#endif // NDEBUG + + DEBUG(dbgs() << "Finished Branch Coalescing\n"); + return didSomething; +} Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -4,6 +4,7 @@ Analysis.cpp AtomicExpandPass.cpp BasicTargetTransformInfo.cpp + BranchCoalescing.cpp BranchFolding.cpp BranchRelaxation.cpp BuiltinGCs.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -21,6 +21,7 @@ /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); + initializeBranchCoalescingPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -704,6 +704,10 @@ addPass(&MachineLICMID, false); addPass(&MachineCSEID, false); + + // Coalesce basic blocks with the same branch condition + addPass(&BranchCoalescingID); + addPass(&MachineSinkingID); addPass(&PeepholeOptimizerID); Index: test/CodeGen/PowerPC/branch_coalesce.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/branch_coalesce.ll @@ -0,0 +1,31 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +; Function Attrs: nounwind +define double @testBranchCoal(double %a, double %b, double %c, i32 %x) { +entry: + %test = icmp eq i32 %x, 0 + %tmp1 = select i1 %test, double %a, double 2.000000e-03 + %tmp2 = select i1 %test, double %b, double 0.000000e+00 + %tmp3 = select i1 %test, double %c, double 5.000000e-03 + + %res1 = fadd double %tmp1, %tmp2 + %result = fadd double %res1, %tmp3 + ret double %result + +; CHECK-LABEL: @testBranchCoal +; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0 +; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]] +; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha +; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha +; CHECK-DAG: xxlxor 2, 2, 2 +; CHECK-NOT: beq +; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] +; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] +; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] +; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] +; CHECK: .LBB[[LAB1]] +; CHECK: xsadddp 0, 1, 2 +; CHECK: xsadddp 1, 0, 3 +; CHECK: blr +} Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -1026,9 +1026,8 @@ %cond = select i1 %cmp3, ppc_fp128 %a1, ppc_fp128 %a2 ret ppc_fp128 %cond -; FIXME: Because of the way that the late SELECT_* pseudo-instruction expansion -; works, we end up with two blocks with the same predicate. These could be -; combined. +; The default branchCoalescing optimization merged the two same predicate blocks +; that was expanded by the late SELECT_* pseudo-instruction expansion. ; CHECK-LABEL: @testppc_fp128eq ; CHECK-DAG: fcmpu {{[0-9]+}}, 6, 8 @@ -1040,10 +1039,8 @@ ; CHECK: crxor [[REG3:[0-9]+]], [[REG2]], [[REG1]] ; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]] ; CHECK: fmr 11, 9 -; CHECK: .LBB[[BB1]]: -; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]] ; CHECK: fmr 12, 10 -; CHECK: .LBB[[BB2]]: +; CHECK: .LBB[[BB1]]: ; CHECK-DAG: fmr 1, 11 ; CHECK-DAG: fmr 2, 12 ; CHECK: blr Index: test/CodeGen/Thumb/select.ll =================================================================== --- test/CodeGen/Thumb/select.ll +++ test/CodeGen/Thumb/select.ll @@ -74,9 +74,9 @@ } ; CHECK-LABEL: f7: ; CHECK: blt -; CHECK: blt +; CHECK-NOT: blt ; CHECK: __ltdf2 ; CHECK-EABI-LABEL: f7: ; CHECK-EABI: __aeabi_dcmplt ; CHECK-EABI: bne -; CHECK-EABI: bne +; CHECK-EABI-NOT: bne