Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -397,6 +397,9 @@ /// This pass frees the memory occupied by the MachineFunction. FunctionPass *createFreeMachineFunctionPass(); + + /// Branch Coalescing - combine basic blocks guarded by the same branch + extern char &BranchCoalescingID; } // End llvm namespace /// Target machine pass initializer for passes with dependencies. Use with Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -77,6 +77,7 @@ void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingPass(PassRegistry&); +void initializeBranchCoalescingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); Index: lib/CodeGen/BranchCoalescing.cpp =================================================================== --- /dev/null +++ lib/CodeGen/BranchCoalescing.cpp @@ -0,0 +1,740 @@ +//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Coalesce basic blocks guarded by the same branch condition into a single +/// basic block. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "coal-branch" + +static cl::opt + EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, + cl::desc("enable coalescing of duplicate branches")); + +STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced"); +STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged"); +STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); + +//===----------------------------------------------------------------------===// +// BranchCoalescing +//===----------------------------------------------------------------------===// +/// +/// Improve scheduling by coalescing branches that depend on the same condition. +/// This pass looks for blocks that are guarded by the same branch condition +/// and attempts to merge the blocks together. Such opportunities arise from +/// the expansion of select statements in the IR. +/// +/// For example, consider the following LLVM IR: +/// +/// %test = icmp eq i32 %x 0 +/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 +/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 +/// +/// This IR expands to the following machine code on PowerPC: +/// +/// BB#0: derived from LLVM BB %entry +/// Live Ins: %F1 %F3 %X6 +/// +/// %vreg0 = COPY %F1; F8RC:%vreg0 +/// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4 +/// %vreg8 = LXSDX %ZERO8, %vreg7, %RM; +/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 +/// BCC 76, %vreg5, ; CRRC:%vreg5 +/// Successors according to CFG: BB#1(?%) BB#2(?%) +/// +/// BB#1: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 +/// Successors according to CFG: BB#2(?%) +/// +/// BB#2: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 BB#1 +/// %vreg9 = PHI %vreg8, , %vreg0, ; +/// F8RC:%vreg9,%vreg8,%vreg0 +/// +/// BCC 76, %vreg5, ; CRRC:%vreg5 +/// Successors according to CFG: BB#3(?%) BB#4(?%) +/// +/// BB#3: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#2 +/// Successors according to CFG: BB#4(?%) +/// +/// BB#4: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#2 BB#3 +/// %vreg13 = PHI %vreg12, , %vreg2, ; +/// F8RC:%vreg13,%vreg12,%vreg2 +/// +/// BLR8 %LR8, %RM, %F1 +/// +/// When this pattern is detected, branch coalescing will try to collapse +/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3. +/// +/// If all conditions are meet, IR should collapse to: +/// +/// BB#0: derived from LLVM BB %entry +/// Live Ins: %F1 %F3 %X6 +/// +/// %vreg0 = COPY %F1; F8RC:%vreg0 +/// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4 +/// %vreg8 = LXSDX %ZERO8, %vreg7, %RM; +/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 +/// +/// BCC 76, %vreg5, ; CRRC:%vreg5 +/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%) +/// BB#4(0x55555554 / 0x80000000 = 66.67%) +/// +/// BB#1: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 +/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%) +/// +/// BB#4: derived from LLVM BB %entry +/// Predecessors according to CFG: BB#0 BB#1 +/// %vreg9 = PHI %vreg8, , %vreg0, ; +/// F8RC:%vreg9,%vreg8,%vreg0 +/// %vreg13 = PHI %vreg12, , %vreg2, ; +/// F8RC:%vreg13,%vreg12,%vreg2 +/// +/// BLR8 %LR8, %RM, %F1 +/// +/// Branch Coalesce does not split blocks, it moves everything in the same +/// direction ensuring it does not break use/definition semantics. +/// +/// PHI nodes and its corresponding use instructions are moved to its successor +/// block if there are no uses within the successor block PHI nodes. PHI +/// node ordering cannot be assumed. +/// +/// Non-PHI can be moved up to the predecessor basic block or down to the +/// successor basic block following any PHI instructions. Whether it moves +/// up or down depends on whether the register(s) defined in the instructions +/// are used in current block or in any PHI instructions at the beginning of +/// the successor block. + +namespace { + +class BranchCoalescing : public MachineFunctionPass { + struct CoalescingCandidateInfo { + MachineBasicBlock *BranchBlock; //< Block containing the branch + MachineBasicBlock *BranchTargetBlock; //< Block branched to + MachineBasicBlock *FallThroughBlock; //< Fall-through if branch not taken + SmallVector Cond; + bool MustMoveDown; + bool MustMoveUp; + + CoalescingCandidateInfo(); + void clear(); + }; + + MachineDominatorTree *MDT; + MachinePostDominatorTree *MPDT; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + + void initialize(MachineFunction &F); + bool analyzeBranch(CoalescingCandidateInfo &Cand); + bool identicalOperands(ArrayRef OperandList1, + ArrayRef OperandList2) const; + + static bool isBranchCoalescingEnabled() { + return EnableBranchCoalescing != cl::BOU_FALSE; + } + + MachineBasicBlock::iterator getFirstPHI(MachineBasicBlock &MBB) const; + +public: + static char ID; + + BranchCoalescing() : MachineFunctionPass(ID) { + initializeBranchCoalescingPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Branch Coalescing"; } + + void mergeCandidates(CoalescingCandidateInfo &From, + CoalescingCandidateInfo &To); + bool canMoveTo(const MachineInstr &MI, const MachineBasicBlock &MBB, + bool MoveToBeginning) const; + bool canMerge(CoalescingCandidateInfo &From, + CoalescingCandidateInfo &To) const; + void moveAndUpdatePHIs(MachineBasicBlock *FromMBB, MachineBasicBlock *ToMBB); + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char BranchCoalescing::ID = 0; +char &llvm::BranchCoalescingID = BranchCoalescing::ID; + +INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing", + "Branch Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing", + false, false) + +BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() + : BranchBlock(nullptr), BranchTargetBlock(nullptr), + FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {} + +void BranchCoalescing::CoalescingCandidateInfo::clear() { + BranchBlock = nullptr; + BranchTargetBlock = nullptr; + FallThroughBlock = nullptr; + Cond.clear(); + MustMoveDown = false; + MustMoveUp = false; +} + +void BranchCoalescing::initialize(MachineFunction &MF) { + MDT = &getAnalysis(); + MPDT = &getAnalysis(); + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + NumBlocksCoalesced = 0; + NumPHINotMoved = 0; + NumBlocksNotCoalesced = 0; +} + +/// +/// Analyze the branch statement to determine if it can be coalesced. This +/// method analyses the branch statement for the given candidate to determine +/// if it can be coalesced. If the branch can be coalesced, then the +/// BranchTargetBlock and the FallThroughBlock are recorded in the specified +/// Candidate. Note that Cand can be modified even if this method fails. +/// +///\param[in,out] Cand The coalescing candidate to analyze +///\return true if and only if the branch can be coalesced, false otherwise +/// +bool BranchCoalescing::analyzeBranch(CoalescingCandidateInfo &Cand) { + DEBUG(dbgs() << "Analyzing branch for block " << Cand.BranchBlock->getNumber() + << ": "); + MachineBasicBlock *FalseMBB = nullptr; + + if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB, + Cand.Cond)) { + DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n"); + return false; + } + + for (auto &I : Cand.BranchBlock->terminators()) { + DEBUG(dbgs() << "Looking at terminator : " << I << "\n"); + if (!I.isBranch()) + continue; + + if (I.getNumOperands() != I.getNumExplicitOperands()) { + DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I + << "\n"); + return false; + } + } + + // For now only consider triangles (i.e, BranchTargetBlock is set, + // FalseMBBim is null) + if (!Cand.BranchTargetBlock || (Cand.BranchTargetBlock && FalseMBB)) { + DEBUG(dbgs() << "Does not form a triangle - skip\n"); + return false; + } + + if (Cand.BranchTargetBlock == Cand.BranchBlock) { + DEBUG(dbgs() << "Branch to the same block - skip\n"); + return false; + } + + // Only consider simple control flow for now. In other words, only try to + // coalesce the branch-taken block (i.e., BranchTargetBlock) if it + // post-dominates the current block + if (!MPDT->dominates(Cand.BranchTargetBlock, Cand.BranchBlock)) { + DEBUG(dbgs() << "Complex control flow - skip\n"); + return false; + } + + if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) { + DEBUG(dbgs() << "EH Pad - skip\n"); + return false; + } + + // Ensure there are only two successors + if (Cand.BranchBlock->succ_size() != 2) { + DEBUG(dbgs() << "Does not have 2 successors - skip\n"); + return false; + } + + // Sanity check - the block must be able to fall through + assert(Cand.BranchBlock->canFallThrough() && + "Expecting the block to fall through!"); + + // Record the fall through block + for (MachineBasicBlock *Succ : Cand.BranchBlock->successors()) + if (Succ != Cand.BranchTargetBlock) + Cand.FallThroughBlock = Succ; + + assert(Cand.FallThroughBlock && "Expecting a valid fall-through block\n"); + + if (!Cand.FallThroughBlock->empty()) { + DEBUG(dbgs() << "Fall-through block contains code -- skip\n"); + return false; + } + + if (!Cand.FallThroughBlock->isSuccessor(Cand.BranchTargetBlock)) { + DEBUG(dbgs() + << "Successor of fall through block is not branch taken block\n"); + return false; + } + + DEBUG(dbgs() << "Valid Candidate\n"); + return true; +} + +/// +/// Determine if the two operand lists are identical +/// +/// \param[in] OpList1 operand list +/// \param[in] OpList2 operand list +/// \return true if and only if the operands lists are identical +/// +bool BranchCoalescing::identicalOperands( + ArrayRef OpList1, ArrayRef OpList2) const { + if (OpList1.size() != OpList2.size()) { + DEBUG(dbgs() << "Operand list is different size\n"); + return false; + } + + for (unsigned i = 0; i < OpList1.size(); ++i) { + const MachineOperand &Op1 = OpList1[i]; + const MachineOperand &Op2 = OpList2[i]; + + if (Op1.getType() != Op2.getType()) { + DEBUG(dbgs() << "Operands are different types\n"); + return false; + } + + DEBUG(dbgs() << "Op1: " << Op1 << "\n" + << "Op2:" << Op2 << "\n"); + + // If the operands are not identical, but are registers, check to see if the + // definition of the register produces the same value. If they produce the + // same value, consider them to be identical. + if (!Op1.isIdenticalTo(Op2)) { + if (Op1.isReg()) { + if (TargetRegisterInfo::isVirtualRegister(Op1.getReg()) && + TargetRegisterInfo::isVirtualRegister(Op2.getReg())) { + MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg()); + MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg()); + if (!TII->produceSameValue(*Op1Def, *Op2Def, MRI)) { + DEBUG(dbgs() << "Operands produce different values\n"); + return false; + } else { + DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def + << " produce the same value!\n"); + } + } else if (TargetRegisterInfo::isPhysicalRegister(Op1.getReg()) && + TargetRegisterInfo::isPhysicalRegister(Op2.getReg())) { + assert(0 && "Physical register!!"); + } + } else { + DEBUG(dbgs() << "Not registers - assume different\n"); + return false; + } + } else { + DEBUG(dbgs() << "Op1 and Op2 are identical!\n"); + } + } + return true; +} + +/// +/// Get the first PHI instruction in the given machine basic block +/// +/// \param[in] MBB The machine basic block to search +/// \return The first PHI instruction, or end if no PHI instruction is found +/// +MachineBasicBlock::iterator +BranchCoalescing::getFirstPHI(MachineBasicBlock &MBB) const { + MachineBasicBlock::instr_iterator I = MBB.instr_begin(), E = MBB.instr_end(); + while (I != E && !I->isPHI()) + ++I; + return I; +} + +/// +/// Move PHI instructions in From into To. This method moves any PHI +/// instructions in From into To and update them to refer to the new block. They +/// are placed at the beginning of To, after any existing PHI instructions. +/// +/// \param[in] From Block to move PHI instructions from +/// \param[in] To Block to move PHI instructions to +/// +void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *FromMBB, + MachineBasicBlock *ToMBB) { + MachineBasicBlock::iterator InsertLoc = getFirstPHI(*ToMBB); + + if (InsertLoc == ToMBB->end()) + InsertLoc = ToMBB->begin(); + + for (MachineBasicBlock::iterator MI = FromMBB->begin(), + ME = FromMBB->getFirstNonPHI(); + MI != ME && MI->isPHI();) { + MachineInstr &PHIInst = *MI; + // Go to next instruction before moving the current instruction + ++MI; + for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) { + MachineOperand &MO = PHIInst.getOperand(i); + if (MO.getMBB() == FromMBB) + MO.setMBB(ToMBB); + } + ToMBB->splice(InsertLoc, FromMBB, &PHIInst); + } +} + +/// +/// Determine if the specified instruction can be moved to the specified machine +/// basic block. This method will determine if the specified instruction can be +/// moved to the specified block. If MoveToBeginning is set to true, it checks +/// whether it can move to the beginning of the specified block (immediately +/// following any PHI instructions). In this case, it checks whether the +/// register(s) defined in this instruction are used in this block or in any PHI +/// instructions at the beginning of the specified block. +/// +/// If MoveToBeginning is set to false, it checks whether it can move to the end +/// of the specified block, immediately before the first terminator. In this +/// case, it checks whether any operands used by the instruction are defined in +/// this block. If so, then it cannot move, otherwise it can. +/// +/// \param[in] MI the machine instruction to move. +/// \param[in] MBB the machine basic block to move to +/// \param[in] MoveToBeginning true indicates move to the beginning of MBB, +/// false +/// indicates move to end of MBB. +/// \return true if it is safe to move MI to MBB, false otherwise +/// +bool BranchCoalescing::canMoveTo(const MachineInstr &MI, + const MachineBasicBlock &MBB, + bool MoveToBeginning) const { + + if (MoveToBeginning) { + DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of " + << MBB.getNumber() << "\n"); + for (auto &Def : MI.defs()) { // Looking at Def + if (Def.isReg()) // could it be anything else? + for (auto &Use : MRI->use_instructions(Def.getReg())) { + if (Use.isPHI() && Use.getParent() == &MBB) { + DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n"); + return false; + } + } + } + } else { + DEBUG(dbgs() << "Checking if " << MI << " can move to end of " + << MBB.getNumber() << "\n"); + for (auto &Use : MI.uses()) { + if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) { + MachineInstr *DefInst = MRI->getVRegDef(Use.getReg()); + if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) { + DEBUG(dbgs() << " *** Cannot move this instruction ***\n"); + return false; + } else { + DEBUG(dbgs() << " *** def is in another block -- safe to move!\n"); + } + } + } + } + + DEBUG(dbgs() << " Safe to move\n"); + return true; +} + +/// +/// Determine if the two candidates can be merged. This method determines +/// whether the two coalescing candidates can be merged. In order to be merged, +/// all instructions must be able to +/// 1. Move to the beginning of the From.BranchTargetBlock; +/// 2. Move to the end of the To.BranchBlock. +/// Merging involves moving the instructions in the To.BranchTargetBlock +/// (also From.BranchBlock). +/// +/// The preference is to move instructions down, to the +/// beginning of the From.BranchTargetBlock. This is not possible if any +/// register defined in From.BranchBlock is used in a PHI node in the +/// From.BranchTargetBlock. In this case, check whether the statement can be +/// moved up, to the end of the To.BranchBlock (immediately before the branch +/// statement). If it cannot move, then these blocks cannot be merged. +/// +/// Note that there is no analysis for moving instructions past the fall-through +/// blocks because they are assumed to be empty. If they are not empty, then +/// additional safety analysis must be added here to ensure it is safe to move +/// the instructions in From.BranchBlock past the fall-through blocks. +/// +/// \param[in] From The candidate to move statements from +/// \param[in] To The candidate to move statements to +/// \return true if all instructions in From.BranchBlock can be merged into a +/// block in To; false otherwise. +/// +bool BranchCoalescing::canMerge(CoalescingCandidateInfo &From, + CoalescingCandidateInfo &To) const { + assert(To.BranchTargetBlock == From.BranchBlock && + "Assuming blocks to be the same\n"); + + assert(MDT->dominates(To.BranchBlock, From.BranchBlock) && + "Expecting To to dominate From"); + + assert(MPDT->dominates(From.BranchBlock, To.BranchBlock) && + "Expecting From to post-dominate To"); + + assert(To.FallThroughBlock->empty() && From.FallThroughBlock->empty() && + "Expecting fall-through blocks to be empty"); + + DEBUG(dbgs() << "Entering " << __PRETTY_FUNCTION__ << "\n"); + + // Walk through PHI nodes first and see if they force the merge into the + // From.BranchTargetBlock. + for (MachineBasicBlock::iterator I = From.BranchBlock->instr_begin(), + E = From.BranchBlock->getFirstNonPHI(); + I != E; ++I) { + for (auto &Def : I->defs()) + if (Def.isReg()) + for (auto &Use : MRI->use_instructions(Def.getReg())) { + if (Use.getParent() == From.BranchBlock) { + DEBUG(dbgs() << "PHI " << *I << " defines register used in this " + "block -- all must move down\n"); + From.MustMoveDown = true; + } + if (Use.isPHI() && Use.getParent() == From.BranchTargetBlock) { + DEBUG(dbgs() << "PHI " << *I << " defines register used in another " + "PHI within branch target block -- " + "can't merge\n"); + NumPHINotMoved++; + return false; + } + } + } + + for (MachineBasicBlock::iterator I = From.BranchBlock->getFirstNonPHI(), + E = From.BranchBlock->end(); + I != E; ++I) { + if (!canMoveTo(*I, *From.BranchTargetBlock, true)) { + DEBUG(dbgs() << "Instruction " << *I + << " cannot move down - must move up!\n"); + From.MustMoveUp = true; + } + if (!canMoveTo(*I, *To.BranchBlock, false)) { + DEBUG(dbgs() << "Instruction " << *I + << " cannot move up - must move down!\n"); + From.MustMoveDown = true; + } + } + + return (From.MustMoveUp && From.MustMoveDown) ? false : true; +} + +/// +/// Merge blocks from From into blocks from To and remove From blocks from +/// function. +/// +/// +------------------+ +/// | To.BranchBlock | +/// +------------------+ +/// / | +/// / +-----------------------+ +/// | | To.FallThroughBlock | +/// \ +-----------------------+ +/// \ | +/// +-----------------------+ +/// | To.BranchTargetBlock | +/// | From.BranchBlock | +/// +-----------------------+ +/// / | +/// / +-------------------------+ +/// | | From.FallThroughBlock | +/// \ +-------------------------+ +/// \ | +/// +-------------------------+ +/// | From.BranchTargetBlock | +/// +-------------------------+ +/// +/// +/// +--------------------+ +/// | To.BranchBlock | +/// | From.BranchBlock | +/// +--------------------+ +/// / | +/// / +-------------------------+ +/// | | To.FallThroughBlock | +/// | | From.FallThroughBlock | +/// \ +-------------------------+ +/// \ | +/// +-------------------------+ +/// | From.BranchTargetBlock | +/// +-------------------------+ +/// +/// This method will move the instructions from From.BranchBlock, +/// From.BranchTargetBlock, and From.FallThroughBlock into +/// To.BranchBlock, To.BranchTargetBlock and To.FallThroughBlock +/// respectively. The successors for blocks in To will be updated to use the +/// successors from blocks in From. Finally, the blocks in From will be +/// removed from the function. +/// +/// \param[in] From The candidate to move blocks from +/// \param[in] To The candidate to move blocks to +/// +void BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &From, + CoalescingCandidateInfo &To) { + assert(To.BranchTargetBlock == From.BranchBlock && + "Expecting From to immediately follow To"); + + assert(To.FallThroughBlock->isSuccessor(From.BranchBlock) && + "Expecting From.BranchBlock to be a successor of To.FallThroughBlock"); + + // Handle the BranchBlock first + // Move any PHIs in From.BranchBlock down to the branch-taken block + moveAndUpdatePHIs(From.BranchBlock, From.BranchTargetBlock); + + // Move remaining instructions in From.BranchBlock into To.BranchBlock + MachineBasicBlock::iterator End = From.BranchBlock->getFirstTerminator(); + MachineBasicBlock::iterator FIL = From.BranchTargetBlock->getFirstNonPHI(); + MachineBasicBlock::iterator TIL = To.BranchBlock->getFirstTerminator(); + + for (MachineBasicBlock::iterator MI = From.BranchBlock->getFirstNonPHI(); + MI != End;) { + MachineInstr &CurrInst = *MI; + + // Go to the next instruction before moving the current instruction + ++MI; + if (From.MustMoveDown) { + assert(!From.MustMoveUp && + "Cannot have both MustMoveDown and MustMoveUp set!\n"); + From.BranchTargetBlock->splice(FIL, From.BranchBlock, &CurrInst); + } else + To.BranchBlock->splice(TIL, From.BranchBlock, &CurrInst); + } + + // Clean-up the control flow + // Remove From.FallThroughBlock before transferring successors of + // From.BranchBlock to To.BranchBlock. + From.BranchBlock->removeSuccessor(From.FallThroughBlock); + To.BranchBlock->transferSuccessorsAndUpdatePHIs(From.BranchBlock); + // Update branch in To.BranchBlock to jump to From.BranchTargetBlock + // In this case, To.BranchTargetBlock == From.BranchBlock. + To.BranchBlock->ReplaceUsesOfBlockWith(From.BranchBlock, + From.BranchTargetBlock); + // Remove the branch statement(s) in From.BranchBlock + MachineBasicBlock::iterator I = From.BranchBlock->terminators().begin(); + while (I != From.BranchBlock->terminators().end()) { + MachineInstr &CurrInst = *I; + ++I; + + if (CurrInst.isBranch()) + CurrInst.eraseFromParent(); + } + + // Merge FallThroughBlock + // Move any PHIs down to the branch-taken block + // It is not necessary to merge the fall-through blocks because they are + // empty! + // We still need to transfer the successors though, and update the CFG + To.FallThroughBlock->transferSuccessorsAndUpdatePHIs(From.FallThroughBlock); + To.FallThroughBlock->removeSuccessor(From.BranchBlock); + + // Remove the blocks from the function. + assert(From.BranchBlock->empty() && "Expecting branch block to be empty!"); + From.BranchBlock->eraseFromParent(); + + assert(From.FallThroughBlock->empty() && + "Expecting fall-through block to be empty!\n"); + From.FallThroughBlock->eraseFromParent(); + + NumBlocksCoalesced++; +} + +bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) { + bool didSomething = false; + + if (skipFunction(*MF.getFunction()) || MF.empty() || + !isBranchCoalescingEnabled()) + return didSomething; + + DEBUG(dbgs() << "******** Branch Coalescing ********\n"); + initialize(MF); + + DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); + + CoalescingCandidateInfo Cand1, Cand2; + // Walk over blocks and find candidates to merge + // Continue trying to merge with the first candidate found, as long as merging + // is successfull. + for (MachineBasicBlock &MBB : MF) { + bool MergedCandidates = false; + do { + MergedCandidates = false; + Cand1.clear(); + Cand2.clear(); + + Cand1.BranchBlock = &MBB; + + // If unable to analyze the branch, then continue to next block + if (!analyzeBranch(Cand1)) + break; + + Cand2.BranchBlock = Cand1.BranchTargetBlock; + if (!analyzeBranch(Cand2)) + break; + + // Sanity check + // The branch-taken block of the second candidate should post-dominate the + // first candidate + assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) && + "Branch-taken block should post-dominate first candidate"); + + if (!identicalOperands(Cand1.Cond, Cand2.Cond)) { + DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and " + << Cand2.BranchBlock->getNumber() + << " have different branches\n"); + break; + } + if (!canMerge(Cand2, Cand1)) { + DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber() + << " and " << Cand2.BranchBlock->getNumber() << "\n"); + NumBlocksNotCoalesced++; + continue; + } + DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber() + << " and " << Cand1.BranchTargetBlock->getNumber() << "\n"); + mergeCandidates(Cand2, Cand1); + MergedCandidates = true; + didSomething = true; + + if (MergedCandidates) { + DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n"); + } + } while (MergedCandidates); + } +#ifndef NDEBUG + MF.verify(nullptr, "in branch coalescing"); +#endif // NDEBUG + + DEBUG(dbgs() << "Finished Branch Coalescing\n"); + return didSomething; +} Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -4,6 +4,7 @@ Analysis.cpp AtomicExpandPass.cpp BasicTargetTransformInfo.cpp + BranchCoalescing.cpp BranchFolding.cpp BranchRelaxation.cpp BuiltinGCs.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -21,6 +21,7 @@ /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); + initializeBranchCoalescingPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -704,6 +704,10 @@ addPass(&MachineLICMID, false); addPass(&MachineCSEID, false); + + // Coalesce basic blocks with the same branch condition + addPass(&BranchCoalescingID); + addPass(&MachineSinkingID); addPass(&PeepholeOptimizerID); Index: test/CodeGen/PowerPC/branch_coalesce.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/branch_coalesce.ll @@ -0,0 +1,31 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +; Function Attrs: nounwind +define double @testBranchCoal(double %a, double %b, double %c, i32 %x) { +entry: + %test = icmp eq i32 %x, 0 + %tmp1 = select i1 %test, double %a, double 2.000000e-03 + %tmp2 = select i1 %test, double %b, double 0.000000e+00 + %tmp3 = select i1 %test, double %c, double 5.000000e-03 + + %res1 = fadd double %tmp1, %tmp2 + %result = fadd double %res1, %tmp3 + ret double %result + +; CHECK-LABEL: @testBranchCoal +; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0 +; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]] +; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha +; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha +; CHECK-DAG: xxlxor 2, 2, 2 +; CHECK-NOT: beq +; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] +; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] +; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] +; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] +; CHECK: .LBB[[LAB1]] +; CHECK: xsadddp 0, 1, 2 +; CHECK: xsadddp 1, 0, 3 +; CHECK: blr +} Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -886,9 +886,8 @@ %cond = select i1 %cmp3, ppc_fp128 %a1, ppc_fp128 %a2 ret ppc_fp128 %cond -; FIXME: Because of the way that the late SELECT_* pseudo-instruction expansion -; works, we end up with two blocks with the same predicate. These could be -; combined. +; The default branchCoalescing optimization merged the two same predicate blocks +; that was expanded by the late SELECT_* pseudo-instruction expansion. ; CHECK-LABEL: @testppc_fp128eq ; CHECK-DAG: fcmpu {{[0-9]+}}, 6, 8 @@ -900,10 +899,8 @@ ; CHECK: crxor [[REG3:[0-9]+]], [[REG2]], [[REG1]] ; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]] ; CHECK: fmr 11, 9 -; CHECK: .LBB[[BB1]]: -; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]] ; CHECK: fmr 12, 10 -; CHECK: .LBB[[BB2]]: +; CHECK: .LBB[[BB1]]: ; CHECK-DAG: fmr 1, 11 ; CHECK-DAG: fmr 2, 12 ; CHECK: blr Index: test/CodeGen/Thumb/select.ll =================================================================== --- test/CodeGen/Thumb/select.ll +++ test/CodeGen/Thumb/select.ll @@ -74,9 +74,9 @@ } ; CHECK-LABEL: f7: ; CHECK: blt -; CHECK: blt +; CHECK-NOT: blt ; CHECK: __ltdf2 ; CHECK-EABI-LABEL: f7: ; CHECK-EABI: __aeabi_dcmplt ; CHECK-EABI: bne -; CHECK-EABI: bne +; CHECK-EABI-NOT: bne