Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -282,6 +282,9 @@ /// \brief This pass lays out funclets contiguously. extern char &FuncletLayoutID; + /// \brief This pass removes identical tail BBs leaving only one of them. + extern char &RemoveEqualBBID; + /// This pass inserts the XRay instrumentation sleds if they are supported by /// the target platform. extern char &XRayInstrumentationID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -139,6 +139,7 @@ void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry &); +void initializeRemoveEqualBBPass(PassRegistry &); void initializeFunctionImportLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -109,6 +109,7 @@ RegisterCoalescer.cpp RegisterPressure.cpp RegisterScavenging.cpp + RemoveEqualBB.cpp RenameIndependentSubregs.cpp RegisterUsageInfo.cpp RegUsageInfoCollector.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -35,6 +35,7 @@ initializeFinalizeMachineBundlesPass(Registry); initializeFEntryInserterPass(Registry); initializeFuncletLayoutPass(Registry); + initializeRemoveEqualBBPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); Index: lib/CodeGen/RemoveEqualBB.cpp =================================================================== --- lib/CodeGen/RemoveEqualBB.cpp +++ lib/CodeGen/RemoveEqualBB.cpp @@ -0,0 +1,275 @@ +//===-- RemoveEqualBB.cpp - Remove Equal BBs from a function ----------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements removing of equal basic blocks if there are such BBs +// inside a function. Only one of such BBs leaves in th function +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +using namespace llvm; + +#define DEBUG_TYPE "remove-equal-bb" + +STATISTIC(NumEqBBsSurvived, "Number of survived (after transformation) BBs"); +STATISTIC(SizeEqBBsSurvived, "Size(in intructions) of survived BBs"); +STATISTIC(NumEqBBsErased, "Number of erased (in result of) transformation BBs"); +STATISTIC(SizeEqBBsErased, "Size(in intructions) of erased BBs"); +STATISTIC(NumEqBBCandidates, "Number of candidates for -O1 optimization"); +STATISTIC(SizeEqBBCandidates, + "Number of intructions which could be optimizad with -O1"); + +static cl::opt RemoveEqBBs("remove-equal-bbs", cl::init(false), + cl::Hidden, + cl::desc("Remove equal Basic Blocks.")); + +namespace { +class RemoveEqualBB : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + RemoveEqualBB() : MachineFunctionPass(ID) { + initializeRemoveEqualBBPass(*PassRegistry::getPassRegistry()); + } + /// \brief A handle to the loop info. + MachineLoopInfo *MLI; + + bool runOnMachineFunction(MachineFunction &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char RemoveEqualBB::ID = 0; +char &llvm::RemoveEqualBBID = RemoveEqualBB::ID; + +INITIALIZE_PASS_BEGIN(RemoveEqualBB, "remove-equal-bb", "Remove identical BBs", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockPlacement) +INITIALIZE_PASS_END(RemoveEqualBB, "remove-equal-bb", "Remove identical BBs", + false, false) + +static MachineBasicBlock *hasLayoutPredecessor(MachineBasicBlock *MBB) { + for (auto *Pred : MBB->predecessors()) { + if (Pred->isLayoutSuccessor(MBB)) + return Pred; + } + return nullptr; +} + +static bool Mark4Remove(SmallMapVector &BBsForRemove, + MachineLoopInfo *MLI, MachineBasicBlock *MBB4Remove, + MachineBasicBlock *MBB4Replace, MachineBasicBlock *X, + MachineBasicBlock *Y) { + if (MBB4Remove->hasAddressTaken() || MLI->getLoopFor(MBB4Remove)) { + DEBUG(dbgs() << "We have BB#" << MBB4Remove->getNumber(); + dbgs() << (MLI->getLoopFor(MBB4Remove) ? " inside loop\n" + : " with address taken\n");); + return X->getNumber() < Y->getNumber(); // Let's deal with it later + } + if (auto *LP = hasLayoutPredecessor(MBB4Remove)) { + // TODO: at the moment we can't remove BB with Layout Predecessor + DEBUG(dbgs() << "REQBB: BB#" << MBB4Remove->getNumber() + << " can't be removed because it has Layout Predecesser BB#" + << LP->getNumber() << "\n"); + // TODO: should we add MBB4Remove to survived? + return X->getNumber() < Y->getNumber(); + } + DEBUG(dbgs() << "Change successors of predecessors of BB#" + << MBB4Remove->getNumber() << "\n"); + auto *JTI = MBB4Remove->getParent()->getJumpTableInfo(); + + // We can't use predecessors from MBB4Remove directly because + // ReplaceUsesOfBlockWith destroys iterators + SmallVector Predecessors(MBB4Remove->pred_begin(), + MBB4Remove->pred_end()); + + for (auto *Pred : Predecessors) { + if (Pred->isSuccessor(MBB4Replace) && !Pred->isSuccessor(MBB4Remove)) + continue; // The MBB4Replace is already successor of Pred + // TODO: This should be removed or it should be assertion + if (Pred->isSuccessor(MBB4Remove)) { + DEBUG(dbgs() << "Change successors of predecessor BB#" + << Pred->getNumber() << "(instead of successor BB#" + << MBB4Remove->getNumber() << " another BB#" + << MBB4Replace->getNumber() << " will be used now.)\n"); + if (JTI) + JTI->ReplaceMBBInJumpTables(MBB4Remove, MBB4Replace); + Pred->ReplaceUsesOfBlockWith(MBB4Remove, MBB4Replace); + } else { + DEBUG(dbgs() << "!!! In function '" << MBB4Remove->getParent()->getName() + << "' predecessor BB#" << Pred->getNumber() + << " does not have successor BB#" << MBB4Remove->getNumber() + << " !!!\n Predecessor\n"; + Pred->dump(); dbgs() << "Successor\n"; MBB4Remove->dump()); + } + } + // assert(MBB4Remove->pred_empty() && "We can't have predecessors here"); + if (BBsForRemove.insert({MBB4Remove->getNumber(), MBB4Replace->getNumber()}) + .second) { + DEBUG(dbgs() << "BB#" << MBB4Remove->getNumber() + << (MBB4Remove->pred_size() ? " with" : " without"); + dbgs() << " pred(s) will be removed and replaced with BB#" + << MBB4Replace->getNumber() << "\n"); + NumEqBBsErased++; + SizeEqBBsErased += MBB4Remove->size(); + } + return X->getNumber() < Y->getNumber(); +} + +bool RemoveEqualBB::runOnMachineFunction(MachineFunction &F) { + // TODO: what's real minimum of BBs should we use? + if (!RemoveEqBBs || F.getNumBlockIDs() < 2) + return false; + + MLI = &getAnalysis(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + // To minmize defferences in tests we'll keep current numbering + //F.RenumberBlocks(); + + // MBBs keeps pointers to all Machine BBs of the given Machine Function + std::vector MBBs; + for (auto &I : F) { + MBBs.push_back(&I); + } + // A list of BBs which will be alived after the transformation + SmallSet SurvivedBBs; + // A list of BBs which will be erased from the function after the + // transformation + SmallMapVector BBsForRemove; + + std::stable_sort(MBBs.begin(), MBBs.end(), [&](MachineBasicBlock *X, + MachineBasicBlock *Y) { + if (!X->size() || X->size() != Y->size() || + X->succ_size() != Y->succ_size()) + return X->getNumber() < Y->getNumber(); + MachineBasicBlock::instr_iterator IY = Y->instr_begin(); + MachineBasicBlock::instr_iterator IXE = X->instr_end(); + for (MachineBasicBlock::instr_iterator IX = X->instr_begin(); IX != IXE; + IX++) { + if (IX->isIdenticalTo(*IY)) { + IY++; + continue; + } + return X->getNumber() < Y->getNumber(); + } + // TODO: it's better to check successors before checking of instructions + // because most probably the number of successors is less than the number + // of instrs but we'd like to extend the suppoort of -O1 + // (optimize for size) + bool equal = true; + for (auto *S : X->successors()) { + if (!Y->isSuccessor(S)) { + equal = false; + break; + } + } + if (!equal) { + NumEqBBCandidates++; + SizeEqBBCandidates += X->size(); + DEBUG(dbgs() << "REQBB: In '" << F.getName() + << "' we have candidates for -O1: BB#" << + X->getNumber() + << " and BB#" << Y->getNumber() << "\n"); + return X->getNumber() < Y->getNumber(); + } else { + // We have 2 equal BBs. If one of them was already marked as survived then + // simply erase the second one + if (X->isEHPad() || X->isEHFuncletEntry() || X->isCleanupFuncletEntry()) + // TODO: We should support these kinds of BBs + return X->getNumber() < Y->getNumber(); + // TODO: it's possible that the second BB is better choice for surviving + // e.g. IF hasLayoutPredecessor(Y) AND NOT hasLayoutPredecessor(X) + if (SurvivedBBs.count(X->getNumber())) { + return Mark4Remove(BBsForRemove, MLI, Y, X, X, Y); + } else if (SurvivedBBs.count(Y->getNumber())) + return Mark4Remove(BBsForRemove, MLI, X, Y, X, Y); + // No one BB was marked as survived. It means if the second one was marked + // for erasing then there is + // equal better BB somewhere: we should simply to erase the second BB + // TODO: it's possible that the second BB is better choice for surviving + // than the already marked + if (auto V = BBsForRemove.lookup(X->getNumber())) + return Mark4Remove(BBsForRemove, MLI, Y, F.getBlockNumbered(V), X, Y); + if (auto V = BBsForRemove.lookup(Y->getNumber())) + return Mark4Remove(BBsForRemove, MLI, X, F.getBlockNumbered(V), X, Y); + + // We're here: it means no one of X and Y was previously marked in any way + // TODO: bool X86InstrInfo::isTailCall(const MachineInstr &Inst); + auto XTerm = X->getFirstInstrTerminator(); + // TODO: we should support CatchReturnInst + if (!(XTerm == X->instr_end() || /*isa(XTerm) ||*/ + (!XTerm->isReturn() && !XTerm->isUnconditionalBranch()))) { + // TODO: Let's start from these kinds of terminators + if (XTerm-- != X->instr_begin() && XTerm->isTerminator()) { + DEBUG(dbgs() << "\nREQBB: We have at least 2 terminators: let's " + "elaborate it later\n"; + X->dump();); + return X->getNumber() < Y->getNumber(); + } + MachineBasicBlock *XLayoutPred = hasLayoutPredecessor(X); + MachineBasicBlock *YLayoutPred = hasLayoutPredecessor(Y); + // TODO: We need to select BestBB to leave it alive but the selection + // algorythm should be re-implemented properly + if (XLayoutPred && YLayoutPred) { + DEBUG(dbgs() << "\nREQBB: Both X-BB#" << X->getNumber() + << " and Y-BB#" << Y->getNumber() + << " have Layout Predecessors: elaborate it later\n"; + X->dump(); Y->dump()); + return X->getNumber() < Y->getNumber(); + } + if (!XLayoutPred && YLayoutPred) { + std::swap(X, Y); + } + // Now we have: + // X - should be alive + // Y - should be removed + Mark4Remove(BBsForRemove, MLI, Y, X, X, Y); + if (SurvivedBBs.insert(X->getNumber()).second) { + NumEqBBsSurvived++; + SizeEqBBsSurvived += X->size(); + } + } else { + DEBUG(dbgs() << "REQBB: In BB#" << X->getNumber() + << "we have illegal terminator preventing the " + "Equal BB optimization\n";); + } + } + return false; + }); + if (BBsForRemove.size()) { + for (auto &Number : BBsForRemove) { + auto RemBB = F.getBlockNumbered(Number.first); + // TODO: it should be done later because at the moment we don't support + // BBs inside loops (see Mark4Remove) + // MLI->removeBlock(RemBB); + RemBB->eraseFromParent(); + DEBUG(dbgs() << "BB#" << Number.first << " was removed\n"); + } + // To minmize defferences in tests we'll keep current numbering + //F.RenumberBlocks(); + return true; + } + return false; +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -667,6 +667,8 @@ addPass(createRegUsageInfoCollector()); addPass(&FuncletLayoutID, false); + if (getOptLevel() != CodeGenOpt::None) + addPass(&RemoveEqualBBID, false); addPass(&StackMapLivenessID, false); addPass(&LiveDebugValuesID, false); Index: test/CodeGen/X86/loop-search.ll =================================================================== --- test/CodeGen/X86/loop-search.ll +++ test/CodeGen/X86/loop-search.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -remove-equal-bbs | FileCheck %s ; This test comes from PR27136 ; We should hoist loop constant invariant @@ -8,7 +8,7 @@ ; CHECK-LABEL: search: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: testl %edx, %edx -; CHECK-NEXT: jle LBB0_1 +; CHECK-NEXT: jle LBB0_3 ; CHECK-NEXT: ## BB#4: ## %for.body.preheader ; CHECK-NEXT: movslq %edx, %rax ; CHECK-NEXT: xorl %ecx, %ecx @@ -22,12 +22,7 @@ ; CHECK-NEXT: incq %rcx ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jl LBB0_5 -; ### FIXME: BB#3 and LBB0_1 should be merged -; CHECK-NEXT: ## BB#3: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: %AL %AL %EAX -; CHECK-NEXT: retq -; CHECK-NEXT: LBB0_1: +; CHECK-NEXT: LBB0_3: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq @@ -35,7 +30,6 @@ ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq -; entry: %cmp5 = icmp sgt i32 %count, 0 br i1 %cmp5, label %for.body.preheader, label %cleanup