diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1593,7 +1593,7 @@ auto Phi = cast(I); auto NewPhi = PHINode::Create(Phi->getType(), Incoming.size(), - Phi->getName() + ".moved", &FirstGuardBlock->back()); + Phi->getName() + ".moved", &FirstGuardBlock->front()); for (auto In : Incoming) { Value *V = UndefValue::get(Phi->getType()); if (In == Out) { @@ -1614,7 +1614,7 @@ } } -using BBPredicates = DenseMap; +using BBPredicates = DenseMap; using BBSetVector = SetVector; // Redirects the terminator of the incoming block to the first guard @@ -1630,6 +1630,8 @@ static std::tuple redirectToHub(BasicBlock *BB, BasicBlock *FirstGuardBlock, const BBSetVector &Outgoing) { + assert(isa(BB->getTerminator()) && + "Only support branch terminator.\n"); auto Branch = cast(BB->getTerminator()); auto Condition = Branch->isConditional() ? Branch->getCondition() : nullptr; @@ -1657,31 +1659,92 @@ assert(Succ0 || Succ1); return std::make_tuple(Condition, Succ0, Succ1); } - -// Capture the existing control flow as guard predicates, and redirect -// control flow from every incoming block to the first guard block in -// the hub. +// Setup the branch instructions for guard blocks. // -// There is one guard predicate for each outgoing block OutBB. The -// predicate is a PHINode with one input for each InBB which -// represents whether the hub should transfer control flow to OutBB if -// it arrived from InBB. These predicates are NOT ORTHOGONAL. The Hub -// evaluates them in the same order as the Outgoing set-vector, and -// control branches to the first outgoing block whose predicate -// evaluates to true. -static void convertToGuardPredicates( - BasicBlock *FirstGuardBlock, BBPredicates &GuardPredicates, - SmallVectorImpl &DeletionCandidates, const BBSetVector &Incoming, - const BBSetVector &Outgoing) { +// Each guard block terminates in a conditional branch that transfers +// control to the corresponding outgoing block or the next guard +// block. The last guard block has two outgoing blocks as successors +// since the condition for the final outgoing block is trivially +static void setupBranchForGuard(SmallVectorImpl &GuardBlocks, + const BBSetVector &Outgoing, + BBPredicates &GuardPredicates) { + // To help keep the loop simple, temporarily append the last + // outgoing block to the list of guard blocks. + GuardBlocks.push_back(Outgoing.back()); + + for (int i = 0, e = GuardBlocks.size() - 1; i != e; ++i) { + auto Out = Outgoing[i]; + assert(GuardPredicates.count(Out)); + BranchInst::Create(Out, GuardBlocks[i + 1], GuardPredicates[Out], + GuardBlocks[i]); + } + + // Remove the last block from the guard list. + GuardBlocks.pop_back(); +} + +/// We are using one integer to represent the block we are branching to. Then at +/// each guard block, the predicate was calcuated using a simple `icmp eq`. +static void calcPredicateUsingInteger( + const BBSetVector &Incoming, const BBSetVector &Outgoing, + SmallVectorImpl &GuardBlocks, BBPredicates &GuardPredicates) { + auto &Context = Incoming.front()->getContext(); + auto FirstGuardBlock = GuardBlocks.front(); + + auto Phi = PHINode::Create(Type::getInt32Ty(Context), Incoming.size(), + "merged.bb.idx", FirstGuardBlock); + + for (auto In : Incoming) { + Value *Condition; + BasicBlock *Succ0; + BasicBlock *Succ1; + std::tie(Condition, Succ0, Succ1) = + redirectToHub(In, FirstGuardBlock, Outgoing); + Value *IncomingId = nullptr; + if (Succ0 && Succ1) { + // target_bb_index = Condition ? index_of_succ0 : index_of_succ1. + auto Succ0Iter = find(Outgoing, Succ0); + auto Succ1Iter = find(Outgoing, Succ1); + Value *Id0 = ConstantInt::get(Type::getInt32Ty(Context), + std::distance(Outgoing.begin(), Succ0Iter)); + Value *Id1 = ConstantInt::get(Type::getInt32Ty(Context), + std::distance(Outgoing.begin(), Succ1Iter)); + IncomingId = SelectInst::Create(Condition, Id0, Id1, "target.bb.idx", + In->getTerminator()); + } else { + // Get the index of the non-null successor. + auto SuccIter = Succ0 ? find(Outgoing, Succ0) : find(Outgoing, Succ1); + IncomingId = ConstantInt::get(Type::getInt32Ty(Context), + std::distance(Outgoing.begin(), SuccIter)); + } + Phi->addIncoming(IncomingId, In); + } + + for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) { + auto Out = Outgoing[i]; + auto Cmp = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, Phi, + ConstantInt::get(Type::getInt32Ty(Context), i), + Out->getName() + ".predicate", GuardBlocks[i]); + GuardPredicates[Out] = Cmp; + } +} + +/// We record the predicate of each outgoing block using a phi of boolean. +static void calcPredicateUsingBooleans( + const BBSetVector &Incoming, const BBSetVector &Outgoing, + SmallVectorImpl &GuardBlocks, BBPredicates &GuardPredicates, + SmallVectorImpl &DeletionCandidates) { auto &Context = Incoming.front()->getContext(); auto BoolTrue = ConstantInt::getTrue(Context); auto BoolFalse = ConstantInt::getFalse(Context); + auto FirstGuardBlock = GuardBlocks.front(); // The predicate for the last outgoing is trivially true, and so we // process only the first N-1 successors. for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) { auto Out = Outgoing[i]; LLVM_DEBUG(dbgs() << "Creating guard for " << Out->getName() << "\n"); + auto Phi = PHINode::Create(Type::getInt1Ty(Context), Incoming.size(), StringRef("Guard.") + Out->getName(), FirstGuardBlock); @@ -1700,105 +1763,104 @@ // for Succ0 and Succ1 complement each other. If Succ0 is visited // first in the loop below, control will branch to Succ0 using the // corresponding predicate. But if that branch is not taken, then - // control must reach Succ1, which means that the predicate for - // Succ1 is always true. + // control must reach Succ1, which means that the incoming value of + // the predicate from `In` is true for Succ1. bool OneSuccessorDone = false; for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) { auto Out = Outgoing[i]; - auto Phi = GuardPredicates[Out]; + PHINode *Phi = cast(GuardPredicates[Out]); if (Out != Succ0 && Out != Succ1) { Phi->addIncoming(BoolFalse, In); - continue; - } - // Optimization: When only one successor is an outgoing block, - // the predicate is always true. - if (!Succ0 || !Succ1 || OneSuccessorDone) { + } else if (!Succ0 || !Succ1 || OneSuccessorDone) { + // Optimization: When only one successor is an outgoing block, + // the incoming predicate from `In` is always true. Phi->addIncoming(BoolTrue, In); - continue; - } - assert(Succ0 && Succ1); - OneSuccessorDone = true; - if (Out == Succ0) { - Phi->addIncoming(Condition, In); - continue; + } else { + assert(Succ0 && Succ1); + if (Out == Succ0) { + Phi->addIncoming(Condition, In); + } else { + auto Inverted = invertCondition(Condition); + DeletionCandidates.push_back(Condition); + Phi->addIncoming(Inverted, In); + } + OneSuccessorDone = true; } - auto Inverted = invertCondition(Condition); - DeletionCandidates.push_back(Condition); - Phi->addIncoming(Inverted, In); } } } -// For each outgoing block OutBB, create a guard block in the Hub. The -// first guard block was already created outside, and available as the -// first element in the vector of guard blocks. +// Capture the existing control flow as guard predicates, and redirect +// control flow from \p Incoming block through the \p GuardBlocks to the +// \p Outgoing blocks. // -// Each guard block terminates in a conditional branch that transfers -// control to the corresponding outgoing block or the next guard -// block. The last guard block has two outgoing blocks as successors -// since the condition for the final outgoing block is trivially -// true. So we create one less block (including the first guard block) -// than the number of outgoing blocks. -static void createGuardBlocks(SmallVectorImpl &GuardBlocks, - Function *F, const BBSetVector &Outgoing, - BBPredicates &GuardPredicates, StringRef Prefix) { - for (int i = 0, e = Outgoing.size() - 2; i != e; ++i) { +// There is one guard predicate for each outgoing block OutBB. The +// predicate represents whether the hub should transfer control flow +// to OutBB. These predicates are NOT ORTHOGONAL. The Hub evaluates +// them in the same order as the Outgoing set-vector, and control +// branches to the first outgoing block whose predicate evaluates to true. +static void +convertToGuardPredicates(SmallVectorImpl &GuardBlocks, + SmallVectorImpl &DeletionCandidates, + const BBSetVector &Incoming, + const BBSetVector &Outgoing, const StringRef Prefix) { + const unsigned MaxNumBooleansEachUnify = 32; + BBPredicates GuardPredicates; + auto F = Incoming.front()->getParent(); + + for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) GuardBlocks.push_back( BasicBlock::Create(F->getContext(), Prefix + ".guard", F)); - } - assert(GuardBlocks.size() == GuardPredicates.size()); - // To help keep the loop simple, temporarily append the last - // outgoing block to the list of guard blocks. - GuardBlocks.push_back(Outgoing.back()); - - for (int i = 0, e = GuardBlocks.size() - 1; i != e; ++i) { - auto Out = Outgoing[i]; - assert(GuardPredicates.count(Out)); - BranchInst::Create(Out, GuardBlocks[i + 1], GuardPredicates[Out], - GuardBlocks[i]); - } + // When we are using interger to record which target block to jump to, we are + // creating less live values, actually we are using one single integer to + // store the index of the target block. When we are using booleans to + // store the branching information, we need (N-1) boolean values, where N is + // the number of outgoing block. + if (Outgoing.size() <= MaxNumBooleansEachUnify) + calcPredicateUsingBooleans(Incoming, Outgoing, GuardBlocks, GuardPredicates, + DeletionCandidates); + else + calcPredicateUsingInteger(Incoming, Outgoing, GuardBlocks, GuardPredicates); - // Remove the last block from the guard list. - GuardBlocks.pop_back(); + setupBranchForGuard(GuardBlocks, Outgoing, GuardPredicates); } BasicBlock *llvm::CreateControlFlowHub( DomTreeUpdater *DTU, SmallVectorImpl &GuardBlocks, const BBSetVector &Incoming, const BBSetVector &Outgoing, const StringRef Prefix) { - auto F = Incoming.front()->getParent(); - auto FirstGuardBlock = - BasicBlock::Create(F->getContext(), Prefix + ".guard", F); + if (Outgoing.size() < 2) + return Outgoing.front(); SmallVector Updates; if (DTU) { for (auto In : Incoming) { - Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); - for (auto Succ : successors(In)) { + for (auto Succ : successors(In)) if (Outgoing.count(Succ)) Updates.push_back({DominatorTree::Delete, In, Succ}); - } } } - BBPredicates GuardPredicates; SmallVector DeletionCandidates; - convertToGuardPredicates(FirstGuardBlock, GuardPredicates, DeletionCandidates, - Incoming, Outgoing); + convertToGuardPredicates(GuardBlocks, DeletionCandidates, Incoming, Outgoing, + Prefix); - GuardBlocks.push_back(FirstGuardBlock); - createGuardBlocks(GuardBlocks, F, Outgoing, GuardPredicates, Prefix); + auto FirstGuardBlock = GuardBlocks.front(); // Update the PHINodes in each outgoing block to match the new control flow. - for (int i = 0, e = GuardBlocks.size(); i != e; ++i) { + for (int i = 0, e = GuardBlocks.size(); i != e; ++i) reconnectPhis(Outgoing[i], GuardBlocks[i], Incoming, FirstGuardBlock); - } + reconnectPhis(Outgoing.back(), GuardBlocks.back(), Incoming, FirstGuardBlock); if (DTU) { int NumGuards = GuardBlocks.size(); assert((int)Outgoing.size() == NumGuards + 1); + + for (auto In : Incoming) + Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); + for (int i = 0; i != NumGuards - 1; ++i) { Updates.push_back({DominatorTree::Insert, GuardBlocks[i], Outgoing[i]}); Updates.push_back( diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -114,9 +114,9 @@ // didn't exist in the original CFG. auto Def = II.first; LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n"); - auto NewPhi = PHINode::Create(Def->getType(), Incoming.size(), - Def->getName() + ".moved", - LoopExitBlock->getTerminator()); + auto NewPhi = + PHINode::Create(Def->getType(), Incoming.size(), + Def->getName() + ".moved", &LoopExitBlock->front()); for (auto In : Incoming) { LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": "); if (Def->getParent() == In || DT.dominates(Def, In)) {