Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -74,6 +74,13 @@ "post dominator, out of line."), cl::init(false), cl::Hidden); +static cl::opt PlaceLastSuccessor( + "place-last-successor", + cl::desc("When selecting a non-successor block, choose the last block to " + "have been a successor. This represents the block whose " + "predecessor was most recently placed."), + cl::init(false), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -223,11 +230,11 @@ const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter); - MachineBasicBlock * - selectBestCandidateBlock(BlockChain &Chain, - SmallVectorImpl &WorkList, - const BlockFilterSet *BlockFilter); + const BlockFilterSet *BlockFilter, + bool &HasUnmergedSuccessors); + MachineBasicBlock *selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl &WorkList, + const BlockFilterSet *BlockFilter, bool HasUnmergedSuccessors); MachineBasicBlock * getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, MachineFunction::iterator &PrevUnplacedBlockIt, @@ -343,7 +350,8 @@ MachineBasicBlock * MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter) { + const BlockFilterSet *BlockFilter, + bool &HasUnmergedSuccessors) { const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = nullptr; @@ -370,6 +378,8 @@ continue; } + HasUnmergedSuccessors = true; + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); @@ -436,7 +446,25 @@ /// \returns The best block found, or null if none are viable. MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( BlockChain &Chain, SmallVectorImpl &WorkList, - const BlockFilterSet *BlockFilter) { + const BlockFilterSet *BlockFilter, bool HasUnmergedSuccessors) { + if (PlaceLastSuccessor && HasUnmergedSuccessors) { + // If we're just placing the last successor as the best candidate, the + // logic is super simple. We skip the already placed entries on the + // worklist and return the most recently added entry that isn't placed. + while (!WorkList.empty()) { + MachineBasicBlock *SuccBB = WorkList.pop_back_val(); + BlockChain &SuccChain = *BlockToChain.lookup(SuccBB); + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(SuccBB) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + return SuccBB; + } + + return nullptr; + } // Once we need to walk the worklist looking for a candidate, cleanup the // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of @@ -513,13 +541,16 @@ // Look for the best viable successor if there is one to place immediately // after this block. - MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + bool HasUnmergedSuccessors = false; + MachineBasicBlock *BestSucc = + selectBestSuccessor(BB, Chain, BlockFilter, HasUnmergedSuccessors); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at // this point. This won't be a fallthrough, but it will increase locality. if (!BestSucc) - BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter, + HasUnmergedSuccessors); if (!BestSucc) { BestSucc =