Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -109,7 +109,7 @@ static cl::opt MisfetchCost( "misfetch-cost", - cl::desc("Cost that models the probablistic risk of an instruction " + cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden); @@ -310,7 +310,7 @@ MachineFunction::iterator &PrevUnplacedBlockIt, const BlockFilterSet *BlockFilter); - /// \brief Add a basic block to the work list if it is apropriate. + /// \brief Add a basic block to the work list if it is appropriate. /// /// If the optional parameter BlockFilter is provided, only MBB /// present in the set will be added to the worklist. If nullptr @@ -438,7 +438,7 @@ // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after // A->C is chosen as a fall-through, D won't be selected as a successor of C // due to CFG constraint (the probability of C->D is not greater than - // HotProb to break top-oorder). If we exclude E that is not in BlockFilter + // HotProb to break top-order). If we exclude E that is not in BlockFilter // when calculating the probability of C->D, D will be selected and we // will get A C D B as the layout of this loop. auto AdjustedSumProb = BranchProbability::getOne(); @@ -466,7 +466,6 @@ /// The helper function returns the branch probability that is adjusted /// or normalized over the new total \p AdjustedSumProb. - static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb) { @@ -484,7 +483,7 @@ /// When the option OutlineOptionalBranches is on, this method /// checks if the fallthrough candidate block \p Succ (of block /// \p BB) also has other unscheduled predecessor blocks which -/// are also successors of \p BB (forming triagular shape CFG). +/// are also successors of \p BB (forming triangular shape CFG). /// If none of such predecessors are small, it returns true. /// The caller can choose to select \p Succ as the layout successors /// so that \p Succ's predecessors (optional branches) can be @@ -550,13 +549,13 @@ BranchProbability SuccProb, BranchProbability RealSuccProb, BlockChain &Chain, const BlockFilterSet *BlockFilter) { - // This is no global conflict, just return false. + // There isn't a better layout when there are no unscheduled predecessors. if (SuccChain.UnscheduledPredecessors == 0) return false; // There are two basic scenarios here: // ------------------------------------- - // Case 1: triagular shape CFG: + // Case 1: triangular shape CFG (if-then): // BB // | \ // | \ @@ -565,11 +564,13 @@ // Succ // In this case, we are evaluating whether to select edge -> Succ, e.g. // set Succ as the layout successor of BB. Picking Succ as BB's - // successor breaks the CFG constraints. With this layout, Pred BB + // successor breaks the CFG constraints (FIXME: define these constraints). + // With this layout, Pred BB // is forced to be outlined, so the overall cost will be cost of the // branch taken from BB to Pred, plus the cost of back taken branch - // from Pred to Succ, as well as the additional cost asssociated + // from Pred to Succ, as well as the additional cost associated // with the needed unconditional jump instruction from Pred To Succ. + // The cost of the topological order layout is the taken branch cost // from BB to Succ, so to make BB->Succ a viable candidate, the following // must hold: @@ -579,12 +580,12 @@ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e., // prob(BB->Succ) > 2 * prob(BB->Pred) // - // When real profile data is available, we can precisely compute the the - // probabililty threshold that is needed for edge BB->Succ to be considered. - // With out profile data, the heuristic requires the branch bias to be + // When real profile data is available, we can precisely compute the + // probability threshold that is needed for edge BB->Succ to be considered. + // Without profile data, the heuristic requires the branch bias to be // a lot larger to make sure the signal is very strong (e.g. 80% default). // ----------------------------------------------------------------- - // Case 2: diamond like CFG: + // Case 2: diamond like CFG (if-then-else): // S // / \ // | \ @@ -592,18 +593,38 @@ // \ / // Succ // .. - // In this case, edge S->BB has already been selected, and we are evaluating - // candidate edge BB->Succ. Edge S->BB is selected because prob(S->BB) - // is no less than prob(S->Pred). When real profile data is *available*, if - // the condition is true, it will be always better to continue the trace with - // edge BB->Succ instead of laying out with topological order (i.e. laying - // Pred first). The cost of S->BB->Succ is 2 * freq (S->Pred), while with - // the topo order, the cost is freq(S-> Pred) + Pred(S->BB) which is larger. + // + // The current block is BB and edge BB->Succ is now being evaluated. + // Note that edge S->BB was previously already selected because + // prob(S->BB) > prob(S->Pred). + // At this point, 2 blocks can be placed after BB: Pred or Succ. If we + // choose Pred, we will have a topological ordering as shown on the left + // in the picture below. If we choose Succ, we have the solution as shown + // on the right: + // + // topo-order: + // + // S----- ---S + // | | | | + // ---BB | | BB + // | | | | + // | pred-- | Succ-- + // | | | | + // ---succ ---pred-- + // + // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred) + // = freq(S->Pred) + freq(S->BB) + // + // If we have profile data (i.e, branch probabilities can be trusted), the + // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 * + // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB). + // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which + // means the cost of topological order is greater. // When profile data is not available, however, we need to be more // conservative. If the branch prediction is wrong, breaking the topo-order // will actually yield a layout with large cost. For this reason, we need - // strong biaaed branch at block S with Prob(S->BB) in order to select - // BB->Succ. This is equialant to looking the CFG backward with backward + // strong biased branch at block S with Prob(S->BB) in order to select + // BB->Succ. This is equivalent to looking the CFG backward with backward // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without // profile data). @@ -634,7 +655,7 @@ // BB Pred // \ / // Succ - // We select edgee BB->Succ if + // We select edge BB->Succ if // freq(BB->Succ) > freq(Succ) * HotProb // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) * // HotProb @@ -716,7 +737,7 @@ /// profitable only really makes sense in the context of a loop. This returns /// the most frequently visited block in the worklist, which in the case of /// a loop, is the one most desirable to be physically close to the rest of the -/// loop body in order to improve icache behavior. +/// loop body in order to improve i-cache behavior. /// /// \returns The best block found, or null if none are viable. MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( @@ -971,7 +992,7 @@ unsigned BestExitLoopDepth = 0; MachineBasicBlock *ExitingBB = nullptr; // If there are exits to outer loops, loop rotation can severely limit - // fallthrough opportunites unless it selects such an exit. Keep a set of + // fallthrough opportunities unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. SmallPtrSet BlocksExitingToOuterLoop; @@ -1379,7 +1400,7 @@ EHPadWorkList.clear(); } -/// When OutlineOpitonalBranches is on, this method colects BBs that +/// When OutlineOpitonalBranches is on, this method collects BBs that /// dominates all terminator blocks of the function \p F. void MachineBlockPlacement::collectMustExecuteBBs() { if (OutlineOptionalBranches) { @@ -1502,7 +1523,7 @@ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. // The "PrevBB" is not yet updated to reflect current code layout, so, - // o. it may fall-through to a block without explict "goto" instruction + // o. it may fall-through to a block without explicit "goto" instruction // before layout, and no longer fall-through it after layout; or // o. just opposite. // @@ -1661,7 +1682,7 @@ // Changing the layout can create new tail merging opportunities. TargetPassConfig *PassConfig = &getAnalysis(); // TailMerge can create jump into if branches that make CFG irreducible for - // HW that requires structurized CFG. + // HW that requires structured CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge() && BranchFoldPlacement;