diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -155,7 +155,17 @@ iterator end() const { return getSubLoops().end(); } reverse_iterator rbegin() const { return getSubLoops().rbegin(); } reverse_iterator rend() const { return getSubLoops().rend(); } - bool empty() const { return getSubLoops().empty(); } + + // LoopInfo does not detect irreducible control flow, just natural + // loops. That is, it is possible that there is cyclic control + // flow within the "innermost loop" or around the "outermost + // loop". + + /// Return true if the loop does not contain any (natural) loops. + bool isInnermost() const { return getSubLoops().empty(); } + /// Return true if the loop does not have a parent (natural) loop + // (i.e. it is outermost, which is the same as top-level). + bool isOutermost() const { return getParentLoop() == nullptr; } /// Get a list of the basic blocks which make up this loop. ArrayRef getBlocks() const { @@ -974,7 +984,7 @@ LoopT *removeLoop(iterator I) { assert(I != end() && "Cannot remove end iterator!"); LoopT *L = *I; - assert(!L->getParentLoop() && "Not a top-level loop!"); + assert(L->isOutermost() && "Not a top-level loop!"); TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin())); return L; } @@ -1002,7 +1012,7 @@ /// This adds the specified loop to the collection of top-level loops. void addTopLevelLoop(LoopT *New) { - assert(!New->getParentLoop() && "Loop already in subloop!"); + assert(New->isOutermost() && "Loop already in subloop!"); TopLevelLoops.push_back(New); } diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -502,7 +502,7 @@ if (Subloop && Block == Subloop->getHeader()) { // We reach this point once per subloop after processing all the blocks in // the subloop. - if (Subloop->getParentLoop()) + if (!Subloop->isOutermost()) Subloop->getParentLoop()->getSubLoopsVector().push_back(Subloop); else LI->addTopLevelLoop(Subloop); @@ -681,7 +681,7 @@ const DomTreeBase &DomTree) const { DenseSet Loops; for (iterator I = begin(), E = end(); I != E; ++I) { - assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + assert((*I)->isOutermost() && "Top-level loop has a parent!"); (*I)->verifyLoopNest(&Loops); } diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1769,7 +1769,7 @@ << TheLoop->getHeader()->getName() << '\n'); // We can only analyze innermost loops. - if (!TheLoop->empty()) { + if (!TheLoop->isInnermost()) { LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; return false; diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -499,7 +499,7 @@ std::unique_ptr CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional TRT) { - if (Root.getParentLoop()) { + if (!Root.isOutermost()) { LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); return nullptr; } diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -764,7 +764,7 @@ /// Update the parent loop for all subloops directly nested within unloop. void UnloopUpdater::updateSubloopParents() { - while (!Unloop.empty()) { + while (!Unloop.isInnermost()) { Loop *Subloop = *std::prev(Unloop.end()); Unloop.removeChildLoop(std::prev(Unloop.end())); @@ -862,7 +862,7 @@ auto InvalidateOnExit = make_scope_exit([&]() { destroy(Unloop); }); // First handle the special case of no parent loop to simplify the algorithm. - if (!Unloop->getParentLoop()) { + if (Unloop->isOutermost()) { // Since BBLoop had no parent, Unloop blocks are no longer in a loop. for (Loop::block_iterator I = Unloop->block_begin(), E = Unloop->block_end(); @@ -887,7 +887,7 @@ } // Move all of the subloops to the top-level. - while (!Unloop->empty()) + while (!Unloop->isInnermost()) addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); return; diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -53,8 +53,8 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { - assert(!OuterLoop.getSubLoops().empty() && "Outer loop should have subloops"); - assert(InnerLoop.getParentLoop() && "Inner loop should have a parent"); + assert(!OuterLoop.isInnermost() && "Outer loop should have subloops"); + assert(!InnerLoop.isOutermost() && "Inner loop should have a parent"); LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName() << "' and '" << InnerLoop.getName() << "' are perfectly nested.\n"); diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp --- a/llvm/lib/Analysis/LoopPass.cpp +++ b/llvm/lib/Analysis/LoopPass.cpp @@ -77,7 +77,7 @@ // Insert loop into loop nest (LoopInfo) and loop queue (LQ). void LPPassManager::addLoop(Loop &L) { - if (!L.getParentLoop()) { + if (L.isOutermost()) { // This is the top level loop. LQ.push_front(&L); return; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3022,7 +3022,7 @@ OS.indent(Loop->getLoopDepth()*2-2); OS << "This "; - if (Loop->empty()) + if (Loop->isInnermost()) OS << "Inner "; OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -234,7 +234,7 @@ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { Loop *L = *I; - if (!L->getParentLoop()) + if (L->isOutermost()) TryConvertLoop(L); } diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1043,7 +1043,7 @@ bool Changed = false; for (auto ML : *MLI) { - if (!ML->getParentLoop()) + if (ML->isOutermost()) Changed |= ProcessLoop(ML); } Changed |= RevertNonLoops(); diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -390,7 +390,7 @@ TRI = HST.getRegisterInfo(); for (auto &L : *MLI) - if (!L->getParentLoop()) { + if (L->isOutermost()) { bool L0Used = false; bool L1Used = false; Changed |= convertToHardwareLoop(L, L0Used, L1Used); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -449,7 +449,7 @@ // specifically moving instructions across the loop boundary and so it is // especially in need of sanity checking here. assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); - assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && + assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); if (MSSAU.get() && VerifyMemorySSA) diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -271,7 +271,7 @@ bool MadeChange = false; // Only prefetch in the inner-most loop - if (!L->empty()) + if (!L->isInnermost()) return MadeChange; SmallPtrSet EphValues; diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -664,7 +664,7 @@ /// Try to distribute an inner-most loop. bool processLoop(std::function &GetLAA) { - assert(L->empty() && "Only process inner loops."); + assert(L->isInnermost() && "Only process inner loops."); LLVM_DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName() @@ -982,7 +982,7 @@ for (Loop *TopLevelLoop : *LI) for (Loop *L : depth_first(TopLevelLoop)) // We only handle inner-most loops. - if (L->empty()) + if (L->isInnermost()) Worklist.push_back(L); // Now walk the identified inner loops. diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -1483,7 +1483,7 @@ continue; LI.changeLoopFor(BB, FC0.L); } - while (!FC1.L->empty()) { + while (!FC1.L->isInnermost()) { const auto &ChildLoopIt = FC1.L->begin(); Loop *ChildLoop = *ChildLoopIt; FC1.L->removeChildLoop(ChildLoopIt); @@ -1777,7 +1777,7 @@ continue; LI.changeLoopFor(BB, FC0.L); } - while (!FC1.L->empty()) { + while (!FC1.L->isInnermost()) { const auto &ChildLoopIt = FC1.L->begin(); Loop *ChildLoop = *ChildLoopIt; FC1.L->removeChildLoop(ChildLoopIt); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1211,7 +1211,7 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, bool IsLoopMemset) { if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) { - if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) { + if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) { LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() << " : LIR " << (IsMemset ? "Memset" : "Memcpy") << " avoided: multi-block top-level loop\n"); diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1197,7 +1197,7 @@ removeChildLoop(NewInner, NewOuter); LI->changeTopLevelLoop(NewInner, NewOuter); } - while (!NewOuter->empty()) + while (!NewOuter->isInnermost()) NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); NewOuter->addChildLoop(NewInner); diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -623,7 +623,7 @@ for (Loop *TopLevelLoop : LI) for (Loop *L : depth_first(TopLevelLoop)) // We only handle inner-most loops. - if (L->empty()) + if (L->isInnermost()) Worklist.push_back(L); // Now walk the identified inner loops. diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -452,7 +452,7 @@ if (LI.isLoopHeader(BB)) { assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!"); Loop *DL = LI.getLoopFor(BB); - if (DL->getParentLoop()) { + if (!DL->isOutermost()) { for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop()) for (auto *BB : DL->getBlocks()) PL->removeBlockFromLoop(BB); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5619,7 +5619,7 @@ if (IU.empty()) return; // Skip nested loops until we can model them better with formulae. - if (!L->empty()) { + if (!L->isInnermost()) { LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); return; } diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -345,7 +345,7 @@ // Only analyze inner loops. We can't properly estimate cost of nested loops // and we won't visit inner loops again anyway. - if (!L->empty()) + if (!L->isInnermost()) return None; // Don't simulate loops with a big or unknown tripcount diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1214,7 +1214,7 @@ LI.addTopLevelLoop(ClonedRootL); AddClonedBlocksToLoop(OrigRootL, *ClonedRootL); - if (OrigRootL.empty()) + if (OrigRootL.isInnermost()) return ClonedRootL; // If we have a nest, we can quickly clone the entire loop nest using an @@ -2353,12 +2353,12 @@ for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) { UpdateLoop(*UpdatedL); - if (!UpdatedL->getParentLoop()) + if (UpdatedL->isOutermost()) OuterExitL = nullptr; } if (IsStillLoop) { UpdateLoop(L); - if (!L.getParentLoop()) + if (L.isOutermost()) OuterExitL = nullptr; } diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -304,7 +304,7 @@ // Only try to peel innermost loops by default. // The constraint can be relaxed by the target in TTI.getUnrollingPreferences // or by the flag -unroll-allow-loop-nests-peeling. - if (!PP.AllowLoopNestsPeeling && !L->empty()) + if (!PP.AllowLoopNestsPeeling && !L->isInnermost()) return; // If the user provided a peel count, use that. diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -268,7 +268,7 @@ for (Loop *TopLevelLoop : *LI) for (Loop *L : depth_first(TopLevelLoop)) // We only handle inner-most loops. - if (L->empty()) + if (L->isInnermost()) Worklist.push_back(L); // Now walk the identified inner loops. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -431,7 +431,7 @@ } bool LoopVectorizationLegality::canVectorizeOuterLoop() { - assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); + assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop."); // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. bool Result = true; @@ -1055,7 +1055,7 @@ // Helper function to canVectorizeLoopNestCFG. bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath) { - assert((UseVPlanNativePath || Lp->empty()) && + assert((UseVPlanNativePath || Lp->isInnermost()) && "VPlan-native path is not enabled."); // TODO: ORE should be improved to show more accurate information when an @@ -1165,7 +1165,7 @@ // Specific checks for outer loops. We skip the remaining legal checks at this // point because they don't support outer loops. - if (!TheLoop->empty()) { + if (!TheLoop->isInnermost()) { assert(UseVPlanNativePath && "VPlan-native path is not enabled."); if (!canVectorizeOuterLoop()) { @@ -1182,7 +1182,7 @@ return Result; } - assert(TheLoop->empty() && "Inner loop expected."); + assert(TheLoop->isInnermost() && "Inner loop expected."); // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1664,7 +1664,7 @@ // representation for pragma 'omp simd' is introduced. static bool isExplicitVecOuterLoop(Loop *OuterLp, OptimizationRemarkEmitter *ORE) { - assert(!OuterLp->empty() && "This is not an outer loop"); + assert(!OuterLp->isInnermost() && "This is not an outer loop"); LoopVectorizeHints Hints(OuterLp, true /*DisableInterleaving*/, *ORE); // Only outer loops with an explicit vectorization hint are supported. @@ -1697,7 +1697,7 @@ // now, only collect outer loops that have explicit vectorization hints. If we // are stress testing the VPlan H-CFG construction, we collect the outermost // loop of every loop nest. - if (L.empty() || VPlanBuildStressTest || + if (L.isInnermost() || VPlanBuildStressTest || (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) { LoopBlocksRPO RPOT(&L); RPOT.perform(LI); @@ -6931,7 +6931,7 @@ // transformations before even evaluating whether vectorization is profitable. // Since we cannot modify the incoming IR, we need to build VPlan upfront in // the vectorization pipeline. - if (!OrigLoop->empty()) { + if (!OrigLoop->isInnermost()) { // If the user doesn't provide a vectorization factor, determine a // reasonable one. if (UserVF.isZero()) { @@ -6969,7 +6969,7 @@ Optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); - assert(OrigLoop->empty() && "Inner loop expected."); + assert(OrigLoop->isInnermost() && "Inner loop expected."); Optional MaybeMaxVF = CM.computeMaxVF(UserVF.getKnownMinValue(), UserIC); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. @@ -7587,7 +7587,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, unsigned MaxVF) { - assert(OrigLoop->empty() && "Inner loop expected."); + assert(OrigLoop->isInnermost() && "Inner loop expected."); // Collect conditions feeding internal conditional branches; they need to be // represented in VPlan for it to model masking. @@ -7837,7 +7837,7 @@ // transformations before even evaluating whether vectorization is profitable. // Since we cannot modify the incoming IR, we need to build VPlan upfront in // the vectorization pipeline. - assert(!OrigLoop->empty()); + assert(!OrigLoop->isInnermost()); assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan @@ -8236,7 +8236,7 @@ !EnableLoopVectorization) {} bool LoopVectorizePass::processLoop(Loop *L) { - assert((EnableVPlanNativePath || L->empty()) && + assert((EnableVPlanNativePath || L->isInnermost()) && "VPlan-native path is not enabled. Only process inner loops."); #ifndef NDEBUG @@ -8298,11 +8298,11 @@ // even evaluating whether vectorization is profitable. Since we cannot modify // the incoming IR, we need to build VPlan upfront in the vectorization // pipeline. - if (!L->empty()) + if (!L->isInnermost()) return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC, ORE, BFI, PSI, Hints); - assert(L->empty() && "Inner loop expected."); + assert(L->isInnermost() && "Inner loop expected."); // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads.