diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -792,7 +792,7 @@ BasicBlockListType::iterator pbegin() { return BasicBlocks.begin(); } BasicBlockListType::iterator pend() { return BasicBlocks.end(); } - order_iterator layout_begin() { return BasicBlocksLayout.begin(); } + order_iterator layout_begin() { return BasicBlocksLayout.begin(); } const_order_iterator layout_begin() const { return BasicBlocksLayout.begin(); } order_iterator layout_end() { return BasicBlocksLayout.end(); } @@ -1161,7 +1161,7 @@ /// Return the number of emitted instructions for this function. uint32_t getNumNonPseudos() const { uint32_t N = 0; - for (BinaryBasicBlock *const &BB : layout()) + for (const BinaryBasicBlock *const BB : BasicBlocks) N += BB->getNumNonPseudos(); return N; } @@ -2334,11 +2334,11 @@ size_t estimateHotSize(const bool UseSplitSize = true) const { size_t Estimate = 0; if (UseSplitSize && isSplit()) { - for (const BinaryBasicBlock *BB : BasicBlocksLayout) + for (const BinaryBasicBlock *const BB : BasicBlocks) if (!BB->isCold()) Estimate += BC.computeCodeSize(BB->begin(), BB->end()); } else { - for (const BinaryBasicBlock *BB : BasicBlocksLayout) + for (const BinaryBasicBlock *const BB : BasicBlocks) if (BB->getKnownExecutionCount() != 0) Estimate += BC.computeCodeSize(BB->begin(), BB->end()); } @@ -2349,7 +2349,7 @@ if (!isSplit()) return estimateSize(); size_t Estimate = 0; - for (const BinaryBasicBlock *BB : BasicBlocksLayout) + for (const BinaryBasicBlock *const BB : BasicBlocks) if (BB->isCold()) Estimate += BC.computeCodeSize(BB->begin(), BB->end()); return Estimate; @@ -2357,7 +2357,7 @@ size_t estimateSize() const { size_t Estimate = 0; - for (const BinaryBasicBlock *BB : BasicBlocksLayout) + for (const BinaryBasicBlock *const BB : BasicBlocks) Estimate += BC.computeCodeSize(BB->begin(), BB->end()); return Estimate; } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -293,7 +293,7 @@ void BinaryFunction::markUnreachableBlocks() { std::stack Stack; - for (BinaryBasicBlock *BB : layout()) + for (BinaryBasicBlock *BB : BasicBlocks) BB->markValid(false); // Add all entries and landing pads as roots. @@ -471,7 +471,7 @@ OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } - if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) { + if (opts::PrintDynoStats && !empty()) { OS << '\n'; DynoStats dynoStats = getDynoStats(*this); OS << dynoStats; @@ -1737,14 +1737,14 @@ BinaryBasicBlock *LastIndirectJumpBB = nullptr; uint64_t LastJT = 0; uint16_t LastJTIndexReg = BC.MIB->getNoRegister(); - for (BinaryBasicBlock *BB : layout()) { + for (BinaryBasicBlock *BB : BasicBlocks) { for (MCInst &Instr : *BB) { if (!BC.MIB->isIndirectBranch(Instr)) continue; // If there's an indirect branch in a single-block function - // it must be a tail call. - if (layout_size() == 1) { + if (BasicBlocks.size() == 1) { BC.MIB->convertJmpToTailCall(Instr); return true; } @@ -2167,7 +2167,7 @@ // later. This has no cost, since annotations are allocated by a bumpptr // allocator and won't be released anyway until late in the pipeline. if (!requiresAddressTranslation() && !opts::Instrument) { - for (BinaryBasicBlock *BB : layout()) + for (BinaryBasicBlock *BB : BasicBlocks) for (MCInst &Inst : *BB) BC.MIB->clearOffset(Inst); } @@ -2179,7 +2179,7 @@ void BinaryFunction::calculateMacroOpFusionStats() { if (!getBinaryContext().isX86()) return; - for (BinaryBasicBlock *BB : layout()) { + for (const BinaryBasicBlock *BB : BasicBlocks) { auto II = BB->getMacroOpFusionPair(); if (II == BB->end()) continue; @@ -2295,7 +2295,7 @@ } uint64_t TotalScore = 0ULL; - for (BinaryBasicBlock *BB : layout()) { + for (const BinaryBasicBlock *BB : BasicBlocks) { uint64_t BBExecCount = BB->getExecutionCount(); if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) continue; @@ -2805,7 +2805,7 @@ } LLVM_DEBUG(dbgs() << "\n"); - for (BinaryBasicBlock *BB : BasicBlocksLayout) { + for (BinaryBasicBlock *BB : BasicBlocks) { for (auto II = BB->begin(); II != BB->end();) { const MCCFIInstruction *CFI = getCFIFor(*II); if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState || @@ -2826,8 +2826,8 @@ uint64_t BinaryFunction::getInstructionCount() const { uint64_t Count = 0; - for (BinaryBasicBlock *const &Block : BasicBlocksLayout) - Count += Block->getNumNonPseudos(); + for (const BinaryBasicBlock *const BB : BasicBlocks) + Count += BB->getNumNonPseudos(); return Count; } @@ -2908,7 +2908,7 @@ void BinaryFunction::duplicateConstantIslands() { assert(Islands && "function expected to have constant islands"); - for (BinaryBasicBlock *BB : layout()) { + for (BinaryBasicBlock *BB : BasicBlocks) { if (!BB->isCold()) continue; @@ -3325,7 +3325,7 @@ void BinaryFunction::postProcessBranches() { if (!isSimple()) return; - for (BinaryBasicBlock *BB : BasicBlocksLayout) { + for (BinaryBasicBlock *BB : BasicBlocks) { auto LastInstrRI = BB->getLastNonPseudo(); if (BB->succ_size() == 1) { if (LastInstrRI != BB->rend() && @@ -3490,18 +3490,28 @@ return Status; } -BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const { - BasicBlockOrderType DFS; +BinaryFunction::BasicBlockListType BinaryFunction::dfs() const { + BasicBlockListType DFS; unsigned Index = 0; std::stack Stack; // Push entry points to the stack in reverse order. // // NB: we rely on the original order of entries to match. - for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) { - BinaryBasicBlock *BB = *BBI; - if (isEntryPoint(*BB)) - Stack.push(BB); + SmallVector EntryPoints; + copy_if(BasicBlocks, std::back_inserter(EntryPoints), + [&](const BinaryBasicBlock *const BB) { return isEntryPoint(*BB); }); + // Sort entry points by their offset to make sure we got them in the right + // order. + stable_sort(EntryPoints, [](const BinaryBasicBlock *const A, + const BinaryBasicBlock *const B) { + return A->getOffset() < B->getOffset(); + }); + for (BinaryBasicBlock *const BB : reverse(EntryPoints)) { + Stack.push(BB); + } + + for (BinaryBasicBlock *const BB : BasicBlocks) { BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex); } @@ -3939,7 +3949,7 @@ if (AdjustmentRatio < 0.0) AdjustmentRatio = 0.0; - for (BinaryBasicBlock *&BB : layout()) + for (BinaryBasicBlock *BB : BasicBlocks) BB->adjustExecutionCount(AdjustmentRatio); ExecutionCount -= Count; diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/ADRRelaxationPass.cpp --- a/bolt/lib/Passes/ADRRelaxationPass.cpp +++ b/bolt/lib/Passes/ADRRelaxationPass.cpp @@ -29,8 +29,8 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); - for (BinaryBasicBlock *BB : BF.layout()) { - for (auto It = BB->begin(); It != BB->end(); ++It) { + for (BinaryBasicBlock &BB : BF) { + for (auto It = BB.begin(); It != BB.end(); ++It) { MCInst &Inst = *It; if (!BC.MIB->isADR(Inst)) continue; @@ -54,7 +54,7 @@ int64_t Addend = BC.MIB->getTargetAddend(Inst); InstructionListType Addr = BC.MIB->materializeAddress(Symbol, BC.Ctx.get(), Reg, Addend); - It = BB->replaceInstruction(It, Addr); + It = BB.replaceInstruction(It, Addr); } } } diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -83,11 +83,11 @@ size_t HotSize = 0; size_t ColdSize = 0; - for (const BinaryBasicBlock *BB : Function.layout()) - if (BB->isCold()) - ColdSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); + for (const BinaryBasicBlock &BB : Function) + if (BB.isCold()) + ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); else - HotSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); + HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); Function.setAlignment(opts::AlignFunctions); if (HotSize > 0) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -339,9 +339,9 @@ uint64_t Bytes; Function.markUnreachableBlocks(); LLVM_DEBUG({ - for (BinaryBasicBlock *BB : Function.layout()) { - if (!BB->isValid()) { - dbgs() << "BOLT-INFO: UCE found unreachable block " << BB->getName() + for (BinaryBasicBlock &BB : Function) { + if (!BB.isValid()) { + dbgs() << "BOLT-INFO: UCE found unreachable block " << BB.getName() << " in function " << Function << "\n"; Function.dump(); } diff --git a/bolt/lib/Passes/CacheMetrics.cpp b/bolt/lib/Passes/CacheMetrics.cpp --- a/bolt/lib/Passes/CacheMetrics.cpp +++ b/bolt/lib/Passes/CacheMetrics.cpp @@ -43,15 +43,15 @@ for (BinaryFunction *BF : BinaryFunctions) { const BinaryContext &BC = BF->getBinaryContext(); - for (BinaryBasicBlock *BB : BF->layout()) { + for (BinaryBasicBlock &BB : *BF) { if (BF->isSimple() || BC.HasRelocations) { // Use addresses/sizes as in the output binary - BBAddr[BB] = BB->getOutputAddressRange().first; - BBSize[BB] = BB->getOutputSize(); + BBAddr[&BB] = BB.getOutputAddressRange().first; + BBSize[&BB] = BB.getOutputSize(); } else { // Output ranges should match the input if the body hasn't changed - BBAddr[BB] = BB->getInputAddressRange().first + BF->getAddress(); - BBSize[BB] = BB->getOriginalSize(); + BBAddr[&BB] = BB.getInputAddressRange().first + BF->getAddress(); + BBSize[&BB] = BB.getOriginalSize(); } } } @@ -68,11 +68,12 @@ for (BinaryFunction *BF : BinaryFunctions) { if (!BF->hasProfile()) continue; - for (BinaryBasicBlock *SrcBB : BF->layout()) { - auto BI = SrcBB->branch_info_begin(); - for (BinaryBasicBlock *DstBB : SrcBB->successors()) { - if (SrcBB != DstBB && BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && - BBAddr.at(SrcBB) + BBSize.at(SrcBB) == BBAddr.at(DstBB)) + for (BinaryBasicBlock &SrcBB : *BF) { + auto BI = SrcBB.branch_info_begin(); + for (BinaryBasicBlock *DstBB : SrcBB.successors()) { + if (&SrcBB != DstBB && + BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && + BBAddr.at(&SrcBB) + BBSize.at(&SrcBB) == BBAddr.at(DstBB)) Score += BI->Count; ++BI; } @@ -92,12 +93,13 @@ for (BinaryFunction *BF : BinaryFunctions) { if (!BF->hasProfile()) continue; - for (BinaryBasicBlock *SrcBB : BF->layout()) { - auto BI = SrcBB->branch_info_begin(); - for (BinaryBasicBlock *DstBB : SrcBB->successors()) { - if (DstBB != SrcBB) - Score += CacheMetrics::extTSPScore(BBAddr.at(SrcBB), BBSize.at(SrcBB), - BBAddr.at(DstBB), BI->Count); + for (BinaryBasicBlock &SrcBB : *BF) { + auto BI = SrcBB.branch_info_begin(); + for (BinaryBasicBlock *DstBB : SrcBB.successors()) { + if (DstBB != &SrcBB) + Score += + CacheMetrics::extTSPScore(BBAddr.at(&SrcBB), BBSize.at(&SrcBB), + BBAddr.at(DstBB), BI->Count); ++BI; } } @@ -262,13 +264,13 @@ NumProfiledFunctions++; if (BF->hasValidIndex()) NumHotFunctions++; - for (BinaryBasicBlock *BB : BF->layout()) { + for (const BinaryBasicBlock &BB : *BF) { NumBlocks++; - size_t BBAddrMin = BB->getOutputAddressRange().first; - size_t BBAddrMax = BB->getOutputAddressRange().second; + size_t BBAddrMin = BB.getOutputAddressRange().first; + size_t BBAddrMax = BB.getOutputAddressRange().second; TotalCodeMinAddr = std::min(TotalCodeMinAddr, BBAddrMin); TotalCodeMaxAddr = std::max(TotalCodeMaxAddr, BBAddrMax); - if (BF->hasValidIndex() && !BB->isCold()) { + if (BF->hasValidIndex() && !BB.isCold()) { NumHotBlocks++; HotCodeMinAddr = std::min(HotCodeMinAddr, BBAddrMin); HotCodeMaxAddr = std::max(HotCodeMaxAddr, BBAddrMax); diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -162,11 +162,11 @@ BinaryFunction &BF = BFI.second; if (!BF.isSimple()) continue; - for (BinaryBasicBlock *BB : BF.layout()) { - auto BI = BB->branch_info_begin(); - for (BinaryBasicBlock *SuccBB : BB->successors()) { + for (const BinaryBasicBlock &BB : BF) { + auto BI = BB.branch_info_begin(); + for (BinaryBasicBlock *SuccBB : BB.successors()) { if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && BI->Count > 0) { - if (BB->getKnownExecutionCount() == 0 || + if (BB.getKnownExecutionCount() == 0 || SuccBB->getKnownExecutionCount() == 0) { errs() << "BOLT-WARNING: profile verification failed after ICP for " "function " diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -165,8 +165,8 @@ return INL_NONE; const MCPhysReg SPReg = BC.MIB->getStackPointer(); - for (const BinaryBasicBlock *BB : BF.layout()) { - for (const MCInst &Inst : *BB) { + for (const BinaryBasicBlock &BB : BF) { + for (const MCInst &Inst : BB) { // Tail calls are marked as implicitly using the stack pointer and they // could be inlined. if (BC.MIB->isTailCall(Inst)) @@ -395,16 +395,16 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { BinaryContext &BC = Function.getBinaryContext(); - std::vector Blocks(Function.layout().begin(), - Function.layout().end()); + std::vector> Blocks(Function.begin(), + Function.end()); llvm::sort( - Blocks, [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { - return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount(); + Blocks, [](const BinaryBasicBlock &BB1, const BinaryBasicBlock &BB2) { + return BB1.getKnownExecutionCount() > BB2.getKnownExecutionCount(); }); bool DidInlining = false; - for (BinaryBasicBlock *BB : Blocks) { - for (auto InstIt = BB->begin(); InstIt != BB->end();) { + for (BinaryBasicBlock &BB : Blocks) { + for (auto InstIt = BB.begin(); InstIt != BB.end();) { MCInst &Inst = *InstIt; if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 || !Inst.getOperand(0).isExpr()) { @@ -459,22 +459,24 @@ } LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction - << " in " << Function << " : " << BB->getName() - << ". Count: " << BB->getKnownExecutionCount() + << " in " << Function << " : " << BB.getName() + << ". Count: " << BB.getKnownExecutionCount() << ". Size change: " << SizeAfterInlining << " bytes.\n"); - std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); + BinaryBasicBlock *FollowUpBB; + std::tie(FollowUpBB, InstIt) = inlineCall(BB, InstIt, *TargetFunction); DidInlining = true; TotalInlinedBytes += SizeAfterInlining; ++NumInlinedCallSites; - NumInlinedDynamicCalls += BB->getExecutionCount(); + NumInlinedDynamicCalls += FollowUpBB->getExecutionCount(); // Subtract basic block execution count from the callee execution count. if (opts::AdjustProfile) - TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount()); + TargetFunction->adjustExecutionCount( + FollowUpBB->getKnownExecutionCount()); // Check if the caller inlining status has to be adjusted. if (IInfo->second.Type == INL_TAILCALL) { diff --git a/bolt/lib/Passes/PLTCall.cpp b/bolt/lib/Passes/PLTCall.cpp --- a/bolt/lib/Passes/PLTCall.cpp +++ b/bolt/lib/Passes/PLTCall.cpp @@ -57,11 +57,11 @@ Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE) continue; - for (BinaryBasicBlock *BB : Function.layout()) { - if (opts::PLT == OT_HOT && !BB->getKnownExecutionCount()) + for (BinaryBasicBlock &BB : Function) { + if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount()) continue; - for (MCInst &Instr : *BB) { + for (MCInst &Instr : BB) { if (!BC.MIB->isCall(Instr)) continue; const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -1973,11 +1973,11 @@ // Update pass statistics uint64_t TotalInstrs = 0ULL; uint64_t TotalStoreInstrs = 0ULL; - for (BinaryBasicBlock *BB : BF.layout()) { - uint64_t BBExecCount = BB->getExecutionCount(); + for (const BinaryBasicBlock &BB : BF) { + uint64_t BBExecCount = BB.getExecutionCount(); if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) continue; - for (const auto &Instr : *BB) { + for (const auto &Instr : BB) { if (BC.MIB->isPseudo(Instr)) continue; if (BC.MIB->isStore(Instr)) diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -90,8 +90,8 @@ return false; bool AllCold = true; - for (BinaryBasicBlock *BB : BF.layout()) { - const uint64_t ExecCount = BB->getExecutionCount(); + for (const BinaryBasicBlock &BB : BF) { + const uint64_t ExecCount = BB.getExecutionCount(); if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) return false; if (ExecCount != 0) diff --git a/bolt/lib/Passes/ThreeWayBranch.cpp b/bolt/lib/Passes/ThreeWayBranch.cpp --- a/bolt/lib/Passes/ThreeWayBranch.cpp +++ b/bolt/lib/Passes/ThreeWayBranch.cpp @@ -19,9 +19,8 @@ bool ThreeWayBranch::shouldRunOnFunction(BinaryFunction &Function) { BinaryContext &BC = Function.getBinaryContext(); - BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout(); - for (BinaryBasicBlock *BB : BlockLayout) - for (MCInst &Inst : *BB) + for (const BinaryBasicBlock &BB : Function) + for (const MCInst &Inst : BB) if (BC.MIB->isPacked(Inst)) return false; return true;