diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -324,6 +324,9 @@ /// Parse a single LBR entry as output by perf script -Fbrstack ErrorOr parseLBREntry(); + /// Parse LBR sample, returns the number of traces. + uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix); + /// Parse and pre-aggregate branch events. std::error_code parseBranchEvents(); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1374,6 +1374,74 @@ return std::error_code(); } +uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample, + bool NeedsSkylakeFix) { + uint64_t NumTraces{0}; + // LBRs are stored in reverse execution order. NextPC refers to the next + // recorded executed PC. + uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; + uint32_t NumEntry = 0; + for (const LBREntry &LBR : Sample.LBR) { + ++NumEntry; + // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) + // sometimes record entry 32 as an exact copy of entry 31. This will cause + // us to likely record an invalid trace and generate a stale function for + // BAT mode (non BAT disassembles the function and is able to ignore this + // trace at aggregation time). Drop first 2 entries (last two, in + // chronological order) + if (NeedsSkylakeFix && NumEntry <= 2) + continue; + if (NextPC) { + // Record fall-through trace. + const uint64_t TraceFrom = LBR.To; + const uint64_t TraceTo = NextPC; + const BinaryFunction *TraceBF = + getBinaryFunctionContainingAddress(TraceFrom); + if (TraceBF && TraceBF->containsAddress(TraceTo)) { + FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; + if (TraceBF->containsAddress(LBR.From)) + ++Info.InternCount; + else + ++Info.ExternCount; + } else { + const BinaryFunction *ToFunc = + getBinaryFunctionContainingAddress(TraceTo); + if (TraceBF && ToFunc) { + LLVM_DEBUG({ + dbgs() << "Invalid trace starting in " << TraceBF->getPrintName() + << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress()) + << formatv(" and ending @ {0:x}\n", TraceTo); + }); + ++NumInvalidTraces; + } else { + LLVM_DEBUG({ + dbgs() << "Out of range trace starting in " + << (TraceBF ? TraceBF->getPrintName() : "None") + << formatv(" @ {0:x}", + TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) + << " and ending in " + << (ToFunc ? ToFunc->getPrintName() : "None") + << formatv(" @ {0:x}\n", + TraceTo - (ToFunc ? ToFunc->getAddress() : 0)); + }); + ++NumLongRangeTraces; + } + } + ++NumTraces; + } + NextPC = LBR.From; + + uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0; + uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0; + if (!From && !To) + continue; + BranchInfo &Info = BranchLBRs[Trace(From, To)]; + ++Info.TakenCount; + Info.MispredCount += LBR.Mispred; + } + return NumTraces; +} + std::error_code DataAggregator::parseBranchEvents() { outs() << "PERF2BOLT: parse branch events...\n"; NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, @@ -1412,79 +1480,7 @@ NeedsSkylakeFix = true; } - // LBRs are stored in reverse execution order. NextPC refers to the next - // recorded executed PC. - uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; - uint32_t NumEntry = 0; - for (const LBREntry &LBR : Sample.LBR) { - ++NumEntry; - // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) - // sometimes record entry 32 as an exact copy of entry 31. This will cause - // us to likely record an invalid trace and generate a stale function for - // BAT mode (non BAT disassembles the function and is able to ignore this - // trace at aggregation time). Drop first 2 entries (last two, in - // chronological order) - if (NeedsSkylakeFix && NumEntry <= 2) - continue; - if (NextPC) { - // Record fall-through trace. - const uint64_t TraceFrom = LBR.To; - const uint64_t TraceTo = NextPC; - const BinaryFunction *TraceBF = - getBinaryFunctionContainingAddress(TraceFrom); - if (TraceBF && TraceBF->containsAddress(TraceTo)) { - FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; - if (TraceBF->containsAddress(LBR.From)) - ++Info.InternCount; - else - ++Info.ExternCount; - } else { - if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { - LLVM_DEBUG(dbgs() - << "Invalid trace starting in " - << TraceBF->getPrintName() << " @ " - << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) - << " and ending @ " << Twine::utohexstr(TraceTo) - << '\n'); - ++NumInvalidTraces; - } else { - LLVM_DEBUG(dbgs() - << "Out of range trace starting in " - << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " - << Twine::utohexstr( - TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) - << " and ending in " - << (getBinaryFunctionContainingAddress(TraceTo) - ? getBinaryFunctionContainingAddress(TraceTo) - ->getPrintName() - : "None") - << " @ " - << Twine::utohexstr( - TraceTo - - (getBinaryFunctionContainingAddress(TraceTo) - ? getBinaryFunctionContainingAddress(TraceTo) - ->getAddress() - : 0)) - << '\n'); - ++NumLongRangeTraces; - } - } - ++NumTraces; - } - NextPC = LBR.From; - - uint64_t From = LBR.From; - if (!getBinaryFunctionContainingAddress(From)) - From = 0; - uint64_t To = LBR.To; - if (!getBinaryFunctionContainingAddress(To)) - To = 0; - if (!From && !To) - continue; - BranchInfo &Info = BranchLBRs[Trace(From, To)]; - ++Info.TakenCount; - Info.MispredCount += LBR.Mispred; - } + NumTraces += parseLBRSample(Sample, NeedsSkylakeFix); } for (const auto &LBR : BranchLBRs) {