diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -346,22 +346,23 @@ // initialize it with zero count, so it remains zero if doesn't hit any // samples. This is to be consistent with compiler that interpret zero count // as unexecuted(cold). + // Note that a function can be spilt into multiple ranges, so we leverange + // DWARFSymbol's RangeIdx to look up all ranges for one entry. for (auto I : RangeCounter) { uint64_t RangeBegin = I.first.first; uint64_t RangeEnd = I.first.second; - // Find the function offset range the current range begin belongs to. - auto FuncRange = Binary->findFuncOffsetRange(RangeBegin); - if (FuncRange.second == 0) + + // Find the symbol the current range begin belongs to. + auto *Symbol = Binary->findSymbolForOffset(RangeBegin); + if (!Symbol) { WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " << format("%8" PRIx64, RangeEnd) << "]: Invalid range or disassembling error in profiled binary.\n"; - else if (RangeEnd > FuncRange.second) - WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " - << format("%8" PRIx64, RangeEnd) - << "]: Range is across different functions.\n"; - else - Ranges[FuncRange] += 0; + continue; + } + for (const auto &Range : Binary->getDWARFRangeVectorForSymbol(Symbol)) + Ranges[{Range.first, Range.second}] += 0; } RangeSample DisjointRanges; findDisjointRanges(DisjointRanges, Ranges); @@ -397,22 +398,15 @@ } } -static bool isOutlinedFunction(StringRef CalleeName) { - // Check whether it's from hot-cold func split or coro split. - return CalleeName.find(".resume") != StringRef::npos || - CalleeName.find(".cold") != StringRef::npos; -} - StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { - // Get the callee name by branch target if it's a call branch. - StringRef CalleeName = FunctionSamples::getCanonicalFnName( - Binary->getFuncFromStartOffset(TargetOffset)); + // Get the DWARF-based symbol info by branch target if it's a call branch. + auto *DWARFSymbol = Binary->getSymbolForStartOffset(TargetOffset); - // We won't accumulate sample count againt outlined function. - if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName)) + // We won't accumulate sample count againt outlined function or inner labels. + if (!DWARFSymbol || !DWARFSymbol->isEntryFunction) return StringRef(); - return CalleeName; + return FunctionSamples::getCanonicalFnName(DWARFSymbol->SymbolName); } void ProfileGenerator::populateBoundarySamplesForAllFunctions( @@ -480,20 +474,23 @@ void CSProfileGenerator::computeSizeForProfiledFunctions() { // Hash map to deduplicate the function range and the item is a pair of // function start and end offset. - std::unordered_map FuncRanges; + std::unordered_map AggregatedRanges; // Go through all the ranges in the CS counters, use the start of the range to // look up the function it belongs and record the function range. for (const auto &CI : SampleCounters) { for (auto Item : CI.second.RangeCounter) { // FIXME: Filter the bogus crossing function range. uint64_t RangeStartOffset = Item.first.first; - auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); - if (FuncRange.second != 0) - FuncRanges[FuncRange.first] = FuncRange.second; + auto *Symbol = Binary->findSymbolForOffset(RangeStartOffset); + if (!Symbol) + continue; + + for (const auto &Range : Binary->getDWARFRangeVectorForSymbol(Symbol)) + AggregatedRanges[Range.first] = Range.second; } } - for (auto I : FuncRanges) { + for (auto I : AggregatedRanges) { uint64_t StartOffset = I.first; uint64_t EndOffset = I.second; Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -12,6 +12,7 @@ #include "CallContext.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -68,6 +69,18 @@ void update(uint64_t Addr); }; +using DWARFRangesVectorTy = std::vector>; + +// Function symbol extracted from DWARF-based debug info. A function can be +// split into multiple ranges, each range corresponds to one DWARFSymbol. +struct DWARFSymbol { + bool isEntryFunction = false; + std::string SymbolName; + // Index to access AllDWARFAddressRanges, used to look up all ranges of the + // function. + size_t RangesIdx; +}; + // PrologEpilog offset tracker, used to filter out broken stack samples // Currently we use a heuristic size (two) to infer prolog and epilog // based on the start address and return address. In the future, @@ -79,8 +92,7 @@ PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; // Take the two addresses from the start of function as prolog - void inferPrologOffsets(std::map> - &FuncStartOffsetMap) { + void inferPrologOffsets(std::map &FuncStartOffsetMap) { for (auto I : FuncStartOffsetMap) { PrologEpilogSet.insert(I.first); InstructionPointer IP(Binary, I.first); @@ -164,9 +176,14 @@ // A list of text sections sorted by start RVA and size. Used to check // if a given RVA is a valid code address. std::set> TextSections; - // An ordered map of mapping function's start offset to its name and - // end offset. - std::map> FuncStartOffsetMap; + + // A vector of DWARF ranges used to quick lookup for all the ranges from + // different part of function. + std::vector AllDWARFAddressRanges; + + // An ordered map of mapping function's start offset to its DWARF-base symbol. + std::map StartOffset2DWARFSymbolMap; + // Offset to context location map. Used to expand the context. std::unordered_map Offset2LocStackMap; @@ -221,6 +238,14 @@ void setUpDisassembler(const ELFObjectFileBase *Obj); void setupSymbolizer(); + // Load debug info of subprograms from DWARF section. + void loadSymbolsFromDWARF(ObjectFile &Obj); + + // A function may be spilt into multiple non-continuous address ranges. We use + // this to set whether start offset of a function is the real entry of the + // function and also set false to the non-function label. + void setIsEntryFunction(uint64_t StartOffset, StringRef ELFSymbolName); + /// Dissassemble the text section and build various address maps. void disassemble(const ELFObjectFileBase *O); @@ -313,19 +338,25 @@ return 0; } - StringRef getFuncFromStartOffset(uint64_t Offset) { - auto I = FuncStartOffsetMap.find(Offset); - if (I == FuncStartOffsetMap.end()) - return StringRef(); - return I->second.first; + DWARFSymbol *getSymbolForStartOffset(uint64_t Offset) { + auto I = StartOffset2DWARFSymbolMap.find(Offset); + if (I == StartOffset2DWARFSymbolMap.end()) + return nullptr; + return &I->second; } - OffsetRange findFuncOffsetRange(uint64_t Offset) { - auto I = FuncStartOffsetMap.upper_bound(Offset); - if (I == FuncStartOffsetMap.begin()) - return {0, 0}; + // Binary search to find the first symbol whose range includes the input + // offset. + DWARFSymbol *findSymbolForOffset(uint64_t Offset) { + auto I = StartOffset2DWARFSymbolMap.upper_bound(Offset); + if (I == StartOffset2DWARFSymbolMap.begin()) + return nullptr; I--; - return {I->first, I->second.second}; + return &I->second; + } + + DWARFRangesVectorTy &getDWARFRangeVectorForSymbol(DWARFSymbol *Symbol) { + return AllDWARFAddressRanges[Symbol->RangesIdx]; } uint32_t getFuncSizeForContext(SampleContext &Context) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -175,6 +175,9 @@ // Decode pseudo probe related section decodePseudoProbe(Obj); + // Load debug info of subprograms from DWARF section. + loadSymbolsFromDWARF(*dyn_cast(&Binary)); + // Disassemble the text sections. disassemble(Obj); @@ -183,7 +186,7 @@ FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder); // Use function start and return address to infer prolog and epilog - ProEpilogTracker.inferPrologOffsets(FuncStartOffsetMap); + ProEpilogTracker.inferPrologOffsets(StartOffset2DWARFSymbolMap); ProEpilogTracker.inferEpilogOffsets(RetAddrs); // TODO: decode other sections. @@ -306,6 +309,20 @@ ProbeDecoder.printGUID2FuncDescMap(outs()); } +void ProfiledBinary::setIsEntryFunction(uint64_t Offset, + StringRef ELFSymbolName) { + // Note that the start offset of ELF can be a non-function symbol, so we need + // to binary search for the first matched function symbol. + auto *Symbol = findSymbolForOffset(Offset); + if (!Symbol) + return; + + // We set isEntryFunction to ture if the matched ELF function is equal to its + // DWARF subprogram name. + if (!Symbol->isEntryFunction && Symbol->SymbolName == ELFSymbolName) + Symbol->isEntryFunction = true; +} + bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section) { @@ -316,7 +333,7 @@ uint64_t NextStartOffset = (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress() : SectionOffset + SectSize; - if (StartOffset >= NextStartOffset) + if (StartOffset > NextStartOffset) return true; StringRef SymbolName = @@ -404,8 +421,8 @@ if (ShowDisassembly) outs() << "\n"; - FuncStartOffsetMap.emplace(StartOffset, - std::make_pair(Symbols[SI].Name.str(), EndOffset)); + setIsEntryFunction(StartOffset, Symbols[SI].Name); + return true; } @@ -517,6 +534,66 @@ } } +void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { + auto DebugContext = llvm::DWARFContext::create(Obj); + if (!DebugContext) + exitWithError("Misssing debug info.", Path); + + for (const auto &CompilationUnit : DebugContext->compile_units()) { + for (const auto &DieInfo : CompilationUnit->dies()) { + llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo); + + if (!Die.isSubprogramDIE()) + continue; + auto Name = Die.getName(llvm::DINameKind::LinkageName); + if (!Name) + Name = Die.getName(llvm::DINameKind::ShortName); + if (!Name) + continue; + + auto RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) + continue; + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + + if (Ranges.empty()) + continue; + + // A function may be spilt into multiple non-continuous address ranges. + // Map each range to a standalone symbol and group the ranges by + // function names. Here persist the ranges for future access. + AllDWARFAddressRanges.push_back( + std::vector>()); + + for (const auto &Range : Ranges) { + uint64_t FunctionStart = Range.LowPC; + uint64_t FunctionSize = Range.HighPC - FunctionStart; + + if (FunctionSize == 0 || FunctionStart < getPreferredBaseAddress()) + continue; + + uint64_t StartOffset = FunctionStart - getPreferredBaseAddress(); + uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); + + AllDWARFAddressRanges.back().emplace_back(StartOffset, EndOffset); + + auto R = StartOffset2DWARFSymbolMap.emplace(StartOffset, DWARFSymbol()); + if (R.second) { + DWARFSymbol &Symbol = R.first->second; + Symbol.SymbolName = Name; + Symbol.RangesIdx = AllDWARFAddressRanges.size() - 1; + } else { + WithColor::warning() + << "Duplicated symbol address at " + << format("%8" PRIx64, StartOffset) << " " + << R.first->second.SymbolName << " and " << Name << "\n"; + } + } + } + } + assert(!StartOffset2DWARFSymbolMap.empty() && "Misssing debug info."); +} + void ProfiledBinary::setupSymbolizer() { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions =