diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt --- a/llvm/tools/llvm-profgen/CMakeLists.txt +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -3,6 +3,7 @@ AllTargetsDescs AllTargetsDisassemblers AllTargetsInfos + DebugInfoDWARF Core MC IPO diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -348,26 +348,14 @@ RangeSample ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); - // For each range, we search for the range of the function it belongs to and + // For each range, we search for all ranges of the function it belongs to and // initialize it with zero count, so it remains zero if doesn't hit any // samples. This is to be consistent with compiler that interpret zero count // as unexecuted(cold). for (auto I : RangeCounter) { - uint64_t RangeBegin = I.first.first; - uint64_t RangeEnd = I.first.second; - // Find the function offset range the current range begin belongs to. - auto FuncRange = Binary->findFuncOffsetRange(RangeBegin); - if (FuncRange.second == 0) - WithColor::warning() - << "[" << format("%8" PRIx64, RangeBegin) << " - " - << format("%8" PRIx64, RangeEnd) - << "]: Invalid range or disassembling error in profiled binary.\n"; - else if (RangeEnd > FuncRange.second) - WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " - << format("%8" PRIx64, RangeEnd) - << "]: Range is across different functions.\n"; - else - Ranges[FuncRange] += 0; + uint64_t StartOffset = I.first.first; + for (const auto &Range : Binary->getRangesForOffset(StartOffset)) + Ranges[{Range.first, Range.second - 1}] += 0; } RangeSample DisjointRanges; findDisjointRanges(DisjointRanges, Ranges); @@ -401,21 +389,16 @@ } } -static bool isOutlinedFunction(StringRef CalleeName) { - // Check whether it's from hot-cold func split or coro split. - return CalleeName.contains(".resume") || CalleeName.contains(".cold"); -} - StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { - // Get the callee name by branch target if it's a call branch. - StringRef CalleeName = FunctionSamples::getCanonicalFnName( - Binary->getFuncFromStartOffset(TargetOffset)); + // Get the function range by branch target if it's a call branch. + auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); - // We won't accumulate sample count againt outlined function. - if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName)) + // We won't accumulate sample count for a range whose start is not the real + // function entry such as outlined function or inner labels. + if (!FRange || !FRange->IsFuncEntry) return StringRef(); - return CalleeName; + return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); } void ProfileGenerator::populateBoundarySamplesForAllFunctions( @@ -482,20 +465,21 @@ void CSProfileGenerator::computeSizeForProfiledFunctions() { // Hash map to deduplicate the function range and the item is a pair of // function start and end offset. - std::unordered_map FuncRanges; + std::unordered_map AggregatedRanges; // Go through all the ranges in the CS counters, use the start of the range to // look up the function it belongs and record the function range. for (const auto &CI : SampleCounters) { for (auto Item : CI.second.RangeCounter) { // FIXME: Filter the bogus crossing function range. - uint64_t RangeStartOffset = Item.first.first; - auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); - if (FuncRange.second != 0) - FuncRanges[FuncRange.first] = FuncRange.second; + uint64_t StartOffset = Item.first.first; + // Note that a function can be spilt into multiple ranges, so get all + // ranges of the function. + for (const auto &Range : Binary->getRangesForOffset(StartOffset)) + AggregatedRanges[Range.first] = Range.second; } } - for (auto I : FuncRanges) { + for (auto I : AggregatedRanges) { uint64_t StartOffset = I.first; uint64_t EndOffset = I.second; Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -12,6 +12,7 @@ #include "CallContext.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -68,6 +69,27 @@ void update(uint64_t Addr); }; +using RangesTy = std::vector>; + +struct BinaryFunction { + StringRef FuncName; + RangesTy Ranges; +}; + +// Info about function range. A function can be split into multiple +// non-continuous ranges, each range corresponds to one FuncRange. +struct FuncRange { + uint64_t StartOffset; + // EndOffset is a exclusive bound. + uint64_t EndOffset; + // Function the range belongs to + BinaryFunction *Func; + // Whether the start offset is the real entry of the function. + bool IsFuncEntry = false; + + StringRef getFuncName() { return Func->FuncName; } +}; + // PrologEpilog offset tracker, used to filter out broken stack samples // Currently we use a heuristic size (two) to infer prolog and epilog // based on the start address and return address. In the future, @@ -79,8 +101,7 @@ PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; // Take the two addresses from the start of function as prolog - void inferPrologOffsets(std::map> - &FuncStartOffsetMap) { + void inferPrologOffsets(std::map &FuncStartOffsetMap) { for (auto I : FuncStartOffsetMap) { PrologEpilogSet.insert(I.first); InstructionPointer IP(Binary, I.first); @@ -164,9 +185,15 @@ // A list of text sections sorted by start RVA and size. Used to check // if a given RVA is a valid code address. std::set> TextSections; - // An ordered map of mapping function's start offset to its name and - // end offset. - std::map> FuncStartOffsetMap; + + // A map of mapping function name to BinaryFunction info. + std::unordered_map BinaryFunctions; + + // An ordered map of mapping function's start offset to function range + // relevant info. Currently to determine if the offset of ELF is the start of + // a real function, we leverage the function range info from DWARF. + std::map StartOffset2FuncRangeMap; + // Offset to context location map. Used to expand the context. std::unordered_map Offset2LocStackMap; @@ -221,6 +248,14 @@ void setUpDisassembler(const ELFObjectFileBase *Obj); void setupSymbolizer(); + // Load debug info of subprograms from DWARF section. + void loadSymbolsFromDWARF(ObjectFile &Obj); + + // A function may be spilt into multiple non-continuous address ranges. We use + // this to set whether start offset of a function is the real entry of the + // function and also set false to the non-function label. + void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName); + /// Dissassemble the text section and build various address maps. void disassemble(const ELFObjectFileBase *O); @@ -313,19 +348,34 @@ return 0; } - StringRef getFuncFromStartOffset(uint64_t Offset) { - auto I = FuncStartOffsetMap.find(Offset); - if (I == FuncStartOffsetMap.end()) - return StringRef(); - return I->second.first; + FuncRange *findFuncRangeForStartOffset(uint64_t Offset) { + auto I = StartOffset2FuncRangeMap.find(Offset); + if (I == StartOffset2FuncRangeMap.end()) + return nullptr; + return &I->second; } - OffsetRange findFuncOffsetRange(uint64_t Offset) { - auto I = FuncStartOffsetMap.upper_bound(Offset); - if (I == FuncStartOffsetMap.begin()) - return {0, 0}; + // Binary search the function range which includes the input offset. + FuncRange *findFuncRangeForOffset(uint64_t Offset) { + auto I = StartOffset2FuncRangeMap.upper_bound(Offset); + if (I == StartOffset2FuncRangeMap.begin()) + return nullptr; I--; - return {I->first, I->second.second}; + + if (Offset >= I->second.EndOffset) + return nullptr; + + return &I->second; + } + + // Get all ranges of one function. + RangesTy getRangesForOffset(uint64_t Offset) { + auto *FRange = findFuncRangeForOffset(Offset); + // Ignore the range which falls into plt section or system lib. + if (!FRange) + return RangesTy(); + + return FRange->Func->Ranges; } uint32_t getFuncSizeForContext(SampleContext &Context) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -175,6 +175,9 @@ // Decode pseudo probe related section decodePseudoProbe(Obj); + // Load debug info of subprograms from DWARF section. + loadSymbolsFromDWARF(*dyn_cast(&Binary)); + // Disassemble the text sections. disassemble(Obj); @@ -183,7 +186,7 @@ FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder); // Use function start and return address to infer prolog and epilog - ProEpilogTracker.inferPrologOffsets(FuncStartOffsetMap); + ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); ProEpilogTracker.inferEpilogOffsets(RetAddrs); // TODO: decode other sections. @@ -306,6 +309,20 @@ ProbeDecoder.printGUID2FuncDescMap(outs()); } +void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) { + // Note that the start offset of each ELF section can be a non-function + // symbol, we need to binary search for the start of a real function range. + auto *FuncRange = findFuncRangeForOffset(Offset); + // Skip external function symbol. + if (!FuncRange) + return; + + // Set IsFuncEntry to ture if the RangeSymName from ELF is equal to its + // DWARF-based function name. + if (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName) + FuncRange->IsFuncEntry = true; +} + bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section) { @@ -316,7 +333,7 @@ uint64_t NextStartOffset = (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress() : SectionOffset + SectSize; - if (StartOffset >= NextStartOffset) + if (StartOffset > NextStartOffset) return true; StringRef SymbolName = @@ -404,8 +421,8 @@ if (ShowDisassembly) outs() << "\n"; - FuncStartOffsetMap.emplace(StartOffset, - std::make_pair(Symbols[SI].Name.str(), EndOffset)); + setIsFuncEntry(StartOffset, Symbols[SI].Name); + return true; } @@ -517,6 +534,71 @@ } } +void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { + auto DebugContext = llvm::DWARFContext::create(Obj); + if (!DebugContext) + exitWithError("Misssing debug info.", Path); + + for (const auto &CompilationUnit : DebugContext->compile_units()) { + for (const auto &DieInfo : CompilationUnit->dies()) { + llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo); + + if (!Die.isSubprogramDIE()) + continue; + auto Name = Die.getName(llvm::DINameKind::LinkageName); + if (!Name) + Name = Die.getName(llvm::DINameKind::ShortName); + if (!Name) + continue; + + auto RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) + continue; + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + + if (Ranges.empty()) + continue; + + // Different DWARF symbols can have same function name, search or create + // BinaryFunction indexed by the name. + auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); + auto &Func = Ret.first->second; + if (Ret.second) + Func.FuncName = Ret.first->first; + + for (const auto &Range : Ranges) { + uint64_t FuncStart = Range.LowPC; + uint64_t FuncSize = Range.HighPC - FuncStart; + + if (FuncSize == 0 || FuncStart < getPreferredBaseAddress()) + continue; + + uint64_t StartOffset = FuncStart - getPreferredBaseAddress(); + uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); + + // We may want to know all ranges for one function. Here group the + // ranges and store them into BinaryFunction. + Func.Ranges.emplace_back(StartOffset, EndOffset); + + auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange()); + if (R.second) { + FuncRange &FRange = R.first->second; + FRange.Func = &Func; + FRange.StartOffset = StartOffset; + FRange.EndOffset = EndOffset; + } else { + WithColor::warning() + << "Duplicated symbol start address at " + << format("%8" PRIx64, StartOffset + getPreferredBaseAddress()) + << " " << R.first->second.getFuncName() << " and " << Name + << "\n"; + } + } + } + } + assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info."); +} + void ProfiledBinary::setupSymbolizer() { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = @@ -576,7 +658,7 @@ << format("%8" PRIx64, StartOffset) << "\n"; uint64_t Offset = CodeAddrOffsets[Index]; - while (Offset <= EndOffset) { + while (Offset < EndOffset) { const SampleContextFrameVector &SymbolizedCallStack = getFrameLocationStack(Offset, UsePseudoProbes); uint64_t Size = Offset2InstSizeMap[Offset];