diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt --- a/llvm/tools/llvm-profgen/CMakeLists.txt +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -3,6 +3,7 @@ AllTargetsDescs AllTargetsDisassemblers AllTargetsInfos + DebugInfoDWARF Core MC IPO diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -352,22 +352,19 @@ // initialize it with zero count, so it remains zero if doesn't hit any // samples. This is to be consistent with compiler that interpret zero count // as unexecuted(cold). + for (auto I : RangeCounter) { - uint64_t RangeBegin = I.first.first; - uint64_t RangeEnd = I.first.second; - // Find the function offset range the current range begin belongs to. - auto FuncRange = Binary->findFuncOffsetRange(RangeBegin); - if (FuncRange.second == 0) - WithColor::warning() - << "[" << format("%8" PRIx64, RangeBegin) << " - " - << format("%8" PRIx64, RangeEnd) - << "]: Invalid range or disassembling error in profiled binary.\n"; - else if (RangeEnd > FuncRange.second) - WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " - << format("%8" PRIx64, RangeEnd) - << "]: Range is across different functions.\n"; - else - Ranges[FuncRange] += 0; + uint64_t RangeStartOffset = I.first.first; + + // Note that a function can be spilt into multiple ranges, so we leverage + // FuncRange's RangeIdx to look up all ranges for one entry. + auto *FRange = Binary->findFuncRangeForOffset(RangeStartOffset); + // Ignore the range which falls into plt section or system lib. + if (!FRange) + continue; + + for (const auto &Range : Binary->getAllRangesOfOneFunc(FRange)) + Ranges[{Range.first, Range.second - 1}] += 0; } RangeSample DisjointRanges; findDisjointRanges(DisjointRanges, Ranges); @@ -401,21 +398,16 @@ } } -static bool isOutlinedFunction(StringRef CalleeName) { - // Check whether it's from hot-cold func split or coro split. - return CalleeName.contains(".resume") || CalleeName.contains(".cold"); -} - StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { - // Get the callee name by branch target if it's a call branch. - StringRef CalleeName = FunctionSamples::getCanonicalFnName( - Binary->getFuncFromStartOffset(TargetOffset)); + // Get the function range by branch target if it's a call branch. + auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); - // We won't accumulate sample count againt outlined function. - if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName)) + // We won't accumulate sample count for a range whose start is not the real + // function entry such as outlined function or inner labels. + if (!FRange || !FRange->IsFunctionEntry) return StringRef(); - return CalleeName; + return FunctionSamples::getCanonicalFnName(FRange->FuncSymName); } void ProfileGenerator::populateBoundarySamplesForAllFunctions( @@ -482,20 +474,24 @@ void CSProfileGenerator::computeSizeForProfiledFunctions() { // Hash map to deduplicate the function range and the item is a pair of // function start and end offset. - std::unordered_map FuncRanges; + std::unordered_map AggregatedRanges; // Go through all the ranges in the CS counters, use the start of the range to // look up the function it belongs and record the function range. for (const auto &CI : SampleCounters) { for (auto Item : CI.second.RangeCounter) { // FIXME: Filter the bogus crossing function range. uint64_t RangeStartOffset = Item.first.first; - auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); - if (FuncRange.second != 0) - FuncRanges[FuncRange.first] = FuncRange.second; + auto *FRange = Binary->findFuncRangeForOffset(RangeStartOffset); + // Ignore the range which falls into plt section or system lib. + if (!FRange) + continue; + + for (const auto &Range : Binary->getAllRangesOfOneFunc(FRange)) + AggregatedRanges[Range.first] = Range.second; } } - for (auto I : FuncRanges) { + for (auto I : AggregatedRanges) { uint64_t StartOffset = I.first; uint64_t EndOffset = I.second; Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -12,6 +12,7 @@ #include "CallContext.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -68,6 +69,24 @@ void update(uint64_t Addr); }; +using RangesTy = std::vector>; + +// Info about function range. A function can be split into multiple +// non-continuous ranges, each range corresponds to one FuncRange. +struct FuncRange { + // Skip storing start offset here because FuncRange's always accessed by + // StartOffset2FuncRangeMap whose key is the start offset. + // EndOffset is a exclusive bound. + uint64_t EndOffset; + // Original function name, currently parsed from DWARF. + StringRef FuncSymName; + // Index to access FuncRangesVec, used to look up all ranges of the + // function. + size_t RangesIdx; + // Whether the start offset is the real entry of the function. + bool IsFunctionEntry = false; +}; + // PrologEpilog offset tracker, used to filter out broken stack samples // Currently we use a heuristic size (two) to infer prolog and epilog // based on the start address and return address. In the future, @@ -79,8 +98,7 @@ PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; // Take the two addresses from the start of function as prolog - void inferPrologOffsets(std::map> - &FuncStartOffsetMap) { + void inferPrologOffsets(std::map &FuncStartOffsetMap) { for (auto I : FuncStartOffsetMap) { PrologEpilogSet.insert(I.first); InstructionPointer IP(Binary, I.first); @@ -164,9 +182,18 @@ // A list of text sections sorted by start RVA and size. Used to check // if a given RVA is a valid code address. std::set> TextSections; - // An ordered map of mapping function's start offset to its name and - // end offset. - std::map> FuncStartOffsetMap; + + // Used to quick look-up for all ranges of one function by the given + // FuncRange's RangesIdx. + std::vector FuncRangesVec; + + std::set FuncSymNames; + + // An ordered map of mapping function's start offset to function range + // relevant info. Currently to determine if the offset of ELF is the start of + // a real function, we leverage the function range info from DWARF. + std::map StartOffset2FuncRangeMap; + // Offset to context location map. Used to expand the context. std::unordered_map Offset2LocStackMap; @@ -221,6 +248,14 @@ void setUpDisassembler(const ELFObjectFileBase *Obj); void setupSymbolizer(); + // Load debug info of subprograms from DWARF section. + void loadSymbolsFromDWARF(ObjectFile &Obj); + + // A function may be spilt into multiple non-continuous address ranges. We use + // this to set whether start offset of a function is the real entry of the + // function and also set false to the non-function label. + void setIsFunctionEntry(uint64_t Offset, StringRef RangeSymName); + /// Dissassemble the text section and build various address maps. void disassemble(const ELFObjectFileBase *O); @@ -313,19 +348,28 @@ return 0; } - StringRef getFuncFromStartOffset(uint64_t Offset) { - auto I = FuncStartOffsetMap.find(Offset); - if (I == FuncStartOffsetMap.end()) - return StringRef(); - return I->second.first; + FuncRange *findFuncRangeForStartOffset(uint64_t Offset) { + auto I = StartOffset2FuncRangeMap.find(Offset); + if (I == StartOffset2FuncRangeMap.end()) + return nullptr; + return &I->second; } - OffsetRange findFuncOffsetRange(uint64_t Offset) { - auto I = FuncStartOffsetMap.upper_bound(Offset); - if (I == FuncStartOffsetMap.begin()) - return {0, 0}; + // Binary search the function range which includes the input offset. + FuncRange *findFuncRangeForOffset(uint64_t Offset) { + auto I = StartOffset2FuncRangeMap.upper_bound(Offset); + if (I == StartOffset2FuncRangeMap.begin()) + return nullptr; I--; - return {I->first, I->second.second}; + + if (Offset >= I->second.EndOffset) + return nullptr; + + return &I->second; + } + + RangesTy &getAllRangesOfOneFunc(FuncRange *FRange) { + return FuncRangesVec[FRange->RangesIdx]; } uint32_t getFuncSizeForContext(SampleContext &Context) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -175,6 +175,9 @@ // Decode pseudo probe related section decodePseudoProbe(Obj); + // Load debug info of subprograms from DWARF section. + loadSymbolsFromDWARF(*dyn_cast(&Binary)); + // Disassemble the text sections. disassemble(Obj); @@ -183,7 +186,7 @@ FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder); // Use function start and return address to infer prolog and epilog - ProEpilogTracker.inferPrologOffsets(FuncStartOffsetMap); + ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); ProEpilogTracker.inferEpilogOffsets(RetAddrs); // TODO: decode other sections. @@ -306,6 +309,21 @@ ProbeDecoder.printGUID2FuncDescMap(outs()); } +void ProfiledBinary::setIsFunctionEntry(uint64_t Offset, + StringRef RangeSymName) { + // Note that the start offset of ELF section can be a non-function symbol, we + // need to binary search for the start of a real function range. + auto *FuncRange = findFuncRangeForOffset(Offset); + // Skip external function symbol. + if (!FuncRange) + return; + + // Set IsFunctionEntry to ture if the RangeSymName from ELF is equal to its + // DWARF function/subprogram name. + if (!FuncRange->IsFunctionEntry && FuncRange->FuncSymName == RangeSymName) + FuncRange->IsFunctionEntry = true; +} + bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section) { @@ -316,7 +334,7 @@ uint64_t NextStartOffset = (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress() : SectionOffset + SectSize; - if (StartOffset >= NextStartOffset) + if (StartOffset > NextStartOffset) return true; StringRef SymbolName = @@ -404,8 +422,8 @@ if (ShowDisassembly) outs() << "\n"; - FuncStartOffsetMap.emplace(StartOffset, - std::make_pair(Symbols[SI].Name.str(), EndOffset)); + setIsFunctionEntry(StartOffset, Symbols[SI].Name); + return true; } @@ -517,6 +535,68 @@ } } +void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { + auto DebugContext = llvm::DWARFContext::create(Obj); + if (!DebugContext) + exitWithError("Misssing debug info.", Path); + + for (const auto &CompilationUnit : DebugContext->compile_units()) { + for (const auto &DieInfo : CompilationUnit->dies()) { + llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo); + + if (!Die.isSubprogramDIE()) + continue; + auto Name = Die.getName(llvm::DINameKind::LinkageName); + if (!Name) + Name = Die.getName(llvm::DINameKind::ShortName); + if (!Name) + continue; + + auto RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) + continue; + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + + if (Ranges.empty()) + continue; + + // A function may be spilt into multiple non-continuous address ranges. + // Sometimes we want to know all ranges for one function. Here group the + // ranges and store them into FuncRangesVec. Later it can be accessed by + // FuncRange's RangesIdx. + FuncRangesVec.push_back(RangesTy()); + auto It = FuncSymNames.insert(Name); + + for (const auto &Range : Ranges) { + uint64_t FuncStart = Range.LowPC; + uint64_t FuncSize = Range.HighPC - FuncStart; + + if (FuncSize == 0 || FuncStart < getPreferredBaseAddress()) + continue; + + uint64_t StartOffset = FuncStart - getPreferredBaseAddress(); + uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); + + FuncRangesVec.back().emplace_back(StartOffset, EndOffset); + + auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange()); + if (R.second) { + FuncRange &FRange = R.first->second; + FRange.FuncSymName = *It.first; + FRange.RangesIdx = FuncRangesVec.size() - 1; + FRange.EndOffset = EndOffset; + } else { + WithColor::warning() + << "Duplicated symbol offset at " + << format("%8" PRIx64, StartOffset) << " " + << R.first->second.FuncSymName << " and " << Name << "\n"; + } + } + } + } + assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info."); +} + void ProfiledBinary::setupSymbolizer() { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = @@ -576,7 +656,7 @@ << format("%8" PRIx64, StartOffset) << "\n"; uint64_t Offset = CodeAddrOffsets[Index]; - while (Offset <= EndOffset) { + while (Offset < EndOffset) { const SampleContextFrameVector &SymbolizedCallStack = getFrameLocationStack(Offset, UsePseudoProbes); uint64_t Size = Offset2InstSizeMap[Offset];