diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -228,6 +228,9 @@ FunctionSamples & getFunctionProfileForContext(const SampleContextFrameVector &Context, bool WasLeafInlined = false); + // For profiled only functions, on-demand compute their inline context + // function byte size which is used by the pre-inliner. + void computeSizeForProfiledFunctions(); // Post processing for profiles before writing out, such as mermining // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -425,6 +425,10 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCS = true; + + if (Binary->getTrackFuncContextSize()) + computeSizeForProfiledFunctions(); + if (Binary->usePseudoProbes()) { // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; @@ -435,6 +439,29 @@ postProcessProfiles(); } +void CSProfileGenerator::computeSizeForProfiledFunctions() { + // Hash map to deduplicate the function range and the item is a pair of + // function start and end offset + std::unordered_map FuncRanges; + // Go through all the range in the CS counters, use the start of the range to + // look up the function it belongs and record the function range + for (const auto &CI : SampleCounters) { + for (auto Item : CI.second.RangeCounter) { + // FIXME: Filter the bogus crossing function range + uint64_t RangeStartOffset = Item.first.first; + auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); + if (FuncRange.second != 0) + FuncRanges[FuncRange.first] = FuncRange.second; + } + } + + for (auto I : FuncRanges) { + uint64_t StartOffset = I.first; + uint64_t EndOffset = I.second; + Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); + } +} + void CSProfileGenerator::generateLineNumBasedProfile() { for (const auto &CI : SampleCounters) { const StringBasedCtxKey *CtxKey = diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -231,7 +231,6 @@ SampleContextFrameVector symbolize(const InstructionPointer &IP, bool UseCanonicalFnName = false, bool UseProbeDiscriminator = false); - /// Decode the interesting parts of the binary and build internal data /// structures. On high level, the parts of interest are: /// 1. Text sections, including the main code section and the PLT @@ -297,11 +296,14 @@ // As we might get an address which is not the code // here it would round to the next valid code address by // using lower bound operation - uint32_t getIndexForAddr(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); + uint32_t getIndexForOffset(uint64_t Offset) const { auto Low = llvm::lower_bound(CodeAddrs, Offset); return Low - CodeAddrs.begin(); } + uint32_t getIndexForAddr(uint64_t Address) const { + uint64_t Offset = virtualAddrToOffset(Address); + return getIndexForOffset(Offset); + } uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const { auto I = getIndexForAddr(FrameAddr); @@ -356,6 +358,10 @@ SampleContextFrameVector getExpandedContext(const SmallVectorImpl &Stack, bool &WasLeafInlined); + // Go through instruction among the given range and record its size for the + // inline context + void computeInlinedContextSizeForRange(uint64_t StartOffset, + uint64_t EndOffset); const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address); @@ -384,6 +390,8 @@ return ProbeDecoder.getInlinerDescForProbe(Probe); } + bool getTrackFuncContextSize() { return TrackFuncContextSize; } + bool getIsLoadedByMMap() { return IsLoadedByMMap; } void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -353,22 +353,7 @@ if (Disassembled) { const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); - // Populate a vector of the symbolized callsite at this location - // We don't need symbolized info for probe-based profile, just use an - // empty stack as an entry to indicate a valid binary offset - SampleContextFrameVector SymbolizedCallStack; - if (TrackFuncContextSize) { - InstructionPointer IP(this, Offset); - // TODO: reallocation of Offset2LocStackMap will lead to dangling - // strings We need ProfiledBinary to owned these string. - Offset2LocStackMap[Offset] = symbolize(IP, true, UsePseudoProbes); - SampleContextFrameVector &SymbolizedCallStack = - Offset2LocStackMap[Offset]; - // Record instruction size for the corresponding context - if (TrackFuncContextSize && !SymbolizedCallStack.empty()) - FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset], - Size); - } + // Record instruction size. Offset2InstSizeMap[Offset] = Size; @@ -560,6 +545,26 @@ return CallStack; } +void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, + uint64_t EndOffset) { + uint32_t Index = getIndexForOffset(StartOffset); + if (CodeAddrs[Index] != StartOffset) + WithColor::warning() << "Invalid function start instruction at " + << format("%8" PRIx64, StartOffset) << "\n"; + + uint64_t Offset = CodeAddrs[Index]; + while (Offset < EndOffset) { + const SampleContextFrameVector &SymbolizedCallStack = + getFrameLocationStack(Offset, UsePseudoProbes); + uint64_t Size = Offset2InstSizeMap[Offset]; + + // Record instruction size for the corresponding context + FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); + + Offset = CodeAddrs[++Index]; + } +} + InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, bool RoundToNext) : Binary(Binary), Address(Address) {