diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -228,6 +228,9 @@ FunctionSamples & getFunctionProfileForContext(const SampleContextFrameVector &Context, bool WasLeafInlined = false); + // For profiled only functions, on-demand compute their inline context + // function byte size which is used by the pre-inliner. + void computeSizeForProfiledFunctions(); // Post processing for profiles before writing out, such as mermining // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -425,6 +425,10 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCS = true; + + if (Binary->getTrackFuncContextSize()) + computeSizeForProfiledFunctions(); + if (Binary->usePseudoProbes()) { // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; @@ -435,6 +439,29 @@ postProcessProfiles(); } +void CSProfileGenerator::computeSizeForProfiledFunctions() { + // Hash map to deduplicate the function range and the item is a pair of + // function start and end offset. + std::unordered_map FuncRanges; + // Go through all the ranges in the CS counters, use the start of the range to + // look up the function it belongs and record the function range. + for (const auto &CI : SampleCounters) { + for (auto Item : CI.second.RangeCounter) { + // FIXME: Filter the bogus crossing function range. + uint64_t RangeStartOffset = Item.first.first; + auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset); + if (FuncRange.second != 0) + FuncRanges[FuncRange.first] = FuncRange.second; + } + } + + for (auto I : FuncRanges) { + uint64_t StartOffset = I.first; + uint64_t EndOffset = I.second; + Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); + } +} + void CSProfileGenerator::generateLineNumBasedProfile() { for (const auto &CI : SampleCounters) { const StringBasedCtxKey *CtxKey = diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -175,7 +175,7 @@ // An array of offsets of all instructions sorted in increasing order. The // sorting is needed to fast advance to the next forward/backward instruction. - std::vector CodeAddrs; + std::vector CodeAddrOffsets; // A set of call instruction offsets. Used by virtual unwinding. std::unordered_set CallAddrs; // A set of return instruction offsets. Used by virtual unwinding. @@ -231,7 +231,6 @@ SampleContextFrameVector symbolize(const InstructionPointer &IP, bool UseCanonicalFnName = false, bool UseProbeDiscriminator = false); - /// Decode the interesting parts of the binary and build internal data /// structures. On high level, the parts of interest are: /// 1. Text sections, including the main code section and the PLT @@ -289,18 +288,21 @@ } uint64_t getAddressforIndex(uint64_t Index) const { - return offsetToVirtualAddr(CodeAddrs[Index]); + return offsetToVirtualAddr(CodeAddrOffsets[Index]); } bool usePseudoProbes() const { return UsePseudoProbes; } - // Get the index in CodeAddrs for the address + // Get the index in CodeAddrOffsets for the address // As we might get an address which is not the code // here it would round to the next valid code address by // using lower bound operation + uint32_t getIndexForOffset(uint64_t Offset) const { + auto Low = llvm::lower_bound(CodeAddrOffsets, Offset); + return Low - CodeAddrOffsets.begin(); + } uint32_t getIndexForAddr(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - auto Low = llvm::lower_bound(CodeAddrs, Offset); - return Low - CodeAddrs.begin(); + return getIndexForOffset(Offset); } uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const { @@ -356,6 +358,10 @@ SampleContextFrameVector getExpandedContext(const SmallVectorImpl &Stack, bool &WasLeafInlined); + // Go through instructions among the given range and record its size for the + // inline context. + void computeInlinedContextSizeForRange(uint64_t StartOffset, + uint64_t EndOffset); const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address); @@ -384,6 +390,8 @@ return ProbeDecoder.getInlinerDescForProbe(Probe); } + bool getTrackFuncContextSize() { return TrackFuncContextSize; } + bool getIsLoadedByMMap() { return IsLoadedByMMap; } void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -293,10 +293,10 @@ uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress(); uint64_t SectSize = Section.getSize(); uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress(); - uint64_t EndOffset = (SI + 1 < SE) - ? Symbols[SI + 1].Addr - getPreferredBaseAddress() - : SectionOffset + SectSize; - if (StartOffset >= EndOffset) + uint64_t NextStartOffset = + (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress() + : SectionOffset + SectSize; + if (StartOffset >= NextStartOffset) return true; StringRef SymbolName = @@ -316,10 +316,11 @@ }; uint64_t Offset = StartOffset; + uint64_t EndOffset = 0; // Size of a consecutive invalid instruction range starting from Offset -1 // backwards. uint64_t InvalidInstLength = 0; - while (Offset < EndOffset) { + while (Offset < NextStartOffset) { MCInst Inst; uint64_t Size; // Disassemble an instruction. @@ -353,32 +354,19 @@ if (Disassembled) { const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); - // Populate a vector of the symbolized callsite at this location - // We don't need symbolized info for probe-based profile, just use an - // empty stack as an entry to indicate a valid binary offset - SampleContextFrameVector SymbolizedCallStack; - if (TrackFuncContextSize) { - InstructionPointer IP(this, Offset); - // TODO: reallocation of Offset2LocStackMap will lead to dangling - // strings We need ProfiledBinary to owned these string. - Offset2LocStackMap[Offset] = symbolize(IP, true, UsePseudoProbes); - SampleContextFrameVector &SymbolizedCallStack = - Offset2LocStackMap[Offset]; - // Record instruction size for the corresponding context - if (TrackFuncContextSize && !SymbolizedCallStack.empty()) - FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset], - Size); - } + // Record instruction size. Offset2InstSizeMap[Offset] = Size; // Populate address maps. - CodeAddrs.push_back(Offset); + CodeAddrOffsets.push_back(Offset); if (MCDesc.isCall()) CallAddrs.insert(Offset); else if (MCDesc.isReturn()) RetAddrs.insert(Offset); + EndOffset = Offset; + if (InvalidInstLength) { WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); InvalidInstLength = 0; @@ -560,6 +548,26 @@ return CallStack; } +void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, + uint64_t EndOffset) { + uint32_t Index = getIndexForOffset(StartOffset); + if (CodeAddrOffsets[Index] != StartOffset) + WithColor::warning() << "Invalid start instruction at " + << format("%8" PRIx64, StartOffset) << "\n"; + + uint64_t Offset = CodeAddrOffsets[Index]; + while (Offset <= EndOffset) { + const SampleContextFrameVector &SymbolizedCallStack = + getFrameLocationStack(Offset, UsePseudoProbes); + uint64_t Size = Offset2InstSizeMap[Offset]; + + // Record instruction size for the corresponding context + FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); + + Offset = CodeAddrOffsets[++Index]; + } +} + InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, bool RoundToNext) : Binary(Binary), Address(Address) {