diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test @@ -4,10 +4,13 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK CHECK: main:836:0 +CHECK: 0: 0 +CHECK: 2: 0 CHECK: 1: foo:836 CHECK: 2.1: 42 CHECK: 3: 62 CHECK: 3.2: 21 +CHECK: 4: 0 CHECK: 3.1: bar:252 CHECK: 1: 42 CHECK: 3.2: bar:63 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -47,12 +47,20 @@ ;CHECK-NEXT: 2: 5 ;CHECK-NEXT: 3: 5 ;CHECK-NEXT: main:213:0 +;CHECK-NEXT: 0: 0 +;CHECK-NEXT: 3: 0 +;CHECK-NEXT: 4.1: 0 +;CHECK-NEXT: 4.3: 0 ;CHECK-NEXT: 5.1: 10 ;CHECK-NEXT: 5.3: 10 ;CHECK-NEXT: 6: 10 ;CHECK-NEXT: 6.1: 12 ;CHECK-NEXT: 6.3: 10 +;CHECK-NEXT: 7: 0 ;CHECK-NEXT: 8: 0 quick_sort:1 +;CHECK-NEXT: 9: 0 +;CHECK-NEXT: 11: 0 +;CHECK-NEXT: 14: 0 ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -83,6 +83,7 @@ private: void generateLineNumBasedProfile(); + RangeSample preprocessRangeCounter(const RangeSample &RangeCounter); FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); // Helper function to get the leaf frame's FunctionProfile by traversing the // inline stack and meanwhile it adds the total samples for each frame's diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -137,15 +137,25 @@ */ struct BoundaryPoint { // Sum of sample counts beginning at this point - uint64_t BeginCount; + uint64_t BeginCount = UINT64_MAX; // Sum of sample counts ending at this point - uint64_t EndCount; - - BoundaryPoint() : BeginCount(0), EndCount(0){}; - - void addBeginCount(uint64_t Count) { BeginCount += Count; } + uint64_t EndCount = UINT64_MAX; + // Is the begin point of a zero range. + bool IsZeroRangeBegin = false; + // Is the end point of a zero range. + bool IsZeroRangeEnd = false; + + void addBeginCount(uint64_t Count) { + if (BeginCount == UINT64_MAX) + BeginCount = 0; + BeginCount += Count; + } - void addEndCount(uint64_t Count) { EndCount += Count; } + void addEndCount(uint64_t Count) { + if (EndCount == UINT64_MAX) + EndCount = 0; + EndCount += Count; + } }; /* @@ -174,41 +184,70 @@ [A, B-1]: 100 [B, B]: 300 [B+1, C]: 200. + + Example for zero value range: + + |<--- 100 --->| + |<--- 200 --->| + |<--------------- 0 ----------------->| + A B C D E F + + [A, B-1] : 0 + [B, C] : 100 + [C+1, D-1]: 0 + [D, E] : 200 + [E+1, F] : 0 */ std::map Boundaries; for (auto Item : Ranges) { - uint64_t Begin = Item.first.first; - uint64_t End = Item.first.second; - assert(Begin <= End && "Invalid instruction range"); + assert(Item.first.first <= Item.first.second && + "Invalid instruction range"); + auto &BeginPoint = Boundaries[Item.first.first]; + auto &EndPoint = Boundaries[Item.first.second]; uint64_t Count = Item.second; - if (Boundaries.find(Begin) == Boundaries.end()) - Boundaries[Begin] = BoundaryPoint(); - Boundaries[Begin].addBeginCount(Count); - if (Boundaries.find(End) == Boundaries.end()) - Boundaries[End] = BoundaryPoint(); - Boundaries[End].addEndCount(Count); + BeginPoint.addBeginCount(Count); + EndPoint.addEndCount(Count); + if (Count == 0) { + BeginPoint.IsZeroRangeBegin = true; + EndPoint.IsZeroRangeEnd = true; + } } + // Use UINT64_MAX to indicate there is no existing range between BeginAddress + // and the next valid address uint64_t BeginAddress = UINT64_MAX; + int ZeroRangeDepth = 0; uint64_t Count = 0; for (auto Item : Boundaries) { uint64_t Address = Item.first; BoundaryPoint &Point = Item.second; - if (Point.BeginCount) { + if (Point.BeginCount != UINT64_MAX) { if (BeginAddress != UINT64_MAX) DisjointRanges[{BeginAddress, Address - 1}] = Count; Count += Point.BeginCount; BeginAddress = Address; + ZeroRangeDepth += Point.IsZeroRangeBegin; } - if (Point.EndCount) { + if (Point.EndCount != UINT64_MAX) { assert((BeginAddress != UINT64_MAX) && "First boundary point cannot be 'end' point"); DisjointRanges[{BeginAddress, Address}] = Count; assert(Count >= Point.EndCount && "Mismatched live ranges"); Count -= Point.EndCount; BeginAddress = Address + 1; + ZeroRangeDepth -= Point.IsZeroRangeEnd; + // If the remaining count is zero and it's no longer in a zero range, this + // means we consume all the ranges before, thus mark BeginAddress as + // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: + // [<---- 10 ---->] + // [<---- 20 ---->] + // A B C D + // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't + // have the [B+1, C-1] zero range. + if (Count == 0 && ZeroRangeDepth == 0) + BeginAddress = UINT64_MAX; } } } @@ -223,7 +262,7 @@ ErrorOr R = FunctionProfile.findSamplesAt( LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); uint64_t PreviousCount = R ? R.get() : 0; - if (PreviousCount < Count) { + if (PreviousCount <= Count) { FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator, Count - PreviousCount); @@ -282,18 +321,41 @@ return *FunctionProfile; } +RangeSample +ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { + RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); + // For each range, we search for the range of the function it belongs to and + // initialize it with zero count, so it remains zero if doesn't hit any + // samples. This is to be consistent with compiler that interpret zero count + // as unexecuted(cold). + for (auto I : RangeCounter) { + uint64_t RangeBegin = I.first.first; + uint64_t RangeEnd = I.first.second; + // Find the function offset range the current range begin belongs to. + auto FuncRange = Binary->findFuncOffsetRange(RangeBegin); + if (FuncRange.second == 0) + WithColor::warning() + << "[" << format("%8" PRIx64, RangeBegin) << " - " + << format("%8" PRIx64, RangeEnd) + << "]: Invalid range or disassembling error in profiled binary.\n"; + else if (RangeEnd > FuncRange.second) + WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - " + << format("%8" PRIx64, RangeEnd) + << "]: Range is across different functions.\n"; + else + Ranges[FuncRange] += 0; + } + RangeSample DisjointRanges; + findDisjointRanges(DisjointRanges, Ranges); + return DisjointRanges; +} + void ProfileGenerator::populateBodySamplesForAllFunctions( const RangeSample &RangeCounter) { - RangeSample Ranges; - findDisjointRanges(Ranges, RangeCounter); - for (auto Range : Ranges) { + for (auto Range : preprocessRangeCounter(RangeCounter)) { uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); uint64_t Count = Range.second; - // Disjoint ranges have introduce zero-filled gap that - // doesn't belong to current context, filter them out. - if (Count == 0) - continue; InstructionPointer IP(Binary, RangeBegin, true); // Disjoint ranges may have range in the middle of two instr, diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -31,6 +31,7 @@ #include "llvm/Support/Path.h" #include "llvm/Transforms/IPO/SampleContextTracker.h" #include +#include #include #include #include @@ -78,9 +79,9 @@ PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; // Take the two addresses from the start of function as prolog - void inferPrologOffsets( - std::unordered_map &FuncStartAddrMap) { - for (auto I : FuncStartAddrMap) { + void inferPrologOffsets(std::map> + &FuncStartOffsetMap) { + for (auto I : FuncStartOffsetMap) { PrologEpilogSet.insert(I.first); InstructionPointer IP(Binary, I.first); IP.advance(); @@ -138,6 +139,8 @@ ContextTrieNode RootContext; }; +using OffsetRange = std::pair; + class ProfiledBinary { // Absolute path of the binary. std::string Path; @@ -161,8 +164,9 @@ // A list of text sections sorted by start RVA and size. Used to check // if a given RVA is a valid code address. std::set> TextSections; - // Function offset to name mapping. - std::unordered_map FuncStartAddrMap; + // An ordered map of mapping function's start offset to its name and + // end offset. + std::map> FuncStartOffsetMap; // Offset to context location map. Used to expand the context. std::unordered_map Offset2LocStackMap; // An array of offsets of all instructions sorted in increasing order. The @@ -304,10 +308,18 @@ } StringRef getFuncFromStartOffset(uint64_t Offset) { - auto I = FuncStartAddrMap.find(Offset); - if (I == FuncStartAddrMap.end()) + auto I = FuncStartOffsetMap.find(Offset); + if (I == FuncStartOffsetMap.end()) return StringRef(); - return I->second; + return I->second.first; + } + + OffsetRange findFuncOffsetRange(uint64_t Offset) { + auto I = FuncStartOffsetMap.upper_bound(Offset); + if (I == FuncStartOffsetMap.begin()) + return {0, 0}; + I--; + return {I->first, I->second.second}; } uint32_t getFuncSizeForContext(SampleContext &Context) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -178,7 +178,7 @@ FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder); // Use function start and return address to infer prolog and epilog - ProEpilogTracker.inferPrologOffsets(FuncStartAddrMap); + ProEpilogTracker.inferPrologOffsets(FuncStartOffsetMap); ProEpilogTracker.inferEpilogOffsets(RetAddrs); // TODO: decode other sections. @@ -397,7 +397,8 @@ if (ShowDisassembly) outs() << "\n"; - FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str(); + FuncStartOffsetMap.emplace(StartOffset, + std::make_pair(Symbols[SI].Name.str(), EndOffset)); return true; }