diff --git a/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof @@ -0,0 +1,5 @@ +3 +0-0:1 +f-fff0:1 +ffff-ffff:1 +0 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test @@ -9,6 +9,8 @@ ; RUN: echo -e "0\n0" > %t ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 --fill-zero-for-all-funcs ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-ALL-ZERO +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/out-of-bounds.raw.prof --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-OB CHECK: main:188:0 CHECK: 0: 0 @@ -58,6 +60,33 @@ CHECK-RAW-PROFILE-NEXT: 677->650:21 CHECK-RAW-PROFILE-NEXT: 691->669:43 +;CHECK-OB: foo:8:0 +;CHECK-OB: 0: 1 +;CHECK-OB: 2.1: 1 +;CHECK-OB: 3: 1 +;CHECK-OB: 3.2: 1 +;CHECK-OB: 4: 1 +;CHECK-OB: 3.1: bar:1 +;CHECK-OB: 1: 1 +;CHECK-OB: 3.2: bar:2 +;CHECK-OB: 1: 1 +;CHECK-OB: 7: 1 +;CHECK-OB: main:8:0 +;CHECK-OB: 0: 1 +;CHECK-OB: 2: 1 +;CHECK-OB: 1: foo:6 +;CHECK-OB: 2.1: 1 +;CHECK-OB: 3: 1 +;CHECK-OB: 3.2: 1 +;CHECK-OB: 4: 1 +;CHECK-OB: 3.1: bar:1 +;CHECK-OB: 1: 1 +;CHECK-OB: 3.2: bar:1 +;CHECK-OB: 1: 1 +;CHECK-OB: bar:2:0 +;CHECK-OB: 1: 1 +;CHECK-OB: 5: 1 + ; original code: ; clang -O3 -g -fdebug-info-for-profiling test.c -o a.out #include diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -355,7 +355,7 @@ if (FillZeroForAllFuncs) { for (auto &FuncI : Binary->getAllBinaryFunctions()) { for (auto &R : FuncI.second.Ranges) { - Ranges[{R.first, R.second}] += 0; + Ranges[{R.first, R.second - 1}] += 0; } } } else { @@ -377,16 +377,17 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( const RangeSample &RangeCounter) { for (auto Range : preprocessRangeCounter(RangeCounter)) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t RangeBegin = Range.first.first; + uint64_t RangeEnd = Range.first.second; uint64_t Count = Range.second; - InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, - // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range - // can be Addr1+1 to Addr2-1. We should ignore such range. - while (IP.Address <= RangeEnd) { - uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); + // Disjoint ranges may have range in the middle of two instr, e.g. If Instr1 + // at Addr1, and Instr2 at Addr2, disjoint range can be [Addr1+1 Addr2-1]. + // We should ignore such range. Similarly a range begin may not be on the + // instruction boundary, we should round to next instruction. + uint32_t Index = Binary->getIndexForOffset(RangeBegin); + uint64_t Offset = Binary->getOffsetForIndex(Index); + while (Offset <= RangeEnd) { const SampleContextFrameVector &FrameVec = Binary->getFrameLocationStack(Offset); if (!FrameVec.empty()) { @@ -394,8 +395,8 @@ updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), Count); } - // Move to next IP within the range. - IP.advance(); + // Get next offset. Return UINT64_MAX if index is out of bounds. + Offset = Binary->getOffsetForIndex(++Index); } } } @@ -526,28 +527,28 @@ RangeSample Ranges; findDisjointRanges(Ranges, RangeCounter); for (auto Range : Ranges) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t RangeBegin = Range.first.first; + uint64_t RangeEnd = Range.first.second; uint64_t Count = Range.second; // Disjoint ranges have introduce zero-filled gap that // doesn't belong to current context, filter them out. if (Count == 0) continue; - InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, - // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range - // can be Addr1+1 to Addr2-1. We should ignore such range. - while (IP.Address <= RangeEnd) { - uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); + // Disjoint ranges may have range in the middle of two instr, e.g. If Instr1 + // at Addr1, and Instr2 at Addr2, disjoint range can be [Addr1+1 Addr2-1]. + // We should ignore such range. Similarly a range begin may not be on the + // instruction boundary, we should round to next instruction. + uint32_t Index = Binary->getIndexForOffset(RangeBegin); + uint64_t Offset = Binary->getOffsetForIndex(Index); + while (Offset <= RangeEnd) { auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); if (LeafLoc.hasValue()) { // Recording body sample for this specific context updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); } - - // Move to next IP within the range - IP.advance(); + // Get next offset. Return UINT64_MAX if index is out of bounds. + Offset = Binary->getOffsetForIndex(++Index); } } } @@ -705,26 +706,24 @@ RangeSample Ranges; findDisjointRanges(Ranges, RangeCounter); for (const auto &Range : Ranges) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t RangeBegin = Range.first.first; + uint64_t RangeEnd = Range.first.second; uint64_t Count = Range.second; // Disjoint ranges have introduce zero-filled gap that // doesn't belong to current context, filter them out. if (Count == 0) continue; - InstructionPointer IP(Binary, RangeBegin, true); - - // Disjoint ranges may have range in the middle of two instr, - // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range - // can be Addr1+1 to Addr2-1. We should ignore such range. - if (IP.Address > RangeEnd) - continue; - - while (IP.Address <= RangeEnd) { + // Disjoint ranges may have range in the middle of two instr, e.g. If Instr1 + // at Addr1, and Instr2 at Addr2, disjoint range can be [Addr1+1 Addr2-1]. + // We should ignore such range. Similarly a range begin may not be on the + // instruction boundary, we should round to next instruction. + uint32_t Index = Binary->getIndexForOffset(RangeBegin); + uint64_t Offset = Binary->getOffsetForIndex(Index); + while (Offset <= RangeEnd) { const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap(); - auto It = Address2ProbesMap.find(IP.Address); + auto It = Address2ProbesMap.find(Binary->offsetToVirtualAddr(Offset)); if (It != Address2ProbesMap.end()) { for (const auto &Probe : It->second) { if (!Probe.isBlock()) @@ -732,8 +731,8 @@ ProbeCounter[&Probe] += Count; } } - - IP.advance(); + // Get next offset. Return UINT64_MAX if index is out of bounds. + Offset = Binary->getOffsetForIndex(++Index); } } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -31,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" #include "llvm/Transforms/IPO/SampleContextTracker.h" +#include #include #include #include @@ -62,9 +63,7 @@ }; // Index to the sorted code address array of the binary. uint64_t Index = 0; - InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, - bool RoundToNext = false); - void advance(); + InstructionPointer(const ProfiledBinary *Binary, uint64_t Address); void backward(); void update(uint64_t Addr); }; @@ -73,6 +72,7 @@ struct BinaryFunction { StringRef FuncName; + // End of range is a exclusive bound. RangesTy Ranges; }; @@ -101,24 +101,10 @@ PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; // Take the two addresses from the start of function as prolog - void inferPrologOffsets(std::map &FuncStartOffsetMap) { - for (auto I : FuncStartOffsetMap) { - PrologEpilogSet.insert(I.first); - InstructionPointer IP(Binary, I.first); - IP.advance(); - PrologEpilogSet.insert(IP.Offset); - } - } + void inferPrologOffsets(std::map &FuncStartOffsetMap); // Take the last two addresses before the return address as epilog - void inferEpilogOffsets(std::unordered_set &RetAddrs) { - for (auto Addr : RetAddrs) { - PrologEpilogSet.insert(Addr); - InstructionPointer IP(Binary, Addr); - IP.backward(); - PrologEpilogSet.insert(IP.Offset); - } - } + void inferEpilogOffsets(std::unordered_set &RetAddrs); }; // Track function byte size under different context (outlined version as well as @@ -336,6 +322,14 @@ return offsetToVirtualAddr(CodeAddrOffsets[Index]); } + uint64_t getOffsetForIndex(uint32_t Index) const { + if (Index >= getCodeOffsetsSize()) + return UINT64_MAX; + return CodeAddrOffsets[Index]; + }; + + size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); } + bool usePseudoProbes() const { return UsePseudoProbes; } // Get the index in CodeAddrOffsets for the address // As we might get an address which is not the code diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -16,6 +16,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" #include "llvm/Support/TargetSelect.h" +#include +#include #define DEBUG_TYPE "load-binary" @@ -658,11 +660,11 @@ void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, uint64_t EndOffset) { uint32_t Index = getIndexForOffset(StartOffset); - if (CodeAddrOffsets[Index] != StartOffset) + uint64_t Offset = getOffsetForIndex(Index); + if (Offset != StartOffset) WithColor::warning() << "Invalid start instruction at " << format("%8" PRIx64, StartOffset) << "\n"; - uint64_t Offset = CodeAddrOffsets[Index]; while (Offset < EndOffset) { const SampleContextFrameVector &SymbolizedCallStack = getFrameLocationStack(Offset, UsePseudoProbes); @@ -671,24 +673,15 @@ // Record instruction size for the corresponding context FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); - Offset = CodeAddrOffsets[++Index]; + // Get next offset. Return UINT64_MAX if index is out of bounds. + Offset = getOffsetForIndex(++Index); } } InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, - uint64_t Address, bool RoundToNext) + uint64_t Address) : Binary(Binary), Address(Address) { Index = Binary->getIndexForAddr(Address); - if (RoundToNext) { - // we might get address which is not the code - // it should round to the next valid address - this->Address = Binary->getAddressforIndex(Index); - } -} - -void InstructionPointer::advance() { - Index++; - Address = Binary->getAddressforIndex(Index); } void InstructionPointer::backward() { @@ -701,5 +694,24 @@ Index = Binary->getIndexForAddr(Address); } +void PrologEpilogTracker::inferPrologOffsets( + std::map &FuncStartOffsetMap) { + for (auto I : FuncStartOffsetMap) { + PrologEpilogSet.insert(I.first); + uint32_t Idx = Binary->getIndexForOffset(I.first); + PrologEpilogSet.insert(Binary->getOffsetForIndex(++Idx)); + } +} + +void PrologEpilogTracker::inferEpilogOffsets( + std::unordered_set &RetOffsets) { + for (auto Offset : RetOffsets) { + PrologEpilogSet.insert(Offset); + uint32_t Idx = Binary->getIndexForOffset(Offset); + if (Idx == 0) + continue; + PrologEpilogSet.insert(Binary->getOffsetForIndex(--Idx)); + } +} } // end namespace sampleprof } // end namespace llvm