diff --git a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript --- a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript +++ b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript @@ -8,3 +8,12 @@ // The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges. + + 20179e + 2017f9 + 7f83e84e7793 + 5541f689495641d7 + 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x20188c/0x201760/P/-/-/0 0x2017d8/0x2017e3/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 + + +// The consecutive pairs 20188c/0x201760 and 0x2017d8/0x2017e3 form an oversized execution range [0x2017e3, 0x20188c] that is greater than the largest function range, should be ignored to avoid bogus instruction ranges. diff --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test --- a/llvm/test/tools/llvm-profgen/invalid-range.test +++ b/llvm/test/tools/llvm-profgen/invalid-range.test @@ -9,38 +9,40 @@ ; NOCS: 4 -; NOCS-NEXT: 201760-20177f:2 -; NOCS-NEXT: 20179e-2017bf:1 -; NOCS-NEXT: 2017c4-2017cf:1 -; NOCS-NEXT: 2017c4-2017d8:1 -; NOCS-NEXT: 4 -; NOCS-NEXT: 20177f->2017c4:2 -; NOCS-NEXT: 2017bf->201760:2 -; NOCS-NEXT: 2017cf->20179e:2 -; NOCS-NEXT: 2017d8->2017e3:1 +; NOCS-NEXT: 201760-20177f:4 +; NOCS-NEXT: 20179e-2017bf:2 +; NOCS-NEXT: 2017c4-2017cf:2 +; NOCS-NEXT: 2017c4-2017d8:2 +; NOCS-NEXT: 5 +; NOCS-NEXT: 20177f->2017c4:4 +; NOCS-NEXT: 2017bf->201760:3 +; NOCS-NEXT: 2017cf->20179e:4 +; NOCS-NEXT: 2017d8->2017e3:2 +; NOCS-NEXT: 20188c->201760:1 ; CS: [] ; CS-NEXT: 3 -; CS-NEXT: 201760-20177f:1 -; CS-NEXT: 20179e-2017bf:1 -; CS-NEXT: 2017c4-2017d8:1 +; CS-NEXT: 201760-20177f:2 +; CS-NEXT: 20179e-2017bf:2 +; CS-NEXT: 2017c4-2017d8:2 ; CS-NEXT: 4 -; CS-NEXT: 20177f->2017c4:1 -; CS-NEXT: 2017bf->201760:1 -; CS-NEXT: 2017cf->20179e:1 -; CS-NEXT: 2017d8->2017e3:1 +; CS-NEXT: 20177f->2017c4:2 +; CS-NEXT: 2017bf->201760:2 +; CS-NEXT: 2017cf->20179e:2 +; CS-NEXT: 2017d8->2017e3:2 ; CS-NEXT: [0x7f4] ; CS-NEXT: 1 -; CS-NEXT: 2017c4-2017cf:1 +; CS-NEXT: 2017c4-2017cf:2 ; CS-NEXT: 2 ; CS-NEXT: 2017bf->201760:1 -; CS-NEXT: 2017cf->20179e:1 +; CS-NEXT: 2017cf->20179e:2 ; CS-NEXT: [0x7f4 @ 0x7bf] ; CS-NEXT: 1 -; CS-NEXT: 201760-20177f:1 -; CS-NEXT: 1 -; CS-NEXT: 20177f->2017c4:1 +; CS-NEXT: 201760-20177f:2 +; CS-NEXT: 2 +; CS-NEXT: 20177f->2017c4:2 +; CS-NEXT: 20188c->201760:1 ; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -210,6 +210,23 @@ using SampleVector = SmallVector, 16>; +inline bool isValidLBRRange(uint64_t Start, uint64_t End, + ProfiledBinary *Binary) { + // 1) Start bigger than End is considered invalid. + // 2) LBR ranges longer than the largest function range are assumed invalid. + // 3) LBR ranges cross the unconditional jmp are assumed invalid. + // It's found that perf data may contain duplicate LBR entries that could form + // a range that does not reflect real execution flow on some Intel targets, + // e.g. Skylake. Such ranges are ususally very long. Since there + // cannot be a linear execution range that spans over multiple funtion ranges + // and unconditional jmp. Exclude them when they are larger than the max + // function size or cross the unconditional jmp. + return Start <= End && End - Start <= Binary->getMaxFuncRangeSize() && + !Binary->rangeCrossCall(Start, End) && + !Binary->rangeCrossReturn(Start, End) && + !Binary->rangeCrossUncondBranch(Start, End); +} + // The state for the unwinder, it doesn't hold the data but only keep the // pointer/index of the data, While unwinding, the CallStack is changed // dynamicially and will be recorded as the context of the sample diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -99,7 +99,7 @@ return; } - if (Target > End) { + if (!isValidLBRRange(Target, End, Binary)) { // Skip unwinding the rest of LBR trace when a bogus range is seen. State.setInvalid(); return; @@ -581,7 +581,6 @@ if (!SrcIsInternal && !DstIsInternal) continue; - // TODO: filter out buggy duplicate branches on Skylake LBRStack.emplace_back(LBREntry(Src, Dst)); } TraceIt.advance(); @@ -878,7 +877,7 @@ // LBR and FROM of next LBR. uint64_t StartOffset = TargetOffset; if (Binary->offsetIsCode(StartOffset) && Binary->offsetIsCode(EndOffeset) && - StartOffset <= EndOffeset) + isValidLBRRange(StartOffset, EndOffeset, Binary)) Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); EndOffeset = SourceOffset; } @@ -1124,7 +1123,7 @@ const char *RangeCrossFuncMsg = "Fall through range should not cross function boundaries, likely due to " "profile and binary mismatch."; - const char *BogusRangeMsg = "Range start is after range end."; + const char *BogusRangeMsg = "Range start is after or too far from range end."; uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; @@ -1155,7 +1154,7 @@ WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); } - if (StartOffset > EndOffset) { + if (!isValidLBRRange(StartOffset, EndOffset, Binary)) { BogusRange += I.second; WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg); } @@ -1170,9 +1169,9 @@ emitWarningSummary( RangeCrossFunc, TotalRangeNum, "of samples are from ranges that do cross function boundaries."); - emitWarningSummary( - BogusRange, TotalRangeNum, - "of samples are from ranges that have range start after range end."); + emitWarningSummary(BogusRange, TotalRangeNum, + "of samples are from ranges that have range start after " + "or too far from range end."); } void PerfScriptReader::parsePerfTraces() { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" #include "llvm/Transforms/IPO/SampleContextTracker.h" +#include #include #include #include @@ -95,6 +96,14 @@ } return Sum; } + + uint64_t getMaxFuncRangeSize() { + uint64_t Size = 0; + for (auto &R : Ranges) { + Size = std::max(Size, R.second - R.first); + } + return Size; + } }; // Info about function range. A function can be split into multiple @@ -133,7 +142,7 @@ } // Take the last two addresses before the return address as epilog - void inferEpilogOffsets(std::unordered_set &RetAddrs) { + void inferEpilogOffsets(std::set &RetAddrs) { for (auto Addr : RetAddrs) { PrologEpilogSet.insert(Addr); InstructionPointer IP(Binary, Addr); @@ -236,9 +245,11 @@ // sorting is needed to fast advance to the next forward/backward instruction. std::vector CodeAddrOffsets; // A set of call instruction offsets. Used by virtual unwinding. - std::unordered_set CallOffsets; + std::set CallOffsets; // A set of return instruction offsets. Used by virtual unwinding. - std::unordered_set RetOffsets; + std::set RetOffsets; + // A set of unconditional branch instruction offsets. + std::set UncondBranchOffsets; // A set of branch instruction offsets. std::unordered_set BranchOffsets; @@ -263,6 +274,9 @@ // Function name to probe frame map for top-level outlined functions. StringMap TopLevelProbeFrameMap; + // The size of largest binary function range. + uint64_t MaxFuncRangeSize = 0; + bool UsePseudoProbes = false; bool UseFSDiscriminator = false; @@ -395,11 +409,33 @@ CallOffsets.count(Offset); } + bool rangeCrossCall(uint64_t Start, uint64_t End) { + if (Start >= End) + return false; + auto R = CallOffsets.upper_bound(Start); + return R != CallOffsets.end() && *R < End; + } + + bool rangeCrossReturn(uint64_t Start, uint64_t End) { + if (Start >= End) + return false; + auto R = RetOffsets.upper_bound(Start); + return R != RetOffsets.end() && *R < End; + } + + bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) { + if (Start >= End) + return false; + auto R = UncondBranchOffsets.upper_bound(Start); + return R != UncondBranchOffsets.end() && *R < End; + } + uint64_t getAddressforIndex(uint64_t Index) const { return offsetToVirtualAddr(CodeAddrOffsets[Index]); } size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); } + uint64_t getMaxFuncRangeSize() const { return MaxFuncRangeSize; } bool usePseudoProbes() const { return UsePseudoProbes; } bool useFSDiscriminator() const { return UseFSDiscriminator; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" #include "llvm/Support/TargetSelect.h" +#include #define DEBUG_TYPE "load-binary" @@ -225,6 +226,12 @@ loadSymbolsFromDWARF(*cast(&ExeBinary)); } + // Compute the size of largest binary function range, which will be used + // to filter out invalid lbr ranges. + for (auto &F : BinaryFunctions) + MaxFuncRangeSize = + std::max(MaxFuncRangeSize, F.second.getMaxFuncRangeSize()); + // Disassemble the text sections. disassemble(Obj); @@ -499,8 +506,11 @@ CallOffsets.insert(Offset); else if (MCDesc.isReturn()) RetOffsets.insert(Offset); - else if (MCDesc.isBranch()) + else if (MCDesc.isBranch()) { + if (MCDesc.isUnconditionalBranch()) + UncondBranchOffsets.insert(Offset); BranchOffsets.insert(Offset); + } if (InvalidInstLength) { WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);