diff --git a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript --- a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript +++ b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript @@ -8,3 +8,12 @@ // The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges. + + + 20179e + 2017f9 + 7f83e84e7793 + 5541f689495641d7 + 0x2017cf/0x20179e/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 + +// Duplicated LBR entries "0x2017cf/0x20179e/P/-/-/0 0x2017cf/0x20179e/P/-/-/0", the range [0x20179e, 0x2017cf] is cross unconditional jmp, should be ignored to avoid bogus instruction ranges. diff --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test --- a/llvm/test/tools/llvm-profgen/invalid-range.test +++ b/llvm/test/tools/llvm-profgen/invalid-range.test @@ -8,39 +8,40 @@ ; RUN: FileCheck %s --input-file %t2 --check-prefix=CS -; NOCS: 4 -; NOCS-NEXT: 201760-20177f:2 -; NOCS-NEXT: 20179e-2017bf:1 -; NOCS-NEXT: 2017c4-2017cf:1 +; NOCS: 4 +; NOCS-NEXT: 201760-20177f:7 +; NOCS-NEXT: 20179e-2017bf:5 +; NOCS-NEXT: 2017c4-2017cf:6 ; NOCS-NEXT: 2017c4-2017d8:1 ; NOCS-NEXT: 4 -; NOCS-NEXT: 20177f->2017c4:2 -; NOCS-NEXT: 2017bf->201760:2 -; NOCS-NEXT: 2017cf->20179e:2 +; NOCS-NEXT: 20177f->2017c4:7 +; NOCS-NEXT: 2017bf->201760:7 +; NOCS-NEXT: 2017cf->20179e:8 ; NOCS-NEXT: 2017d8->2017e3:1 ; CS: [] -; CS-NEXT: 3 -; CS-NEXT: 201760-20177f:1 -; CS-NEXT: 20179e-2017bf:1 -; CS-NEXT: 2017c4-2017d8:1 -; CS-NEXT: 4 -; CS-NEXT: 20177f->2017c4:1 -; CS-NEXT: 2017bf->201760:1 -; CS-NEXT: 2017cf->20179e:1 -; CS-NEXT: 2017d8->2017e3:1 +; CS-NEXT: 4 +; CS-NEXT: 201760-20177f:6 +; CS-NEXT: 20179e-2017bf:5 +; CS-NEXT: 2017c4-2017cf:5 +; CS-NEXT: 2017c4-2017d8:1 +; CS-NEXT: 4 +; CS-NEXT: 20177f->2017c4:6 +; CS-NEXT: 2017bf->201760:6 +; CS-NEXT: 2017cf->20179e:6 +; CS-NEXT: 2017d8->2017e3:1 ; CS-NEXT: [0x7f4] -; CS-NEXT: 1 -; CS-NEXT: 2017c4-2017cf:1 -; CS-NEXT: 2 -; CS-NEXT: 2017bf->201760:1 -; CS-NEXT: 2017cf->20179e:1 +; CS-NEXT: 1 +; CS-NEXT: 2017c4-2017cf:1 +; CS-NEXT: 2 +; CS-NEXT: 2017bf->201760:1 +; CS-NEXT: 2017cf->20179e:2 ; CS-NEXT: [0x7f4 @ 0x7bf] -; CS-NEXT: 1 -; CS-NEXT: 201760-20177f:1 -; CS-NEXT: 1 -; CS-NEXT: 20177f->2017c4:1 +; CS-NEXT: 1 +; CS-NEXT: 201760-20177f:1 +; CS-NEXT: 1 +; CS-NEXT: 20177f->2017c4:1 ; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -210,6 +210,17 @@ using SampleVector = SmallVector, 16>; +inline bool isValidFallThroughRange(uint64_t Start, uint64_t End, + ProfiledBinary *Binary) { + // Start bigger than End is considered invalid. + // LBR ranges cross the unconditional jmp are also assumed invalid. + // It's found that perf data may contain duplicate LBR entries that could form + // a range that does not reflect real execution flow on some Intel targets, + // e.g. Skylake. Such ranges are ususally very long. Exclude them since there + // cannot be a linear execution range that spans over unconditional jmp. + return Start <= End && !Binary->rangeCrossUncondBranch(Start, End); +} + // The state for the unwinder, it doesn't hold the data but only keep the // pointer/index of the data, While unwinding, the CallStack is changed // dynamicially and will be recorded as the context of the sample diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -99,7 +99,8 @@ return; } - if (Target > End) { + if (!isValidFallThroughRange(Binary->virtualAddrToOffset(Target), + Binary->virtualAddrToOffset(End), Binary)) { // Skip unwinding the rest of LBR trace when a bogus range is seen. State.setInvalid(); return; @@ -581,7 +582,6 @@ if (!SrcIsInternal && !DstIsInternal) continue; - // TODO: filter out buggy duplicate branches on Skylake LBRStack.emplace_back(LBREntry(Src, Dst)); } TraceIt.advance(); @@ -878,7 +878,7 @@ // LBR and FROM of next LBR. uint64_t StartOffset = TargetOffset; if (Binary->offsetIsCode(StartOffset) && Binary->offsetIsCode(EndOffeset) && - StartOffset <= EndOffeset) + isValidFallThroughRange(StartOffset, EndOffeset, Binary)) Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); EndOffeset = SourceOffset; } @@ -1124,7 +1124,7 @@ const char *RangeCrossFuncMsg = "Fall through range should not cross function boundaries, likely due to " "profile and binary mismatch."; - const char *BogusRangeMsg = "Range start is after range end."; + const char *BogusRangeMsg = "Range start is after or too far from range end."; uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; @@ -1155,7 +1155,7 @@ WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); } - if (StartOffset > EndOffset) { + if (!isValidFallThroughRange(StartOffset, EndOffset, Binary)) { BogusRange += I.second; WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg); } @@ -1172,7 +1172,8 @@ "of samples are from ranges that do cross function boundaries."); emitWarningSummary( BogusRange, TotalRangeNum, - "of samples are from ranges that have range start after range end."); + "of samples are from ranges that have range start after or too far from " + "range end acrossing the unconditinal jmp."); } void PerfScriptReader::parsePerfTraces() { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -239,6 +239,8 @@ std::unordered_set CallOffsets; // A set of return instruction offsets. Used by virtual unwinding. std::unordered_set RetOffsets; + // An ordered set of unconditional branch instruction offsets. + std::set UncondBranchOffsets; // A set of branch instruction offsets. std::unordered_set BranchOffsets; @@ -395,6 +397,13 @@ CallOffsets.count(Offset); } + bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) { + if (Start >= End) + return false; + auto R = UncondBranchOffsets.lower_bound(Start); + return R != UncondBranchOffsets.end() && *R < End; + } + uint64_t getAddressforIndex(uint64_t Index) const { return offsetToVirtualAddr(CodeAddrOffsets[Index]); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -495,12 +495,17 @@ // Populate address maps. CodeAddrOffsets.push_back(Offset); - if (MCDesc.isCall()) + if (MCDesc.isCall()) { CallOffsets.insert(Offset); - else if (MCDesc.isReturn()) + UncondBranchOffsets.insert(Offset); + } else if (MCDesc.isReturn()) { RetOffsets.insert(Offset); - else if (MCDesc.isBranch()) + UncondBranchOffsets.insert(Offset); + } else if (MCDesc.isBranch()) { + if (MCDesc.isUnconditionalBranch()) + UncondBranchOffsets.insert(Offset); BranchOffsets.insert(Offset); + } if (InvalidInstLength) { WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);