diff --git a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript @@ -0,0 +1,10 @@ + PERF_RECORD_MMAP2 1243676/1243676: [0x201000(0x1000) @ 0 00:1d 224517108 1044165]: r-xp /home/noinline-cs-pseudoprobe.perfbin + + 20179e + 2017f9 + 7f83e84e7793 + 5541f689495641d7 + 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017d8/0x2017e3/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 + + +// The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges. diff --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/invalid-range.test @@ -0,0 +1,69 @@ +; In the perfscript input, the consecutive branch pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf]. +; We are testing only the invalid range is dropped to avoid bogus instruction ranges. All other ranges and all branch samples should be kept. +; +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --skip-symbolization --ignore-stack-samples --use-offset=0 +; RUN: FileCheck %s --input-file %t1 --check-prefix=NOCS + +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --skip-symbolization --use-offset=0 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CS + + + +; NOCS: 4 +; NOCS-NEXT: 201760-20177f:2 +; NOCS-NEXT: 20179e-2017bf:1 +; NOCS-NEXT: 2017c4-2017cf:1 +; NOCS-NEXT: 2017c4-2017d8:1 +; NOCS-NEXT: 4 +; NOCS-NEXT: 20177f->2017c4:2 +; NOCS-NEXT: 2017bf->201760:2 +; NOCS-NEXT: 2017cf->20179e:2 +; NOCS-NEXT: 2017d8->2017e3:1 + + +; CS: [] +; CS-NEXT: 3 +; CS-NEXT: 201760-20177f:1 +; CS-NEXT: 20179e-2017bf:1 +; CS-NEXT: 2017c4-2017d8:1 +; CS-NEXT: 4 +; CS-NEXT: 20177f->2017c4:1 +; CS-NEXT: 2017bf->201760:1 +; CS-NEXT: 2017cf->20179e:1 +; CS-NEXT: 2017d8->2017e3:1 +; CS-NEXT: [0x7f4] +; CS-NEXT: 1 +; CS-NEXT: 2017c4-2017cf:1 +; CS-NEXT: 2 +; CS-NEXT: 2017bf->201760:1 +; CS-NEXT: 2017cf->20179e:1 +; CS-NEXT: [0x7f4 @ 0x7bf] +; CS-NEXT: 1 +; CS-NEXT: 201760-20177f:1 +; CS-NEXT: 1 +; CS-NEXT: 20177f->2017c4:1 + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -197,7 +197,10 @@ } #ifndef NDEBUG + uint64_t Linenum = 0; + void print() const { + dbgs() << "Line " << Linenum << "\n"; dbgs() << "LBR stack\n"; printLBRStack(LBRStack); dbgs() << "Call stack\n"; @@ -291,7 +294,10 @@ bool IsLastLBR() const { return LBRIndex == 0; } bool getLBRStackSize() const { return LBRStack.size(); } void advanceLBR() { LBRIndex++; } - ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; } + ProfiledFrame *getParentFrame() { + return CurrentLeafFrame == &DummyTrieRoot ? CurrentLeafFrame + : CurrentLeafFrame->Parent; + } void pushFrame(uint64_t Address) { CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address); @@ -498,7 +504,7 @@ } void unwindCall(UnwindState &State); - void unwindLinear(UnwindState &State, uint64_t Repeat); + bool unwindLinear(UnwindState &State, uint64_t Repeat); void unwindReturn(UnwindState &State); void unwindBranch(UnwindState &State); diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -96,10 +96,12 @@ State.InstPtr.update(Source); } -void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { +bool VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { InstructionPointer &IP = State.InstPtr; uint64_t Target = State.getCurrentLBRTarget(); uint64_t End = IP.Address; + if (Target > End) + return false; if (Binary->usePseudoProbes()) { // We don't need to top frame probe since it should be extracted // from the range. @@ -138,6 +140,7 @@ State.switchToFrame(IP.Address); State.CurrentLeafFrame->recordRangeCount(IP.Address, End, Repeat); } + return true; } void VirtualUnwinder::unwindReturn(UnwindState &State) { @@ -280,42 +283,58 @@ if (!State.validateInitialState()) return false; + // Skip unwinding the rest of LBR trace when a bogus range is seen. + bool SkipUnwinding = false; + // Now process the LBR samples in parrallel with stack sample // Note that we do not reverse the LBR entry order so we can // unwind the sample stack as we walk through LBR entries. while (State.hasNextLBR()) { - State.checkStateConsistency(); + if (!SkipUnwinding) + State.checkStateConsistency(); // Do not attempt linear unwind for the leaf range as it's incomplete. if (!State.IsLastLBR()) { // Unwind implicit calls/returns from inlining, along the linear path, // break into smaller sub section each with its own calling context. - unwindLinear(State, Repeat); + if (!unwindLinear(State, Repeat)) + SkipUnwinding = true; } // Save the LBR branch before it gets unwound. const LBREntry &Branch = State.getCurrentLBR(); - if (isCallState(State)) { - // Unwind calls - we know we encountered call if LBR overlaps with - // transition between leaf the 2nd frame. Note that for calls that - // were not in the original stack sample, we should have added the - // extra frame when processing the return paired with this call. - unwindCall(State); - } else if (isReturnState(State)) { - // Unwind returns - check whether the IP is indeed at a return instruction - unwindReturn(State); + if (SkipUnwinding) { + // Skip unwinding the rest of LBR trace. Reset the stack and update the + // state so that the rest of the trace can still be processed in a + // context-insensitive way, i.e, all ranges will be counted towards + // the root context. + State.clearCallStack(); + State.InstPtr.update(State.getCurrentLBRSource()); } else { - // Unwind branches - // For regular intra function branches, we only need to record branch with - // context. For an artificial branch cross function boundaries, we got an - // issue with returning to external code. Take the two LBR enties for - // example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader, - // we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo - // return to bar. Here we detect and treat this case as BRANCH instead of - // RETURN which only update the source address. - unwindBranch(State); + if (isCallState(State)) { + // Unwind calls - we know we encountered call if LBR overlaps with + // transition between leaf the 2nd frame. Note that for calls that + // were not in the original stack sample, we should have added the + // extra frame when processing the return paired with this call. + unwindCall(State); + } else if (isReturnState(State)) { + // Unwind returns - check whether the IP is indeed at a return + // instruction + unwindReturn(State); + } else { + // Unwind branches + // For regular intra function branches, we only need to record branch + // with context. For an artificial branch cross function boundaries, we + // got an issue with returning to external code. Take the two LBR enties + // for example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf + // reader, we only get[foo:8(RETURN), bar:1], unwinder will be confused + // like foo return to bar. Here we detect and treat this case as BRANCH + // instead of RETURN which only update the source address. + unwindBranch(State); + } } + State.advanceLBR(); // Record `branch` with calling context after unwinding. recordBranchCount(Branch, State, Repeat); @@ -720,7 +739,9 @@ // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries // std::shared_ptr Sample = std::make_shared(); - +#ifndef NDEBUG + Sample->Linenum = TraceIt.getLineNumber(); +#endif // Parsing call stack and populate into PerfSample.CallStack if (!extractCallstack(TraceIt, Sample->CallStack)) { // Skip the next LBR line matched current call stack @@ -915,8 +936,10 @@ // If this not the first LBR, update the range count between TO of current // LBR and FROM of next LBR. uint64_t StartOffset = TargetOffset; - if (EndOffeset != 0) - Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); + if (EndOffeset != 0) { + if (StartOffset <= EndOffeset) + Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); + } EndOffeset = SourceOffset; } } @@ -1161,41 +1184,50 @@ const char *RangeCrossFuncMsg = "Fall through range should not cross function boundaries, likely due to " "profile and binary mismatch."; + const char *BogusRangeMsg = "Range start is above range end."; + uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; uint64_t UnmatchedRange = 0; uint64_t RangeCrossFunc = 0; + uint64_t BogusRange = 0; for (auto &I : Ranges) { uint64_t StartOffset = I.first.first; uint64_t EndOffset = I.first.second; + TotalRangeNum += I.second; if (!Binary->offsetIsCode(StartOffset) || !Binary->offsetIsTransfer(EndOffset)) { - InstNotBoundary++; + InstNotBoundary += I.second; WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg); } auto *FRange = Binary->findFuncRangeForOffset(StartOffset); if (!FRange) { - UnmatchedRange++; + UnmatchedRange += I.second; WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg); continue; } if (EndOffset >= FRange->EndOffset) { - RangeCrossFunc++; + RangeCrossFunc += I.second; WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); } + + if (StartOffset > EndOffset) { + BogusRange += I.second; + WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg); + } } - uint64_t TotalRangeNum = Ranges.size(); emitWarningSummary(InstNotBoundary, TotalRangeNum, "of profiled ranges are not on instruction boundary."); emitWarningSummary(UnmatchedRange, TotalRangeNum, "of profiled ranges do not belong to any functions."); emitWarningSummary(RangeCrossFunc, TotalRangeNum, "of profiled ranges do cross function boundaries."); + emitWarningSummary(BogusRange, TotalRangeNum, "of profiled ranges is bogus."); } void PerfScriptReader::parsePerfTraces() {