diff --git a/llvm/test/tools/llvm-profgen/Inputs/external-address.perfscript b/llvm/test/tools/llvm-profgen/Inputs/external-address.perfscript new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/external-address.perfscript @@ -0,0 +1,33 @@ +PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]: r-xp /home/inline-cs-noprobe.perfbin + +; Test for an external top address, should only ignore the call stack and keep unwinding the LBR + +; Valid LBR + ffffffff + 40067e + ffffffff + 0x4006c8/0x40067e/P/-/-/0 0x40069b/0x400670/M/-/-/0 + +; Valid LBR + ffffffff + 0x4006c8/0xffffffff/P/-/-/0 0x40069b/0x400670/M/-/-/0 + +; Valid LBR + 40067e + 0x4006c8/0xffffffff/P/-/-/0 0x40069b/0x400670/M/-/-/0 + +; Valid LBR + ffffffff + 40067e + 5541f689495641d7 + 0xffffffff/0xffffffff/P/-/-/0 0x4006c8/0xffffffff/P/-/-/0 0x40069b/0x400670/M/-/-/0 + +; Empty sample + ffffffff + 40067e + 5541f689495641d7 + 0xffffffff/0xffffffff/P/-/-/0 0xffffffff/0xffffffff/P/-/-/0 + +; Invalid LBR + ffffffff + 0xffffffff/0xffffffff/P/-/-/0 0x40069b/0x400670/M/-/-/0 diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript --- a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript +++ b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript @@ -1,12 +1,5 @@ PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]: r-xp /home/inline-cs-noprobe.perfbin -; test for an external or invalid top address, should skip the whole sample - - ffffffff - 40067e - 5541f689495641d7 - 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x40069b/0x400670/M/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 - 40067e 5541f689495641d7 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x40069b/0x400670/M/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 0x4006c8/0x40067e/P/-/-/0 diff --git a/llvm/test/tools/llvm-profgen/cs-external-address.test b/llvm/test/tools/llvm-profgen/cs-external-address.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/cs-external-address.test @@ -0,0 +1,15 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/external-address.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-hot-count=0 +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER + + +; CHECK-UNWINDER: [main:1 @ foo] +; CHECK-UNWINDER: 2 +; CHECK-UNWINDER: 670-6ad:4 +; CHECK-UNWINDER: 6bd-6c8:4 +; CHECK-UNWINDER: 2 +; CHECK-UNWINDER: 69b->670:5 +; CHECK-UNWINDER: 6c8->67e:1 +; CHECK-UNWINDER: [main:1 @ foo:3.1 @ bar] +; CHECK-UNWINDER: 1 +; CHECK-UNWINDER: 6af-6bb:4 +; CHECK-UNWINDER: 0 diff --git a/llvm/test/tools/llvm-profgen/cs-interrupt.test b/llvm/test/tools/llvm-profgen/cs-interrupt.test --- a/llvm/test/tools/llvm-profgen/cs-interrupt.test +++ b/llvm/test/tools/llvm-profgen/cs-interrupt.test @@ -3,7 +3,6 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-interrupt.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-interrupt.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 ->>>>>>> 02ea7084c370 ([llvm-profgen] Support LBR only perf script) ; RUN: FileCheck %s --input-file %t ; CHECK:[main:1 @ foo]:88:0 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -78,22 +78,30 @@ ;CHECK: 1: 5 ;CHECK: 2: 5 ;CHECK: 3: 5 -;CHECK: main:820:0 +;CHECK: main:906:0 ;CHECK: 0: 0 ;CHECK: 3: 0 ;CHECK: 4.1: 0 ;CHECK: 4.3: 0 -;CHECK: 5.1: 10 -;CHECK: 5.3: 10 -;CHECK: 6: 10 -;CHECK: 6.1: 12 -;CHECK: 6.3: 10 +;CHECK: 5.1: 11 +;CHECK: 5.3: 11 +;CHECK: 6: 11 +;CHECK: 6.1: 14 +;CHECK: 6.3: 11 ;CHECK: 7: 0 ;CHECK: 8: 0 quick_sort:1 ;CHECK: 9: 0 ;CHECK: 11: 0 ;CHECK: 14: 0 ;CHECK: 65499: 0 +;CHECK: quick_sort:903:25 +;CHECK: 1: 24 +;CHECK: 2: 12 partition_pivot_last:7 partition_pivot_first:5 +;CHECK: 3: 11 quick_sort:12 +;CHECK: 4: 12 quick_sort:12 +;CHECK: 6: 24 +;CHECK: 65507: 12 + ; original code: diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -17,6 +17,7 @@ #include #include #include +#include #include using namespace llvm; @@ -221,7 +222,11 @@ const ProfiledBinary *Binary; // Call stack trie node struct ProfiledFrame { - const uint64_t Address = 0; + static const uint64_t DummyRootAddr = 0; + // Represent all the addresses outside of current binary. + static const uint64_t ExternalAddr = 1; + + const uint64_t Address = DummyRootAddr; ProfiledFrame *Parent; SampleVector RangeSamples; SampleVector BranchSamples; @@ -241,7 +246,8 @@ void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) { BranchSamples.emplace_back(std::make_tuple(Source, Target, Count)); } - bool isDummyRoot() { return Address == 0; } + bool isDummyRoot() { return Address == DummyRootAddr; } + bool isExternalFrame() { return Address == ExternalAddr; } bool isLeafFrame() { return Children.empty(); } }; @@ -546,6 +552,10 @@ ContextSampleCounterMap SampleCounters; bool ProfileIsCS = false; + + uint64_t NumTotalSample = 0; + uint64_t NumTopExternalFrame = 0; + uint64_t NumLeadingExternalLBR = 0; }; // Read perf script to parse the events and samples. diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -251,6 +251,19 @@ while (State.hasNextLBR()) { State.checkStateConsistency(); + // When the top frame is an external frame, update the top frame to the + // source of current LBR. the source and the next LBR's target will be an + // linear range. This is the case that a sample is collected when running + // external address(kernel, dynamic library), the call stack is truncated + // since we can not optimize code outside of the binary. The remaining LBRs + // continue to be unwinded using a context-less frame stack. + if (IsLeaf && State.CurrentLeafFrame->isExternalFrame()) { + State.InstPtr.update(State.getCurrentLBRSource()); + State.switchToFrame(State.getCurrentLBRSource()); + State.advanceLBR(); + IsLeaf = false; + continue; + } // Unwind implicit calls/returns from inlining, along the linear path, // break into smaller sub section each with its own calling context. if (!IsLeaf) { @@ -508,26 +521,33 @@ bool IsOutgoing = SrcIsInternal && !DstIsInternal; bool IsArtificial = false; - // Ignore branches outside the current binary. Ignore all remaining branches - // if there's no incoming branch before the external branch in reverse - // order. + // Ignore branches outside the current binary. if (IsExternal) { - if (PrevTrDst) - continue; - if (!LBRStack.empty()) { + if (!PrevTrDst && !LBRStack.empty()) { WithColor::warning() << "Invalid transfer to external code in LBR record at line " << TraceIt.getLineNumber() << ": " << TraceIt.getCurrentLine() << "\n"; } - break; + // Do not ignore the entire samples, the remaining LBR can still be + // unwinded using a context-less stack. + continue; } if (IsOutgoing) { if (!PrevTrDst) { - // This is unpaired outgoing jump which is likely due to interrupt or - // incomplete LBR trace. Ignore current and subsequent entries since - // they are likely in different contexts. + // This is a leading outgoing LBR, we should keep processing the LBRs. + if (LBRStack.empty()) { + NumLeadingExternalLBR++; + // Record this LBR since current source and next LBR' target is still + // a valid range. + LBRStack.emplace_back( + LBREntry(Src, UnwindState::ProfiledFrame::ExternalAddr, true)); + continue; + } + // This is middle unpaired outgoing jump which is likely due to + // interrupt or incomplete LBR trace. Ignore current and subsequent + // entries since they are likely in different contexts. break; } @@ -594,8 +614,17 @@ TraceIt.advance(); // Currently intermixed frame from different binaries is not supported. // Ignore caller frames not from binary of interest. - if (!Binary->addressIsCode(FrameAddr)) + if (!Binary->addressIsCode(FrameAddr)) { + // An empty call stack will be treated as an invalid sample, it will + // ignore the LBR samples. Here we push the external frame so that reader + // can still processing LBR samples. Those LBRS will be unwinded based on + // context-less stack. + if (CallStack.empty()) { + NumTopExternalFrame++; + CallStack.emplace_back(UnwindState::ProfiledFrame::ExternalAddr); + } break; + } // We need to translate return address to call address for non-leaf frames. if (!CallStack.empty()) { @@ -946,8 +975,10 @@ void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { if (isMMap2Event(TraceIt.getCurrentLine())) parseMMap2Event(TraceIt); - else + else { + NumTotalSample++; parseSample(TraceIt); + } } void PerfScriptReader::parseAndAggregateTrace() { @@ -1131,6 +1162,11 @@ // Parse perf traces and do aggregation. parseAndAggregateTrace(); + emitWarningSummary(NumTopExternalFrame, NumTotalSample, + "of samples have top external frame in call stack."); + emitWarningSummary(NumLeadingExternalLBR, NumTotalSample, + "of samples have leading external LBR."); + // Generate unsymbolized profile. warnTruncatedStack(); warnInvalidRange();