diff --git a/llvm/test/tools/llvm-xray/X86/account-recursive-calls-only-tail-call-deduction.yaml b/llvm/test/tools/llvm-xray/X86/account-recursive-calls-only-tail-call-deduction.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-xray/X86/account-recursive-calls-only-tail-call-deduction.yaml @@ -0,0 +1,46 @@ +# RUN: llvm-xray account -d %s -o - -m %S/Inputs/simple-instrmap.yaml | FileCheck --check-prefixes=ALL %s +# RUN: llvm-xray account -d -recursive-calls-only %s -o - -m %S/Inputs/simple-instrmap.yaml | FileCheck --check-prefixes=RECURSIVE %s + +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 0 +records: +# Here we reconstruct the following call trace: +# +# f1() +# f2() +# f3() +# f2() +# +# But we find that we're missing an exit record for f2() because it's +# tail-called f3(). We make sure that if we see a trace like this that we can +# deduce tail calls, and account the time (potentially wrongly) to f2() when +# f1() exits. That is because we don't go back to f3()'s entry record to +# properly do the math on the timing of f2(). +# +# As a result, we can deduce that f2() is not recursive here. +# +# Note that by default, tail/sibling call deduction is disabled, and is enabled +# with a flag "-d" or "-deduce-sibling-calls". +# + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 10000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10001 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 10002 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 10003 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 10004 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10005 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 10006 } +... + +# ALL: Functions with latencies: 3 +# ALL-NEXT: funcid count [ min, med, 90p, 99p, max] sum function +# ALL-NEXT: 1 1 [ 4.000000, 4.000000, 4.000000, 4.000000, 4.000000] 4.000000 :0:0: @(1) +# ALL-NEXT: 2 2 [ 1.000000, 3.000000, 3.000000, 3.000000, 3.000000] 4.000000 :0:0: @(2) +# ALL-NEXT: 3 1 [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000] 1.000000 :0:0: @(3) + +# RECURSIVE: Functions with latencies: 0 +# RECURSIVE-NEXT: funcid count [ min, med, 90p, 99p, max] sum function diff --git a/llvm/test/tools/llvm-xray/X86/account-recursive-calls-only.yaml b/llvm/test/tools/llvm-xray/X86/account-recursive-calls-only.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-xray/X86/account-recursive-calls-only.yaml @@ -0,0 +1,32 @@ +# RUN: llvm-xray account %s -o - -m %S/Inputs/simple-instrmap.yaml | FileCheck --check-prefixes=ALL %s +# RUN: llvm-xray account -recursive-calls-only %s -o - -m %S/Inputs/simple-instrmap.yaml | FileCheck --check-prefixes=RECURSIVE %s + +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 2601000000 +records: + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 0 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 100000000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 200000000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 300000000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 400000000 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 500000000 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 600000000 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 700000000 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 800000000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 900000000 } +... + +# ALL: Functions with latencies: 3 +# ALL-NEXT: funcid count [ min, med, 90p, 99p, max] sum function +# ALL-NEXT: 1 1 [ 0.038447, 0.038447, 0.038447, 0.038447, 0.038447] 0.038447 :0:0: @(1) +# ALL-NEXT: 2 2 [ 0.038447, 0.192234, 0.192234, 0.192234, 0.192234] 0.230681 :0:0: @(2) +# ALL-NEXT: 3 2 [ 0.038447, 0.115340, 0.115340, 0.115340, 0.115340] 0.153787 :0:0: @(3) + +# RECURSIVE: Functions with latencies: 1 +# RECURSIVE-NEXT: funcid count [ min, med, 90p, 99p, max] sum function +# RECURSIVE-NEXT: 3 1 [ 0.038447, 0.038447, 0.038447, 0.038447, 0.038447] 0.038447 :0:0: @(3) diff --git a/llvm/tools/llvm-xray/xray-account.h b/llvm/tools/llvm-xray/xray-account.h --- a/llvm/tools/llvm-xray/xray-account.h +++ b/llvm/tools/llvm-xray/xray-account.h @@ -31,7 +31,10 @@ typedef std::map> PerThreadMinMaxTSCMap; typedef std::map> PerCPUMinMaxTSCMap; - typedef std::vector> FunctionStack; + struct FunctionStack { + std::vector> Stack; + Optional> RecursionDepth; + }; typedef std::map PerThreadFunctionStackMap; private: @@ -41,6 +44,7 @@ PerCPUMinMaxTSCMap PerCPUMinMaxTSC; FuncIdConversionHelper &FuncIdHelper; + bool RecursiveCallsOnly = false; bool DeduceSiblingCalls = false; uint64_t CurrentMaxTSC = 0; @@ -50,8 +54,9 @@ public: explicit LatencyAccountant(FuncIdConversionHelper &FuncIdHelper, - bool DeduceSiblingCalls) - : FuncIdHelper(FuncIdHelper), DeduceSiblingCalls(DeduceSiblingCalls) {} + bool RecursiveCallsOnly, bool DeduceSiblingCalls) + : FuncIdHelper(FuncIdHelper), RecursiveCallsOnly(RecursiveCallsOnly), + DeduceSiblingCalls(DeduceSiblingCalls) {} const FunctionLatencyMap &getFunctionLatencies() const { return FunctionLatencies; diff --git a/llvm/tools/llvm-xray/xray-account.cpp b/llvm/tools/llvm-xray/xray-account.cpp --- a/llvm/tools/llvm-xray/xray-account.cpp +++ b/llvm/tools/llvm-xray/xray-account.cpp @@ -35,6 +35,9 @@ cl::sub(Account), cl::init(false)); static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing), cl::desc("Alias for -keep_going")); +static cl::opt AccountRecursiveCallsOnly( + "recursive-calls-only", cl::desc("Only count the calls that are recursive"), + cl::sub(Account), cl::init(false)); static cl::opt AccountDeduceSiblingCalls( "deduce-sibling-calls", cl::desc("Deduce sibling calls when unrolling function call stacks"), @@ -137,6 +140,8 @@ return false; auto &ThreadStack = PerThreadFunctionStack[Record.TId]; + if (RecursiveCallsOnly && !ThreadStack.RecursionDepth) + ThreadStack.RecursionDepth.emplace(); switch (Record.Type) { case RecordTypes::CUSTOM_EVENT: case RecordTypes::TYPED_EVENT: @@ -144,18 +149,24 @@ return true; case RecordTypes::ENTER: case RecordTypes::ENTER_ARG: { - ThreadStack.emplace_back(Record.FuncId, Record.TSC); + ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC); + if (ThreadStack.RecursionDepth) + ++(*ThreadStack.RecursionDepth)[Record.FuncId]; break; } case RecordTypes::EXIT: case RecordTypes::TAIL_EXIT: { - if (ThreadStack.empty()) + if (ThreadStack.Stack.empty()) return false; - if (ThreadStack.back().first == Record.FuncId) { - const auto &Top = ThreadStack.back(); - recordLatency(Top.first, diff(Top.second, Record.TSC)); - ThreadStack.pop_back(); + if (ThreadStack.Stack.back().first == Record.FuncId) { + const auto &Top = ThreadStack.Stack.back(); + if (!ThreadStack.RecursionDepth || + (*ThreadStack.RecursionDepth)[Top.first] > 1) + recordLatency(Top.first, diff(Top.second, Record.TSC)); + if (ThreadStack.RecursionDepth) + --(*ThreadStack.RecursionDepth)[Top.first]; + ThreadStack.Stack.pop_back(); break; } @@ -164,11 +175,11 @@ // Look for the parent up the stack. auto Parent = - std::find_if(ThreadStack.rbegin(), ThreadStack.rend(), + std::find_if(ThreadStack.Stack.rbegin(), ThreadStack.Stack.rend(), [&](const std::pair &E) { return E.first == Record.FuncId; }); - if (Parent == ThreadStack.rend()) + if (Parent == ThreadStack.Stack.rend()) return false; // Account time for this apparently sibling call exit up the stack. @@ -199,11 +210,17 @@ // complexity to do correctly (need to backtrack, etc.). // // FIXME: Potentially implement the more complex deduction algorithm? - auto I = std::next(Parent).base(); - for (auto &E : make_range(I, ThreadStack.end())) { - recordLatency(E.first, diff(E.second, Record.TSC)); + auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end()); + for (auto &E : R) { + if (!ThreadStack.RecursionDepth || + (*ThreadStack.RecursionDepth)[E.first] > 1) + recordLatency(E.first, diff(E.second, Record.TSC)); + } + for (auto &Top : reverse(R)) { + if (ThreadStack.RecursionDepth) + --(*ThreadStack.RecursionDepth)[Top.first]; + ThreadStack.Stack.pop_back(); } - ThreadStack.erase(I, ThreadStack.end()); break; } } @@ -423,7 +440,8 @@ symbolize::LLVMSymbolizer Symbolizer; llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer, FunctionAddresses); - xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls); + xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly, + AccountDeduceSiblingCalls); auto TraceOrErr = loadTraceFile(AccountInput); if (!TraceOrErr) return joinErrors( @@ -445,12 +463,12 @@ << '\n'; for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) { errs() << "Thread ID: " << ThreadStack.first << "\n"; - if (ThreadStack.second.empty()) { + if (ThreadStack.second.Stack.empty()) { errs() << " (empty stack)\n"; continue; } - auto Level = ThreadStack.second.size(); - for (const auto &Entry : llvm::reverse(ThreadStack.second)) + auto Level = ThreadStack.second.Stack.size(); + for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack)) errs() << " #" << Level-- << "\t" << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n'; }