Diff 280801

llvm/test/tools/llvm-xray/X86/account-recursive-calls-only-tail-call-deduction.yaml

This file was added.

				# RUN: llvm-xray account -d %s -o - -m %S/Inputs/simple-instrmap.yaml \| FileCheck --check-prefixes=ALL %s
				# RUN: llvm-xray account -d -recursive-calls-only %s -o - -m %S/Inputs/simple-instrmap.yaml \| FileCheck --check-prefixes=RECURSIVE %s

				---
				header:
				version: 1
				type: 0
				constant-tsc: true
				nonstop-tsc: true
				cycle-frequency: 0
				records:
				# Here we reconstruct the following call trace:
				#
				# f1()
				# f2()
				# f3()
				# f2()
				#
				# But we find that we're missing an exit record for f2() because it's
				# tail-called f3(). We make sure that if we see a trace like this that we can
				# deduce tail calls, and account the time (potentially wrongly) to f2() when
				# f1() exits. That is because we don't go back to f3()'s entry record to
				# properly do the math on the timing of f2().
				#
				# As a result, we can deduce that f2() is not recursive here.
				#
				# Note that by default, tail/sibling call deduction is disabled, and is enabled
				# with a flag "-d" or "-deduce-sibling-calls".
				#
				- { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 10000 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10001 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 10002 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 10003 }
				- { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 10004 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10005 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 10006 }
				...

				# ALL: Functions with latencies: 3
				# ALL-NEXT: funcid count [ min, med, 90p, 99p, max] sum function
				# ALL-NEXT: 1 1 [ 4.000000, 4.000000, 4.000000, 4.000000, 4.000000] 4.000000 <invalid>:0:0: @(1)
				# ALL-NEXT: 2 2 [ 1.000000, 3.000000, 3.000000, 3.000000, 3.000000] 4.000000 <invalid>:0:0: @(2)
				# ALL-NEXT: 3 1 [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000] 1.000000 <invalid>:0:0: @(3)

				# RECURSIVE: Functions with latencies: 0
				# RECURSIVE-NEXT: funcid count [ min, med, 90p, 99p, max] sum function

llvm/test/tools/llvm-xray/X86/account-recursive-calls-only.yaml

This file was added.

				# RUN: llvm-xray account %s -o - -m %S/Inputs/simple-instrmap.yaml \| FileCheck --check-prefixes=ALL %s
				# RUN: llvm-xray account -recursive-calls-only %s -o - -m %S/Inputs/simple-instrmap.yaml \| FileCheck --check-prefixes=RECURSIVE %s

				---
				header:
				version: 1
				type: 0
				constant-tsc: true
				nonstop-tsc: true
				cycle-frequency: 2601000000
				records:
				- { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 0 }
				- { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 100000000 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 200000000 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 300000000 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 400000000 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 500000000 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 600000000 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 700000000 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 800000000 }
				- { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 900000000 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 1000000000 }
				- { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 1100000000 }
				...

				# ALL: Functions with latencies: 3
				# ALL-NEXT: funcid count [ min, med, 90p, 99p, max] sum function
				# ALL-NEXT: 1 1 [ 0.038447, 0.038447, 0.038447, 0.038447, 0.038447] 0.038447 <invalid>:0:0: @(1)
				# ALL-NEXT: 2 2 [ 0.038447, 0.192234, 0.192234, 0.192234, 0.192234] 0.230681 <invalid>:0:0: @(2)
				# ALL-NEXT: 3 3 [ 0.038447, 0.038447, 0.115340, 0.115340, 0.115340] 0.192234 <invalid>:0:0: @(3)

				# RECURSIVE: Functions with latencies: 1
				# RECURSIVE-NEXT: funcid count [ min, med, 90p, 99p, max] sum function
				# RECURSIVE-NEXT: 3 2 [ 0.038447, 0.115340, 0.115340, 0.115340, 0.115340] 0.153787 <invalid>:0:0: @(3)

llvm/tools/llvm-xray/xray-account.h

	Show All 12 Lines
	#ifndef LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H			#ifndef LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H
	#define LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H			#define LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H

	#include <map>			#include <map>
	#include <utility>			#include <utility>
	#include <vector>			#include <vector>

	#include "func-id-helper.h"			#include "func-id-helper.h"
				#include "llvm/ADT/Bitfields.h"
	#include "llvm/Support/Program.h"			#include "llvm/Support/Program.h"
	#include "llvm/Support/raw_ostream.h"			#include "llvm/Support/raw_ostream.h"
	#include "llvm/XRay/XRayRecord.h"			#include "llvm/XRay/XRayRecord.h"

	namespace llvm {			namespace llvm {
	namespace xray {			namespace xray {

	class LatencyAccountant {			class LatencyAccountant {
	public:			public:
	typedef llvm::DenseMap<int32_t, llvm::SmallVector<uint64_t, 0>>			typedef llvm::DenseMap<int32_t, llvm::SmallVector<uint64_t, 0>>
	FunctionLatencyMap;			FunctionLatencyMap;
	typedef llvm::DenseMap<uint32_t, std::pair<uint64_t, uint64_t>>			typedef llvm::DenseMap<uint32_t, std::pair<uint64_t, uint64_t>>
	PerThreadMinMaxTSCMap;			PerThreadMinMaxTSCMap;
	typedef llvm::DenseMap<uint8_t, std::pair<uint64_t, uint64_t>>			typedef llvm::DenseMap<uint8_t, std::pair<uint64_t, uint64_t>>
	PerCPUMinMaxTSCMap;			PerCPUMinMaxTSCMap;
	typedef llvm::SmallVector<std::pair<int32_t, uint64_t>, 32> FunctionStack;			struct FunctionStack {
				llvm::SmallVector<std::pair<int32_t, uint64_t>, 32> Stack;
				class RecursionStatus {
				uint32_t Storage = 0;
				using Depth = Bitfield::Element<int32_t, 0, 31>; // Low 31 bits.
				using IsRecursive = Bitfield::Element<bool, 31, 1>; // Sign bit.
				public:
				RecursionStatus &operator++();
				RecursionStatus &operator--();
				bool isRecursive() const;
				};
				Optional<llvm::DenseMap<int32_t, RecursionStatus>> RecursionDepth;
				};
	typedef llvm::DenseMap<uint32_t, FunctionStack> PerThreadFunctionStackMap;			typedef llvm::DenseMap<uint32_t, FunctionStack> PerThreadFunctionStackMap;

	private:			private:
	PerThreadFunctionStackMap PerThreadFunctionStack;			PerThreadFunctionStackMap PerThreadFunctionStack;
	FunctionLatencyMap FunctionLatencies;			FunctionLatencyMap FunctionLatencies;
	PerThreadMinMaxTSCMap PerThreadMinMaxTSC;			PerThreadMinMaxTSCMap PerThreadMinMaxTSC;
	PerCPUMinMaxTSCMap PerCPUMinMaxTSC;			PerCPUMinMaxTSCMap PerCPUMinMaxTSC;
	FuncIdConversionHelper &FuncIdHelper;			FuncIdConversionHelper &FuncIdHelper;

				bool RecursiveCallsOnly = false;
	bool DeduceSiblingCalls = false;			bool DeduceSiblingCalls = false;
	uint64_t CurrentMaxTSC = 0;			uint64_t CurrentMaxTSC = 0;

	void recordLatency(int32_t FuncId, uint64_t Latency) {			void recordLatency(int32_t FuncId, uint64_t Latency) {
	FunctionLatencies[FuncId].push_back(Latency);			FunctionLatencies[FuncId].push_back(Latency);
	}			}

	public:			public:
	explicit LatencyAccountant(FuncIdConversionHelper &FuncIdHelper,			explicit LatencyAccountant(FuncIdConversionHelper &FuncIdHelper,
	bool DeduceSiblingCalls)			bool RecursiveCallsOnly, bool DeduceSiblingCalls)
	: FuncIdHelper(FuncIdHelper), DeduceSiblingCalls(DeduceSiblingCalls) {}			: FuncIdHelper(FuncIdHelper), RecursiveCallsOnly(RecursiveCallsOnly),
				DeduceSiblingCalls(DeduceSiblingCalls) {}

	const FunctionLatencyMap &getFunctionLatencies() const {			const FunctionLatencyMap &getFunctionLatencies() const {
	return FunctionLatencies;			return FunctionLatencies;
	}			}

	const PerThreadMinMaxTSCMap &getPerThreadMinMaxTSC() const {			const PerThreadMinMaxTSCMap &getPerThreadMinMaxTSC() const {
	return PerThreadMinMaxTSC;			return PerThreadMinMaxTSC;
	}			}
	Show All 36 Lines

llvm/tools/llvm-xray/xray-account.cpp

Show All 29 Lines
static cl::opt<std::string> AccountInput(cl::Positional,		static cl::opt<std::string> AccountInput(cl::Positional,
cl::desc("<xray log file>"),		cl::desc("<xray log file>"),
cl::Required, cl::sub(Account));		cl::Required, cl::sub(Account));
static cl::opt<bool>		static cl::opt<bool>
AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),		AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
cl::sub(Account), cl::init(false));		cl::sub(Account), cl::init(false));
static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),		static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
cl::desc("Alias for -keep_going"));		cl::desc("Alias for -keep_going"));
		static cl::opt<bool> AccountRecursiveCallsOnly(
		"recursive-calls-only", cl::desc("Only count the calls that are recursive"),
		cl::sub(Account), cl::init(false));
static cl::opt<bool> AccountDeduceSiblingCalls(		static cl::opt<bool> AccountDeduceSiblingCalls(
"deduce-sibling-calls",		"deduce-sibling-calls",
cl::desc("Deduce sibling calls when unrolling function call stacks"),		cl::desc("Deduce sibling calls when unrolling function call stacks"),
cl::sub(Account), cl::init(false));		cl::sub(Account), cl::init(false));
static cl::alias		static cl::alias
AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),		AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),
cl::desc("Alias for -deduce_sibling_calls"));		cl::desc("Alias for -deduce_sibling_calls"));
static cl::opt<std::string>		static cl::opt<std::string>
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines	template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) {
else		else
MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));		MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));
}		}

template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }		template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }

} // namespace		} // namespace

		using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus;
		RecursionStatus &RecursionStatus::operator++() {
		auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
		assert(Depth >= 0 && Depth < std::numeric_limits<decltype(Depth)>::max());
		++Depth;
		Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // ++Storage
		// Did this function just (maybe indirectly) call itself the first time?
		if (!isRecursive() && Depth == 2) // Storage == 2 / Storage s> 1
		Bitfield::set<RecursionStatus::IsRecursive>(Storage,
		true); // Storage \|= INT_MIN
		return *this;
		}
		RecursionStatus &RecursionStatus::operator--() {
		auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
		assert(Depth > 0);
		--Depth;
		Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // --Storage
		// Did we leave a function that previouly (maybe indirectly) called itself?
		if (isRecursive() && Depth == 0) // Storage == INT_MIN
		Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0
		return *this;
		}
		bool RecursionStatus::isRecursive() const {
		return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0
		}

bool LatencyAccountant::accountRecord(const XRayRecord &Record) {		bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);		setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);
setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);		setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);

if (CurrentMaxTSC == 0)		if (CurrentMaxTSC == 0)
CurrentMaxTSC = Record.TSC;		CurrentMaxTSC = Record.TSC;

if (Record.TSC < CurrentMaxTSC)		if (Record.TSC < CurrentMaxTSC)
return false;		return false;

auto &ThreadStack = PerThreadFunctionStack[Record.TId];		auto &ThreadStack = PerThreadFunctionStack[Record.TId];
		if (RecursiveCallsOnly && !ThreadStack.RecursionDepth)
		ThreadStack.RecursionDepth.emplace();
switch (Record.Type) {		switch (Record.Type) {
case RecordTypes::CUSTOM_EVENT:		case RecordTypes::CUSTOM_EVENT:
case RecordTypes::TYPED_EVENT:		case RecordTypes::TYPED_EVENT:
// TODO: Support custom and typed event accounting in the future.		// TODO: Support custom and typed event accounting in the future.
return true;		return true;
case RecordTypes::ENTER:		case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG: {		case RecordTypes::ENTER_ARG: {
ThreadStack.emplace_back(Record.FuncId, Record.TSC);		ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC);
		if (ThreadStack.RecursionDepth)
		++(*ThreadStack.RecursionDepth)[Record.FuncId];
break;		break;
}		}
case RecordTypes::EXIT:		case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT: {		case RecordTypes::TAIL_EXIT: {
if (ThreadStack.empty())		if (ThreadStack.Stack.empty())
return false;		return false;

if (ThreadStack.back().first == Record.FuncId) {		if (ThreadStack.Stack.back().first == Record.FuncId) {
const auto &Top = ThreadStack.back();		const auto &Top = ThreadStack.Stack.back();
		if (!ThreadStack.RecursionDepth \|\|
		(*ThreadStack.RecursionDepth)[Top.first].isRecursive())
recordLatency(Top.first, diff(Top.second, Record.TSC));		recordLatency(Top.first, diff(Top.second, Record.TSC));
ThreadStack.pop_back();		if (ThreadStack.RecursionDepth)
		--(*ThreadStack.RecursionDepth)[Top.first];
		ThreadStack.Stack.pop_back();
break;		break;
}		}

if (!DeduceSiblingCalls)		if (!DeduceSiblingCalls)
return false;		return false;

// Look for the parent up the stack.		// Look for the parent up the stack.
auto Parent =		auto Parent =
std::find_if(ThreadStack.rbegin(), ThreadStack.rend(),		std::find_if(ThreadStack.Stack.rbegin(), ThreadStack.Stack.rend(),
[&](const std::pair<const int32_t, uint64_t> &E) {		[&](const std::pair<const int32_t, uint64_t> &E) {
return E.first == Record.FuncId;		return E.first == Record.FuncId;
});		});
if (Parent == ThreadStack.rend())		if (Parent == ThreadStack.Stack.rend())
return false;		return false;

// Account time for this apparently sibling call exit up the stack.		// Account time for this apparently sibling call exit up the stack.
// Considering the following case:		// Considering the following case:
//		//
// f()		// f()
// g()		// g()
// h()		// h()
Show All 14 Lines	case RecordTypes::TAIL_EXIT: {
// ideal and brittle -- so instead we provide a potentially inaccurate		// ideal and brittle -- so instead we provide a potentially inaccurate
// accounting of g() instead, computing it from the exit of f().		// accounting of g() instead, computing it from the exit of f().
//		//
// While it might be better that we account the time between `-> g()` and		// While it might be better that we account the time between `-> g()` and
// `-> h()` as the proper accounting of time for g() here, this introduces		// `-> h()` as the proper accounting of time for g() here, this introduces
// complexity to do correctly (need to backtrack, etc.).		// complexity to do correctly (need to backtrack, etc.).
//		//
// FIXME: Potentially implement the more complex deduction algorithm?		// FIXME: Potentially implement the more complex deduction algorithm?
auto I = std::next(Parent).base();		auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end());
for (auto &E : make_range(I, ThreadStack.end())) {		for (auto &E : R) {
		if (!ThreadStack.RecursionDepth \|\|
		(*ThreadStack.RecursionDepth)[E.first].isRecursive())
recordLatency(E.first, diff(E.second, Record.TSC));		recordLatency(E.first, diff(E.second, Record.TSC));
}		}
ThreadStack.erase(I, ThreadStack.end());		for (auto &Top : reverse(R)) {
		if (ThreadStack.RecursionDepth)
		--(*ThreadStack.RecursionDepth)[Top.first];
		ThreadStack.Stack.pop_back();
		}
break;		break;
}		}
}		}

return true;		return true;
}		}

namespace {		namespace {
▲ Show 20 Lines • Show All 205 Lines • ▼ Show 20 Lines	static CommandRegistration Unused(&Account, []() -> Error {
if (EC)		if (EC)
return make_error<StringError>(		return make_error<StringError>(
Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);		Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);

const auto &FunctionAddresses = Map.getFunctionAddresses();		const auto &FunctionAddresses = Map.getFunctionAddresses();
symbolize::LLVMSymbolizer Symbolizer;		symbolize::LLVMSymbolizer Symbolizer;
llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,		llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
FunctionAddresses);		FunctionAddresses);
xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls);		xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly,
		AccountDeduceSiblingCalls);
auto TraceOrErr = loadTraceFile(AccountInput);		auto TraceOrErr = loadTraceFile(AccountInput);
if (!TraceOrErr)		if (!TraceOrErr)
return joinErrors(		return joinErrors(
make_error<StringError>(		make_error<StringError>(
Twine("Failed loading input file '") + AccountInput + "'",		Twine("Failed loading input file '") + AccountInput + "'",
std::make_error_code(std::errc::executable_format_error)),		std::make_error_code(std::errc::executable_format_error)),
TraceOrErr.takeError());		TraceOrErr.takeError());

auto &T = *TraceOrErr;		auto &T = *TraceOrErr;
for (const auto &Record : T) {		for (const auto &Record : T) {
if (FCA.accountRecord(Record))		if (FCA.accountRecord(Record))
continue;		continue;
errs()		errs()
<< "Error processing record: "		<< "Error processing record: "
<< llvm::formatv(		<< llvm::formatv(
R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",		R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",
Record.RecordType, Record.CPU, Record.Type, Record.FuncId,		Record.RecordType, Record.CPU, Record.Type, Record.FuncId,
Record.TSC, Record.TId, Record.PId)		Record.TSC, Record.TId, Record.PId)
<< '\n';		<< '\n';
for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {		for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {
errs() << "Thread ID: " << ThreadStack.first << "\n";		errs() << "Thread ID: " << ThreadStack.first << "\n";
if (ThreadStack.second.empty()) {		if (ThreadStack.second.Stack.empty()) {
errs() << " (empty stack)\n";		errs() << " (empty stack)\n";
continue;		continue;
}		}
auto Level = ThreadStack.second.size();		auto Level = ThreadStack.second.Stack.size();
for (const auto &Entry : llvm::reverse(ThreadStack.second))		for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack))
errs() << " #" << Level-- << "\t"		errs() << " #" << Level-- << "\t"
<< FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';		<< FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';
}		}
if (!AccountKeepGoing)		if (!AccountKeepGoing)
return make_error<StringError>(		return make_error<StringError>(
Twine("Failed accounting function calls in file '") + AccountInput +		Twine("Failed accounting function calls in file '") + AccountInput +
"'.",		"'.",
std::make_error_code(std::errc::executable_format_error));		std::make_error_code(std::errc::executable_format_error));
Show All 12 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[XRay] Account: recursion detection
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 280801

llvm/test/tools/llvm-xray/X86/account-recursive-calls-only-tail-call-deduction.yaml

llvm/test/tools/llvm-xray/X86/account-recursive-calls-only.yaml

llvm/tools/llvm-xray/xray-account.h

llvm/tools/llvm-xray/xray-account.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[XRay] Account: recursion detectionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 280801

llvm/test/tools/llvm-xray/X86/account-recursive-calls-only-tail-call-deduction.yaml

llvm/test/tools/llvm-xray/X86/account-recursive-calls-only.yaml

llvm/tools/llvm-xray/xray-account.h

llvm/tools/llvm-xray/xray-account.cpp

[XRay] Account: recursion detection
ClosedPublic