Diff 363926

llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript

This file was added.

				2
				4005dc
				400634
				400684
				7f68c5788793
				0x4005c8/0x4005dc 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005c8/0x4005dc
				2
				hoyUnsubmitted Not Done Reply Inline Actions Can you remove the `/P/-/-/0` part and see if the existing parser can handle both formats? That part is unnecessary. hoy: Can you remove the `/P/-/-/0` part and see if the existing parser can handle both formats? That…
				4005b0
				400684
				7f68c5788793
				0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005c8/0x4005dc 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0

llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript

This file was added.

				1
				20179e
				2017f9
				7f83e84e7793
				5541f689495641d7
				0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0
				1
				2017c4
				2017f9
				7f83e84e7793
				5541f689495641d7
				0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0
				1
				2017c4
				2017f9
				7f83e84e7793
				5541f689495641d7
				0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0

llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test

	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-AGG-UNWINDER
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG

				; CHECK-AGG:[main:1 @ foo]:108:0
				; CHECK-AGG: 2: 6
				; CHECK-AGG: 3: 6 bar:6
				; CHECK-AGG:[main:1 @ foo:3 @ bar]:100:6
				; CHECK-AGG: 0: 6
				; CHECK-AGG: 1: 6
				; CHECK-AGG: 2: 4
				; CHECK-AGG: 4: 2
				; CHECK-AGG: 5: 6

				; CHECK-AGG-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter:
				; CHECK-AGG-UNWINDER: main:1 @ foo
				; CHECK-AGG-UNWINDER: (5ff, 62f): 6
				; CHECK-AGG-UNWINDER: (634, 637): 6
				; CHECK-AGG-UNWINDER: (645, 645): 6
				; CHECK-AGG-UNWINDER: main:1 @ foo:3 @ bar
				; CHECK-AGG-UNWINDER: (5b0, 5c8): 2
				; CHECK-AGG-UNWINDER: (5b0, 5d7): 4
				; CHECK-AGG-UNWINDER: (5dc, 5e9): 2
				; CHECK-AGG-UNWINDER: (5e5, 5e9): 4

				; CHECK-AGG-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Branch Counter:
				; CHECK-AGG-UNWINDER: main:1 @ foo
				; CHECK-AGG-UNWINDER: (62f, 5b0): 6
				; CHECK-AGG-UNWINDER: (637, 645): 6
				; CHECK-AGG-UNWINDER: (645, 5ff): 6
				; CHECK-AGG-UNWINDER: main:1 @ foo:3 @ bar
				; CHECK-AGG-UNWINDER: (5c8, 5dc): 4
				; CHECK-AGG-UNWINDER: (5d7, 5e5): 4
				; CHECK-AGG-UNWINDER: (5e9, 634): 6



	; CHECK:[main:1 @ foo]:54:0			; CHECK:[main:1 @ foo]:54:0
	; CHECK: 2: 3			; CHECK: 2: 3
	; CHECK: 3: 3 bar:3			; CHECK: 3: 3 bar:3
	; CHECK:[main:1 @ foo:3 @ bar]:50:3			; CHECK:[main:1 @ foo:3 @ bar]:50:3
	; CHECK: 0: 3			; CHECK: 0: 3
	; CHECK: 1: 3			; CHECK: 1: 3
	; CHECK: 2: 2			; CHECK: 2: 2
	Show All 21 Lines
	; CHECK-UNWINDER: (5d7, 5e5): 2			; CHECK-UNWINDER: (5d7, 5e5): 2
	; CHECK-UNWINDER: (5e9, 634): 3			; CHECK-UNWINDER: (5e9, 634): 3







	; original code:			; original code:
	; clang -O0 -g test.c -o a.out			; clang -O0 -g test.c -o a.out
	#include <stdio.h>			#include <stdio.h>

	int bar(int x, int y) {			int bar(int x, int y) {
	if (x % 3) {			if (x % 3) {
	return x - y;			return x - y;
	}			}
	Show All 14 Lines

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER
				; RUN: FileCheck %s --input-file %t


	; CHECK: [main:2 @ foo]:75:0			; CHECK: [main:2 @ foo]:75:0
	; CHECK-NEXT: 1: 0			; CHECK-NEXT: 1: 0
	; CHECK-NEXT: 2: 15			; CHECK-NEXT: 2: 15
	; CHECK-NEXT: 3: 15			; CHECK-NEXT: 3: 15
	; CHECK-NEXT: 4: 15			; CHECK-NEXT: 4: 15
	; CHECK-NEXT: 5: 0			; CHECK-NEXT: 5: 0
	; CHECK-NEXT: 6: 15			; CHECK-NEXT: 6: 15
	▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines

llvm/tools/llvm-profgen/PerfReader.h

Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines

// The type of perfscript		// The type of perfscript
enum PerfScriptType {		enum PerfScriptType {
PERF_UNKNOWN = 0,		PERF_UNKNOWN = 0,
PERF_INVALID = 1,		PERF_INVALID = 1,
PERF_LBR = 2, // Only LBR sample		PERF_LBR = 2, // Only LBR sample
PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack.		PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack.
};		};

		wenleiUnsubmitted Not Done Reply Inline Actions How about we make the aggregation form an "extension" applicable to any perf script type instead of a new type? This is because we could have a count for LBR perf script too (when we use llvm-profgen for line-based AutoFDO too in the future). If we make aggregation a separate type, we would potentially need to double the number of types.. wenlei: How about we make the aggregation form an "extension" applicable to any perf script type…
		hoyUnsubmitted Not Done Reply Inline Actions +1, this is a good idea. hoy: +1, this is a good idea.
// The parsed LBR sample entry.		// The parsed LBR sample entry.
struct LBREntry {		struct LBREntry {
uint64_t Source = 0;		uint64_t Source = 0;
uint64_t Target = 0;		uint64_t Target = 0;
// An artificial branch stands for a series of consecutive branches starting		// An artificial branch stands for a series of consecutive branches starting
// from the current binary with a transition through external code and		// from the current binary with a transition through external code and
// eventually landing back in the current binary.		// eventually landing back in the current binary.
bool IsArtificial = false;		bool IsArtificial = false;
▲ Show 20 Lines • Show All 556 Lines • ▼ Show 20 Lines	protected:
void parseEventOrSample(TraceStream &TraceIt);		void parseEventOrSample(TraceStream &TraceIt);
// Extract call stack from the perf trace lines		// Extract call stack from the perf trace lines
bool extractCallstack(TraceStream &TraceIt,		bool extractCallstack(TraceStream &TraceIt,
SmallVectorImpl<uint64_t> &CallStack);		SmallVectorImpl<uint64_t> &CallStack);
// Extract LBR stack from one perf trace line		// Extract LBR stack from one perf trace line
bool extractLBRStack(TraceStream &TraceIt,		bool extractLBRStack(TraceStream &TraceIt,
SmallVectorImpl<LBREntry> &LBRStack,		SmallVectorImpl<LBREntry> &LBRStack,
ProfiledBinary *Binary);		ProfiledBinary *Binary);
		uint64_t parseAggregatedCount(TraceStream &TraceIt);
// Parse one sample from multiple perf lines, override this for different		// Parse one sample from multiple perf lines, override this for different
// sample type		// sample type
virtual void parseSample(TraceStream &TraceIt) = 0;		void parseSample(TraceStream &TraceIt);
		// An aggregated count is given to indicate how many times the sample is
		// repeated.
		virtual void parseSample(TraceStream &TraceIt, uint64_t Count) = 0;
// Post process the profile after trace aggregation, we will do simple range		// Post process the profile after trace aggregation, we will do simple range
// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).		// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
virtual void generateRawProfile() = 0;		virtual void generateRawProfile() = 0;
// Helper function for looking up binary in AddressBinaryMap		// Helper function for looking up binary in AddressBinaryMap
ProfiledBinary *getBinary(uint64_t Address);		ProfiledBinary *getBinary(uint64_t Address);

BinaryMap BinaryTable;		BinaryMap BinaryTable;
AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.		AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.
Show All 15 Lines
*/		*/
class HybridPerfReader : public PerfReaderBase {		class HybridPerfReader : public PerfReaderBase {
public:		public:
HybridPerfReader(cl::list<std::string> &BinaryFilenames)		HybridPerfReader(cl::list<std::string> &BinaryFilenames)
: PerfReaderBase(BinaryFilenames) {		: PerfReaderBase(BinaryFilenames) {
PerfType = PERF_LBR_STACK;		PerfType = PERF_LBR_STACK;
};		};
// Parse the hybrid sample including the call and LBR line		// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt) override;		void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateRawProfile() override;		void generateRawProfile() override;

private:		private:
// Unwind the hybrid samples after aggregration		// Unwind the hybrid samples after aggregration
void unwindSamples();		void unwindSamples();
void printUnwinderOutput();		void printUnwinderOutput();
};		};

} // end namespace sampleprof		} // end namespace sampleprof
} // end namespace llvm		} // end namespace llvm

#endif		#endif

llvm/tools/llvm-profgen/PerfReader.cpp

Show First 20 Lines • Show All 616 Lines • ▼ Show 20 Lines	bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
// out to reduce the number of different calling contexts. One instance		// out to reduce the number of different calling contexts. One instance
// of such case - when sample landed in prolog/epilog, somehow stack		// of such case - when sample landed in prolog/epilog, somehow stack
// walking will be broken in an unexpected way that higher frames will be		// walking will be broken in an unexpected way that higher frames will be
// missing.		// missing.
return !CallStack.empty() &&		return !CallStack.empty() &&
!Binary->addressInPrologEpilog(CallStack.front());		!Binary->addressInPrologEpilog(CallStack.front());
}		}

void HybridPerfReader::parseSample(TraceStream &TraceIt) {		void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
// The raw hybird sample started with call stack in FILO order and followed		// The raw hybird sample started with call stack in FILO order and followed
// intermediately by LBR sample		// intermediately by LBR sample
// e.g.		// e.g.
// 4005dc # call stack leaf		// 4005dc # call stack leaf
// 400634		// 400634
// 400684 # call stack root		// 400684 # call stack root
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...		// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries		// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
Show All 20 Lines	void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {		if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
// Parsing LBR stack and populate into HybridSample.LBRStack		// Parsing LBR stack and populate into HybridSample.LBRStack
if (extractLBRStack(TraceIt, Sample->LBRStack, Sample->Binary)) {		if (extractLBRStack(TraceIt, Sample->LBRStack, Sample->Binary)) {
// Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR		// Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
// ranges		// ranges
Sample->CallStack.front() = Sample->LBRStack[0].Target;		Sample->CallStack.front() = Sample->LBRStack[0].Target;
// Record samples by aggregation		// Record samples by aggregation
Sample->genHashCode();		Sample->genHashCode();
AggregatedSamples[Hashable<PerfSample>(Sample)]++;		AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
}		}
} else {		} else {
// LBR sample is encoded in single line after stack sample		// LBR sample is encoded in single line after stack sample
exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");		exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
}		}
}		}

		uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) {
		// The aggregated count is optional, so do not skip the line and return 1 if
		// it's unmatched
		uint64_t Count = 1;
		if (!TraceIt.getCurrentLine().getAsInteger(10, Count))
		TraceIt.advance();
		return Count;
		}

		void PerfReaderBase::parseSample(TraceStream &TraceIt) {
		uint64_t Count = parseAggregatedCount(TraceIt);
		assert(Count >= 1 && "Aggregated count should be >= 1!");
		wenleiUnsubmitted Not Done Reply Inline Actions assert that `Count >= 1` here? wenlei: assert that `Count >= 1` here?
		parseSample(TraceIt, Count);
		}

void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {		void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {
// Parse a line like:		// Parse a line like:
// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0		// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so		// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
constexpr static const char *const Pattern =		constexpr static const char *const Pattern =
"PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "		"PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "		"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
"(0x[a-f0-9]+\|0) .\\]: [-a-z]+ (.)";		"(0x[a-f0-9]+\|0) .\\]: [-a-z]+ (.)";
▲ Show 20 Lines • Show All 79 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[llvm-profgen][CSSPGO] Support count based aggregated type of hybrid perf script
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 363926

llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript

llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript

llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

llvm/tools/llvm-profgen/PerfReader.h

llvm/tools/llvm-profgen/PerfReader.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[llvm-profgen][CSSPGO] Support count based aggregated type of hybrid perf scriptClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 363926

llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript

llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript

llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

llvm/tools/llvm-profgen/PerfReader.h

llvm/tools/llvm-profgen/PerfReader.cpp

[llvm-profgen][CSSPGO] Support count based aggregated type of hybrid perf script
ClosedPublic