Diff 369780

llvm/test/tools/llvm-profgen/Inputs/noprobe.aggperfscript

This file was added.

				1
				40062f 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/16 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/10 0x40062f/0x4005b0/P/-/-/14 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/7 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/15 0x400645/0x4005ff/P/-/-/1
				1
				4005d7 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/13 0x40062f/0x4005b0/P/-/-/9
				3
				4005c8 0x4005c8/0x4005dc/P/-/-/11 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/8

llvm/test/tools/llvm-profgen/Inputs/noprobe.mmap.perfscript

This file was added.

				PERF_RECORD_MMAP2 121161/121161: [0x400000(0x1000) @ 0 00:23 10094534 144120]: r-xp /home/noprobe.perfbin
				40062f 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/16 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/10 0x40062f/0x4005b0/P/-/-/14 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/7 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/15 0x400645/0x4005ff/P/-/-/1
				4005d7 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/13 0x40062f/0x4005b0/P/-/-/9
				4005c8 0x4005c8/0x4005dc/P/-/-/11 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/8

llvm/test/tools/llvm-profgen/Inputs/noprobe.perfbin

This binary file was added.

Property	Old Value	New Value
File Mode	null	100755

llvm/test/tools/llvm-profgen/Inputs/noprobe.perfscript

This file was added.

				40062f 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/16 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/6 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/10 0x40062f/0x4005b0/P/-/-/14 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/7 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/15 0x400645/0x4005ff/P/-/-/1
				4005d7 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/11 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/13 0x40062f/0x4005b0/P/-/-/9
				4005c8 0x4005c8/0x4005dc/P/-/-/11 0x40062f/0x4005b0/P/-/-/8 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/5 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/7 0x40062f/0x4005b0/P/-/-/10 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/8 0x40062f/0x4005b0/P/-/-/9 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/1 0x4005d7/0x4005e5/P/-/-/12 0x40062f/0x4005b0/P/-/-/6 0x400645/0x4005ff/P/-/-/1 0x400637/0x400645/P/-/-/1 0x4005e9/0x400634/P/-/-/2 0x4005c8/0x4005dc/P/-/-/8 0x40062f/0x4005b0/P/-/-/8

llvm/test/tools/llvm-profgen/cs-interrupt.test

	;; The test fails on Windows. Fix it before removing the following requirement.			;; The test fails on Windows. Fix it before removing the following requirement.
	; REQUIRES: x86_64-linux			; REQUIRES: x86_64-linux
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-interrupt.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-interrupt.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-interrupt.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
				>>>>>>> 02ea7084c370 ([llvm-profgen] Support LBR only perf script)
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t

	; CHECK:[main:1 @ foo]:88:0			; CHECK:[main:1 @ foo]:88:0
	; CHECK: 2: 5			; CHECK: 2: 5
	; CHECK: 3: 5 bar:5			; CHECK: 3: 5 bar:5
	; CHECK:[main:1 @ foo:3 @ bar]:74:5			; CHECK:[main:1 @ foo:3 @ bar]:74:5
	; CHECK: 0: 5			; CHECK: 0: 5
	; CHECK: 1: 5			; CHECK: 1: 5
	▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

llvm/test/tools/llvm-profgen/inline-cs-noprobe.test

	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				\| FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t

	; CHECK:[main:1 @ foo]:309:0			; CHECK:[main:1 @ foo]:309:0
	; CHECK: 2.1: 14			; CHECK: 2.1: 14
	; CHECK: 3: 15			; CHECK: 3: 15
	; CHECK: 3.1: 14 bar:14			; CHECK: 3.1: 14 bar:14
	; CHECK: 3.2: 1			; CHECK: 3.2: 1
	; CHECK:[main:1 @ foo:3.1 @ bar]:84:0			; CHECK:[main:1 @ foo:3.1 @ bar]:84:0
	Show All 38 Lines

llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test

	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t

	; CHECK: [main:2 @ foo]:74:0			; CHECK: [main:2 @ foo]:74:0
	; CHECK-NEXT: 1: 0			; CHECK-NEXT: 1: 0
	; CHECK-NEXT: 2: 15			; CHECK-NEXT: 2: 15
	; CHECK-NEXT: 3: 15			; CHECK-NEXT: 3: 15
	; CHECK-NEXT: 4: 14			; CHECK-NEXT: 4: 14
	; CHECK-NEXT: 5: 1			; CHECK-NEXT: 5: 1
	▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test

	;; The test fails on Windows. Fix it before removing the following requirement.			;; The test fails on Windows. Fix it before removing the following requirement.
	; REQUIRES: x86_64-linux			; REQUIRES: x86_64-linux
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-AGG-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG			; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG


	; CHECK-AGG:[main:1 @ foo]:108:0			; CHECK-AGG:[main:1 @ foo]:108:0
	; CHECK-AGG: 2: 6			; CHECK-AGG: 2: 6
	; CHECK-AGG: 3: 6 bar:6			; CHECK-AGG: 3: 6 bar:6
	; CHECK-AGG:[main:1 @ foo:3 @ bar]:100:6			; CHECK-AGG:[main:1 @ foo:3 @ bar]:100:6
	; CHECK-AGG: 0: 6			; CHECK-AGG: 0: 6
	; CHECK-AGG: 1: 6			; CHECK-AGG: 1: 6
	; CHECK-AGG: 2: 4			; CHECK-AGG: 2: 4
	; CHECK-AGG: 4: 2			; CHECK-AGG: 4: 2
	▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t


	; CHECK: [main:2 @ foo]:75:0			; CHECK: [main:2 @ foo]:75:0
	; CHECK-NEXT: 1: 0			; CHECK-NEXT: 1: 0
	; CHECK-NEXT: 2: 15			; CHECK-NEXT: 2: 15
	; CHECK-NEXT: 3: 15			; CHECK-NEXT: 3: 15
	; CHECK-NEXT: 4: 15			; CHECK-NEXT: 4: 15
	▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines

llvm/test/tools/llvm-profgen/noprobe.test

This file was added.

				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.mmap.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.aggperfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE-AGG

				CHECK-RAW-PROFILE: 7
				CHECK-RAW-PROFILE-NEXT: 5b0-5c8:7
				CHECK-RAW-PROFILE-NEXT: 5b0-5d7:13
				CHECK-RAW-PROFILE-NEXT: 5dc-5e9:6
				CHECK-RAW-PROFILE-NEXT: 5e5-5e9:12
				CHECK-RAW-PROFILE-NEXT: 5ff-62f:19
				CHECK-RAW-PROFILE-NEXT: 634-637:18
				CHECK-RAW-PROFILE-NEXT: 645-645:18
				CHECK-RAW-PROFILE-NEXT: 6
				CHECK-RAW-PROFILE-NEXT: 5c8->5dc:7
				CHECK-RAW-PROFILE-NEXT: 5d7->5e5:13
				CHECK-RAW-PROFILE-NEXT: 5e9->634:18
				CHECK-RAW-PROFILE-NEXT: 62f->5b0:21
				CHECK-RAW-PROFILE-NEXT: 637->645:18
				CHECK-RAW-PROFILE-NEXT: 645->5ff:19


				CHECK-RAW-PROFILE-AGG: 7
				CHECK-RAW-PROFILE-AGG-NEXT: 5b0-5c8:13
				CHECK-RAW-PROFILE-AGG-NEXT: 5b0-5d7:21
				CHECK-RAW-PROFILE-AGG-NEXT: 5dc-5e9:10
				CHECK-RAW-PROFILE-AGG-NEXT: 5e5-5e9:20
				CHECK-RAW-PROFILE-AGG-NEXT: 5ff-62f:31
				CHECK-RAW-PROFILE-AGG-NEXT: 634-637:30
				CHECK-RAW-PROFILE-AGG-NEXT: 645-645:30
				CHECK-RAW-PROFILE-AGG-NEXT: 6
				CHECK-RAW-PROFILE-AGG-NEXT: 5c8->5dc:13
				CHECK-RAW-PROFILE-AGG-NEXT: 5d7->5e5:21
				CHECK-RAW-PROFILE-AGG-NEXT: 5e9->634:30
				CHECK-RAW-PROFILE-AGG-NEXT: 62f->5b0:35
				CHECK-RAW-PROFILE-AGG-NEXT: 637->645:30
				CHECK-RAW-PROFILE-AGG-NEXT: 645->5ff:31


				; original code:
				; clang -O3 -g -debug-info-for-profiling test.c -o a.out
				#include <stdio.h>

				int bar(int x, int y) {
				if (x % 3) {
				return x - y;
				}
				return x + y;
				}

				void foo() {
				int s, i = 0;
				while (i++ < 4000 * 4000)
				if (i % 91) s = bar(i, s); else s += 30;
				printf("sum is %d\n", s);
				}

				int main() {
				foo();
				return 0;
				}

llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test

	; Firstly test uncompression(--compress-recursion=0)			; Firstly test uncompression(--compress-recursion=0)
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS			; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 \| FileCheck %s --check-prefix=CHECK-UNWINDER			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
				; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
				; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0
	; RUN: FileCheck %s --input-file %t			; RUN: FileCheck %s --input-file %t
	; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-max-context-depth=0			; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-max-context-depth=0
	; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH			; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH


	; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1			; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
	; CHECK-UNCOMPRESS: 1: 1			; CHECK-UNCOMPRESS: 1: 1
	; CHECK-UNCOMPRESS: 3: 1			; CHECK-UNCOMPRESS: 3: 1
	▲ Show 20 Lines • Show All 178 Lines • Show Last 20 Lines

llvm/tools/llvm-profgen/PerfReader.h

Show First 20 Lines • Show All 115 Lines • ▼ Show 20 Lines
public:		public:
std::shared_ptr<T> Data;		std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> &D) : Data(D) {}		Hashable(const std::shared_ptr<T> &D) : Data(D) {}

// Hash code generation		// Hash code generation
struct Hash {		struct Hash {
uint64_t operator()(const Hashable<T> &Key) const {		uint64_t operator()(const Hashable<T> &Key) const {
// Don't make it virtual for getHashCode		// Don't make it virtual for getHashCode
assert(Key.Data->getHashCode() && "Should generate HashCode for it!");		uint64_t Hash = Key.Data->getHashCode();
return Key.Data->getHashCode();		assert(Hash && "Should generate HashCode for it!");
		return Hash;
}		}
};		};

// Hash equal		// Hash equal
struct Equal {		struct Equal {
bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {		bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
// Precisely compare the data, vtable will have overhead.		// Precisely compare the data, vtable will have overhead.
return LHS.Data->isEqual(RHS.Data.get());		return LHS.Data->isEqual(RHS.Data.get());
}		}
};		};

T *getPtr() const { return Data.get(); }		T *getPtr() const { return Data.get(); }
};		};

// Base class to extend for all types of perf sample
struct PerfSample {		struct PerfSample {
uint64_t HashCode = 0;		// LBR stack recorded in FIFO order.
		SmallVector<LBREntry, 16> LBRStack;
		// Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
		// generation
		SmallVector<uint64_t, 16> CallStack;

virtual ~PerfSample() = default;		virtual ~PerfSample() = default;
uint64_t getHashCode() const { return HashCode; }		uint64_t getHashCode() const {
virtual bool isEqual(const PerfSample *K) const {		// Use simple DJB2 hash
return HashCode == K->HashCode;		auto HashCombine = [](uint64_t H, uint64_t V) {
};		return ((H << 5) + H) + V;

// Utilities for LLVM-style RTTI
enum PerfKind { PK_HybridSample };
const PerfKind Kind;
PerfKind getKind() const { return Kind; }
PerfSample(PerfKind K) : Kind(K){};
};		};
		uint64_t Hash = 5381;
// The parsed hybrid sample including call stack and LBR stack.		for (const auto &Value : CallStack) {
struct HybridSample : public PerfSample {		Hash = HashCombine(Hash, Value);
// Profiled binary that current frame address belongs to		}
ProfiledBinary *Binary;		for (const auto &Entry : LBRStack) {
// Call stack recorded in FILO(leaf to root) order		Hash = HashCombine(Hash, Entry.Source);
SmallVector<uint64_t, 16> CallStack;		Hash = HashCombine(Hash, Entry.Target);
// LBR stack recorded in FIFO order		}
SmallVector<LBREntry, 16> LBRStack;		return Hash;

HybridSample(ProfiledBinary *B) : PerfSample(PK_HybridSample), Binary(B){};
static bool classof(const PerfSample *K) {
return K->getKind() == PK_HybridSample;
}		}

// Used for sample aggregation		bool isEqual(const PerfSample *Other) const {
bool isEqual(const PerfSample *K) const override {
const HybridSample *Other = dyn_cast<HybridSample>(K);
if (Other->Binary != Binary)
return false;
const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;		const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;		const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;

if (CallStack.size() != OtherCallStack.size() \|\|		if (CallStack.size() != OtherCallStack.size() \|\|
LBRStack.size() != OtherLBRStack.size())		LBRStack.size() != OtherLBRStack.size())
return false;		return false;

auto Iter = CallStack.begin();		if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
for (auto Address : OtherCallStack) {
if (Address != *Iter++)
return false;		return false;
}

for (size_t I = 0; I < OtherLBRStack.size(); I++) {		for (size_t I = 0; I < OtherLBRStack.size(); I++) {
if (LBRStack[I].Source != OtherLBRStack[I].Source \|\|		if (LBRStack[I].Source != OtherLBRStack[I].Source \|\|
LBRStack[I].Target != OtherLBRStack[I].Target)		LBRStack[I].Target != OtherLBRStack[I].Target)
return false;		return false;
}		}
return true;		return true;
}		}

void genHashCode() {
// Use simple DJB2 hash
auto HashCombine = [](uint64_t H, uint64_t V) {
return ((H << 5) + H) + V;
};
uint64_t Hash = 5381;
Hash = HashCombine(Hash, reinterpret_cast<uint64_t>(Binary));
for (const auto &Value : CallStack) {
Hash = HashCombine(Hash, Value);
}
for (const auto &Entry : LBRStack) {
Hash = HashCombine(Hash, Entry.Source);
Hash = HashCombine(Hash, Entry.Target);
}
HashCode = Hash;
}

#ifndef NDEBUG		#ifndef NDEBUG
void print() const {		void print() const {
dbgs() << "LBR stack\n";		dbgs() << "LBR stack\n";
printLBRStack(LBRStack);		printLBRStack(LBRStack);
dbgs() << "Call stack\n";		dbgs() << "Call stack\n";
printCallStack(CallStack);		printCallStack(CallStack);
}		}
#endif		#endif
};		};

// After parsing the sample, we record the samples by aggregating them		// After parsing the sample, we record the samples by aggregating them
// into this counter. The key stores the sample data and the value is		// into this counter. The key stores the sample data and the value is
// the sample repeat times.		// the sample repeat times.
using AggregatedCounter =		using AggregatedCounter =
std::unordered_map<Hashable<PerfSample>, uint64_t,		std::unordered_map<Hashable<PerfSample>, uint64_t,
Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;		Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;

using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;		using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
Show All 28 Lines	struct ProfiledFrame {
bool isDummyRoot() { return Address == 0; }		bool isDummyRoot() { return Address == 0; }
bool isLeafFrame() { return Children.empty(); }		bool isLeafFrame() { return Children.empty(); }
};		};

ProfiledFrame DummyTrieRoot;		ProfiledFrame DummyTrieRoot;
ProfiledFrame *CurrentLeafFrame;		ProfiledFrame *CurrentLeafFrame;
// Used to fall through the LBR stack		// Used to fall through the LBR stack
uint32_t LBRIndex = 0;		uint32_t LBRIndex = 0;
// Reference to HybridSample.LBRStack		// Reference to PerfSample.LBRStack
const SmallVector<LBREntry, 16> &LBRStack;		const SmallVector<LBREntry, 16> &LBRStack;
// Used to iterate the address range		// Used to iterate the address range
InstructionPointer InstPtr;		InstructionPointer InstPtr;
UnwindState(const HybridSample *Sample)		UnwindState(const PerfSample Sample, const ProfiledBinary Binary)
: Binary(Sample->Binary), LBRStack(Sample->LBRStack),		: Binary(Binary), LBRStack(Sample->LBRStack),
InstPtr(Sample->Binary, Sample->CallStack.front()) {		InstPtr(Binary, Sample->CallStack.front()) {
initFrameTrie(Sample->CallStack);		initFrameTrie(Sample->CallStack);
}		}

bool validateInitialState() {		bool validateInitialState() {
uint64_t LBRLeaf = LBRStack[LBRIndex].Target;		uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
uint64_t LeafAddr = CurrentLeafFrame->Address;		uint64_t LeafAddr = CurrentLeafFrame->Address;
// When we take a stack sample, ideally the sampling distance between the		// When we take a stack sample, ideally the sampling distance between the
// leaf IP of stack and the last LBR target shouldn't be very large.		// leaf IP of stack and the last LBR target shouldn't be very large.
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines
using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;		using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
// The counter of range samples for one function indexed by the range,		// The counter of range samples for one function indexed by the range,
// which is represented as the start and end offset pair.		// which is represented as the start and end offset pair.
using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;		using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
// Wrapper for sample counters including range counter and branch counter		// Wrapper for sample counters including range counter and branch counter
struct SampleCounter {		struct SampleCounter {
RangeSample RangeCounter;		RangeSample RangeCounter;
BranchSample BranchCounter;		BranchSample BranchCounter;

		hoyUnsubmitted Not Done Reply Inline Actions Do we really need this? `RangeCounter` should cover addresses? hoy: Do we really need this? `RangeCounter` should cover addresses?
		wleiAuthorUnsubmitted Done Reply Inline Actions Good catch. I did check our internal tool, the addressCounter is actually used only without LBR sample, so here we indeed don't need this, thanks! wlei: Good catch. I did check our internal tool, the addressCounter is actually used only without LBR…
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {		void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
RangeCounter[{Start, End}] += Repeat;		RangeCounter[{Start, End}] += Repeat;
}		}
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {		void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
BranchCounter[{Source, Target}] += Repeat;		BranchCounter[{Source, Target}] += Repeat;
}		}
};		};

▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
splitting and record counters by the range with same inline context. Over those		splitting and record counters by the range with same inline context. Over those
unwinding process we will record each call stack as context id and LBR/linear		unwinding process we will record each call stack as context id and LBR/linear
range as sample counter for further CS profile generation.		range as sample counter for further CS profile generation.
*/		*/
class VirtualUnwinder {		class VirtualUnwinder {
public:		public:
VirtualUnwinder(ContextSampleCounterMap Counter, const ProfiledBinary B)		VirtualUnwinder(ContextSampleCounterMap Counter, const ProfiledBinary B)
: CtxCounterMap(Counter), Binary(B) {}		: CtxCounterMap(Counter), Binary(B) {}
bool unwind(const HybridSample *Sample, uint64_t Repeat);		bool unwind(const PerfSample *Sample, uint64_t Repeat);

private:		private:
bool isCallState(UnwindState &State) const {		bool isCallState(UnwindState &State) const {
// The tail call frame is always missing here in stack sample, we will		// The tail call frame is always missing here in stack sample, we will
// use a specific tail call tracker to infer it.		// use a specific tail call tracker to infer it.
return Binary->addressIsCall(State.getCurrentLBRSource());		return Binary->addressIsCall(State.getCurrentLBRSource());
}		}

Show All 32 Lines	PerfReaderBase(ProfiledBinary *B) : Binary(B) {
// Initialize the base address to preferred address.		// Initialize the base address to preferred address.
Binary->setBaseAddress(Binary->getPreferredBaseAddress());		Binary->setBaseAddress(Binary->getPreferredBaseAddress());
};		};
virtual ~PerfReaderBase() = default;		virtual ~PerfReaderBase() = default;
static std::unique_ptr<PerfReaderBase>		static std::unique_ptr<PerfReaderBase>
create(ProfiledBinary *Binary, cl::list<std::string> &PerfTraceFilenames);		create(ProfiledBinary *Binary, cl::list<std::string> &PerfTraceFilenames);

// A LBR sample is like:		// A LBR sample is like:
// 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ...		// 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ...
// A heuristic for fast detection by checking whether a		// A heuristic for fast detection by checking whether a
// leading " 0x" and the '/' exist.		// leading " 0x" and the '/' exist.
static bool isLBRSample(StringRef Line) {		static bool isLBRSample(StringRef Line) {
if (!Line.startswith(" 0x"))		// Skip the leading instruction pointer
		SmallVector<StringRef, 32> Records;
		Line.trim().split(Records, " ", 2, false);
		hoyUnsubmitted Not Done Reply Inline Actions Nit: splitting once is enough, instead of unlimited times (-1)? hoy: Nit: splitting once is enough, instead of unlimited times (-1)?
		wleiAuthorUnsubmitted Done Reply Inline Actions -1(maximum number of times split) is the default value, here is for the 4th parameter which means to not keep the empty string, otherwise there are many empty record, it's not direct to get the first leading pointer. Seems C++ can't skip the middle default value. wlei: -1(maximum number of times split) is the default value, here is for the 4th parameter which…
		wenleiUnsubmitted Not Done Reply Inline Actions `Line.split(Records, " ", 1, false);` instead? wenlei: `Line.split(Records, " ", 1, false);` instead?
		wleiAuthorUnsubmitted Done Reply Inline Actions Sorry, I misunderstood. I tired `Line.split(Records, " ", 1, false);` but it showed it doesn't work, there are several leading empty space, splitting once will only separate one empty space and the remaining still start with empty space. wlei: Sorry, I misunderstood. I tired `Line.split(Records, " ", 1, false);` but it showed it doesn't…
		wenleiUnsubmitted Not Done Reply Inline Actions not a big deal, but this could be faster as it avoids scanning and splitting the entire string: Line = Line.trim(" "); Line.split(Records, " ", 1, false); wenlei: not a big deal, but this could be faster as it avoids scanning and splitting the entire string…
		wleiAuthorUnsubmitted Done Reply Inline Actions Sounds good, I actually found there are two space strings between each LBR entry, so I need to change to split 2 times :) wlei: Sounds good, I actually found there are two space strings between each LBR entry, so I need to…
		wenleiUnsubmitted Not Done Reply Inline Actions Or `Line.split(Records, " ", 1, false);`? two space instead of one as the separator. Works either way. wenlei: Or `Line.split(Records, " ", 1, false);`? two space instead of one as the separator. Works…
		if (Records.size() < 2)
return false;		return false;
if (Line.find('/') != StringRef::npos)		if (Records[1].startswith("0x") && Records[1].find('/') != StringRef::npos)
return true;		return true;
return false;		return false;
}		}

// The raw hybird sample is like		// The raw hybird sample is like
// e.g.		// e.g.
// 4005dc # call stack leaf		// 4005dc # call stack leaf
// 400634		// 400634
// 400684 # call stack root		// 400684 # call stack root
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...		// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries		// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
// Determine the perfscript contains hybrid samples(call stack + LBRs) by		// Determine the perfscript contains hybrid samples(call stack + LBRs) by
// checking whether there is a non-empty call stack immediately followed by		// checking whether there is a non-empty call stack immediately followed by
// a LBR sample		// a LBR sample
static PerfScriptType checkPerfScriptType(StringRef FileName) {		static PerfScriptType checkPerfScriptType(StringRef FileName) {
TraceStream TraceIt(FileName);		TraceStream TraceIt(FileName);
uint64_t FrameAddr = 0;		uint64_t FrameAddr = 0;
while (!TraceIt.isAtEoF()) {		while (!TraceIt.isAtEoF()) {
		// Skip the aggregated count
		if (!TraceIt.getCurrentLine().getAsInteger(10, FrameAddr))
		hoyUnsubmitted Not Done Reply Inline Actions Why need this? Can the count be just skipped since it does not reflect a call stack sample and LBR sample? hoy: Why need this? Can the count be just skipped since it does not reflect a call stack sample and…
		wleiAuthorUnsubmitted Done Reply Inline Actions Here it's in fact to skip it. To distinguish LBR and Hybrid sample, it used `Count` to indicate call stack exists, >0 means hybrid sample. When adding the aggregated count, it will also increase `Count` and treat LBR sample as hybrid sample incorrectly. wlei: Here it's in fact to skip it. To distinguish LBR and Hybrid sample, it used `Count` to indicate…
		hoyUnsubmitted Not Done Reply Inline Actions I see. Do we always have an aggregated count for both hybrid and LBR samples? If not, a decimal number can also be a valid call frame addr in which case we may not be able to tell LBR samples apart from hybrid samples. hoy: I see. Do we always have an aggregated count for both hybrid and LBR samples? If not, a decimal…
		TraceIt.advance();

		// Detect sample with call stack
int32_t Count = 0;		int32_t Count = 0;
while (!TraceIt.isAtEoF() &&		while (!TraceIt.isAtEoF() &&
!TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {		!TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {
Count++;		Count++;
TraceIt.advance();		TraceIt.advance();
}		}
if (!TraceIt.isAtEoF()) {		if (!TraceIt.isAtEoF()) {
if (isLBRSample(TraceIt.getCurrentLine())) {		if (isLBRSample(TraceIt.getCurrentLine())) {
Show All 31 Lines	protected:
/// Parse a single line of a PERF_RECORD_MMAP2 event looking for a		/// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
/// mapping between the binary name and its memory layout.		/// mapping between the binary name and its memory layout.
///		///
void parseMMap2Event(TraceStream &TraceIt);		void parseMMap2Event(TraceStream &TraceIt);
// Parse perf events/samples and do aggregation		// Parse perf events/samples and do aggregation
void parseAndAggregateTrace(StringRef Filename);		void parseAndAggregateTrace(StringRef Filename);
// Parse either an MMAP event or a perf sample		// Parse either an MMAP event or a perf sample
void parseEventOrSample(TraceStream &TraceIt);		void parseEventOrSample(TraceStream &TraceIt);
		// Warn if the relevant mmap event is missing.
		void warnIfMissingMMap();
// Extract call stack from the perf trace lines		// Extract call stack from the perf trace lines
bool extractCallstack(TraceStream &TraceIt,		bool extractCallstack(TraceStream &TraceIt,
SmallVectorImpl<uint64_t> &CallStack);		SmallVectorImpl<uint64_t> &CallStack);
// Extract LBR stack from one perf trace line		// Extract LBR stack from one perf trace line
bool extractLBRStack(TraceStream &TraceIt,		bool extractLBRStack(TraceStream &TraceIt,
SmallVectorImpl<LBREntry> &LBRStack);		SmallVectorImpl<LBREntry> &LBRStack);
uint64_t parseAggregatedCount(TraceStream &TraceIt);		uint64_t parseAggregatedCount(TraceStream &TraceIt);
// Parse one sample from multiple perf lines, override this for different		// Parse one sample from multiple perf lines, override this for different
// sample type		// sample type
void parseSample(TraceStream &TraceIt);		void parseSample(TraceStream &TraceIt);
// An aggregated count is given to indicate how many times the sample is		// An aggregated count is given to indicate how many times the sample is
// repeated.		// repeated.
virtual void parseSample(TraceStream &TraceIt, uint64_t Count) = 0;		virtual void parseSample(TraceStream &TraceIt, uint64_t Count) = 0;
// Post process the profile after trace aggregation, we will do simple range		// Post process the profile after trace aggregation, we will do simple range
// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).		// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
virtual void generateRawProfile() = 0;		virtual void generateRawProfile() = 0;
		virtual void writeRawProfile(StringRef Filename);
		virtual void writeRawProfile(raw_fd_ostream &OS) = 0;
		wenleiUnsubmitted Not Done Reply Inline Actions nit: now that this is output to a file instead of stdout, rename it as writeRawProfile? wenlei: nit: now that this is output to a file instead of stdout, rename it as writeRawProfile?

ProfiledBinary *Binary = nullptr;		ProfiledBinary *Binary = nullptr;

ContextSampleCounterMap SampleCounters;		ContextSampleCounterMap SampleCounters;
// Samples with the repeating time generated by the perf reader		// Samples with the repeating time generated by the perf reader
AggregatedCounter AggregatedSamples;		AggregatedCounter AggregatedSamples;
PerfScriptType PerfType = PERF_UNKNOWN;		PerfScriptType PerfType = PERF_UNKNOWN;
};		};
Show All 14 Lines	public:
};		};
// Parse the hybrid sample including the call and LBR line		// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt, uint64_t Count) override;		void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateRawProfile() override;		void generateRawProfile() override;

private:		private:
// Unwind the hybrid samples after aggregration		// Unwind the hybrid samples after aggregration
void unwindSamples();		void unwindSamples();
void printUnwinderOutput();		void writeRawProfile(raw_fd_ostream &OS) override;
		};

		/*
		The reader of LBR only perf script.
		A typical LBR sample is like:
		40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
		... 0x4005c8/0x4005dc/P/-/-/0
		*/
		class LBRPerfReader : public PerfReaderBase {
		public:
		LBRPerfReader(ProfiledBinary *Binary) : PerfReaderBase(Binary) {
		// There is no context for LBR only sample, so initialize one entry with
		// fake "empty" context key.
		std::shared_ptr<StringBasedCtxKey> Key =
		std::make_shared<StringBasedCtxKey>();
		Key->genHashCode();
		SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
		PerfType = PERF_LBR;
		};

		// Parse the LBR only sample.
		void parseSample(TraceStream &TraceIt, uint64_t Count) override;
		void generateRawProfile() override;

		private:
		void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
		void writeRawProfile(raw_fd_ostream &OS) override;
};		};

} // end namespace sampleprof		} // end namespace sampleprof
} // end namespace llvm		} // end namespace llvm

#endif		#endif
		wenleiUnsubmitted Not Done Reply Inline Actions Would an empty string (zero length) work? wenlei: Would an empty string (zero length) work?
		wleiAuthorUnsubmitted Done Reply Inline Actions Good, catch! Removed! wlei: Good, catch! Removed!

llvm/tools/llvm-profgen/PerfReader.cpp

//===-- PerfReader.cpp - perfscript reader ---------------------- C++ --===//		//===-- PerfReader.cpp - perfscript reader ---------------------- C++ --===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
#include "PerfReader.h"		#include "PerfReader.h"
#include "ProfileGenerator.h"		#include "ProfileGenerator.h"
		#include "llvm/Support/FileSystem.h"

static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,		static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
cl::init(false), cl::ZeroOrMore,		cl::init(false), cl::ZeroOrMore,
cl::desc("Print binary load events."));		cl::desc("Print binary load events."));

static cl::opt<bool> ShowUnwinderOutput("show-unwinder-output",		cl::opt<bool> SkipSymbolization("skip-symbolization", cl::ReallyHidden,
cl::ReallyHidden, cl::init(false),		cl::init(false), cl::ZeroOrMore,
cl::ZeroOrMore,		cl::desc("Dump the unsumbolized profile to the "
cl::desc("Print unwinder output"));		"output file. It will show unwinder "
		"output for CS profile generation."));

extern cl::opt<bool> ShowDisassemblyOnly;		extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;		extern cl::opt<bool> ShowSourceLocations;
		extern cl::opt<std::string> OutputFilename;

namespace llvm {		namespace llvm {
namespace sampleprof {		namespace sampleprof {

void VirtualUnwinder::unwindCall(UnwindState &State) {		void VirtualUnwinder::unwindCall(UnwindState &State) {
// The 2nd frame after leaf could be missing if stack sample is		// The 2nd frame after leaf could be missing if stack sample is
// taken when IP is within prolog/epilog, as frame chain isn't		// taken when IP is within prolog/epilog, as frame chain isn't
// setup yet. Fill in the missing frame in that case.		// setup yet. Fill in the missing frame in that case.
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines	if (Binary->usePseudoProbes()) {
State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,		State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
Repeat);		Repeat);
} else {		} else {
State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,		State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
Repeat);		Repeat);
}		}
}		}

bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {		bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
// Capture initial state as starting point for unwinding.		// Capture initial state as starting point for unwinding.
UnwindState State(Sample);		UnwindState State(Sample, Binary);

// Sanity check - making sure leaf of LBR aligns with leaf of stack sample		// Sanity check - making sure leaf of LBR aligns with leaf of stack sample
// Stack sample sometimes can be unreliable, so filter out bogus ones.		// Stack sample sometimes can be unreliable, so filter out bogus ones.
if (!State.validateInitialState())		if (!State.validateInitialState())
return false;		return false;

// Also do not attempt linear unwind for the leaf range as it's incomplete.		// Also do not attempt linear unwind for the leaf range as it's incomplete.
bool IsLeaf = true;		bool IsLeaf = true;
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
std::unique_ptr<PerfReaderBase>		std::unique_ptr<PerfReaderBase>
PerfReaderBase::create(ProfiledBinary *Binary,		PerfReaderBase::create(ProfiledBinary *Binary,
cl::list<std::string> &PerfTraceFilenames) {		cl::list<std::string> &PerfTraceFilenames) {
PerfScriptType PerfType = extractPerfType(PerfTraceFilenames);		PerfScriptType PerfType = extractPerfType(PerfTraceFilenames);
std::unique_ptr<PerfReaderBase> PerfReader;		std::unique_ptr<PerfReaderBase> PerfReader;
if (PerfType == PERF_LBR_STACK) {		if (PerfType == PERF_LBR_STACK) {
PerfReader.reset(new HybridPerfReader(Binary));		PerfReader.reset(new HybridPerfReader(Binary));
} else if (PerfType == PERF_LBR) {		} else if (PerfType == PERF_LBR) {
// TODO:		PerfReader.reset(new LBRPerfReader(Binary));
exitWithError("Unsupported perfscript!");
} else {		} else {
exitWithError("Unsupported perfscript!");		exitWithError("Unsupported perfscript!");
}		}

return PerfReader;		return PerfReader;
}		}

void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) {		void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) {
Show All 40 Lines	if (It != Offsets.end() && *It == Event.Offset) {
}		}
}		}
}		}
}		}

// Use ordered map to make the output deterministic		// Use ordered map to make the output deterministic
using OrderedCounterForPrint = std::map<std::string, RangeSample>;		using OrderedCounterForPrint = std::map<std::string, RangeSample>;

static void printSampleCounter(OrderedCounterForPrint &OrderedCounter) {		static void printSampleCounter(OrderedCounterForPrint &OrderedCounter,
		raw_fd_ostream &OS) {
for (auto Range : OrderedCounter) {		for (auto Range : OrderedCounter) {
outs() << Range.first << "\n";		OS << Range.first << "\n";
for (auto I : Range.second) {		for (auto I : Range.second) {
outs() << " (" << format("%" PRIx64, I.first.first) << ", "		OS << " (" << format("%" PRIx64, I.first.first) << ", "
<< format("%" PRIx64, I.first.second) << "): " << I.second << "\n";		<< format("%" PRIx64, I.first.second) << "): " << I.second << "\n";
}		}
}		}
}		}

static std::string getContextKeyStr(ContextKey *K,		static std::string getContextKeyStr(ContextKey *K,
const ProfiledBinary *Binary) {		const ProfiledBinary *Binary) {
if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {		if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
return SampleContext::getContextString(CtxKey->Context);		return SampleContext::getContextString(CtxKey->Context);
} else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {		} else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {
SampleContextFrameVector ContextStack;		SampleContextFrameVector ContextStack;
for (const auto *Probe : CtxKey->Probes) {		for (const auto *Probe : CtxKey->Probes) {
Binary->getInlineContextForProbe(Probe, ContextStack, true);		Binary->getInlineContextForProbe(Probe, ContextStack, true);
}		}
// Probe context key at this point does not have leaf probe, so do not		// Probe context key at this point does not have leaf probe, so do not
// include the leaf inline location.		// include the leaf inline location.
return SampleContext::getContextString(ContextStack, true);		return SampleContext::getContextString(ContextStack, true);
} else {		} else {
llvm_unreachable("unexpected key type");		llvm_unreachable("unexpected key type");
}		}
}		}

static void printRangeCounter(ContextSampleCounterMap &Counter,		static void printRangeCounter(ContextSampleCounterMap &Counter,
const ProfiledBinary *Binary) {		const ProfiledBinary *Binary,
		raw_fd_ostream &OS) {
OrderedCounterForPrint OrderedCounter;		OrderedCounterForPrint OrderedCounter;
for (auto &CI : Counter) {		for (auto &CI : Counter) {
OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =		OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =
CI.second.RangeCounter;		CI.second.RangeCounter;
}		}
printSampleCounter(OrderedCounter);		printSampleCounter(OrderedCounter, OS);
}		}

static void printBranchCounter(ContextSampleCounterMap &Counter,		static void printBranchCounter(ContextSampleCounterMap &Counter,
const ProfiledBinary *Binary) {		const ProfiledBinary *Binary,
		raw_fd_ostream &OS) {
OrderedCounterForPrint OrderedCounter;		OrderedCounterForPrint OrderedCounter;
for (auto &CI : Counter) {		for (auto &CI : Counter) {
OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =		OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =
CI.second.BranchCounter;		CI.second.BranchCounter;
}		}
printSampleCounter(OrderedCounter);		printSampleCounter(OrderedCounter, OS);
}		}

void HybridPerfReader::printUnwinderOutput() {		void HybridPerfReader::writeRawProfile(raw_fd_ostream &OS) {
outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";		OS << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";
printRangeCounter(SampleCounters, Binary);		printRangeCounter(SampleCounters, Binary, OS);
outs() << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n";		OS << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n";
printBranchCounter(SampleCounters, Binary);		printBranchCounter(SampleCounters, Binary, OS);
}		}

void HybridPerfReader::unwindSamples() {		void HybridPerfReader::unwindSamples() {
for (const auto &Item : AggregatedSamples) {		for (const auto &Item : AggregatedSamples) {
const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());		const PerfSample *Sample = Item.first.getPtr();
VirtualUnwinder Unwinder(&SampleCounters, Binary);		VirtualUnwinder Unwinder(&SampleCounters, Binary);
Unwinder.unwind(Sample, Item.second);		Unwinder.unwind(Sample, Item.second);
}		}

if (ShowUnwinderOutput)		if (SkipSymbolization)
printUnwinderOutput();		PerfReaderBase::writeRawProfile(OutputFilename);
}		}

bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt,		bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt,
SmallVectorImpl<LBREntry> &LBRStack) {		SmallVectorImpl<LBREntry> &LBRStack) {
// The raw format of LBR stack is like:		// The raw format of LBR stack is like:
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...		// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
// ... 0x4005c8/0x4005dc/P/-/-/0		// ... 0x4005c8/0x4005dc/P/-/-/0
// It's in FIFO order and seperated by whitespace.		// It's in FIFO order and seperated by whitespace.
SmallVector<StringRef, 32> Records;		SmallVector<StringRef, 32> Records;
TraceIt.getCurrentLine().split(Records, " ");		TraceIt.getCurrentLine().split(Records, " ", -1, false);

// Extract leading instruction pointer if present, use single		// Skip the leading instruction pointer.
// list to pass out as reference.
size_t Index = 0;		size_t Index = 0;
if (!Records.empty() && Records[0].find('/') == StringRef::npos) {		if (!Records.empty() && Records[0].find('/') == StringRef::npos) {
Index = 1;		Index = 1;
}		}
// Now extract LBR samples - note that we do not reverse the		// Now extract LBR samples - note that we do not reverse the
// LBR entry order so we can unwind the sample stack as we walk		// LBR entry order so we can unwind the sample stack as we walk
// through LBR entries.		// through LBR entries.
uint64_t PrevTrDst = 0;		uint64_t PrevTrDst = 0;
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines	bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
// out to reduce the number of different calling contexts. One instance		// out to reduce the number of different calling contexts. One instance
// of such case - when sample landed in prolog/epilog, somehow stack		// of such case - when sample landed in prolog/epilog, somehow stack
// walking will be broken in an unexpected way that higher frames will be		// walking will be broken in an unexpected way that higher frames will be
// missing.		// missing.
return !CallStack.empty() &&		return !CallStack.empty() &&
!Binary->addressInPrologEpilog(CallStack.front());		!Binary->addressInPrologEpilog(CallStack.front());
}		}

		void PerfReaderBase::warnIfMissingMMap() {
		if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {
		WithColor::warning() << "No relevant mmap event is matched, will use "
		"preferred address as the base loading address!\n";
		// Avoid redundant warning, only warn at the first unmatched sample.
		Binary->setMissingMMapWarned(true);
		}
		}

void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {		void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
// The raw hybird sample started with call stack in FILO order and followed		// The raw hybird sample started with call stack in FILO order and followed
// intermediately by LBR sample		// intermediately by LBR sample
// e.g.		// e.g.
// 4005dc # call stack leaf		// 4005dc # call stack leaf
// 400634		// 400634
// 400684 # call stack root		// 400684 # call stack root
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...		// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries		// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
//		//
std::shared_ptr<HybridSample> Sample = std::make_shared<HybridSample>(Binary);		std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();

// Parsing call stack and populate into HybridSample.CallStack		// Parsing call stack and populate into PerfSample.CallStack
if (!extractCallstack(TraceIt, Sample->CallStack)) {		if (!extractCallstack(TraceIt, Sample->CallStack)) {
// Skip the next LBR line matched current call stack		// Skip the next LBR line matched current call stack
if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x"))		if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x"))
TraceIt.advance();		TraceIt.advance();
return;		return;
}		}

if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {		warnIfMissingMMap();
WithColor::warning() << "No relevant mmap event is matched, will use "
"preferred address as the base loading address!\n";
// Avoid redundant warning, only warn at the first unmatched sample.
Binary->setMissingMMapWarned(true);
}

if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {		if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
// Parsing LBR stack and populate into HybridSample.LBRStack		// Parsing LBR stack and populate into PerfSample.LBRStack
if (extractLBRStack(TraceIt, Sample->LBRStack)) {		if (extractLBRStack(TraceIt, Sample->LBRStack)) {
// Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR		// Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
// ranges		// ranges
Sample->CallStack.front() = Sample->LBRStack[0].Target;		Sample->CallStack.front() = Sample->LBRStack[0].Target;
// Record samples by aggregation		// Record samples by aggregation
Sample->genHashCode();
AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;		AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
}		}
} else {		} else {
// LBR sample is encoded in single line after stack sample		// LBR sample is encoded in single line after stack sample
exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");		exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
}		}
}		}

		void PerfReaderBase::writeRawProfile(StringRef Filename) {
		std::error_code EC;
		raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
		if (EC)
		exitWithError(EC, Filename);
		writeRawProfile(OS);
		}

		void LBRPerfReader::writeRawProfile(raw_fd_ostream &OS) {
		/*
		Format:
		number of entries in RangeCounter
		from_1-to_1:count_1
		from_2-to_2:count_2
		......
		from_n-to_n:count_n
		number of entries in BranchCounter
		src_1->dst_1:count_1
		src_2->dst_2:count_2
		......
		src_n->dst_n:count_n
		*/

		SampleCounter &Counter = SampleCounters.begin()->second;
		OS << Counter.RangeCounter.size() << "\n";
		for (auto I : Counter.RangeCounter) {
		OS << Twine::utohexstr(I.first.first) << "-"
		<< Twine::utohexstr(I.first.second) << ":" << I.second << "\n";
		}

		OS << Counter.BranchCounter.size() << "\n";
		for (auto I : Counter.BranchCounter) {
		OS << Twine::utohexstr(I.first.first) << "->"
		<< Twine::utohexstr(I.first.second) << ":" << I.second << "\n";
		}
		}

		void LBRPerfReader::computeCounterFromLBR(const PerfSample *Sample,
		uint64_t Repeat) {
		SampleCounter &Counter = SampleCounters.begin()->second;
		uint64_t EndOffeset = 0;
		for (const LBREntry &LBR : Sample->LBRStack) {
		uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source);
		uint64_t TargetOffset = Binary->virtualAddrToOffset(LBR.Target);

		if (!LBR.IsArtificial) {
		Counter.recordBranchCount(SourceOffset, TargetOffset, Repeat);
		}

		// If this not the first LBR, update the range count between TO of current
		// LBR and FROM of next LBR.
		uint64_t StartOffset = TargetOffset;
		if (EndOffeset != 0) {
		assert(StartOffset <= EndOffeset &&
		"Bogus range should be filtered ealier!");
		Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
		}
		EndOffeset = SourceOffset;
		}
		}

		void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
		std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
		// Parsing LBR stack and populate into PerfSample.LBRStack
		if (extractLBRStack(TraceIt, Sample->LBRStack)) {
		warnIfMissingMMap();
		// Record LBR only samples by aggregation
		AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
		}
		}

		void LBRPerfReader::generateRawProfile() {
		assert(SampleCounters.size() == 1 && "Must have one entry of sample counter");
		for (const auto &Item : AggregatedSamples) {
		const PerfSample *Sample = Item.first.getPtr();
		computeCounterFromLBR(Sample, Item.second);
		}

		if (SkipSymbolization)
		PerfReaderBase::writeRawProfile(OutputFilename);
		}

uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) {		uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) {
// The aggregated count is optional, so do not skip the line and return 1 if		// The aggregated count is optional, so do not skip the line and return 1 if
// it's unmatched		// it's unmatched
uint64_t Count = 1;		uint64_t Count = 1;
if (!TraceIt.getCurrentLine().getAsInteger(10, Count))		if (!TraceIt.getCurrentLine().getAsInteger(10, Count))
TraceIt.advance();		TraceIt.advance();
		hoyUnsubmitted Not Done Reply Inline Actions I'm thinking of separating LBR samples from `HybridSample` and making `HybridSample` inherit that. This way they are technically separate and could reduce confusion. What do you think? hoy: I'm thinking of separating LBR samples from `HybridSample` and making `HybridSample` inherit…
		wleiAuthorUnsubmitted Done Reply Inline Actions Sounds good, changed! wlei: Sounds good, changed!
		wenleiUnsubmitted Not Done Reply Inline Actions I actually find the inheritance isn't really necessary now. The hashing specialization in LBRSample does not add value because the general version for HybridSample can handle that too, and a separate hash for LBRSample just adds duplication. On the contrary, how about we rename `HybridSample` to `PerfSample` and remove the inheritance structure all together? Also I don't think HybridSample being subclass of LBRSample is good because this is more about composition. What about if we have a perf sample that only has call stack but not LBR? inheritance wouldn't work well for that (and we don't want virtual inheritance). This is similar to why we don't make subclass for aggregation count. wenlei: I actually find the inheritance isn't really necessary now. The hashing specialization in…
		hoyUnsubmitted Not Done Reply Inline Actions Removing the inheritance has a cost of stacking fields that are not necessarily used at the same time together. For example, the `SmallVector<uint64_t, 16> CallStack` field will be there for `LBRSamples` and in member functions the unused fields have to be checked. But we don't have virtual method overhead as a result of removing inheritance, which is a cons of OOP. Multi-inheritance can be use to a separate callstack samples. I still prefer OOP here from memory and code maintenance and extension point of view. Let me know your thoughts. hoy: Removing the inheritance has a cost of stacking fields that are not necessarily used at the…
return Count;		return Count;
}		}

void PerfReaderBase::parseSample(TraceStream &TraceIt) {		void PerfReaderBase::parseSample(TraceStream &TraceIt) {
uint64_t Count = parseAggregatedCount(TraceIt);		uint64_t Count = parseAggregatedCount(TraceIt);
assert(Count >= 1 && "Aggregated count should be >= 1!");		assert(Count >= 1 && "Aggregated count should be >= 1!");
parseSample(TraceIt, Count);		parseSample(TraceIt, Count);
}		}
▲ Show 20 Lines • Show All 88 Lines • Show Last 20 Lines

llvm/tools/llvm-profgen/ProfileGenerator.cpp

	//===-- ProfileGenerator.cpp - Profile Generator ---------------- C++ --===//			//===-- ProfileGenerator.cpp - Profile Generator ---------------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "ProfileGenerator.h"			#include "ProfileGenerator.h"
	#include "ProfiledBinary.h"			#include "ProfiledBinary.h"
	#include "llvm/ProfileData/ProfileCommon.h"			#include "llvm/ProfileData/ProfileCommon.h"
	#include <unordered_set>			#include <unordered_set>

	static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),			cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
	cl::Required,			cl::Required,
	cl::desc("Output profile file"));			cl::desc("Output profile file"));
	static cl::alias OutputA("o", cl::desc("Alias for --output"),			static cl::alias OutputA("o", cl::desc("Alias for --output"),
	cl::aliasopt(OutputFilename));			cl::aliasopt(OutputFilename));

	static cl::opt<SampleProfileFormat> OutputFormat(			static cl::opt<SampleProfileFormat> OutputFormat(
	"format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),			"format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
	cl::values(			cl::values(
	clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),			clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
	clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),			clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
	▲ Show 20 Lines • Show All 591 Lines • Show Last 20 Lines

llvm/tools/llvm-profgen/ProfiledBinary.h

	Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
	using namespace llvm::object;			using namespace llvm::object;

	namespace llvm {			namespace llvm {
	namespace sampleprof {			namespace sampleprof {

	class ProfiledBinary;			class ProfiledBinary;

	struct InstructionPointer {			struct InstructionPointer {
	ProfiledBinary *Binary;			const ProfiledBinary *Binary;
	union {			union {
	// Offset of the executable segment of the binary.			// Offset of the executable segment of the binary.
	uint64_t Offset = 0;			uint64_t Offset = 0;
	// Also used as address in unwinder			// Also used as address in unwinder
	uint64_t Address;			uint64_t Address;
	};			};
	// Index to the sorted code address array of the binary.			// Index to the sorted code address array of the binary.
	uint64_t Index = 0;			uint64_t Index = 0;
	InstructionPointer(ProfiledBinary *Binary, uint64_t Address,			InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
	bool RoundToNext = false);			bool RoundToNext = false);
	void advance();			void advance();
	void backward();			void backward();
	void update(uint64_t Addr);			void update(uint64_t Addr);
	};			};

	// PrologEpilog offset tracker, used to filter out broken stack samples			// PrologEpilog offset tracker, used to filter out broken stack samples
	// Currently we use a heuristic size (two) to infer prolog and epilog			// Currently we use a heuristic size (two) to infer prolog and epilog
	▲ Show 20 Lines • Show All 298 Lines • Show Last 20 Lines

llvm/tools/llvm-profgen/ProfiledBinary.cpp

Show First 20 Lines • Show All 539 Lines • ▼ Show 20 Lines	for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
LineLocation Line(LineOffset, Discriminator);		LineLocation Line(LineOffset, Discriminator);
auto It = NameStrings.insert(FunctionName.str());		auto It = NameStrings.insert(FunctionName.str());
CallStack.emplace_back(*It.first, Line);		CallStack.emplace_back(*It.first, Line);
}		}

return CallStack;		return CallStack;
}		}

InstructionPointer::InstructionPointer(ProfiledBinary *Binary, uint64_t Address,		InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
bool RoundToNext)		uint64_t Address, bool RoundToNext)
: Binary(Binary), Address(Address) {		: Binary(Binary), Address(Address) {
Index = Binary->getIndexForAddr(Address);		Index = Binary->getIndexForAddr(Address);
if (RoundToNext) {		if (RoundToNext) {
// we might get address which is not the code		// we might get address which is not the code
// it should round to the next valid address		// it should round to the next valid address
this->Address = Binary->getAddressforIndex(Index);		this->Address = Binary->getAddressforIndex(Index);
}		}
}		}
Show All 18 Lines

llvm/tools/llvm-profgen/llvm-profgen.cpp

Show All 29 Lines

static cl::opt<std::string> BinaryPath(		static cl::opt<std::string> BinaryPath(
"binary", cl::value_desc("binary"), cl::Required,		"binary", cl::value_desc("binary"), cl::Required,
cl::desc("Path of profiled binary, only one binary is supported."),		cl::desc("Path of profiled binary, only one binary is supported."),
cl::cat(ProfGenCategory));		cl::cat(ProfGenCategory));

extern cl::opt<bool> ShowDisassemblyOnly;		extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;		extern cl::opt<bool> ShowSourceLocations;
		extern cl::opt<bool> SkipSymbolization;

using namespace llvm;		using namespace llvm;
using namespace sampleprof;		using namespace sampleprof;

// Validate the command line input.		// Validate the command line input.
static void validateCommandLine(StringRef BinaryPath,		static void validateCommandLine(StringRef BinaryPath,
cl::list<std::string> &PerfTraceFilenames) {		cl::list<std::string> &PerfTraceFilenames) {
// Allow the invalid perfscript if we only use to show binary disassembly.		// Allow the invalid perfscript if we only use to show binary disassembly.
Show All 38 Lines	int main(int argc, const char *argv[]) {
if (ShowDisassemblyOnly)		if (ShowDisassemblyOnly)
return EXIT_SUCCESS;		return EXIT_SUCCESS;

// Parse perf events and samples		// Parse perf events and samples
std::unique_ptr<PerfReaderBase> Reader =		std::unique_ptr<PerfReaderBase> Reader =
PerfReaderBase::create(Binary.get(), PerfTraceFilenames);		PerfReaderBase::create(Binary.get(), PerfTraceFilenames);
Reader->parsePerfTraces(PerfTraceFilenames);		Reader->parsePerfTraces(PerfTraceFilenames);

		if (SkipSymbolization)
		return EXIT_SUCCESS;

		wenleiUnsubmitted Not Done Reply Inline Actions As an intermediate state, print a warning? wenlei: As an intermediate state, print a warning?
		wleiAuthorUnsubmitted Done Reply Inline Actions Sounds good wlei: Sounds good
		// TBD
		if (Reader->getPerfScriptType() == PERF_LBR) {
		WithColor::warning() << "Currently LBR only perf script is not supported!";
		return EXIT_SUCCESS;
		}

std::unique_ptr<ProfileGenerator> Generator = ProfileGenerator::create(		std::unique_ptr<ProfileGenerator> Generator = ProfileGenerator::create(
Binary.get(), Reader->getSampleCounters(), Reader->getPerfScriptType());		Binary.get(), Reader->getSampleCounters(), Reader->getPerfScriptType());
Generator->generateProfile();		Generator->generateProfile();
Generator->write();		Generator->write();

return EXIT_SUCCESS;		return EXIT_SUCCESS;
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[llvm-profgen] Support LBR only perf script
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 369780

llvm/test/tools/llvm-profgen/Inputs/noprobe.aggperfscript

llvm/test/tools/llvm-profgen/Inputs/noprobe.mmap.perfscript

llvm/test/tools/llvm-profgen/Inputs/noprobe.perfbin

llvm/test/tools/llvm-profgen/Inputs/noprobe.perfscript

llvm/test/tools/llvm-profgen/cs-interrupt.test

llvm/test/tools/llvm-profgen/inline-cs-noprobe.test

llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test

llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

llvm/test/tools/llvm-profgen/noprobe.test

llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test

llvm/tools/llvm-profgen/PerfReader.h

llvm/tools/llvm-profgen/PerfReader.cpp

llvm/tools/llvm-profgen/ProfileGenerator.cpp

llvm/tools/llvm-profgen/ProfiledBinary.h

llvm/tools/llvm-profgen/ProfiledBinary.cpp

llvm/tools/llvm-profgen/llvm-profgen.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[llvm-profgen] Support LBR only perf scriptClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 369780

llvm/test/tools/llvm-profgen/Inputs/noprobe.aggperfscript

llvm/test/tools/llvm-profgen/Inputs/noprobe.mmap.perfscript

llvm/test/tools/llvm-profgen/Inputs/noprobe.perfbin

llvm/test/tools/llvm-profgen/Inputs/noprobe.perfscript

llvm/test/tools/llvm-profgen/cs-interrupt.test

llvm/test/tools/llvm-profgen/inline-cs-noprobe.test

llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test

llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

llvm/test/tools/llvm-profgen/noprobe.test

llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test

llvm/tools/llvm-profgen/PerfReader.h

llvm/tools/llvm-profgen/PerfReader.cpp

llvm/tools/llvm-profgen/ProfileGenerator.cpp

llvm/tools/llvm-profgen/ProfiledBinary.h

llvm/tools/llvm-profgen/ProfiledBinary.cpp

llvm/tools/llvm-profgen/llvm-profgen.cpp

[llvm-profgen] Support LBR only perf script
ClosedPublic