Diff 192579

llvm/trunk/docs/CommandGuide/llvm-exegesis.rst

	Show First 20 Lines • Show All 208 Lines • ▼ Show 20 Lines
	If provided, write the analysis clusters as CSV to this file. "-" prints to			If provided, write the analysis clusters as CSV to this file. "-" prints to
	stdout. By default, this analysis is not run.			stdout. By default, this analysis is not run.

	.. option:: -analysis-inconsistencies-output-file=</path/to/file>			.. option:: -analysis-inconsistencies-output-file=</path/to/file>

	If non-empty, write inconsistencies found during analysis to this file. `-`			If non-empty, write inconsistencies found during analysis to this file. `-`
	prints to stdout. By default, this analysis is not run.			prints to stdout. By default, this analysis is not run.

				.. option:: -analysis-clustering=[dbscan,naive]

				Specify the clustering algorithm to use. By default DBSCAN will be used.
				Naive clustering algorithm is better for doing further work on the
				`-analysis-inconsistencies-output-file=` output, it will create one cluster
				per opcode, and check that the cluster is stable (all points are neighbours).

	.. option:: -analysis-numpoints=<dbscan numPoints parameter>			.. option:: -analysis-numpoints=<dbscan numPoints parameter>

	Specify the numPoints parameters to be used for DBSCAN clustering			Specify the numPoints parameters to be used for DBSCAN clustering
	(`analysis` mode).			(`analysis` mode, DBSCAN only).

	.. option:: -analysis-clustering-epsilon=<dbscan epsilon parameter>			.. option:: -analysis-clustering-epsilon=<dbscan epsilon parameter>

	Specify the epsilon parameter used for clustering of benchmark points			Specify the epsilon parameter used for clustering of benchmark points
	(`analysis` mode).			(`analysis` mode).

	.. option:: -analysis-inconsistency-epsilon=<epsilon>			.. option:: -analysis-inconsistency-epsilon=<epsilon>

	Show All 24 Lines

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-clustering-algorithms.test

				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=dbscan \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN-05 %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.49 -analysis-numpoints=1 -analysis-clustering=dbscan \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN-049 %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-NAIVE %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.49 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-NAIVE %s

				# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,inverse_throughput{{$}}

				# By default with -analysis-clustering-epsilon=0.5 everything ends up in the
				# same cluster, because each next point is a neighbour of the previous point.

				# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-05-SAME: ,1.00{{$}}
				# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-05-SAME: ,1.50{{$}}
				# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-05-SAME: ,2.00{{$}}
				# CHECK-CLUSTERS-DBSCAN-05-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-05-SAME: ,2.50{{$}}

				# With -analysis-clustering-epsilon=0.49 every point goes into separate cluster.

				# CHECK-CLUSTERS-DBSCAN-049-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-049-SAME: ,1.00{{$}}
				# CHECK-CLUSTERS-DBSCAN-049: {{^}}1,
				# CHECK-CLUSTERS-DBSCAN-049-SAME: ,1.50{{$}}
				# CHECK-CLUSTERS-DBSCAN-049: {{^}}2,
				# CHECK-CLUSTERS-DBSCAN-049-SAME: ,2.00{{$}}
				# CHECK-CLUSTERS-DBSCAN-049: {{^}}3,
				# CHECK-CLUSTERS-DBSCAN-049-SAME: ,2.50{{$}}

				# And -analysis-clustering=naive every opcode goes into separate cluster.

				# CHECK-CLUSTERS-NAIVE-049-NEXT: {{^}}0,
				# CHECK-CLUSTERS-NAIVE-049-SAME: ,1.50{{$}}
				# CHECK-CLUSTERS-NAIVE-049: {{^}}1,
				# CHECK-CLUSTERS-NAIVE-049-SAME: ,2.00{{$}}
				# CHECK-CLUSTERS-NAIVE-049: {{^}}2,
				# CHECK-CLUSTERS-NAIVE-049-SAME: ,2.50{{$}}
				# CHECK-CLUSTERS-NAIVE-049: {{^}}3,
				# CHECK-CLUSTERS-NAIVE-049-SAME: ,1.00{{$}}

				# The "value" is manually specified, not measured.

				---
				mode: inverse_throughput
				key:
				instructions:
				- 'ROL8ri AH AH i_0x1'
				- 'ROL8ri AL AL i_0x1'
				- 'ROL8ri BH BH i_0x1'
				- 'ROL8ri BL BL i_0x1'
				- 'ROL8ri BPL BPL i_0x1'
				- 'ROL8ri CH CH i_0x1'
				- 'ROL8ri CL CL i_0x1'
				- 'ROL8ri DH DH i_0x1'
				- 'ROL8ri DIL DIL i_0x1'
				- 'ROL8ri DL DL i_0x1'
				- 'ROL8ri SIL SIL i_0x1'
				- 'ROL8ri R8B R8B i_0x1'
				- 'ROL8ri R9B R9B i_0x1'
				- 'ROL8ri R10B R10B i_0x1'
				- 'ROL8ri R11B R11B i_0x1'
				- 'ROL8ri R12B R12B i_0x1'
				- 'ROL8ri R13B R13B i_0x1'
				- 'ROL8ri R14B R14B i_0x1'
				- 'ROL8ri R15B R15B i_0x1'
				config: ''
				register_initial_values:
				- 'AH=0x0'
				- 'AL=0x0'
				- 'BH=0x0'
				- 'BL=0x0'
				- 'BPL=0x0'
				- 'CH=0x0'
				- 'CL=0x0'
				- 'DH=0x0'
				- 'DIL=0x0'
				- 'DL=0x0'
				- 'SIL=0x0'
				- 'R8B=0x0'
				- 'R9B=0x0'
				- 'R10B=0x0'
				- 'R11B=0x0'
				- 'R12B=0x0'
				- 'R13B=0x0'
				- 'R14B=0x0'
				- 'R15B=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 1000000
				measurements:
				- { key: inverse_throughput, value: 1.0000, per_snippet_value: 30.4026 }
				error: ''
				info: instruction has tied variables, using static renaming.
				assembled_snippet: 55415741564155415453B400B000B700B30040B500B500B100B60040B700B20040B60041B00041B10041B20041B30041B40041B50041B60041B700C0C401C0C001C0C701C0C30140C0C501C0C501C0C101C0C60140C0C701C0C20140C0C60141C0C00141C0C10141C0C20141C0C30141C0C40141C0C50141C0C60141C0C7015B415C415D415E415F5DC3
				...
				---
				mode: inverse_throughput
				key:
				instructions:
				- 'ROL16ri AX AX i_0x1'
				- 'ROL16ri BP BP i_0x1'
				- 'ROL16ri BX BX i_0x1'
				- 'ROL16ri CX CX i_0x1'
				- 'ROL16ri DI DI i_0x1'
				- 'ROL16ri DX DX i_0x1'
				- 'ROL16ri SI SI i_0x1'
				- 'ROL16ri R8W R8W i_0x1'
				- 'ROL16ri R9W R9W i_0x1'
				- 'ROL16ri R10W R10W i_0x1'
				- 'ROL16ri R11W R11W i_0x1'
				- 'ROL16ri R12W R12W i_0x1'
				- 'ROL16ri R13W R13W i_0x1'
				- 'ROL16ri R14W R14W i_0x1'
				- 'ROL16ri R15W R15W i_0x1'
				config: ''
				register_initial_values:
				- 'AX=0x0'
				- 'BP=0x0'
				- 'BX=0x0'
				- 'CX=0x0'
				- 'DI=0x0'
				- 'DX=0x0'
				- 'SI=0x0'
				- 'R8W=0x0'
				- 'R9W=0x0'
				- 'R10W=0x0'
				- 'R11W=0x0'
				- 'R12W=0x0'
				- 'R13W=0x0'
				- 'R14W=0x0'
				- 'R15W=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 1000000
				measurements:
				- { key: inverse_throughput, value: 1.5000, per_snippet_value: 30.154 }
				error: ''
				info: instruction has tied variables, using static renaming.
				assembled_snippet: 5541574156415541545366B8000066BD000066BB000066B9000066BF000066BA000066BE00006641B800006641B900006641BA00006641BB00006641BC00006641BD00006641BE00006641BF000066C1C00166C1C50166C1C30166C1C10166C1C70166C1C20166C1C6016641C1C0016641C1C1016641C1C2016641C1C3016641C1C4016641C1C5016641C1C6016641C1C70166C1C0015B415C415D415E415F5DC3
				...
				---
				mode: inverse_throughput
				key:
				instructions:
				- 'ROL32ri EAX EAX i_0x1'
				- 'ROL32ri EBP EBP i_0x1'
				- 'ROL32ri EBX EBX i_0x1'
				- 'ROL32ri ECX ECX i_0x1'
				- 'ROL32ri EDI EDI i_0x1'
				- 'ROL32ri EDX EDX i_0x1'
				- 'ROL32ri ESI ESI i_0x1'
				- 'ROL32ri R8D R8D i_0x1'
				- 'ROL32ri R9D R9D i_0x1'
				- 'ROL32ri R10D R10D i_0x1'
				- 'ROL32ri R11D R11D i_0x1'
				- 'ROL32ri R12D R12D i_0x1'
				- 'ROL32ri R13D R13D i_0x1'
				- 'ROL32ri R14D R14D i_0x1'
				- 'ROL32ri R15D R15D i_0x1'
				config: ''
				register_initial_values:
				- 'EAX=0x0'
				- 'EBP=0x0'
				- 'EBX=0x0'
				- 'ECX=0x0'
				- 'EDI=0x0'
				- 'EDX=0x0'
				- 'ESI=0x0'
				- 'R8D=0x0'
				- 'R9D=0x0'
				- 'R10D=0x0'
				- 'R11D=0x0'
				- 'R12D=0x0'
				- 'R13D=0x0'
				- 'R14D=0x0'
				- 'R15D=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 1000000
				measurements:
				- { key: inverse_throughput, value: 2.0000, per_snippet_value: 23.2762 }
				error: ''
				info: instruction has tied variables, using static renaming.
				assembled_snippet: 55415741564155415453B800000000BD00000000BB00000000B900000000BF00000000BA00000000BE0000000041B80000000041B90000000041BA0000000041BB0000000041BC0000000041BD0000000041BE0000000041BF00000000C1C001C1C501C1C301C1C101C1C701C1C201C1C60141C1C00141C1C10141C1C20141C1C30141C1C40141C1C50141C1C60141C1C701C1C0015B415C415D415E415F5DC3
				...
				---
				mode: inverse_throughput
				key:
				instructions:
				- 'ROL64ri RAX RAX i_0x1'
				- 'ROL64ri RBP RBP i_0x1'
				- 'ROL64ri RBX RBX i_0x1'
				- 'ROL64ri RCX RCX i_0x1'
				- 'ROL64ri RDI RDI i_0x1'
				- 'ROL64ri RDX RDX i_0x1'
				- 'ROL64ri RSI RSI i_0x1'
				- 'ROL64ri R8 R8 i_0x1'
				- 'ROL64ri R9 R9 i_0x1'
				- 'ROL64ri R10 R10 i_0x1'
				- 'ROL64ri R11 R11 i_0x1'
				- 'ROL64ri R12 R12 i_0x1'
				- 'ROL64ri R13 R13 i_0x1'
				- 'ROL64ri R14 R14 i_0x1'
				- 'ROL64ri R15 R15 i_0x1'
				config: ''
				register_initial_values:
				- 'RAX=0x0'
				- 'RBP=0x0'
				- 'RBX=0x0'
				- 'RCX=0x0'
				- 'RDI=0x0'
				- 'RDX=0x0'
				- 'RSI=0x0'
				- 'R8=0x0'
				- 'R9=0x0'
				- 'R10=0x0'
				- 'R11=0x0'
				- 'R12=0x0'
				- 'R13=0x0'
				- 'R14=0x0'
				- 'R15=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 1000000
				measurements:
				- { key: inverse_throughput, value: 2.5000, per_snippet_value: 26.2268 }
				error: ''
				info: instruction has tied variables, using static renaming.
				assembled_snippet: 5541574156415541545348B8000000000000000048BD000000000000000048BB000000000000000048B9000000000000000048BF000000000000000048BA000000000000000048BE000000000000000049B8000000000000000049B9000000000000000049BA000000000000000049BB000000000000000049BC000000000000000049BD000000000000000049BE000000000000000049BF000000000000000048C1C00148C1C50148C1C30148C1C10148C1C70148C1C20148C1C60149C1C00149C1C10149C1C20149C1C30149C1C40149C1C50149C1C60149C1C70148C1C0015B415C415D415E415F5DC3
				...

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-naive-cluster-stabilization.test

				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-05 %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-INCONSISTENCIES-STABLE-05 %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-INCONSISTENCIES-UNSTABLE-05 %s

				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.49 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-049 %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.49 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-INCONSISTENCIES-STABLE-049 %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.49 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-INCONSISTENCIES-UNSTABLE-049 %s

				# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}

				# CHECK-CLUSTERS-NEXT-05: {{^}}0,
				# CHECK-CLUSTERS-SAME-05: ,90.00{{$}}
				# CHECK-CLUSTERS-05: {{^}}0,
				# CHECK-CLUSTERS-SAME-05: ,90.50{{$}}

				# CHECK-INCONSISTENCIES-STABLE-05: ADD32rr
				# CHECK-INCONSISTENCIES-STABLE-05: ADD32rr
				# CHECK-INCONSISTENCIES-STABLE-05-NOT: ADD32rr

				# CHECK-INCONSISTENCIES-UNSTABLE-05-NOT: ADD32rr

				# CHECK-INCONSISTENCIES-STABLE-049-NOT: ADD32rr

				# CHECK-INCONSISTENCIES-UNSTABLE-049: ADD32rr
				# CHECK-INCONSISTENCIES-UNSTABLE-049: ADD32rr
				# CHECK-INCONSISTENCIES-UNSTABLE-049-NOT: ADD32rr

				---
				mode: latency
				key:
				instructions:
				- 'ADD32rr EDX EDX EAX'
				config: ''
				register_initial_values:
				- 'EDX=0x0'
				- 'EAX=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 10000
				measurements:
				- { key: latency, value: 90.0000, per_snippet_value: 90.0000 }
				error: ''
				info: Repeating a single implicitly serial instruction
				assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
				---
				mode: latency
				key:
				instructions:
				- 'ADD32rr EDX EDX EAX'
				config: ''
				register_initial_values:
				- 'EDX=0x0'
				- 'EAX=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 10000
				measurements:
				- { key: latency, value: 90.5000, per_snippet_value: 90.5000 }
				error: ''
				info: Repeating a single implicitly serial instruction
				assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
				---
				...

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-naive-clusterization.test

				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-CLUSTERS %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-STABLE %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-INCONSISTENCIES-ALL,CHECK-INCONSISTENCIES-UNSTABLE %s

				# We have two ADD32rr measurements, and two measurements for SQRTSSr.
				# ADD32rr measurements are neighbours.
				# But the measurements of SQRTSSr are not neighbours,
				# so therefore that cluster is marked as unstable.

				# By default, we do not show such unstable clusters.
				# If told to show, we only show such unstable clusters.

				# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
				# CHECK-CLUSTERS-NEXT: {{^}}0,
				# CHECK-CLUSTERS-SAME: ,90.00{{$}}
				# CHECK-CLUSTERS-NEXT: {{^}}0,
				# CHECK-CLUSTERS-SAME: ,90.11{{$}}
				# CHECK-CLUSTERS: {{^}}1,
				# CHECK-CLUSTERS-SAME: ,90.11{{$}}
				# CHECK-CLUSTERS-NEXT: {{^}}1,
				# CHECK-CLUSTERS-SAME: ,100.00{{$}}

				# CHECK-INCONSISTENCIES-STABLE: ADD32rr
				# CHECK-INCONSISTENCIES-STABLE: ADD32rr
				# CHECK-INCONSISTENCIES-STABLE-NOT: ADD32rr
				# CHECK-INCONSISTENCIES-STABLE-NOT: SQRTSSr

				# CHECK-INCONSISTENCIES-UNSTABLE: SQRTSSr
				# CHECK-INCONSISTENCIES-UNSTABLE: SQRTSSr
				# CHECK-INCONSISTENCIES-UNSTABLE-NOT: SQRTSSr
				# CHECK-INCONSISTENCIES-UNSTABLE-NOT: ADD32rr

				---
				mode: latency
				key:
				instructions:
				- 'ADD32rr EDX EDX EAX'
				config: ''
				register_initial_values:
				- 'EDX=0x0'
				- 'EAX=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 10000
				measurements:
				- { key: latency, value: 90.0000, per_snippet_value: 90.0000 }
				error: ''
				info: Repeating a single implicitly serial instruction
				assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
				---
				mode: latency
				key:
				instructions:
				- 'ADD32rr EDX EDX EAX'
				config: ''
				register_initial_values:
				- 'EDX=0x0'
				- 'EAX=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 10000
				measurements:
				- { key: latency, value: 90.1100, per_snippet_value: 90.1100 }
				error: ''
				info: Repeating a single implicitly serial instruction
				assembled_snippet: BA00000000B80000000001C201C201C201C201C201C201C201C201C201C201C201C201C201C201C201C2C3
				---
				mode: latency
				key:
				instructions:
				- 'SQRTSSr XMM11 XMM11'
				config: ''
				register_initial_values:
				- 'XMM11=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 10000
				measurements:
				- { key: latency, value: 90.1111, per_snippet_value: 90.1111 }
				error: ''
				info: Repeating a single explicitly serial instruction
				assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3
				...
				---
				mode: latency
				key:
				instructions:
				- 'SQRTSSr XMM11 XMM11'
				config: ''
				register_initial_values:
				- 'XMM11=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 10000
				measurements:
				- { key: latency, value: 100, per_snippet_value: 100 }
				error: ''
				info: Repeating a single explicitly serial instruction
				assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3
				...

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-same-cluster-for-ops-in-different-sched-clusters.test

				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-numpoints=1 \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-numpoints=1 -analysis-clustering=dbscan \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-DBSCAN %s
				# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=10 -analysis-numpoints=1 -analysis-clustering=naive \| FileCheck -check-prefixes=CHECK-CLUSTERS-ALL,CHECK-CLUSTERS-NAIVE %s

				# Normally BSR32rr is in WriteBSR and BSF32rr is in WriteBSF sched classes.
				# Here we check that if we have dbscan-clustered these two measurements into the
				# same cluster, we don't split it per the sched classes into two.

				# CHECK-CLUSTERS-ALL: {{^}}cluster_id,opcode_name,config,sched_class,inverse_throughput{{$}}

				# CHECK-CLUSTERS-DBSCAN-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-SAME: ,4.03{{$}}
				# CHECK-CLUSTERS-DBSCAN-NEXT: {{^}}0,
				# CHECK-CLUSTERS-DBSCAN-SAME: ,3.02{{$}}

				# CHECK-CLUSTERS-NAIVE-NEXT: {{^}}0,
				# CHECK-CLUSTERS-NAIVE-SAME: ,3.02{{$}}
				# CHECK-CLUSTERS-NAIVE: {{^}}1,
				# CHECK-CLUSTERS-NAIVE-SAME: ,4.03{{$}}

				---
				mode: inverse_throughput
				key:
				instructions:
				- 'BSR32rr R11D EDI'
				config: ''
				register_initial_values:
				- 'EDI=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 1000000
				measurements:
				- { key: inverse_throughput, value: 4.03048, per_snippet_value: 4.03048 }
				error: ''
				info: instruction has no tied variables picking Uses different from defs
				assembled_snippet: BF00000000440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDF440FBDDFC3
				...
				---
				mode: inverse_throughput
				key:
				instructions:
				- 'BSF32rr EAX R14D'
				config: ''
				register_initial_values:
				- 'R14D=0x0'
				cpu_name: bdver2
				llvm_triple: x86_64-unknown-linux-gnu
				num_repetitions: 1000000
				measurements:
				- { key: inverse_throughput, value: 3.02186, per_snippet_value: 3.02186 }
				error: ''
				info: instruction has no tied variables picking Uses different from defs
				assembled_snippet: 415641BE00000000410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6410FBCC6415EC3
				...

llvm/trunk/tools/llvm-exegesis/lib/Analysis.h

Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	private:
class SchedClassCluster {		class SchedClassCluster {
public:		public:
const InstructionBenchmarkClustering::ClusterId &id() const {		const InstructionBenchmarkClustering::ClusterId &id() const {
return ClusterId;		return ClusterId;
}		}

const std::vector<size_t> &getPointIds() const { return PointIds; }		const std::vector<size_t> &getPointIds() const { return PointIds; }

		void addPoint(size_t PointId,
		const InstructionBenchmarkClustering &Clustering);

// Return the cluster centroid.		// Return the cluster centroid.
const std::vector<PerInstructionStats> &getRepresentative() const {		const SchedClassClusterCentroid &getCentroid() const { return Centroid; }
return Representative;
}

// Returns true if the cluster representative measurements match that of SC.		// Returns true if the cluster representative measurements match that of SC.
bool		bool
measurementsMatch(const llvm::MCSubtargetInfo &STI,		measurementsMatch(const llvm::MCSubtargetInfo &STI,
const ResolvedSchedClass &SC,		const ResolvedSchedClass &SC,
const InstructionBenchmarkClustering &Clustering,		const InstructionBenchmarkClustering &Clustering,
const double AnalysisInconsistencyEpsilonSquared_) const;		const double AnalysisInconsistencyEpsilonSquared_) const;

void addPoint(size_t PointId,
const InstructionBenchmarkClustering &Clustering);

private:		private:
InstructionBenchmarkClustering::ClusterId ClusterId;		InstructionBenchmarkClustering::ClusterId ClusterId;
std::vector<size_t> PointIds;		std::vector<size_t> PointIds;
// Measurement stats for the points in the SchedClassCluster.		// Measurement stats for the points in the SchedClassCluster.
std::vector<PerInstructionStats> Representative;		SchedClassClusterCentroid Centroid;
};		};

void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;		void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;

void		void
printSchedClassClustersHtml(const std::vector<SchedClassCluster> &Clusters,		printSchedClassClustersHtml(const std::vector<SchedClassCluster> &Clusters,
const ResolvedSchedClass &SC,		const ResolvedSchedClass &SC,
llvm::raw_ostream &OS) const;		llvm::raw_ostream &OS) const;
▲ Show 20 Lines • Show All 43 Lines • Show Last 20 Lines

llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp

Show First 20 Lines • Show All 327 Lines • ▼ Show 20 Lines	for (const size_t PointId : Cluster.getPointIds()) {
default:		default:
llvm_unreachable("invalid mode");		llvm_unreachable("invalid mode");
}		}
OS << "</span> <span class=\"mono\">";		OS << "</span> <span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS, Point.Key.Config);		writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
OS << "</span></li>";		OS << "</span></li>";
}		}
OS << "</ul></td>";		OS << "</ul></td>";
for (const auto &Stats : Cluster.getRepresentative()) {		for (const auto &Stats : Cluster.getCentroid().getStats()) {
OS << "<td class=\"measurement\">";		OS << "<td class=\"measurement\">";
writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());		writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
OS << "<br><span class=\"minmax\">[";		OS << "<br><span class=\"minmax\">[";
writeMeasurementValue<kEscapeHtml>(OS, Stats.min());		writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
OS << ";";		OS << ";";
writeMeasurementValue<kEscapeHtml>(OS, Stats.max());		writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
OS << "]</span></td>";		OS << "]</span></td>";
}		}
▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines	Analysis::ResolvedSchedClass::ResolvedSchedClass(
assert((SCDesc == nullptr \|\| !SCDesc->isVariant()) &&		assert((SCDesc == nullptr \|\| !SCDesc->isVariant()) &&
"ResolvedSchedClass should never be variant");		"ResolvedSchedClass should never be variant");
}		}

void Analysis::SchedClassCluster::addPoint(		void Analysis::SchedClassCluster::addPoint(
size_t PointId, const InstructionBenchmarkClustering &Clustering) {		size_t PointId, const InstructionBenchmarkClustering &Clustering) {
PointIds.push_back(PointId);		PointIds.push_back(PointId);
const auto &Point = Clustering.getPoints()[PointId];		const auto &Point = Clustering.getPoints()[PointId];
if (ClusterId.isUndef()) {		if (ClusterId.isUndef())
ClusterId = Clustering.getClusterIdForPoint(PointId);		ClusterId = Clustering.getClusterIdForPoint(PointId);
Representative.resize(Point.Measurements.size());
}
for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
Representative[I].push(Point.Measurements[I]);
}
assert(ClusterId == Clustering.getClusterIdForPoint(PointId));		assert(ClusterId == Clustering.getClusterIdForPoint(PointId));

		Centroid.addPoint(Point.Measurements);
}		}

// Returns a ProxResIdx by id or name.		// Returns a ProxResIdx by id or name.
static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,		static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
const llvm::StringRef NameOrId) {		const llvm::StringRef NameOrId) {
// Interpret the key as an ProcResIdx.		// Interpret the key as an ProcResIdx.
unsigned ProcResIdx = 0;		unsigned ProcResIdx = 0;
if (llvm::to_integer(NameOrId, ProcResIdx, 10))		if (llvm::to_integer(NameOrId, ProcResIdx, 10))
return ProcResIdx;		return ProcResIdx;
// Interpret the key as a ProcRes name.		// Interpret the key as a ProcRes name.
const auto &SchedModel = STI.getSchedModel();		const auto &SchedModel = STI.getSchedModel();
for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {		for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
if (NameOrId == SchedModel.getProcResource(I)->Name)		if (NameOrId == SchedModel.getProcResource(I)->Name)
return I;		return I;
}		}
return 0;		return 0;
}		}

bool Analysis::SchedClassCluster::measurementsMatch(		bool Analysis::SchedClassCluster::measurementsMatch(
const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,		const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
const InstructionBenchmarkClustering &Clustering,		const InstructionBenchmarkClustering &Clustering,
const double AnalysisInconsistencyEpsilonSquared_) const {		const double AnalysisInconsistencyEpsilonSquared_) const {
		ArrayRef<PerInstructionStats> Representative = Centroid.getStats();
const size_t NumMeasurements = Representative.size();		const size_t NumMeasurements = Representative.size();
std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);		std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);		std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
// Latency case.		// Latency case.
assert(!Clustering.getPoints().empty());		assert(!Clustering.getPoints().empty());
const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;		const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
if (Mode == InstructionBenchmark::Latency) {		if (Mode == InstructionBenchmark::Latency) {
if (NumMeasurements != 1) {		if (NumMeasurements != 1) {
▲ Show 20 Lines • Show All 358 Lines • Show Last 20 Lines

llvm/trunk/tools/llvm-exegesis/lib/Clustering.h

Show All 19 Lines
#include <limits>		#include <limits>
#include <vector>		#include <vector>

namespace llvm {		namespace llvm {
namespace exegesis {		namespace exegesis {

class InstructionBenchmarkClustering {		class InstructionBenchmarkClustering {
public:		public:
		enum ModeE { Dbscan, Naive };

// Clusters `Points` using DBSCAN with the given parameters. See the cc file		// Clusters `Points` using DBSCAN with the given parameters. See the cc file
// for more explanations on the algorithm.		// for more explanations on the algorithm.
static llvm::Expected<InstructionBenchmarkClustering>		static llvm::Expected<InstructionBenchmarkClustering>
create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,		create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
double AnalysisClusteringEpsilon,		size_t DbscanMinPts, double AnalysisClusteringEpsilon,
llvm::Optional<unsigned> NumOpcodes = llvm::None);		llvm::Optional<unsigned> NumOpcodes = llvm::None);

class ClusterId {		class ClusterId {
public:		public:
static ClusterId noise() { return ClusterId(kNoise); }		static ClusterId noise() { return ClusterId(kNoise); }
static ClusterId error() { return ClusterId(kError); }		static ClusterId error() { return ClusterId(kError); }
static ClusterId makeValid(size_t Id) { return ClusterId(Id); }		static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
		return ClusterId(Id, IsUnstable);
		}
static ClusterId makeValidUnstable(size_t Id) {		static ClusterId makeValidUnstable(size_t Id) {
return ClusterId(Id, /IsUnstable=/true);		return makeValid(Id, /IsUnstable=/true);
}		}

ClusterId() : Id_(kUndef), IsUnstable_(false) {}		ClusterId() : Id_(kUndef), IsUnstable_(false) {}

// Compare id's, ignoring the 'unstability' bit.		// Compare id's, ignoring the 'unstability' bit.
bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }		bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }		bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }

▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines	public:
}		}

private:		private:
InstructionBenchmarkClustering(		InstructionBenchmarkClustering(
const std::vector<InstructionBenchmark> &Points,		const std::vector<InstructionBenchmark> &Points,
double AnalysisClusteringEpsilonSquared);		double AnalysisClusteringEpsilonSquared);

llvm::Error validateAndSetup();		llvm::Error validateAndSetup();
void dbScan(size_t MinPts);
		void clusterizeDbScan(size_t MinPts);
		void clusterizeNaive(unsigned NumOpcodes);

		// Stabilization is only needed if dbscan was used to clusterize.
void stabilize(unsigned NumOpcodes);		void stabilize(unsigned NumOpcodes);

void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;		void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;

		bool areAllNeighbours(ArrayRef<size_t> Pts) const;

const std::vector<InstructionBenchmark> &Points_;		const std::vector<InstructionBenchmark> &Points_;
const double AnalysisClusteringEpsilonSquared_;		const double AnalysisClusteringEpsilonSquared_;

int NumDimensions_ = 0;		int NumDimensions_ = 0;
// ClusterForPoint_[P] is the cluster id for Points[P].		// ClusterForPoint_[P] is the cluster id for Points[P].
std::vector<ClusterId> ClusterIdForPoint_;		std::vector<ClusterId> ClusterIdForPoint_;
std::vector<Cluster> Clusters_;		std::vector<Cluster> Clusters_;
Cluster NoiseCluster_;		Cluster NoiseCluster_;
Cluster ErrorCluster_;		Cluster ErrorCluster_;
};		};

		class SchedClassClusterCentroid {
		public:
		const std::vector<PerInstructionStats> &getStats() const {
		return Representative;
		}

		std::vector<BenchmarkMeasure> getAsPoint() const;

		void addPoint(ArrayRef<BenchmarkMeasure> Point);

		private:
		// Measurement stats for the points in the SchedClassCluster.
		std::vector<PerInstructionStats> Representative;
		};

} // namespace exegesis		} // namespace exegesis
} // namespace llvm		} // namespace llvm

#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H		#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H

llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp

Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	if (PMeasurements.empty()) // Error point.
continue;		continue;
if (isNeighbour(PMeasurements, QMeasurements,		if (isNeighbour(PMeasurements, QMeasurements,
AnalysisClusteringEpsilonSquared_)) {		AnalysisClusteringEpsilonSquared_)) {
Neighbors.push_back(P);		Neighbors.push_back(P);
}		}
}		}
}		}

		// Given a set of points, checks that all the points are neighbours
		// up to AnalysisClusteringEpsilon. This is O(2*N).
		bool InstructionBenchmarkClustering::areAllNeighbours(
		ArrayRef<size_t> Pts) const {
		// First, get the centroid of this group of points. This is O(N).
		SchedClassClusterCentroid G;
		llvm::for_each(Pts, [this, &G](size_t P) {
		assert(P < Points_.size());
		ArrayRef<BenchmarkMeasure> Measurements = Points_[P].Measurements;
		if (Measurements.empty()) // Error point.
		return;
		G.addPoint(Measurements);
		});
		const std::vector<BenchmarkMeasure> Centroid = G.getAsPoint();

		// Since we will be comparing with the centroid, we need to halve the epsilon.
		double AnalysisClusteringEpsilonHalvedSquared =
		AnalysisClusteringEpsilonSquared_ / 4.0;

		// And now check that every point is a neighbour of the centroid. Also O(N).
		return llvm::all_of(
		Pts, [this, &Centroid, AnalysisClusteringEpsilonHalvedSquared](size_t P) {
		assert(P < Points_.size());
		const auto &PMeasurements = Points_[P].Measurements;
		if (PMeasurements.empty()) // Error point.
		return true; // Pretend that error point is a neighbour.
		return isNeighbour(PMeasurements, Centroid,
		AnalysisClusteringEpsilonHalvedSquared);
		});
		}

InstructionBenchmarkClustering::InstructionBenchmarkClustering(		InstructionBenchmarkClustering::InstructionBenchmarkClustering(
const std::vector<InstructionBenchmark> &Points,		const std::vector<InstructionBenchmark> &Points,
const double AnalysisClusteringEpsilonSquared)		const double AnalysisClusteringEpsilonSquared)
: Points_(Points),		: Points_(Points),
AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared),		AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared),
NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}		NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}

llvm::Error InstructionBenchmarkClustering::validateAndSetup() {		llvm::Error InstructionBenchmarkClustering::validateAndSetup() {
Show All 26 Lines	for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
LastMeasurement = CurMeasurement;		LastMeasurement = CurMeasurement;
}		}
if (LastMeasurement) {		if (LastMeasurement) {
NumDimensions_ = LastMeasurement->size();		NumDimensions_ = LastMeasurement->size();
}		}
return llvm::Error::success();		return llvm::Error::success();
}		}

void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {		void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.		std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {		for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
if (!ClusterIdForPoint_[P].isUndef())		if (!ClusterIdForPoint_[P].isUndef())
continue; // Previously processed in inner loop.		continue; // Previously processed in inner loop.
rangeQuery(P, Neighbors);		rangeQuery(P, Neighbors);
if (Neighbors.size() + 1 < MinPts) { // Density check.		if (Neighbors.size() + 1 < MinPts) { // Density check.
// The region around P is not dense enough to create a new cluster, mark		// The region around P is not dense enough to create a new cluster, mark
// as noise for now.		// as noise for now.
Show All 40 Lines	void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
// Add noisy points to noise cluster.		// Add noisy points to noise cluster.
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {		for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
if (ClusterIdForPoint_[P].isNoise()) {		if (ClusterIdForPoint_[P].isNoise()) {
NoiseCluster_.PointIndices.push_back(P);		NoiseCluster_.PointIndices.push_back(P);
}		}
}		}
}		}

		void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
		// Given an instruction Opcode, which are the benchmarks of this instruction?
		std::vector<llvm::SmallVector<size_t, 1>> OpcodeToPoints;
		OpcodeToPoints.resize(NumOpcodes);
		size_t NumOpcodesSeen = 0;
		for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
		const InstructionBenchmark &Point = Points_[P];
		const unsigned Opcode = Point.keyInstruction().getOpcode();
		assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
		llvm::SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
		if (PointsOfOpcode.empty()) // If we previously have not seen any points of
		++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
		PointsOfOpcode.emplace_back(P);
		}
		assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
		assert(NumOpcodesSeen <= NumOpcodes &&
		"can't see more opcodes than there are total opcodes");
		assert(NumOpcodesSeen <= Points_.size() &&
		"can't see more opcodes than there are total points");

		Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
		for (ArrayRef<size_t> PointsOfOpcode : llvm::make_filter_range(
		OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
		return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
		})) {
		// Create a new cluster.
		Clusters_.emplace_back(ClusterId::makeValid(
		Clusters_.size(), /IsUnstable=/!areAllNeighbours(PointsOfOpcode)));
		Cluster &CurrentCluster = Clusters_.back();
		// Mark points as belonging to the new cluster.
		llvm::for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
		ClusterIdForPoint_[P] = CurrentCluster.Id;
		});
		// And add all the points of this opcode to the new cluster.
		CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
		CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
		PointsOfOpcode.end());
		assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
		}
		assert(Clusters_.size() == NumOpcodesSeen);
		}

// Given an instruction Opcode, we can make benchmarks (measurements) of the		// Given an instruction Opcode, we can make benchmarks (measurements) of the
// instruction characteristics/performance. Then, to facilitate further analysis		// instruction characteristics/performance. Then, to facilitate further analysis
// we group the benchmarks with similar characteristics into clusters.		// we group the benchmarks with similar characteristics into clusters.
// Now, this is all not entirely deterministic. Some instructions have variable		// Now, this is all not entirely deterministic. Some instructions have variable
// characteristics, depending on their arguments. And thus, if we do several		// characteristics, depending on their arguments. And thus, if we do several
// benchmarks of the same instruction Opcode, we may end up with different		// benchmarks of the same instruction Opcode, we may end up with different
// performance characteristics measurements. And when we then do clustering,		// performance characteristics measurements. And when we then do clustering,
// these several benchmarks of the same instruction Opcode may end up being		// these several benchmarks of the same instruction Opcode may end up being
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines	assert(UnstableCluster.PointIndices.size() >= ClusterIDs.size() &&
"New unstable cluster should end up with no less points than there "		"New unstable cluster should end up with no less points than there "
"was clusters");		"was clusters");
}		}
assert(Clusters_.size() == NewTotalClusterCount && "sanity check");		assert(Clusters_.size() == NewTotalClusterCount && "sanity check");
}		}

llvm::Expected<InstructionBenchmarkClustering>		llvm::Expected<InstructionBenchmarkClustering>
InstructionBenchmarkClustering::create(		InstructionBenchmarkClustering::create(
const std::vector<InstructionBenchmark> &Points, const size_t MinPts,		const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
const double AnalysisClusteringEpsilon,		const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
llvm::Optional<unsigned> NumOpcodes) {		llvm::Optional<unsigned> NumOpcodes) {
InstructionBenchmarkClustering Clustering(		InstructionBenchmarkClustering Clustering(
Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);		Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
if (auto Error = Clustering.validateAndSetup()) {		if (auto Error = Clustering.validateAndSetup()) {
return std::move(Error);		return std::move(Error);
}		}
if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) {		if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) {
return Clustering; // Nothing to cluster.		return Clustering; // Nothing to cluster.
}		}

Clustering.dbScan(MinPts);		if (Mode == ModeE::Dbscan) {
		Clustering.clusterizeDbScan(DbscanMinPts);

if (NumOpcodes.hasValue())		if (NumOpcodes.hasValue())
Clustering.stabilize(NumOpcodes.getValue());		Clustering.stabilize(NumOpcodes.getValue());
		} else /if(Mode == ModeE::Naive)/ {
		if (!NumOpcodes.hasValue())
		llvm::report_fatal_error(
		"'naive' clustering mode requires opcode count to be specified");
		Clustering.clusterizeNaive(NumOpcodes.getValue());
		}

return Clustering;		return Clustering;
}		}

		void SchedClassClusterCentroid::addPoint(ArrayRef<BenchmarkMeasure> Point) {
		if (Representative.empty())
		Representative.resize(Point.size());
		assert(Representative.size() == Point.size() &&
		"All points should have identical dimensions.");

		for (const auto &I : llvm::zip(Representative, Point))
		std::get<0>(I).push(std::get<1>(I));
		}

		std::vector<BenchmarkMeasure> SchedClassClusterCentroid::getAsPoint() const {
		std::vector<BenchmarkMeasure> ClusterCenterPoint(Representative.size());
		for (const auto &I : llvm::zip(ClusterCenterPoint, Representative))
		std::get<0>(I).PerInstructionValue = std::get<1>(I).avg();
		return ClusterCenterPoint;
		}

} // namespace exegesis		} // namespace exegesis
} // namespace llvm		} // namespace llvm

llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
static cl::opt<std::string>		static cl::opt<std::string>
BenchmarkFile("benchmarks-file",		BenchmarkFile("benchmarks-file",
cl::desc("File to read (analysis mode) or write "		cl::desc("File to read (analysis mode) or write "
"(latency/uops/inverse_throughput modes) benchmark "		"(latency/uops/inverse_throughput modes) benchmark "
"results. “-” uses stdin/stdout."),		"results. “-” uses stdin/stdout."),
cl::cat(Options), cl::init(""));		cl::cat(Options), cl::init(""));

static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(		static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
"mode", cl::desc("the mode to run"), cl::cat(BenchmarkOptions),		"mode", cl::desc("the mode to run"), cl::cat(Options),
cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",		cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
"Instruction Latency"),		"Instruction Latency"),
clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,		clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
"inverse_throughput",		"inverse_throughput",
"Instruction Inverse Throughput"),		"Instruction Inverse Throughput"),
clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",		clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",
"Uop Decomposition"),		"Uop Decomposition"),
// When not asking for a specific benchmark mode,		// When not asking for a specific benchmark mode,
// we'll analyse the results.		// we'll analyse the results.
clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",		clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
"Analysis")));		"Analysis")));

static cl::opt<unsigned>		static cl::opt<unsigned>
NumRepetitions("num-repetitions",		NumRepetitions("num-repetitions",
cl::desc("number of time to repeat the asm snippet"),		cl::desc("number of time to repeat the asm snippet"),
cl::cat(BenchmarkOptions), cl::init(10000));		cl::cat(BenchmarkOptions), cl::init(10000));

static cl::opt<bool> IgnoreInvalidSchedClass(		static cl::opt<bool> IgnoreInvalidSchedClass(
"ignore-invalid-sched-class",		"ignore-invalid-sched-class",
cl::desc("ignore instructions that do not define a sched class"),		cl::desc("ignore instructions that do not define a sched class"),
cl::cat(BenchmarkOptions), cl::init(false));		cl::cat(BenchmarkOptions), cl::init(false));

static cl::opt<unsigned> AnalysisNumPoints(		static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
		AnalysisClusteringAlgorithm(
		"analysis-clustering", cl::desc("the clustering algorithm to use"),
		cl::cat(AnalysisOptions),
		cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan,
		"dbscan", "use DBSCAN/OPTICS algorithm"),
		clEnumValN(exegesis::InstructionBenchmarkClustering::Naive,
		"naive", "one cluster per opcode")),
		cl::init(exegesis::InstructionBenchmarkClustering::Dbscan));

		static cl::opt<unsigned> AnalysisDbscanNumPoints(
"analysis-numpoints",		"analysis-numpoints",
cl::desc("minimum number of points in an analysis cluster"),		cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
cl::cat(AnalysisOptions), cl::init(3));		cl::cat(AnalysisOptions), cl::init(3));

static cl::opt<float> AnalysisClusteringEpsilon(		static cl::opt<float> AnalysisClusteringEpsilon(
"analysis-clustering-epsilon",		"analysis-clustering-epsilon",
cl::desc("dbscan epsilon for benchmark point clustering"),		cl::desc("epsilon for benchmark point clustering"),
cl::cat(AnalysisOptions), cl::init(0.1));		cl::cat(AnalysisOptions), cl::init(0.1));

static cl::opt<float> AnalysisInconsistencyEpsilon(		static cl::opt<float> AnalysisInconsistencyEpsilon(
"analysis-inconsistency-epsilon",		"analysis-inconsistency-epsilon",
cl::desc("epsilon for detection of when the cluster is different from the "		cl::desc("epsilon for detection of when the cluster is different from the "
"LLVM schedule profile values"),		"LLVM schedule profile values"),
cl::cat(AnalysisOptions), cl::init(0.1));		cl::cat(AnalysisOptions), cl::init(0.1));

▲ Show 20 Lines • Show All 347 Lines • ▼ Show 20 Lines	static void analysisMain() {
if (!TheTarget) {		if (!TheTarget) {
llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";		llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";
return;		return;
}		}

std::unique_ptr<llvm::MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());		std::unique_ptr<llvm::MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());

const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(		const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
Points, AnalysisNumPoints, AnalysisClusteringEpsilon,		Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
InstrInfo->getNumOpcodes()));		AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));

const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,		const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
AnalysisInconsistencyEpsilon,		AnalysisInconsistencyEpsilon,
AnalysisDisplayUnstableOpcodes);		AnalysisDisplayUnstableOpcodes);

maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",		maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
AnalysisClustersOutputFile);		AnalysisClustersOutputFile);
maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(		maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
Show All 24 Lines

llvm/trunk/unittests/tools/llvm-exegesis/ClusteringTest.cpp

Show All 40 Lines	TEST(ClusteringTest, Clusters3D) {
Points[4].Measurements = {		Points[4].Measurements = {
{"x", 0.99, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};		{"x", 0.99, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
// Cluster around (x=0, y=0, z=0): points {5}, marked as noise.		// Cluster around (x=0, y=0, z=0): points {5}, marked as noise.
Points[5].Measurements = {		Points[5].Measurements = {
{"x", 0.0, 0.0}, {"y", 0.01, 0.0}, {"z", -0.02, 0.0}};		{"x", 0.0, 0.0}, {"y", 0.01, 0.0}, {"z", -0.02, 0.0}};
// Error cluster: points {2}		// Error cluster: points {2}
Points[2].Error = "oops";		Points[2].Error = "oops";

auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 0.25);		auto Clustering = InstructionBenchmarkClustering::create(
		Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 0.25);
ASSERT_TRUE((bool)Clustering);		ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),		EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 3}), HasPoints({1, 4})));		UnorderedElementsAre(HasPoints({0, 3}), HasPoints({1, 4})));
EXPECT_THAT(Clustering.get().getCluster(		EXPECT_THAT(Clustering.get().getCluster(
InstructionBenchmarkClustering::ClusterId::noise()),		InstructionBenchmarkClustering::ClusterId::noise()),
HasPoints({5}));		HasPoints({5}));
EXPECT_THAT(Clustering.get().getCluster(		EXPECT_THAT(Clustering.get().getCluster(
InstructionBenchmarkClustering::ClusterId::error()),		InstructionBenchmarkClustering::ClusterId::error()),
Show All 10 Lines
}		}

TEST(ClusteringTest, Clusters3D_InvalidSize) {		TEST(ClusteringTest, Clusters3D_InvalidSize) {
std::vector<InstructionBenchmark> Points(6);		std::vector<InstructionBenchmark> Points(6);
Points[0].Measurements = {		Points[0].Measurements = {
{"x", 0.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};		{"x", 0.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
Points[1].Measurements = {{"y", 1.02, 0.0}, {"z", 1.98, 0.0}};		Points[1].Measurements = {{"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
auto Error =		auto Error =
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();		InstructionBenchmarkClustering::create(
		Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 0.25)
		.takeError();
ASSERT_TRUE((bool)Error);		ASSERT_TRUE((bool)Error);
consumeError(std::move(Error));		consumeError(std::move(Error));
}		}

TEST(ClusteringTest, Clusters3D_InvalidOrder) {		TEST(ClusteringTest, Clusters3D_InvalidOrder) {
std::vector<InstructionBenchmark> Points(6);		std::vector<InstructionBenchmark> Points(6);
Points[0].Measurements = {{"x", 0.01, 0.0}, {"y", 1.02, 0.0}};		Points[0].Measurements = {{"x", 0.01, 0.0}, {"y", 1.02, 0.0}};
Points[1].Measurements = {{"y", 1.02, 0.0}, {"x", 1.98, 0.0}};		Points[1].Measurements = {{"y", 1.02, 0.0}, {"x", 1.98, 0.0}};
auto Error =		auto Error =
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();		InstructionBenchmarkClustering::create(
		Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 0.25)
		.takeError();
ASSERT_TRUE((bool)Error);		ASSERT_TRUE((bool)Error);
consumeError(std::move(Error));		consumeError(std::move(Error));
}		}

TEST(ClusteringTest, Ordering) {		TEST(ClusteringTest, Ordering) {
ASSERT_LT(InstructionBenchmarkClustering::ClusterId::makeValid(1),		ASSERT_LT(InstructionBenchmarkClustering::ClusterId::makeValid(1),
InstructionBenchmarkClustering::ClusterId::makeValid(2));		InstructionBenchmarkClustering::ClusterId::makeValid(2));

Show All 12 Lines	TEST(ClusteringTest, Ordering1) {

Points[0].Measurements = {		Points[0].Measurements = {
{"x", 0.0, 0.0}};		{"x", 0.0, 0.0}};
Points[1].Measurements = {		Points[1].Measurements = {
{"x", 1.0, 0.0}};		{"x", 1.0, 0.0}};
Points[2].Measurements = {		Points[2].Measurements = {
{"x", 2.0, 0.0}};		{"x", 2.0, 0.0}};

auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 1.1);		auto Clustering = InstructionBenchmarkClustering::create(
		Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 1.1);
ASSERT_TRUE((bool)Clustering);		ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),		EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 1, 2})));		UnorderedElementsAre(HasPoints({0, 1, 2})));
}		}

TEST(ClusteringTest, Ordering2) {		TEST(ClusteringTest, Ordering2) {
std::vector<InstructionBenchmark> Points(3);		std::vector<InstructionBenchmark> Points(3);

Points[0].Measurements = {		Points[0].Measurements = {
{"x", 0.0, 0.0}};		{"x", 0.0, 0.0}};
Points[1].Measurements = {		Points[1].Measurements = {
{"x", 2.0, 0.0}};		{"x", 2.0, 0.0}};
Points[2].Measurements = {		Points[2].Measurements = {
{"x", 1.0, 0.0}};		{"x", 1.0, 0.0}};

auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 1.1);		auto Clustering = InstructionBenchmarkClustering::create(
		Points, InstructionBenchmarkClustering::ModeE::Dbscan, 2, 1.1);
ASSERT_TRUE((bool)Clustering);		ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),		EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 1, 2})));		UnorderedElementsAre(HasPoints({0, 1, 2})));
}		}

} // namespace		} // namespace
} // namespace exegesis		} // namespace exegesis
} // namespace llvm		} // namespace llvm

This is an archive of the discontinued LLVM Phabricator instance.

[llvm-exegesis] Introduce a 'naive' clustering algorithm (PR40880)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 192579

llvm/trunk/docs/CommandGuide/llvm-exegesis.rst

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-clustering-algorithms.test

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-naive-cluster-stabilization.test

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-naive-clusterization.test

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-same-cluster-for-ops-in-different-sched-clusters.test

llvm/trunk/tools/llvm-exegesis/lib/Analysis.h

llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp

llvm/trunk/tools/llvm-exegesis/lib/Clustering.h

llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp

llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp

llvm/trunk/unittests/tools/llvm-exegesis/ClusteringTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[llvm-exegesis] Introduce a 'naive' clustering algorithm (PR40880)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 192579

llvm/trunk/docs/CommandGuide/llvm-exegesis.rst

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-clustering-algorithms.test

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-naive-cluster-stabilization.test

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-naive-clusterization.test

llvm/trunk/test/tools/llvm-exegesis/X86/analysis-same-cluster-for-ops-in-different-sched-clusters.test

llvm/trunk/tools/llvm-exegesis/lib/Analysis.h

llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp

llvm/trunk/tools/llvm-exegesis/lib/Clustering.h

llvm/trunk/tools/llvm-exegesis/lib/Clustering.cpp

llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp

llvm/trunk/unittests/tools/llvm-exegesis/ClusteringTest.cpp

[llvm-exegesis] Introduce a 'naive' clustering algorithm (PR40880)
ClosedPublic