Diff 98560

lib/Analysis/ProfileSummaryInfo.cpp

Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	bool ProfileSummaryInfo::computeSummary() {
return true;		return true;
}		}

Optional<uint64_t>		Optional<uint64_t>
ProfileSummaryInfo::getProfileCount(const Instruction *Inst,		ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
BlockFrequencyInfo *BFI) {		BlockFrequencyInfo *BFI) {
if (!Inst)		if (!Inst)
return None;		return None;
		if (!computeSummary())
		return None;
assert((isa<CallInst>(Inst) \|\| isa<InvokeInst>(Inst)) &&		assert((isa<CallInst>(Inst) \|\| isa<InvokeInst>(Inst)) &&
"We can only get profile count for call/invoke instruction.");		"We can only get profile count for call/invoke instruction.");
// Check if there is a profile metadata on the instruction. If it is present,		if (Summary->getKind() == ProfileSummary::PSK_Sample) {
		eramanUnsubmitted Not Done Reply Inline Actions I wonder if we should check if Summary is non-null and then the summary kind is PSK_Sample. There is one test case down below (inliner count update) where you had to attach the summary to the test case. Is there any reason the summary has to be present to get the count based on entry count and block frequency? eraman: I wonder if we should check if Summary is non-null and then the summary kind is PSK_Sample.
		tejohnsonAuthorUnsubmitted Not Done Reply Inline Actions Do you mean only do the metadata-based hotness when computeSummary() returns true and the kind is PSK_Sample? I.e. if !computeSummary(), then assume instrumentation based? I could do that, it would mean a few less test changes. tejohnson: Do you mean only do the metadata-based hotness when computeSummary() returns true and the kind…
		eramanUnsubmitted Not Done Reply Inline Actions Yes, that's what I should've written. As long as we have function entry counts, we should return the profile count. eraman: Yes, that's what I should've written. As long as we have function entry counts, we should…
// determine hotness solely based on that.		// In sample PGO mode, check if there is a profile metadata on the
		// instruction. If it is present, determine hotness solely based on that,
		// since the sampled entry count may not be accurate.
uint64_t TotalCount;		uint64_t TotalCount;
		danielcdhUnsubmitted Not Done Reply Inline Actions Looks like you can always get the Kind from Inst, why would you want to pass in the Summary? danielcdh: Looks like you can always get the Kind from Inst, why would you want to pass in the Summary?
		tejohnsonAuthorUnsubmitted Not Done Reply Inline Actions It involves doing some work that is already done when the summary is available, so this was added just for the case where it is invoked without a summary. tejohnson: It involves doing some work that is already done when the summary is available, so this was…
if (Inst->extractProfTotalWeight(TotalCount))		if (Inst->extractProfTotalWeight(TotalCount))
return TotalCount;		return TotalCount;
		}
if (BFI)		if (BFI)
return BFI->getBlockProfileCount(Inst->getParent());		return BFI->getBlockProfileCount(Inst->getParent());
return None;		return None;
}		}

/// Returns true if the function's entry is hot. If it returns false, it		/// Returns true if the function's entry is hot. If it returns false, it
/// either means it is not hot or it is unknown whether it is hot or not (for		/// either means it is not hot or it is unknown whether it is hot or not (for
/// example, no profile data is available).		/// example, no profile data is available).
▲ Show 20 Lines • Show All 152 Lines • Show Last 20 Lines

test/Bitcode/thinlto-function-summary-callgraph-pgo.ll

	Show All 11 Lines

	; CHECK: <SOURCE_FILENAME			; CHECK: <SOURCE_FILENAME
	; CHECK-NEXT: <FUNCTION			; CHECK-NEXT: <FUNCTION
	; "func"			; "func"
	; CHECK-NEXT: <FUNCTION op0=4 op1=4			; CHECK-NEXT: <FUNCTION op0=4 op1=4
	; CHECK: <GLOBALVAL_SUMMARY_BLOCK			; CHECK: <GLOBALVAL_SUMMARY_BLOCK
	; CHECK-NEXT: <VERSION			; CHECK-NEXT: <VERSION
	; See if the call to func is registered, using the expected hotness type.			; See if the call to func is registered, using the expected hotness type.
	; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=2/>			; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3/>
	; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>			; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
	; CHECK: <STRTAB_BLOCK			; CHECK: <STRTAB_BLOCK
	; CHECK-NEXT: blob data = 'mainfunc'			; CHECK-NEXT: blob data = 'mainfunc'

	; COMBINED: <GLOBALVAL_SUMMARY_BLOCK			; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
	; COMBINED-NEXT: <VERSION			; COMBINED-NEXT: <VERSION
	; COMBINED-NEXT: <VALUE_GUID op0=[[FUNCID:[0-9]+]] op1=7289175272376759421/>			; COMBINED-NEXT: <VALUE_GUID op0=[[FUNCID:[0-9]+]] op1=7289175272376759421/>
	; COMBINED-NEXT: <VALUE_GUID			; COMBINED-NEXT: <VALUE_GUID
	; COMBINED-NEXT: <COMBINED			; COMBINED-NEXT: <COMBINED
	; See if the call to func is registered, using the expected hotness type.			; See if the call to func is registered, using the expected hotness type.
	; op6=2 which is hotnessType::None.			; op6=2 which is hotnessType::Hot.
	; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[FUNCID]] op6=2/>			; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[FUNCID]] op6=3/>
	; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>			; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>

	; ModuleID = 'thinlto-function-summary-callgraph.ll'			; ModuleID = 'thinlto-function-summary-callgraph.ll'
	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	; Function Attrs: nounwind uwtable			; Function Attrs: nounwind uwtable
	define i32 @main() #0 !prof !2 {			define i32 @main() #0 !prof !28 {
	entry:			entry:
	call void (...) @func()			call void (...) @func()
	ret i32 0			ret i32 0
	}			}

	declare void @func(...) #1			declare void @func(...) #1

	!2 = !{!"function_entry_count", i64 1}			!llvm.module.flags = !{!1}

				!1 = !{i32 1, !"ProfileSummary", !2}
				!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
				!3 = !{!"ProfileFormat", !"InstrProf"}
				!4 = !{!"TotalCount", i64 2}
				!5 = !{!"MaxCount", i64 1}
				!6 = !{!"MaxInternalCount", i64 0}
				!7 = !{!"MaxFunctionCount", i64 1}
				!8 = !{!"NumCounts", i64 2}
				!9 = !{!"NumFunctions", i64 2}
				!10 = !{!"DetailedSummary", !11}
				!11 = !{!12, !13, !14, !15, !16, !17, !17, !18, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27}
				!12 = !{i32 10000, i64 0, i32 0}
				!13 = !{i32 100000, i64 0, i32 0}
				!14 = !{i32 200000, i64 0, i32 0}
				!15 = !{i32 300000, i64 0, i32 0}
				!16 = !{i32 400000, i64 0, i32 0}
				!17 = !{i32 500000, i64 1, i32 2}
				!18 = !{i32 600000, i64 1, i32 2}
				!19 = !{i32 700000, i64 1, i32 2}
				!20 = !{i32 800000, i64 1, i32 2}
				!21 = !{i32 900000, i64 1, i32 2}
				!22 = !{i32 950000, i64 1, i32 2}
				!23 = !{i32 990000, i64 1, i32 2}
				!24 = !{i32 999000, i64 1, i32 2}
				!25 = !{i32 999900, i64 1, i32 2}
				!26 = !{i32 999990, i64 1, i32 2}
				!27 = !{i32 999999, i64 1, i32 2}
				!28 = !{!"function_entry_count", i64 1}


	; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)			; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)
	; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)			; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)

test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll

This file was copied to test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll.

	Show All 23 Lines
	; "none2"			; "none2"
	; CHECK-NEXT: <FUNCTION op0=37 op1=5			; CHECK-NEXT: <FUNCTION op0=37 op1=5
	; "none3"			; "none3"
	; CHECK-NEXT: <FUNCTION op0=42 op1=5			; CHECK-NEXT: <FUNCTION op0=42 op1=5
	; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK			; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
	; CHECK-NEXT: <VERSION			; CHECK-NEXT: <VERSION
	; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>			; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
	; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123			; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
	; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>			; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=1 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
	; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>			; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>

	; CHECK: <STRTAB_BLOCK			; CHECK: <STRTAB_BLOCK
	; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'			; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'

	; COMBINED: <GLOBALVAL_SUMMARY_BLOCK			; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
	; COMBINED-NEXT: <VERSION			; COMBINED-NEXT: <VERSION
	; COMBINED-NEXT: <VALUE_GUID			; COMBINED-NEXT: <VALUE_GUID
	▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines

test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll

This file was copied from test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll.

	Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines



	!llvm.module.flags = !{!1}			!llvm.module.flags = !{!1}
	!20 = !{!"function_entry_count", i64 110, i64 123}			!20 = !{!"function_entry_count", i64 110, i64 123}

	!1 = !{i32 1, !"ProfileSummary", !2}			!1 = !{i32 1, !"ProfileSummary", !2}
	!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}			!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
	!3 = !{!"ProfileFormat", !"InstrProf"}			!3 = !{!"ProfileFormat", !"SampleProfile"}
	!4 = !{!"TotalCount", i64 10000}			!4 = !{!"TotalCount", i64 10000}
	!5 = !{!"MaxCount", i64 10}			!5 = !{!"MaxCount", i64 10}
	!6 = !{!"MaxInternalCount", i64 1}			!6 = !{!"MaxInternalCount", i64 1}
	!7 = !{!"MaxFunctionCount", i64 1000}			!7 = !{!"MaxFunctionCount", i64 1000}
	!8 = !{!"NumCounts", i64 3}			!8 = !{!"NumCounts", i64 3}
	!9 = !{!"NumFunctions", i64 3}			!9 = !{!"NumFunctions", i64 3}
	!10 = !{!"DetailedSummary", !11}			!10 = !{!"DetailedSummary", !11}
	!11 = !{!12, !13, !14}			!11 = !{!12, !13, !14}
	!12 = !{i32 10000, i64 100, i32 1}			!12 = !{i32 10000, i64 100, i32 1}
	!13 = !{i32 999000, i64 100, i32 1}			!13 = !{i32 999000, i64 100, i32 1}
	!14 = !{i32 999999, i64 1, i32 2}			!14 = !{i32 999999, i64 1, i32 2}
	!15 = !{!"branch_weights", i32 100}			!15 = !{!"branch_weights", i32 100}

test/Transforms/CodeGenPrepare/section-samplepgo.ll

This file was copied from test/Transforms/CodeGenPrepare/section.ll.

Show All 33 Lines	define void @cold_func() !prof !16 {
ret void		ret void
}		}

; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}		; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}		; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
!llvm.module.flags = !{!1}		!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}		!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}		!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}		!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}		!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}		!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}		!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}		!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}		!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}		!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}		!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}		!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}		!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}		!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}		!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"function_entry_count", i64 1000}		!15 = !{!"function_entry_count", i64 1000}
!16 = !{!"function_entry_count", i64 1}		!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}		!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}		!18 = !{!"branch_weights", i32 1}

test/Transforms/CodeGenPrepare/section.ll

This file was copied to test/Transforms/CodeGenPrepare/section-samplepgo.ll.

	; RUN: opt < %s -codegenprepare -S \| FileCheck %s			; RUN: opt < %s -codegenprepare -S \| FileCheck %s

	target triple = "x86_64-pc-linux-gnu"			target triple = "x86_64-pc-linux-gnu"

	; This tests that hot/cold functions get correct section prefix assigned			; This tests that hot/cold functions get correct section prefix assigned

	; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]			; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
	; The entry is hot			; The entry is hot
	define void @hot_func() !prof !15 {			define void @hot_func() !prof !15 {
	ret void			ret void
	}			}

	; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]			; For instrumentation based PGO, we should only look at entry counts,
	; The sum of 2 callsites are hot			; not call site VP metadata (which can exist on value profiled memcpy,
	define void @hot_call_func() !prof !16 {			; or possibly left behind after static analysis based devirtualization).
				; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
				define void @cold_func1() !prof !16 {
	call void @hot_func(), !prof !17			call void @hot_func(), !prof !17
	call void @hot_func(), !prof !17			call void @hot_func(), !prof !17
	ret void			ret void
	}			}

	; CHECK-NOT: normal_func{{.*}}!section_prefix			; CHECK: cold_func2{{.*}}!section_prefix
	; The sum of all callsites are neither hot or cold			define void @cold_func2() !prof !16 {
	define void @normal_func() !prof !16 {
	call void @hot_func(), !prof !17			call void @hot_func(), !prof !17
	call void @hot_func(), !prof !18			call void @hot_func(), !prof !18
	call void @hot_func(), !prof !18			call void @hot_func(), !prof !18
	ret void			ret void
	}			}

	; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]			; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
	; The entry and the callsite are both cold			define void @cold_func3() !prof !16 {
	define void @cold_func() !prof !16 {
	call void @hot_func(), !prof !18			call void @hot_func(), !prof !18
	ret void			ret void
	}			}

	; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}			; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
	; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}			; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
	!llvm.module.flags = !{!1}			!llvm.module.flags = !{!1}
	!1 = !{i32 1, !"ProfileSummary", !2}			!1 = !{i32 1, !"ProfileSummary", !2}
	Show All 17 Lines

test/Transforms/Inline/function-count-update-2.ll

	; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S \| FileCheck %s			; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S \| FileCheck %s

	; This tests that the function count of a callee gets correctly updated after it			; This tests that the function count of a callee gets correctly updated after it
	; has been inlined into a two callsites.			; has been inlined into a two callsites.

	; CHECK: @callee() !prof [[COUNT:![0-9]+]]			; CHECK: @callee() !prof [[COUNT:![0-9]+]]
	define i32 @callee() !prof !1 {			define i32 @callee() !prof !15 {
	ret i32 0			ret i32 0
	}			}

	define i32 @caller1() !prof !2 {			define i32 @caller1() !prof !16 {
	; CHECK-LABEL: @caller1			; CHECK-LABEL: @caller1
	; CHECK-NOT: callee			; CHECK-NOT: callee
	; CHECK: ret			; CHECK: ret
	%i = call i32 @callee()			%i = call i32 @callee()
	ret i32 %i			ret i32 %i
	}			}

	define i32 @caller2() !prof !3 {			define i32 @caller2() !prof !17 {
	; CHECK-LABEL: @caller2			; CHECK-LABEL: @caller2
	; CHECK-NOT: callee			; CHECK-NOT: callee
	; CHECK: ret			; CHECK: ret
	%i = call i32 @callee()			%i = call i32 @callee()
	ret i32 %i			ret i32 %i
	}			}

	!llvm.module.flags = !{!0}			!llvm.module.flags = !{!1}
	; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0}			; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0}
	!0 = !{i32 1, !"MaxFunctionCount", i32 1000}			!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
	!1 = !{!"function_entry_count", i64 1000}
	!2 = !{!"function_entry_count", i64 600}
	!3 = !{!"function_entry_count", i64 400}

				!1 = !{i32 1, !"ProfileSummary", !2}
				!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
				!3 = !{!"ProfileFormat", !"InstrProf"}
				!4 = !{!"TotalCount", i64 2000}
				!5 = !{!"MaxCount", i64 1000}
				!6 = !{!"MaxInternalCount", i64 0}
				!7 = !{!"MaxFunctionCount", i64 1000}
				!8 = !{!"NumCounts", i64 3}
				!9 = !{!"NumFunctions", i64 3}
				!10 = !{!"DetailedSummary", !11}
				!11 = !{!12, !13, !14}
				!12 = !{i32 10000, i64 1000, i32 1}
				!13 = !{i32 999900, i64 600, i32 2}
				!14 = !{i32 999999, i64 600, i32 2}
				!15 = !{!"function_entry_count", i64 1000}
				!16 = !{!"function_entry_count", i64 600}
				!17 = !{!"function_entry_count", i64 400}

test/Transforms/Inline/function-count-update-3.ll

	Show All 12 Lines
	; known and hence the cost gets reduced.			; known and hence the cost gets reduced.
	; Estimated count of a->e callsite = C2 * (C1 / C4)			; Estimated count of a->e callsite = C2 * (C1 / C4)
	; Estimated count of a->e callsite = 250 * (200 / 500) = 100			; Estimated count of a->e callsite = 250 * (200 / 500) = 100
	; Remaining count of e = C3 - 100 = 500 - 100 = 400			; Remaining count of e = C3 - 100 = 500 - 100 = 400
	; Remaining count of c = C4 - C1 - C5 = 500 - 200 - 300 = 0			; Remaining count of c = C4 - C1 - C5 = 500 - 200 - 300 = 0

	@data = external global i32			@data = external global i32

	define i32 @a(i32 %a1) !prof !1 {			define i32 @a(i32 %a1) !prof !15 {
	%a2 = call i32 @c(i32 %a1, i32 1)			%a2 = call i32 @c(i32 %a1, i32 1)
	ret i32 %a2			ret i32 %a2
	}			}

	define i32 @b(i32 %b1) !prof !2 {			define i32 @b(i32 %b1) !prof !16 {
	%b2 = call i32 @c(i32 %b1, i32 %b1)			%b2 = call i32 @c(i32 %b1, i32 %b1)
	ret i32 %b2			ret i32 %b2
	}			}

	declare void @ext();			declare void @ext();

	; CHECK: @c(i32 %c1, i32 %c100) !prof [[COUNT1:![0-9]+]]			; CHECK: @c(i32 %c1, i32 %c100) !prof [[COUNT1:![0-9]+]]
	define i32 @c(i32 %c1, i32 %c100) !prof !3 {			define i32 @c(i32 %c1, i32 %c100) !prof !17 {
	call void @ext()			call void @ext()
	%cond = icmp sle i32 %c1, 1			%cond = icmp sle i32 %c1, 1
	br i1 %cond, label %cond_true, label %cond_false			br i1 %cond, label %cond_true, label %cond_false

	cond_false:			cond_false:
	ret i32 0			ret i32 0

	cond_true:			cond_true:
	%c11 = call i32 @e(i32 %c100)			%c11 = call i32 @e(i32 %c100)
	ret i32 %c11			ret i32 %c11
	}			}


	; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]]			; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]]
	define i32 @e(i32 %c1) !prof !4 {			define i32 @e(i32 %c1) !prof !18 {
	%cond = icmp sle i32 %c1, 1			%cond = icmp sle i32 %c1, 1
	br i1 %cond, label %cond_true, label %cond_false			br i1 %cond, label %cond_true, label %cond_false

	cond_false:			cond_false:
	call void @ext()			call void @ext()
	%c2 = load i32, i32* @data, align 4			%c2 = load i32, i32* @data, align 4
	%c3 = add i32 %c1, %c2			%c3 = add i32 %c1, %c2
	%c4 = mul i32 %c3, %c2			%c4 = mul i32 %c3, %c2
	%c5 = add i32 %c4, %c2			%c5 = add i32 %c4, %c2
	%c6 = mul i32 %c5, %c2			%c6 = mul i32 %c5, %c2
	%c7 = add i32 %c6, %c2			%c7 = add i32 %c6, %c2
	%c8 = mul i32 %c7, %c2			%c8 = mul i32 %c7, %c2
	%c9 = add i32 %c8, %c2			%c9 = add i32 %c8, %c2
	%c10 = mul i32 %c9, %c2			%c10 = mul i32 %c9, %c2
	ret i32 %c10			ret i32 %c10

	cond_true:			cond_true:
	ret i32 0			ret i32 0
	}			}

	!llvm.module.flags = !{!0}			!llvm.module.flags = !{!1}
	; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 0}			; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 0}
	; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 400}			; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 400}
	!0 = !{i32 1, !"MaxFunctionCount", i32 5000}			!1 = !{i32 1, !"ProfileSummary", !2}
	!1 = !{!"function_entry_count", i64 200}			!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
	!2 = !{!"function_entry_count", i64 300}			!3 = !{!"ProfileFormat", !"InstrProf"}
	!3 = !{!"function_entry_count", i64 500}			!4 = !{!"TotalCount", i64 6500}
	!4 = !{!"function_entry_count", i64 500}			!5 = !{!"MaxCount", i64 5000}
				!6 = !{!"MaxInternalCount", i64 0}
				!7 = !{!"MaxFunctionCount", i64 5000}
				!8 = !{!"NumCounts", i64 4}
				!9 = !{!"NumFunctions", i64 4}
				!10 = !{!"DetailedSummary", !11}
				!11 = !{!12, !13, !14}
				!12 = !{i32 10000, i64 5000, i32 1}
				!13 = !{i32 999900, i64 5000, i32 1}
				!14 = !{i32 999999, i64 5000, i32 1}
				!15 = !{!"function_entry_count", i64 200}
				!16 = !{!"function_entry_count", i64 300}
				!17 = !{!"function_entry_count", i64 500}
				!18 = !{!"function_entry_count", i64 500}

test/Transforms/Inline/function-count-update.ll

	; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S \| FileCheck %s			; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S \| FileCheck %s

	; This tests that the function count of two callees get correctly updated after			; This tests that the function count of two callees get correctly updated after
	; they have been inlined into two back-to-back callsites in a single basic block			; they have been inlined into two back-to-back callsites in a single basic block
	; in the caller. The callees have the alwaysinline attribute and so they get			; in the caller. The callees have the alwaysinline attribute and so they get
	; inlined both with the regular inliner pass and the always inline pass. In			; inlined both with the regular inliner pass and the always inline pass. In
	; both cases, the new count of each callee is the original count minus callsite			; both cases, the new count of each callee is the original count minus callsite
	; count which is 200 (since the caller's entry count is 400 and the block			; count which is 200 (since the caller's entry count is 400 and the block
	; containing the calls have a relative block frequency of 0.5).			; containing the calls have a relative block frequency of 0.5).

	; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]]			; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]]
	define i32 @callee1(i32 %n) #0 !prof !1 {			define i32 @callee1(i32 %n) #0 !prof !15 {
	%cond = icmp sle i32 %n, 10			%cond = icmp sle i32 %n, 10
	br i1 %cond, label %cond_true, label %cond_false			br i1 %cond, label %cond_true, label %cond_false

	cond_true:			cond_true:
	%r1 = add i32 %n, 1			%r1 = add i32 %n, 1
	ret i32 %r1			ret i32 %r1
	cond_false:			cond_false:
	%r2 = add i32 %n, 2			%r2 = add i32 %n, 2
	ret i32 %r2			ret i32 %r2
	}			}

	; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]]			; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]]
	define i32 @callee2(i32 %n) #0 !prof !2 {			define i32 @callee2(i32 %n) #0 !prof !16 {
	%r1 = add i32 %n, 1			%r1 = add i32 %n, 1
	ret i32 %r1			ret i32 %r1
	}			}

	define i32 @caller(i32 %n) !prof !3 {			define i32 @caller(i32 %n) !prof !17 {
	%cond = icmp sle i32 %n, 100			%cond = icmp sle i32 %n, 100
	br i1 %cond, label %cond_true, label %cond_false			br i1 %cond, label %cond_true, label %cond_false

	cond_true:			cond_true:
	%i = call i32 @callee1(i32 %n)			%i = call i32 @callee1(i32 %n)
	%j = call i32 @callee2(i32 %i)			%j = call i32 @callee2(i32 %i)
	ret i32 %j			ret i32 %j
	cond_false:			cond_false:
	ret i32 0			ret i32 0
	}			}

	!llvm.module.flags = !{!0}			!llvm.module.flags = !{!1}
	; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800}			; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800}
	; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800}			; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800}
	!0 = !{i32 1, !"MaxFunctionCount", i32 1000}			!1 = !{i32 1, !"ProfileSummary", !2}
	!1 = !{!"function_entry_count", i64 1000}			!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
	!2 = !{!"function_entry_count", i64 2000}			!3 = !{!"ProfileFormat", !"InstrProf"}
	!3 = !{!"function_entry_count", i64 400}			!4 = !{!"TotalCount", i64 3400}
				!5 = !{!"MaxCount", i64 2000}
				!6 = !{!"MaxInternalCount", i64 400}
				!7 = !{!"MaxFunctionCount", i64 2000}
				!8 = !{!"NumCounts", i64 3}
				!9 = !{!"NumFunctions", i64 3}
				!10 = !{!"DetailedSummary", !11}
				!11 = !{!12, !13, !14}
				!12 = !{i32 10000, i64 2000, i32 1}
				!13 = !{i32 999900, i64 1000, i32 2}
				!14 = !{i32 999999, i64 1000, i32 2}
				!15 = !{!"function_entry_count", i64 1000}
				!16 = !{!"function_entry_count", i64 2000}
				!17 = !{!"function_entry_count", i64 400}
	attributes #0 = { alwaysinline }			attributes #0 = { alwaysinline }

test/Transforms/Inline/prof-update.ll

	; RUN: opt < %s -inline -S \| FileCheck %s			; RUN: opt < %s -inline -S \| FileCheck %s
	; Checks if inliner updates branch_weights annotation for call instructions.			; Checks if inliner updates branch_weights annotation for call instructions.

	declare void @ext();			declare void @ext();
	declare void @ext1();			declare void @ext1();
	@func = global void ()* null			@func = global void ()* null

	; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]			; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
	define void @callee(i32 %n) !prof !1 {			define void @callee(i32 %n) !prof !15 {
	%cond = icmp sle i32 %n, 10			%cond = icmp sle i32 %n, 10
	br i1 %cond, label %cond_true, label %cond_false			br i1 %cond, label %cond_true, label %cond_false
	cond_true:			cond_true:
	; ext1 is optimized away, thus not updated.			; ext1 is optimized away, thus not updated.
	; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]			; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]
	call void @ext1(), !prof !2			call void @ext1(), !prof !16
	ret void			ret void
	cond_false:			cond_false:
	; ext is cloned and updated.			; ext is cloned and updated.
	; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]			; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]
	call void @ext(), !prof !2			call void @ext(), !prof !16
	%f = load void (), void ()* @func			%f = load void (), void ()* @func
	; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]]			; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]]
	call void %f(), !prof !4			call void %f(), !prof !18
	ret void			ret void
	}			}

	; CHECK: define void @caller()			; CHECK: define void @caller()
	define void @caller() {			define void @caller() {
	; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]			; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]
	; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]]			; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]]
	call void @callee(i32 15), !prof !3			call void @callee(i32 15), !prof !17
	ret void			ret void
	}			}

	!llvm.module.flags = !{!0}			!llvm.module.flags = !{!1}
	!0 = !{i32 1, !"MaxFunctionCount", i32 2000}			!1 = !{i32 1, !"ProfileSummary", !2}
	!1 = !{!"function_entry_count", i64 1000}			!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
	!2 = !{!"branch_weights", i64 2000}			!3 = !{!"ProfileFormat", !"SampleProfile"}
	!3 = !{!"branch_weights", i64 400}			!4 = !{!"TotalCount", i64 10000}
	!4 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}			!5 = !{!"MaxCount", i64 10}
				!6 = !{!"MaxInternalCount", i64 1}
				!7 = !{!"MaxFunctionCount", i64 2000}
				!8 = !{!"NumCounts", i64 2}
				!9 = !{!"NumFunctions", i64 2}
				!10 = !{!"DetailedSummary", !11}
				!11 = !{!12, !13, !14}
				!12 = !{i32 10000, i64 100, i32 1}
				!13 = !{i32 999000, i64 100, i32 1}
				!14 = !{i32 999999, i64 1, i32 2}
				!15 = !{!"function_entry_count", i64 1000}
				!16 = !{!"branch_weights", i64 2000}
				!17 = !{!"branch_weights", i64 400}
				!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
	attributes #0 = { alwaysinline }			attributes #0 = { alwaysinline }
	; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}			; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
	; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}			; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}
	; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200}			; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200}
	; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}			; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
	; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i64 800}			; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i64 800}
	; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8}			; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8}

unittests/Analysis/ProfileSummaryInfoTest.cpp

Show First 20 Lines • Show All 156 Lines • ▼ Show 20 Lines	TEST_F(ProfileSummaryInfoTest, InstrProf) {
EXPECT_TRUE(PSI.isHotBB(BB3, &BFI));		EXPECT_TRUE(PSI.isHotBB(BB3, &BFI));

CallSite CS1(BB1->getFirstNonPHI());		CallSite CS1(BB1->getFirstNonPHI());
auto *CI2 = BB2->getFirstNonPHI();		auto *CI2 = BB2->getFirstNonPHI();
CallSite CS2(CI2);		CallSite CS2(CI2);

EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));		EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));		EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));

		// Test that adding an MD_prof metadata with a hot count on CS2 does not
		// change its hotness as it has no effect in instrumented profiling.
		MDBuilder MDB(M->getContext());
		CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
		EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
}		}

TEST_F(ProfileSummaryInfoTest, SampleProf) {		TEST_F(ProfileSummaryInfoTest, SampleProf) {
auto M = makeLLVMModule("SampleProfile");		auto M = makeLLVMModule("SampleProfile");
Function *F = M->getFunction("f");		Function *F = M->getFunction("f");
ProfileSummaryInfo PSI = buildPSI(M.get());		ProfileSummaryInfo PSI = buildPSI(M.get());

BasicBlock &BB0 = F->getEntryBlock();		BasicBlock &BB0 = F->getEntryBlock();
Show All 26 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Restrict call metadata based hotness detection to Sample PGO mode
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 98560

lib/Analysis/ProfileSummaryInfo.cpp

test/Bitcode/thinlto-function-summary-callgraph-pgo.ll

test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll

test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll

test/Transforms/CodeGenPrepare/section-samplepgo.ll

test/Transforms/CodeGenPrepare/section.ll

test/Transforms/Inline/function-count-update-2.ll

test/Transforms/Inline/function-count-update-3.ll

test/Transforms/Inline/function-count-update.ll

test/Transforms/Inline/prof-update.ll

unittests/Analysis/ProfileSummaryInfoTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

Restrict call metadata based hotness detection to Sample PGO modeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 98560

lib/Analysis/ProfileSummaryInfo.cpp

test/Bitcode/thinlto-function-summary-callgraph-pgo.ll

test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll

test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll

test/Transforms/CodeGenPrepare/section-samplepgo.ll

test/Transforms/CodeGenPrepare/section.ll

test/Transforms/Inline/function-count-update-2.ll

test/Transforms/Inline/function-count-update-3.ll

test/Transforms/Inline/function-count-update.ll

test/Transforms/Inline/prof-update.ll

unittests/Analysis/ProfileSummaryInfoTest.cpp

Restrict call metadata based hotness detection to Sample PGO mode
ClosedPublic