Diff 233400

llvm/lib/Transforms/IPO/SampleProfile.cpp

Show All 24 Lines
#include "llvm/ADT/ArrayRef.h"		#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"		#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"		#include "llvm/ADT/None.h"
#include "llvm/ADT/SCCIterator.h"		#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"		#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
		#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"		#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"		#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"		#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"		#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"		#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"		#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"		#include "llvm/Analysis/LoopInfo.h"
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
#include <utility>		#include <utility>
#include <vector>		#include <vector>

using namespace llvm;		using namespace llvm;
using namespace sampleprof;		using namespace sampleprof;
using ProfileCount = Function::ProfileCount;		using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "sample-profile"		#define DEBUG_TYPE "sample-profile"

		STATISTIC(NumCSInlined,
		"Number of functions inlined with context sensitive profile");
		STATISTIC(NumCSNotInlined,
		"Number of functions not inlined with context sensitive profile");

// Command line option to specify the file to read samples from. This is		// Command line option to specify the file to read samples from. This is
// mainly used for debugging.		// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(		static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),		"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);		cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);

// The named file contains a set of transformations that may have been applied		// The named file contains a set of transformations that may have been applied
// to the symbol names between the program from which the sample data was		// to the symbol names between the program from which the sample data was
▲ Show 20 Lines • Show All 219 Lines • ▼ Show 20 Lines	protected:
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;		findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation , const FunctionSamples > DILocation2SampleMap;		mutable DenseMap<const DILocation , const FunctionSamples > DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;		const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineCallInstruction(Instruction *I);		bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,		bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);		DenseSet<GlobalValue::GUID> &InlinedGUIDs);
// Inline cold/small functions in addition to hot ones		// Inline cold/small functions in addition to hot ones
bool shouldInlineColdCallee(Instruction &CallInst);		bool shouldInlineColdCallee(Instruction &CallInst);
		void emitOptimizationRemarksForInlineCandidates(
		const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot);
void printEdgeWeight(raw_ostream &OS, Edge E);		void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;		void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);		void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);		bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);		void findEquivalenceClasses(Function &F);
template <bool IsPostDom>		template <bool IsPostDom>
void findEquivalencesFor(BasicBlock BB1, ArrayRef<BasicBlock > Descendants,		void findEquivalencesFor(BasicBlock BB1, ArrayRef<BasicBlock > Descendants,
DominatorTreeBase<BasicBlock, IsPostDom> *DomTree);		DominatorTreeBase<BasicBlock, IsPostDom> *DomTree);
▲ Show 20 Lines • Show All 549 Lines • ▼ Show 20 Lines	bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
// set ComputeFullInlineCost, otherwise getInlineCost may return early		// set ComputeFullInlineCost, otherwise getInlineCost may return early
// when cost exceeds threshold without checking all IRs in the callee.		// when cost exceeds threshold without checking all IRs in the callee.
// The acutal cost does not matter because we only checks isNever() to		// The acutal cost does not matter because we only checks isNever() to
// see if it is legal to inline the callsite.		// see if it is legal to inline the callsite.
InlineCost Cost =		InlineCost Cost =
getInlineCost(cast<CallBase>(I), Params, GetTTI(CalledFunction), GetAC,		getInlineCost(cast<CallBase>(I), Params, GetTTI(CalledFunction), GetAC,
None, nullptr, nullptr);		None, nullptr, nullptr);
if (Cost.isNever()) {		if (Cost.isNever()) {
ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)		ORE->emit(OptimizationRemarkAnalysis("sample-profile-inline", "InlineFail",
		DLoc, BB)
		wmiUnsubmitted Not Done Reply Inline Actions The first parameter in the declaration of OptimizationRemark is "const char PassName", so why not use DEBUG_TYPE? I feel "NeverInline" may be more clear than "NotInline" in terms of showing it is illegal to inline. wmi:* The first parameter in the declaration of OptimizationRemark is "const char *PassName", so why…
		wenleiAuthorUnsubmitted Done Reply Inline Actions DEBUG_TYPE is "sample-profile" and I feel it's too broad for practical uses as it covers sample usages, weight propagation, and inlining. I wanted to have something that only gives me inlining remarks, thus using "sample-profile-inline" instead here. Good point about "NeverInline", will change. wenlei: DEBUG_TYPE is "sample-profile" and I feel it's too broad for practical uses as it covers sample…
		wmiUnsubmitted Not Done Reply Inline Actions Then maybe define a macro for it like #define CSINLINE DEBUG_TYPE "-inline". wmi: Then maybe define a macro for it like #define CSINLINE DEBUG_TYPE "-inline".
		wenleiAuthorUnsubmitted Done Reply Inline Actions thanks, macro added. wenlei: thanks, macro added.
<< "incompatible inlining");		<< "incompatible inlining");
return false;		return false;
}		}
InlineFunctionInfo IFI(nullptr, &GetAC);		InlineFunctionInfo IFI(nullptr, &GetAC);
if (InlineFunction(CS, IFI)) {		if (InlineFunction(CS, IFI)) {
// The call to InlineFunction erases I, so we can't pass it here.		// The call to InlineFunction erases I, so we can't pass it here.
ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)		ORE->emit(OptimizationRemark("sample-profile-inline", "InlineSuccess",
<< "inlined hot callee '" << ore::NV("Callee", CalledFunction)		DLoc, BB)
		<< "inlined callee '" << ore::NV("Callee", CalledFunction)
<< "' into '" << ore::NV("Caller", BB->getParent()) << "'");		<< "' into '" << ore::NV("Caller", BB->getParent()) << "'");
return true;		return true;
}		}
return false;		return false;
}		}

bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {		bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
if (!ProfileSizeInline)		if (!ProfileSizeInline)
return false;		return false;

Function *Callee = CallSite(&CallInst).getCalledFunction();		Function *Callee = CallSite(&CallInst).getCalledFunction();
if (Callee == nullptr)		if (Callee == nullptr)
return false;		return false;

InlineCost Cost =		InlineCost Cost =
getInlineCost(cast<CallBase>(CallInst), getInlineParams(),		getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
GetTTI(*Callee), GetAC, None, nullptr, nullptr);		GetTTI(*Callee), GetAC, None, nullptr, nullptr);

return Cost.getCost() <= SampleColdCallSiteThreshold;		return Cost.getCost() <= SampleColdCallSiteThreshold;
}		}

		void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
		const SmallVector<Instruction *, 10> &Candidates, const Function &F,
		bool Hot) {
		for (auto I : Candidates) {
		Function *CalledFunction = CallSite(I).getCalledFunction();
		if (CalledFunction) {
		ORE->emit(OptimizationRemarkAnalysis("sample-profile-inline",
		"InlineAttempt", I->getDebugLoc(),
		I->getParent())
		<< "previous inlining reattempted for "
		<< (Hot ? "hotness: '" : "size: '")
		<< ore::NV("Callee", CalledFunction) << "' into '"
		<< ore::NV("Caller", &F) << "'");
		}
		}
		}

/// Iteratively inline hot callsites of a function.		/// Iteratively inline hot callsites of a function.
///		///
/// Iteratively traverse all callsites of the function \p F, and find if		/// Iteratively traverse all callsites of the function \p F, and find if
/// the corresponding inlined instance exists and is hot in profile. If		/// the corresponding inlined instance exists and is hot in profile. If
/// it is hot enough, inline the callsites and adds new callsites of the		/// it is hot enough, inline the callsites and adds new callsites of the
/// callee into the caller. If the call is an indirect call, first promote		/// callee into the caller. If the call is an indirect call, first promote
/// it to direct call. Each indirect call is limited with a single target.		/// it to direct call. Each indirect call is limited with a single target.
///		///
Show All 33 Lines	for (auto &BB : F) {
if (callsiteIsHot(FS, PSI))		if (callsiteIsHot(FS, PSI))
Hot = true;		Hot = true;
else if (shouldInlineColdCallee(I))		else if (shouldInlineColdCallee(I))
ColdCandidates.push_back(&I);		ColdCandidates.push_back(&I);
}		}
}		}
if (Hot) {		if (Hot) {
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());		CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
		emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
}		}
else {		else {
CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());		CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
		emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
}		}
}		}
for (auto I : CIS) {		for (auto I : CIS) {
Function *CalledFunction = CallSite(I).getCalledFunction();		Function *CalledFunction = CallSite(I).getCalledFunction();
// Do not inline recursive calls.		// Do not inline recursive calls.
if (CalledFunction == &F)		if (CalledFunction == &F)
continue;		continue;
if (CallSite(I).isIndirectCall()) {		if (CallSite(I).isIndirectCall()) {
if (PromotedInsns.count(I))		if (PromotedInsns.count(I))
continue;		continue;
uint64_t Sum;		uint64_t Sum;
for (const auto FS : findIndirectCallFunctionSamples(I, Sum)) {		for (const auto FS : findIndirectCallFunctionSamples(I, Sum)) {
if (IsThinLTOPreLink) {		if (IsThinLTOPreLink) {
		wmiUnsubmitted Not Done Reply Inline Actions Not inlined candidate may be reported multiple times here because of the iterative outer loop. I guess you put the OptimizationRemark here because you want to know the exact reason of why the candidate with inline instance in profile is not inlined (here the reason is not hot enough), then some more information should be emitted to explain it. If you don't care the exact reason, then it is better to generate the optimization remark in the loop iterating localNotInlinedCallSites. localNotInlinedCallSites contains all the candidates with inline instance in profile but not being inlined for whatever reason including the reason of "not hot enough". wmi: Not inlined candidate may be reported multiple times here because of the iterative outer loop.
		wenleiAuthorUnsubmitted Done Reply Inline Actions Yes, I care about the reasons. I will change the message to make the reason explicit in the output remarks. wenlei: Yes, I care about the reasons. I will change the message to make the reason explicit in the…
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),		FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
PSI->getOrCompHotCountThreshold());		PSI->getOrCompHotCountThreshold());
continue;		continue;
}		}
auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());		auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
// If it is a recursive call, we do not inline it as it could bloat		// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.		// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is		// clone the caller first, and inline the cloned caller if it is
Show All 16 Lines	for (auto I : CIS) {
pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);		pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
Sum -= C;		Sum -= C;
PromotedInsns.insert(I);		PromotedInsns.insert(I);
// If profile mismatches, we should not attempt to inline DI.		// If profile mismatches, we should not attempt to inline DI.
if ((isa<CallInst>(DI) \|\| isa<InvokeInst>(DI)) &&		if ((isa<CallInst>(DI) \|\| isa<InvokeInst>(DI)) &&
inlineCallInstruction(DI)) {		inlineCallInstruction(DI)) {
localNotInlinedCallSites.erase(I);		localNotInlinedCallSites.erase(I);
LocalChanged = true;		LocalChanged = true;
		++NumCSInlined;
}		}
} else {		} else {
LLVM_DEBUG(dbgs()		LLVM_DEBUG(dbgs()
<< "\nFailed to promote indirect call to "		<< "\nFailed to promote indirect call to "
<< CalleeFunctionName << " because " << Reason << "\n");		<< CalleeFunctionName << " because " << Reason << "\n");
}		}
}		}
} else if (CalledFunction && CalledFunction->getSubprogram() &&		} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {		!CalledFunction->isDeclaration()) {
if (inlineCallInstruction(I)) {		if (inlineCallInstruction(I)) {
localNotInlinedCallSites.erase(I);		localNotInlinedCallSites.erase(I);
LocalChanged = true;		LocalChanged = true;
		++NumCSInlined;
}		}
} else if (IsThinLTOPreLink) {		} else if (IsThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(		findCalleeFunctionSamples(*I)->findInlinedFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());		InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
}		}
}		}
if (LocalChanged) {		if (LocalChanged) {
Changed = true;		Changed = true;
} else {		} else {
break;		break;
}		}
}		}

// Accumulate not inlined callsite information into notInlinedSamples		// Accumulate not inlined callsite information into notInlinedSamples
for (const auto &Pair : localNotInlinedCallSites) {		for (const auto &Pair : localNotInlinedCallSites) {
Instruction *I = Pair.getFirst();		Instruction *I = Pair.getFirst();
Function *Callee = CallSite(I).getCalledFunction();		Function *Callee = CallSite(I).getCalledFunction();
if (!Callee \|\| Callee->isDeclaration())		if (!Callee \|\| Callee->isDeclaration())
continue;		continue;

		ORE->emit(OptimizationRemarkAnalysis("sample-profile-inline", "NotInline",
		I->getDebugLoc(), I->getParent())
		<< "previous inlining not repeated: '"
		<< ore::NV("Callee", Callee) << "' into '"
		<< ore::NV("Caller", &F) << "'");

		++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();		const FunctionSamples *FS = Pair.getSecond();
if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {		if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
continue;		continue;
}		}

if (ProfileMergeInlinee) {		if (ProfileMergeInlinee) {
// Use entry samples as head samples during the merge, as inlinees		// Use entry samples as head samples during the merge, as inlinees
// don't have head samples.		// don't have head samples.
▲ Show 20 Lines • Show All 884 Lines • Show Last 20 Lines

llvm/test/Transforms/SampleProfile/inline-coverage.ll

	Show All 10 Lines
	; 6			; 6
	; 7 int main() {			; 7 int main() {
	; 8 long long int sum = 0;			; 8 long long int sum = 0;
	; 9 for (int i = 0; i < 200000 * 3000; i++)			; 9 for (int i = 0; i < 200000 * 3000; i++)
	; 10 sum += foo(i);			; 10 sum += foo(i);
	; 11 return sum > 0 ? 0 : 1;			; 11 return sum > 0 ? 0 : 1;
	; 12 }			; 12 }
	;			;
	; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' into 'main'			; CHECK: remark: coverage.cc:10:12: previous inlining reattempted for hotness: '_Z3fool' into 'main'
	; CHECK: remark: coverage.cc:9:21: Applied 23478 samples from profile (offset: 2.1)			; CHECK: remark: coverage.cc:9:21: Applied 23478 samples from profile (offset: 2.1)
	; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)			; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)
	; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)			; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)
	; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)			; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)
	; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3			; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3
	;			;
	; There is one sample record with 0 samples at offset 4 in main() that we never			; There is one sample record with 0 samples at offset 4 in main() that we never
	; use:			; use:
	▲ Show 20 Lines • Show All 108 Lines • Show Last 20 Lines

llvm/test/Transforms/SampleProfile/inline-stats.ll

This file was added.

				; REQUIRES: asserts
				; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -stats -S 2>&1 \| FileCheck %s
				; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -stats -S 2>&1 \| FileCheck %s
				wmiUnsubmitted Not Done Reply Inline Actions Test new pass manager as well. wmi: Test new pass manager as well.

				; Original C++ test case
				;
				; #include <stdio.h>
				;
				; int sum(int x, int y) {
				; return x + y;
				; }
				;
				; int main() {
				; int s, i = 0;
				; while (i++ < 20000 * 20000)
				; if (i != 100) s = sum(i, s); else s = 30;
				; printf("sum is %d\n", s);
				; return 0;
				; }
				;
				@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
				define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !6 {
				entry:
				%x.addr = alloca i32, align 4
				%y.addr = alloca i32, align 4
				store i32 %x, i32* %x.addr, align 4
				store i32 %y, i32* %y.addr, align 4
				%tmp = load i32, i32* %x.addr, align 4, !dbg !8
				%tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
				%add = add nsw i32 %tmp, %tmp1, !dbg !8
				wmiUnsubmitted Not Done Reply Inline Actions rename the vars with just a number. wmi: rename the vars with just a number.
				ret i32 %add, !dbg !8
				}
				define i32 @main() !dbg !9 {
				entry:
				%retval = alloca i32, align 4
				%s = alloca i32, align 4
				%i = alloca i32, align 4
				store i32 0, i32* %retval
				store i32 0, i32* %i, align 4, !dbg !10
				br label %while.cond, !dbg !11

				while.cond: ; preds = %if.end, %entry
				%tmp = load i32, i32* %i, align 4, !dbg !12
				%inc = add nsw i32 %tmp, 1, !dbg !12
				store i32 %inc, i32* %i, align 4, !dbg !12
				%cmp = icmp slt i32 %tmp, 400000000, !dbg !12
				br i1 %cmp, label %while.body, label %while.end, !dbg !12

				while.body: ; preds = %while.cond
				%tmp1 = load i32, i32* %i, align 4, !dbg !14
				%cmp1 = icmp ne i32 %tmp1, 100, !dbg !14
				br i1 %cmp1, label %if.then, label %if.else, !dbg !14

				if.then: ; preds = %while.body
				%tmp2 = load i32, i32* %i, align 4, !dbg !16
				%tmp3 = load i32, i32* %s, align 4, !dbg !16
				%call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !16
				store i32 %call, i32* %s, align 4, !dbg !16
				br label %if.end, !dbg !16

				if.else: ; preds = %while.body
				store i32 30, i32* %s, align 4, !dbg !18
				br label %if.end

				if.end: ; preds = %if.else, %if.then
				br label %while.cond, !dbg !20

				while.end: ; preds = %while.cond
				%tmp4 = load i32, i32* %s, align 4, !dbg !22
				%call2 = call i32 (i8, ...) @printf(i8 getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !22
				ret i32 0, !dbg !23
				}
				declare i32 @printf(i8*, ...)

				!llvm.dbg.cu = !{!0}
				!llvm.module.flags = !{!3, !4}
				!llvm.ident = !{!5}

				!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
				!1 = !DIFile(filename: "calls.cc", directory: ".")
				!2 = !{}
				!3 = !{i32 2, !"Dwarf Version", i32 4}
				!4 = !{i32 1, !"Debug Info Version", i32 3}
				!5 = !{!"clang version 3.5 "}
				!6 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
				!7 = !DISubroutineType(types: !2)
				!8 = !DILocation(line: 4, scope: !6)
				!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
				!10 = !DILocation(line: 8, scope: !9)
				!11 = !DILocation(line: 9, scope: !9)
				!12 = !DILocation(line: 9, scope: !13)
				!13 = !DILexicalBlockFile(scope: !9, file: !1, discriminator: 2)
				!14 = !DILocation(line: 10, scope: !15)
				!15 = distinct !DILexicalBlock(scope: !9, file: !1, line: 10)
				!16 = !DILocation(line: 10, scope: !17)
				!17 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2)
				!18 = !DILocation(line: 10, scope: !19)
				!19 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 4)
				!20 = !DILocation(line: 10, scope: !21)
				!21 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 6)
				!22 = !DILocation(line: 11, scope: !9)
				!23 = !DILocation(line: 12, scope: !9)

				; CHECK: 1 sample-profile - Number of functions inlined with context sensitive profile
				No newline at end of file

llvm/test/Transforms/SampleProfile/remarks.ll

	Show All 15 Lines
	; 9 sum += -i * rand();			; 9 sum += -i * rand();
	; 10 return sum;			; 10 return sum;
	; 11 }			; 11 }
	; 12			; 12
	; 13 int main() { return foo() > 0; }			; 13 int main() { return foo() > 0; }

	; We are expecting foo() to be inlined in main() (almost all the cycles are			; We are expecting foo() to be inlined in main() (almost all the cycles are
	; spent inside foo).			; spent inside foo).
	; CHECK: remark: remarks.cc:13:21: inlined hot callee '_Z3foov' into 'main'			; CHECK: remark: remarks.cc:13:21: inlined callee '_Z3foov' into 'main'

	; The back edge for the loop is the hottest edge in the loop subgraph.			; The back edge for the loop is the hottest edge in the loop subgraph.
	; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3			; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3

	; The predicate almost always chooses the 'else' branch.			; The predicate almost always chooses the 'else' branch.
	; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9			; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9

	; Checking to see if YAML file is generated and contains remarks			; Checking to see if YAML file is generated and contains remarks
	;YAML: --- !Passed			;YAML: --- !Passed
	;YAML-NEXT: Pass: sample-profile			;YAML-NEXT: Pass: sample-profile-inline
	;YAML-NEXT: Name: HotInline			;YAML-NEXT: Name: InlineSuccess
	;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 13, Column: 21 }			;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 13, Column: 21 }
	;YAML-NEXT: Function: main			;YAML-NEXT: Function: main
	;YAML-NEXT: Args:			;YAML-NEXT: Args:
	;YAML-NEXT: - String: 'inlined hot callee '''			;YAML-NEXT: - String: 'inlined callee '''
	;YAML-NEXT: - Callee: _Z3foov			;YAML-NEXT: - Callee: _Z3foov
	;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 3, Column: 0 }			;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 3, Column: 0 }
	;YAML-NEXT: - String: ''' into '''			;YAML-NEXT: - String: ''' into '''
	;YAML-NEXT: - Caller: main			;YAML-NEXT: - Caller: main
	;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 13, Column: 0 }			;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 13, Column: 0 }
	;YAML-NEXT: - String: ''''			;YAML-NEXT: - String: ''''
	;YAML-NEXT: ...			;YAML-NEXT: ...
	;YAML: --- !Analysis			;YAML: --- !Analysis
	▲ Show 20 Lines • Show All 178 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AutoFDO] Statistic for context sensitive profile guided inlining
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 233400

llvm/lib/Transforms/IPO/SampleProfile.cpp

llvm/test/Transforms/SampleProfile/inline-coverage.ll

llvm/test/Transforms/SampleProfile/inline-stats.ll

llvm/test/Transforms/SampleProfile/remarks.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AutoFDO] Statistic for context sensitive profile guided inliningClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 233400

llvm/lib/Transforms/IPO/SampleProfile.cpp

llvm/test/Transforms/SampleProfile/inline-coverage.ll

llvm/test/Transforms/SampleProfile/inline-stats.ll

llvm/test/Transforms/SampleProfile/remarks.ll

[AutoFDO] Statistic for context sensitive profile guided inlining
ClosedPublic