This is an archive of the discontinued LLVM Phabricator instance.

Use call target count to derive the call instruction weight
ClosedPublic

Authored by danielcdh on Sep 9 2016, 10:44 AM.

Download Raw Diff

Details

Reviewers

dnovillo
davidxl

Commits

rG38e3731c470b: Use call target count to derive the call instruction weight
rL281911: Use call target count to derive the call instruction weight

Summary

The call target count profile is directly derived from LBR branch->target data. This is more reliable than instruction frequency profiles that could be moved across basic block boundaries. This patches uses call target count profile to annotate call instructions.

Diff Detail

Event Timeline

danielcdh updated this revision to Diff 70862.Sep 9 2016, 10:44 AM

danielcdh retitled this revision from to Use call target count to derive the call instruction weight.

danielcdh updated this object.

danielcdh added reviewers: dnovillo, davidxl.

danielcdh added a subscriber: llvm-commits.

OK, so instead of looking at the number of samples collected at the call site, it looks at the number of calls the target had. But what if the target is called disproportionately more from one call site than the other?

Like:

if (this_is_true_95_percent_of_the_time())

A();

else

A();

The example is ridiculous, I know, but wouldn't we end up inlining A() in both branches of the if()? Is that a code growth issue in general?

In D24410#545823, @dnovillo wrote:
OK, so instead of looking at the number of samples collected at the call site, it looks at the number of calls the target had. But what if the target is called disproportionately more from one call site than the other?

Like:

if (this_is_true_95_percent_of_the_time())
A();
else
A();
The example is ridiculous, I know, but wouldn't we end up inlining A() in both branches of the if()? Is that a code growth issue in general?

No because we use lineno+discriminator to distinguish a callsite, so we can precisely locate both callsites to A and only inline the hot one.

dnovillo accepted this revision.Sep 19 2016, 9:01 AM

dnovillo edited edge metadata.

This revision is now accepted and ready to land.Sep 19 2016, 9:01 AM

format

danielcdh closed this revision.Sep 19 2016, 9:15 AM

Revision Contents

Path

Size

include/

llvm/

ProfileData/

SampleProf.h

15 lines

lib/

Transforms/

IPO/

SampleProfile.cpp

8 lines

test/

Transforms/

SampleProfile/

Inputs/

inline-coverage.prof

2 lines

Diff 71840

include/llvm/ProfileData/SampleProf.h

Show First 20 Lines • Show All 216 Lines • ▼ Show 20 Lines	ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
uint32_t Discriminator) const {		uint32_t Discriminator) const {
const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));		const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
if (ret == BodySamples.end())		if (ret == BodySamples.end())
return std::error_code();		return std::error_code();
else		else
return ret->second.getSamples();		return ret->second.getSamples();
}		}

		/// Return the total number of call target samples collected at a given
		/// location. Each location is specified by \p LineOffset and
		/// \p Discriminator. If the location is not found in profile, return error.
		ErrorOr<uint64_t> findCallSamplesAt(uint32_t LineOffset,
		uint32_t Discriminator) const {
		const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
		if (ret == BodySamples.end())
		return std::error_code();
		uint64_t T = 0;
		for (const auto &t_c : ret->second.getCallTargets()) {
		T += t_c.second;
		}
		return T;
		}

/// Return the function samples at the given callsite location.		/// Return the function samples at the given callsite location.
FunctionSamples &functionSamplesAt(const LineLocation &Loc) {		FunctionSamples &functionSamplesAt(const LineLocation &Loc) {
return CallsiteSamples[Loc];		return CallsiteSamples[Loc];
}		}

/// Return a pointer to function samples at the given callsite location.		/// Return a pointer to function samples at the given callsite location.
const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const {		const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const {
auto iter = CallsiteSamples.find(Loc);		auto iter = CallsiteSamples.find(Loc);
▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines

lib/Transforms/IPO/SampleProfile.cpp

Show First 20 Lines • Show All 474 Lines • ▼ Show 20 Lines	if (IsCall && findCalleeFunctionSamples(Inst))
return 0;		return 0;

const DILocation *DIL = DLoc;		const DILocation *DIL = DLoc;
unsigned Lineno = DLoc.getLine();		unsigned Lineno = DLoc.getLine();
unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();		unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();

uint32_t LineOffset = getOffset(Lineno, HeaderLineno);		uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
uint32_t Discriminator = DIL->getDiscriminator();		uint32_t Discriminator = DIL->getDiscriminator();
ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);		ErrorOr<uint64_t> R = IsCall
		? FS->findCallSamplesAt(LineOffset, Discriminator)
		: FS->findSamplesAt(LineOffset, Discriminator);
if (R) {		if (R) {
bool FirstMark =		bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());		CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
if (FirstMark) {		if (FirstMark) {
const Function *F = Inst.getParent()->getParent();		const Function *F = Inst.getParent()->getParent();
LLVMContext &Ctx = F->getContext();		LLVMContext &Ctx = F->getContext();
emitOptimizationRemark(		emitOptimizationRemark(
Ctx, DEBUG_TYPE, *F, DLoc,		Ctx, DEBUG_TYPE, *F, DLoc,
▲ Show 20 Lines • Show All 775 Lines • ▼ Show 20 Lines	if (Coverage < SampleProfileSampleCoverage) {
DS_Warning));		DS_Warning));
}		}
}		}
return Changed;		return Changed;
}		}

char SampleProfileLoaderLegacyPass::ID = 0;		char SampleProfileLoaderLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",		INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)		"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)		INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",		INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)		"Sample Profile loader", false, false)

bool SampleProfileLoader::doInitialization(Module &M) {		bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();		auto &Ctx = M.getContext();
auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);		auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
if (std::error_code EC = ReaderOrErr.getError()) {		if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();		std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));		Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;		return false;
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

test/Transforms/SampleProfile/Inputs/inline-coverage.prof

	main:501438:0			main:501438:0
	2.1: 23478			2.1: 23478
	3: 23478			3: 23478
	4: 0			4: 0
	0: 0			0: 0
	3: _Z3fool:172746			3: _Z3fool:172746
	1: 31878			1: 31878 rand:31878