Diff 92170

include/llvm/IR/Instruction.h

Show First 20 Lines • Show All 246 Lines • ▼ Show 20 Lines	public:
/// Returns false if no metadata or invalid metadata was found.		/// Returns false if no metadata or invalid metadata was found.
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const;		bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const;

/// Retrieve total raw weight values of a branch.		/// Retrieve total raw weight values of a branch.
/// Returns true on success with profile total weights filled in.		/// Returns true on success with profile total weights filled in.
/// Returns false if no metadata was found.		/// Returns false if no metadata was found.
bool extractProfTotalWeight(uint64_t &TotalVal) const;		bool extractProfTotalWeight(uint64_t &TotalVal) const;

		/// Updates branch_weights metadata by scaling it by \p S / \p T.
		void updateProfWeight(uint64_t S, uint64_t T);

		eramanUnsubmitted Done Reply Inline Actions Do not use double here and instead specify it as a fraction. eraman: Do not use double here and instead specify it as a fraction.
/// Set the debug location information for this instruction.		/// Set the debug location information for this instruction.
void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }		void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }

/// Return the debug location for this node as a DebugLoc.		/// Return the debug location for this node as a DebugLoc.
const DebugLoc &getDebugLoc() const { return DbgLoc; }		const DebugLoc &getDebugLoc() const { return DbgLoc; }

/// Set or clear the nsw flag on this instruction, which must be an operator		/// Set or clear the nsw flag on this instruction, which must be an operator
/// which supports this flag. See LangRef.html for the meaning of this flag.		/// which supports this flag. See LangRef.html for the meaning of this flag.
▲ Show 20 Lines • Show All 346 Lines • Show Last 20 Lines

lib/IR/Instruction.cpp

	Show All 11 Lines
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "llvm/ADT/DenseSet.h"			#include "llvm/ADT/DenseSet.h"
	#include "llvm/IR/Instruction.h"			#include "llvm/IR/Instruction.h"
	#include "llvm/IR/CallSite.h"			#include "llvm/IR/CallSite.h"
	#include "llvm/IR/Constants.h"			#include "llvm/IR/Constants.h"
	#include "llvm/IR/Instructions.h"			#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Module.h"			#include "llvm/IR/Module.h"
				#include "llvm/IR/MDBuilder.h"
	#include "llvm/IR/Operator.h"			#include "llvm/IR/Operator.h"
	#include "llvm/IR/Type.h"			#include "llvm/IR/Type.h"
	using namespace llvm;			using namespace llvm;

	Instruction::Instruction(Type ty, unsigned it, Use Ops, unsigned NumOps,			Instruction::Instruction(Type ty, unsigned it, Use Ops, unsigned NumOps,
	Instruction *InsertBefore)			Instruction *InsertBefore)
	: User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) {			: User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) {

	▲ Show 20 Lines • Show All 641 Lines • ▼ Show 20 Lines
	#include "llvm/IR/Instruction.def"			#include "llvm/IR/Instruction.def"
	#undef HANDLE_INST			#undef HANDLE_INST
	}			}

	New->SubclassOptionalData = SubclassOptionalData;			New->SubclassOptionalData = SubclassOptionalData;
	New->copyMetadata(*this);			New->copyMetadata(*this);
	return New;			return New;
	}			}

				void Instruction::updateProfWeight(uint64_t S, uint64_t T) {
				auto *ProfileData = getMetadata(LLVMContext::MD_prof);
				if (!ProfileData.hasValue())
				return;

				auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
				if (!ProfDataName \|\| !ProfDataName->getString().equals("branch_weights"))
				return;

				SmallVector<uint32_t, 4> Weights;
				for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
				// Using APInt::div may be expensive, but most cases should fit in 64 bits.
				APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i))
				eramanUnsubmitted Done Reply Inline Actions What are the assumptions on the MD_prof metadata here? Could you assert V here? If it is indeed possible that ith operand is not constant int, could the next operand be constant int (in which case you shouldn't return here) eraman: What are the assumptions on the MD_prof metadata here? Could you assert V here? If it is indeed…
				danielcdhAuthorUnsubmitted Not Done Reply Inline Actions removed the check. danielcdh: removed the check.
				->getValue()
				.getZExtValue());
				eramanUnsubmitted Done Reply Inline Actions For counts from instrumentation based profile, I use a 128 bit APInt to do the arithmetic and then educe it to 64 bit. In this case, since the values being multiplied are sample profile weights, I suppose the likelihood of Val * S overflowing 64 bits is very small and the APInt may be unnecessary. Even then, use saturated multiply so that the values are at least sensible even in the unlikely case of overflow. eraman: For counts from instrumentation based profile, I use a 128 bit APInt to do the arithmetic and…
				Val *= APInt(128, S);
				eramanUnsubmitted Done Reply Inline Actions Perhaps it is not clear above, but SaturatingMultiply may be sufficient in your case. I'm ok with using APInt as well as in the most common case (multiplication result fits within 64 bits), the division is not expensive. Perhaps add a comment above the udiv stating that this could potentially be expensive, but most likely the product is going to fit within 64 bits. eraman: Perhaps it is not clear above, but SaturatingMultiply may be sufficient in your case. I'm ok…
				Weights.push_back(Val.udiv(APInt(128, T)).getLimitedValue());
				}
				MDBuilder MDB(getContext());
				setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
				}

lib/Transforms/Utils/InlineFunction.cpp

Show All 19 Lines
#include "llvm/ADT/StringExtras.h"		#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"		#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"		#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"		#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"		#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"		#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"		#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"		#include "llvm/Analysis/InstructionSimplify.h"
		#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"		#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"		#include "llvm/IR/CallSite.h"
#include "llvm/IR/CFG.h"		#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"		#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"		#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"		#include "llvm/IR/DerivedTypes.h"
▲ Show 20 Lines • Show All 1,383 Lines • ▼ Show 20 Lines	for (auto const &Entry : VMap) {
}		}
CallerBFI->setBlockFreq(ClonedBB, Freq);		CallerBFI->setBlockFreq(ClonedBB, Freq);
}		}
BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));		BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
CallerBFI->setBlockFreqAndScale(		CallerBFI->setBlockFreqAndScale(
EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),		EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
ClonedBBs);		ClonedBBs);
}		}

		/// Update the branch metadata for cloned call instructions.
		static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
		const Optional<uint64_t> &CalleeEntryCount,
		const Instruction *TheCall) {
		if (!CalleeEntryCount.hasValue() \|\| CalleeEntryCount.getValue() < 1)
		zzhengUnsubmitted Done Reply Inline Actions Do you mean to use !CalleeEntryCount.has_value()? zzheng: Do you mean to use !CalleeEntryCount.has_value()?
		return;
		Optional<uint64_t> CallSiteCount =
		ProfileSummaryInfo::getProfileCount(TheCall, nullptr);
		uint64_t CallCount =
		zzhengUnsubmitted Done Reply Inline Actions I'm confused, do you mean to test if CallSiteCount has no value or it has a value of 0? zzheng: I'm confused, do you mean to test if CallSiteCount has no value or it has a value of 0?
		std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
		CalleeEntryCount.getValue());

		for (auto const &Entry : VMap)
		eramanUnsubmitted Done Reply Inline Actions You have to check if Entry.second is a call. It is possible for call instructions to be simplified to a value during the cloning. eraman: You have to check if Entry.second is a call. It is possible for call instructions to be…
		zzhengUnsubmitted Not Done Reply Inline Actions Isn't it redundant to check ptr != nullptr? Is there any chance that Entry.second is NULL and what happen on (Entry.second) in that case? zzheng:* Isn't it redundant to check ptr != nullptr? Is there any chance that Entry.second is NULL and…
		danielcdhAuthorUnsubmitted Not Done Reply Inline Actions It indeed happened once when I compile large code, that's why I added this check. I'm not familiar with cloning code, Easwaran may have some insights on why this could be nullptr? Or it's a bug in function clone? danielcdh: It indeed happened once when I compile large code, that's why I added this check. I'm not…
		eramanUnsubmitted Not Done Reply Inline Actions The value type of the VMap is WeakVH and the value it holds becomes null when it gets destroyed. eraman: The value type of the VMap is WeakVH and the value it holds becomes null when it gets destroyed.
		if (isa<CallInst>(Entry.first) && &*Entry.second != nullptr &&
		isa<CallInst>(Entry.second))
		eramanUnsubmitted Done Reply Inline Actions No need to keep track of the cloned blocks and update the calls inside them. VMap also includes the instructions - so just check if they are calls and update the weights. eraman: No need to keep track of the cloned blocks and update the calls inside them. VMap also includes…
		cast<CallInst>(Entry.second)
		->updateProfWeight(CallCount, CalleeEntryCount.getValue());
		for (BasicBlock &BB : *Callee)
		// No need to update the callsite if it is pruned during inlining.
		eramanUnsubmitted Done Reply Inline Actions Useful to add a comment as to why this is done only when the instruction is in VMap. eraman: Useful to add a comment as to why this is done only when the instruction is in VMap.
		if (VMap.count(&BB))
		for (Instruction &I : BB)
		if (CallInst *CI = dyn_cast<CallInst>(&I))
		eramanUnsubmitted Done Reply Inline Actions The check above is not needed. After all, if the block is reachable in the call context, all instructions inside are reachable too. But there is one tricky case. Let's say that a call has been simplified into a value (this can happen during pruning), Should you update the call count or not? If you should not update, then the check above should be isa<CallInst>(VMap.Count(CI)). I think updating is the right thing here though. eraman: The check above is not needed. After all, if the block is reachable in the call context, all…
		danielcdhAuthorUnsubmitted Not Done Reply Inline Actions The prof count only attaches to branch instructions. For the case you mentioned, it's already checked by the "if (CallInst CI = dyn_cast<CallInst>(&I))", thus I simply removed the second check. danielcdh:* The prof count only attaches to branch instructions. For the case you mentioned, it's already…
		CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount,
		CalleeEntryCount.getValue());
		}

		eramanUnsubmitted Done Reply Inline Actions Document this method. Also, perhaps choose a more meaningful name. eraman: Document this method. Also, perhaps choose a more meaningful name.
		eramanUnsubmitted Done Reply Inline Actions There is a subtle issue here. Let's say there is a block in the callee that is not executed in this call context and that block has a call instruction. You'll still be reducing the weight of that call even though you haven't cloned it. Similar issue does exist in updating the bfi incrementally, but that's okay because BFI does get recomputed later. In this case, once the weight gets incorrectly updated it remains so forever. What you should be doing is update the weights only when the call has been cloned. eraman: There is a subtle issue here. Let's say there is a block in the callee that is not executed in…
/// Update the entry count of callee after inlining.		/// Update the entry count of callee after inlining.
///		///
/// The callsite's block count is subtracted from the callee's function entry		/// The callsite's block count is subtracted from the callee's function entry
/// count.		/// count.
static void updateCalleeCount(BlockFrequencyInfo &CallerBFI, BasicBlock *CallBB,		static void updateCalleeCount(BlockFrequencyInfo CallerBFI, BasicBlock CallBB,
Function *Callee) {		Instruction CallInst, Function Callee) {
// If the callee has a original count of N, and the estimated count of		// If the callee has a original count of N, and the estimated count of
// callsite is M, the new callee count is set to N - M. M is estimated from		// callsite is M, the new callee count is set to N - M. M is estimated from
// the caller's entry count, its entry block frequency and the block frequency		// the caller's entry count, its entry block frequency and the block frequency
// of the callsite.		// of the callsite.
Optional<uint64_t> CalleeCount = Callee->getEntryCount();		Optional<uint64_t> CalleeCount = Callee->getEntryCount();
if (!CalleeCount)		if (!CalleeCount.hasValue())
		zzhengUnsubmitted Done Reply Inline Actions !CalleeCount.hasValue() zzheng: !CalleeCount.hasValue()
return;		return;
Optional<uint64_t> CallSiteCount = CallerBFI.getBlockProfileCount(CallBB);		Optional<uint64_t> CallCount =
if (!CallSiteCount)		ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI);
		if (!CallCount.hasValue())
		eramanUnsubmitted Done Reply Inline Actions This code that gets either the metadata weight or the block weight should be refactored and moved out of here. eraman: This code that gets either the metadata weight or the block weight should be refactored and…
		zzhengUnsubmitted Done Reply Inline Actions !CallCount.hasValue() zzheng: !CallCount.hasValue()
return;		return;
// Since CallSiteCount is an estimate, it could exceed the original callee		// Since CallSiteCount is an estimate, it could exceed the original callee
// count and has to be set to 0.		// count and has to be set to 0.
if (CallSiteCount.getValue() > CalleeCount.getValue())		if (CallCount.getValue() > CalleeCount.getValue())
Callee->setEntryCount(0);		Callee->setEntryCount(0);
else		else
Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());		Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue());
		eramanUnsubmitted Done Reply Inline Actions This gets confusing. You're now updating the entry count based on the metadata. When sample profile is used, is it expected that the sum of the calls' profile weights equal the entry count? Do you smooth out the entry count based on call instructions' profile weight when you initially load the profile. In any case, more comments are helpful. eraman: This gets confusing. You're now updating the entry count based on the metadata. When sample…
		danielcdhAuthorUnsubmitted Not Done Reply Inline Actions Code updated. Yes, the sample profile collection makes sure the function entry count meet with all call edges. Let me know if you think there still need to add comments. danielcdh: Code updated. Yes, the sample profile collection makes sure the function entry count meet with…
}		}

/// This function inlines the called function into the basic block of the		/// This function inlines the called function into the basic block of the
/// caller. This returns false if it is not possible to inline this call.		/// caller. This returns false if it is not possible to inline this call.
/// The program is still in a well defined state if this occurs though.		/// The program is still in a well defined state if this occurs though.
///		///
/// Note that this only does one level of inlining. For example, if the		/// Note that this only does one level of inlining. For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now		/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
▲ Show 20 Lines • Show All 173 Lines • ▼ Show 20 Lines	Function::iterator FirstNewBlock;
// (which can happen, e.g., because an argument was constant), but we'll be		// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.		// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,		CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/ModuleLevelChanges=/false, Returns, ".i",		/ModuleLevelChanges=/false, Returns, ".i",
&InlinedFunctionInfo, TheCall);		&InlinedFunctionInfo, TheCall);
// Remember the first block that is newly cloned over.		// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;		FirstNewBlock = LastBlock; ++FirstNewBlock;

if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) {		if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
// Update the BFI of blocks cloned into the caller.		// Update the BFI of blocks cloned into the caller.
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,		updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
CalledFunc->front());		CalledFunc->front());

		updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall);
// Update the profile count of callee.		// Update the profile count of callee.
updateCalleeCount(*IFI.CallerBFI, OrigBB, CalledFunc);		updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc);
}

// Inject byval arguments initialization.		// Inject byval arguments initialization.
for (std::pair<Value, Value> &Init : ByValInit)		for (std::pair<Value, Value> &Init : ByValInit)
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),		HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
&*FirstNewBlock, IFI);		&*FirstNewBlock, IFI);

Optional<OperandBundleUse> ParentDeopt =		Optional<OperandBundleUse> ParentDeopt =
CS.getOperandBundle(LLVMContext::OB_deopt);		CS.getOperandBundle(LLVMContext::OB_deopt);
▲ Show 20 Lines • Show All 631 Lines • Show Last 20 Lines

test/Transforms/Inline/prof-update.ll

This file was added.

				; RUN: opt < %s -inline -S \| FileCheck %s
				; Checks if inliner updates branch_weights annotation for call instructions.
				eramanUnsubmitted Done Reply Inline Actions Please expand this test or add a new test case to handle more complex cases where some calls are not cloned into the caller. eraman: Please expand this test or add a new test case to handle more complex cases where some calls…
				eramanUnsubmitted Done Reply Inline Actions Nit: I prefer writing comments to explain why I expect the values in the CHECK statements. This helps anyone trying to modify the test later. eraman: Nit: I prefer writing comments to explain why I expect the values in the CHECK statements. This…

				declare void @ext();
				declare void @ext1();

				; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
				define void @callee(i32 %n) !prof !1 {
				%cond = icmp sle i32 %n, 10
				br i1 %cond, label %cond_true, label %cond_false
				cond_true:
				; ext1 is optimized away, thus not updated.
				; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]
				call void @ext1(), !prof !2
				ret void
				cond_false:
				; ext is cloned and updated.
				; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]
				call void @ext(), !prof !2
				ret void
				}

				; CHECK: define void @caller()
				define void @caller() {
				; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]
				call void @callee(i32 15), !prof !3
				ret void
				}

				!llvm.module.flags = !{!0}
				!0 = !{i32 1, !"MaxFunctionCount", i32 2000}
				!1 = !{!"function_entry_count", i64 1000}
				!2 = !{!"branch_weights", i64 2000}
				!3 = !{!"branch_weights", i64 400}
				attributes #0 = { alwaysinline }
				; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
				; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}
				; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
				; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}

This is an archive of the discontinued LLVM Phabricator instance.

Updates branch_weights annotation for call instructions during inlining.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 92170

include/llvm/IR/Instruction.h

lib/IR/Instruction.cpp

lib/Transforms/Utils/InlineFunction.cpp

test/Transforms/Inline/prof-update.ll

This is an archive of the discontinued LLVM Phabricator instance.

Updates branch_weights annotation for call instructions during inlining.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 92170

include/llvm/IR/Instruction.h

lib/IR/Instruction.cpp

lib/Transforms/Utils/InlineFunction.cpp

test/Transforms/Inline/prof-update.ll

Updates branch_weights annotation for call instructions during inlining.
ClosedPublic