Diff 355304

llvm/include/llvm/Analysis/InlineCost.h

	Show All 9 Lines
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_INLINECOST_H			#ifndef LLVM_ANALYSIS_INLINECOST_H
	#define LLVM_ANALYSIS_INLINECOST_H			#define LLVM_ANALYSIS_INLINECOST_H

	#include "llvm/Analysis/AssumptionCache.h"			#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/CallGraphSCCPass.h"			#include "llvm/Analysis/CallGraphSCCPass.h"
				#include "llvm/Analysis/InlineModelFeatureMaps.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"			#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include <cassert>			#include <cassert>
	#include <climits>			#include <climits>

	namespace llvm {			namespace llvm {
	class AssumptionCacheTracker;			class AssumptionCacheTracker;
	class BlockFrequencyInfo;			class BlockFrequencyInfo;
	class CallBase;			class CallBase;
	Show All 23 Lines
	/// Do not inline functions which allocate this many bytes on the stack			/// Do not inline functions which allocate this many bytes on the stack
	/// when the caller is recursive.			/// when the caller is recursive.
	const unsigned TotalAllocaSizeRecursiveCaller = 1024;			const unsigned TotalAllocaSizeRecursiveCaller = 1024;
	/// Do not inline dynamic allocas that have been constant propagated to be			/// Do not inline dynamic allocas that have been constant propagated to be
	/// static allocas above this amount in bytes.			/// static allocas above this amount in bytes.
	const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;			const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
	} // namespace InlineConstants			} // namespace InlineConstants

	/// Represents the cost of inlining a function.			/// Represents the cost of inlining a function.
				mtrofinUnsubmitted Not Done Reply Inline Actions I think it'd be preferable if this were defined on the ML side, like in InlineModelFeatureMaps.h. The reason is that it brings in this concept of feature name (e.g. "sroa_savings"), which is a ML side concept. mtrofin: I think it'd be preferable if this were defined on the ML side, like in InlineModelFeatureMaps.
	///			///
	/// This supports special values for functions which should "always" or			/// This supports special values for functions which should "always" or
	/// "never" be inlined. Otherwise, the cost represents a unitless amount;			/// "never" be inlined. Otherwise, the cost represents a unitless amount;
	/// smaller values increase the likelihood of the function being inlined.			/// smaller values increase the likelihood of the function being inlined.
	///			///
	/// Objects of this type also provide the adjusted threshold for inlining			/// Objects of this type also provide the adjusted threshold for inlining
	/// based on the information available for a particular callsite. They can be			/// based on the information available for a particular callsite. They can be
	/// directly tested to determine if inlining should occur given the cost and			/// directly tested to determine if inlining should occur given the cost and
	▲ Show 20 Lines • Show All 199 Lines • ▼ Show 20 Lines
	/// - an integer, representing the cost.			/// - an integer, representing the cost.
	Optional<int> getInliningCostEstimate(			Optional<int> getInliningCostEstimate(
	CallBase &Call, TargetTransformInfo &CalleeTTI,			CallBase &Call, TargetTransformInfo &CalleeTTI,
	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,			function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
	function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,			function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
	ProfileSummaryInfo *PSI = nullptr,			ProfileSummaryInfo *PSI = nullptr,
	OptimizationRemarkEmitter *ORE = nullptr);			OptimizationRemarkEmitter *ORE = nullptr);

				/// Get the expanded cost features. The features are returned unconditionally,
				/// even if inlining is impossible.
				Optional<InlineCostFeatures> getInliningCostFeatures(
				mtrofinUnsubmitted Not Done Reply Inline Actions I'd drop the name 'array', suggests too much impl detail. mtrofin: I'd drop the name 'array', suggests too much impl detail.
				CallBase &Call, TargetTransformInfo &CalleeTTI,
				function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
				function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
				ProfileSummaryInfo *PSI = nullptr,
				OptimizationRemarkEmitter *ORE = nullptr);

	/// Minimal filter to detect invalid constructs for inlining.			/// Minimal filter to detect invalid constructs for inlining.
	InlineResult isInlineViable(Function &Callee);			InlineResult isInlineViable(Function &Callee);

	// This pass is used to annotate instructions during the inline process for			// This pass is used to annotate instructions during the inline process for
	// debugging and analysis. The main purpose of the pass is to see and test			// debugging and analysis. The main purpose of the pass is to see and test
	// inliner's decisions when creating new optimizations to InlineCost.			// inliner's decisions when creating new optimizations to InlineCost.
	struct InlineCostAnnotationPrinterPass			struct InlineCostAnnotationPrinterPass
	: PassInfoMixin<InlineCostAnnotationPrinterPass> {			: PassInfoMixin<InlineCostAnnotationPrinterPass> {
	Show All 9 Lines

llvm/include/llvm/Analysis/InlineModelFeatureMaps.h

	//===- InlineModelFeatureMaps.h - common model runner defs ------- C++ --===//			//===- InlineModelFeatureMaps.h - common model runner defs ------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//

	#ifndef LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H			#ifndef LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H
	#define LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H			#define LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H

	#include <array>			#include <array>
	#include <string>			#include <string>
	#include <vector>			#include <vector>

	namespace llvm {			namespace llvm {
				mtrofinUnsubmitted Not Done Reply Inline Actions this wouldn't be needed once you move in the changes in InlineCost.h here. mtrofin: this wouldn't be needed once you move in the changes in InlineCost.h here.

				// List of cost features. A "cost" feature is a summand of the heuristic-based
				// inline cost, and we define them separately to preserve the original heuristic
				// behavior.
				#define INLINE_COST_FEATURE_ITERATOR(M) \
				M(SROASavings, "sroa_savings") \
				M(SROALosses, "sroa_losses") \
				M(LoadElimination, "load_elimination") \
				M(CallPenalty, "call_penalty") \
				M(CallArgumentSetup, "call_argument_setup") \
				M(LoadRelativeIntrinsic, "load_relative_intrinsic") \
				M(LoweredCallArgSetup, "lowered_call_arg_setup") \
				M(IndirectCallPenalty, "indirect_call_penalty") \
				M(JumpTablePenalty, "jump_table_penalty") \
				M(CaseClusterPenalty, "case_cluster_penalty") \
				M(SwitchPenalty, "switch_penalty") \
				M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \
				M(NumLoops, "num_loops") \
				M(DeadBlocks, "dead_blocks") \
				M(SimplifiedInstructions, "simplified_instructions") \
				M(ConstantArgs, "constant_args") \
				M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \
				M(CallSiteCost, "callsite_cost") \
				M(ColdCcPenalty, "cold_cc_penalty") \
				M(LastCallToStaticBonus, "last_call_to_static_bonus") \
				M(IsMultipleBlocks, "is_multiple_blocks") \
				M(NestedInlines, "nested_inlines") \
				M(NestedInlineBonus, "nested_inline_bonus") \
				M(Threshold, "threshold")

				// clang-format off
				enum class InlineCostFeatureIndex : size_t {
				mtrofinUnsubmitted Not Done Reply Inline Actions Aah! I see why InlineCostFeaturesArray. How about this enum were InlineCostFeatureIndex, and then the collection is InlineCostFeatures mtrofin: Aah! I see why InlineCostFeaturesArray. How about this enum were InlineCostFeatureIndex, and…
				#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
				INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
				#undef POPULATE_INDICES

				NumberOfFeatures
				};
				// clang-format on

				using InlineCostFeatures =
				std::array<int,
				static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures)>;

				constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) {
				return Feature != InlineCostFeatureIndex::SROASavings &&
				Feature != InlineCostFeatureIndex::IsMultipleBlocks &&
				Feature != InlineCostFeatureIndex::DeadBlocks &&
				Feature != InlineCostFeatureIndex::SimplifiedInstructions &&
				Feature != InlineCostFeatureIndex::ConstantArgs &&
				Feature != InlineCostFeatureIndex::ConstantOffsetPtrArgs &&
				Feature != InlineCostFeatureIndex::NestedInlines &&
				Feature != InlineCostFeatureIndex::NestedInlineBonus &&
				Feature != InlineCostFeatureIndex::Threshold;
				}

	// List of features. Each feature is defined through a triple:			// List of features. Each feature is defined through a triple:
	// - the name of an enum member, which will be the feature index			// - the name of an enum member, which will be the feature index
	// - a textual name, used for Tensorflow model binding (so it needs to match the			// - a textual name, used for Tensorflow model binding (so it needs to match the
	// names used by the Tensorflow model)			// names used by the Tensorflow model)
	// - a documentation description. Currently, that is not used anywhere			// - a documentation description. Currently, that is not used anywhere
	// programmatically, and serves as workaround to inability of inserting comments			// programmatically, and serves as workaround to inability of inserting comments
	// in macros.			// in macros.
	#define INLINE_FEATURE_ITERATOR(M) \			#define INLINE_FEATURE_ITERATOR(M) \
	M(CalleeBasicBlockCount, "callee_basic_block_count", \			M(CalleeBasicBlockCount, "callee_basic_block_count", \
	"number of basic blocks of the callee") \			"number of basic blocks of the callee") \
	M(CallSiteHeight, "callsite_height", \			M(CallSiteHeight, "callsite_height", \
	"position of the call site in the original call graph - measured from " \			"position of the call site in the original call graph - measured from " \
	"the farthest SCC") \			"the farthest SCC") \
	M(NodeCount, "node_count", \			M(NodeCount, "node_count", \
	"total current number of defined functions in the module") \			"total current number of defined functions in the module") \
	M(NrCtantParams, "nr_ctant_params", \			M(NrCtantParams, "nr_ctant_params", \
	"number of parameters in the call site that are constants") \			"number of parameters in the call site that are constants") \
	M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") \
	M(EdgeCount, "edge_count", "total number of calls in the module") \			M(EdgeCount, "edge_count", "total number of calls in the module") \
	M(CallerUsers, "caller_users", \			M(CallerUsers, "caller_users", \
	"number of module-internal users of the caller, +1 if the caller is " \			"number of module-internal users of the caller, +1 if the caller is " \
	"exposed externally") \			"exposed externally") \
	M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \			M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \
	"number of blocks reached from a conditional instruction, in the caller") \			"number of blocks reached from a conditional instruction, in the caller") \
	M(CallerBasicBlockCount, "caller_basic_block_count", \			M(CallerBasicBlockCount, "caller_basic_block_count", \
	"number of basic blocks in the caller") \			"number of basic blocks in the caller") \
	M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \			M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \
	"number of blocks reached from a conditional instruction, in the callee") \			"number of blocks reached from a conditional instruction, in the callee") \
	M(CalleeUsers, "callee_users", \			M(CalleeUsers, "callee_users", \
	"number of module-internal users of the callee, +1 if the callee is " \			"number of module-internal users of the callee, +1 if the callee is " \
	"exposed externally")			"exposed externally") \
				M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)")
				mtrofinUnsubmitted Not Done Reply Inline Actions why not leave this where it was? because you can now populate the remainder in a loop? mtrofin: why not leave this where it was? because you can now populate the remainder in a loop?
				jacobhegnaAuthorUnsubmitted Done Reply Inline Actions it's an artifact from when I was fiddling around with things - will move it back jacobhegna: it's an artifact from when I was fiddling around with things - will move it back

				// clang-format off
	enum class FeatureIndex : size_t {			enum class FeatureIndex : size_t {
				// InlineCost features - these must come first
				#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
				INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
				#undef POPULATE_INDICES

				// Non-cost features
	#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,			#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,
	INLINE_FEATURE_ITERATOR(POPULATE_INDICES)			INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
	#undef POPULATE_INDICES			#undef POPULATE_INDICES

				mtrofinUnsubmitted Not Done Reply Inline Actions unrelated change? mtrofin: unrelated change?
				jacobhegnaAuthorUnsubmitted Done Reply Inline Actions are you referring to the newline that is inserted here? I think it just makes things easier to read in the enum jacobhegna: are you referring to the newline that is inserted here? I think it just makes things easier to…
	NumberOfFeatures			NumberOfFeatures
	};			};
				// clang-format on

				constexpr FeatureIndex
				inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) {
				return static_cast<FeatureIndex>(static_cast<size_t>(Feature));
				}

	constexpr size_t NumberOfFeatures =			constexpr size_t NumberOfFeatures =
	static_cast<size_t>(FeatureIndex::NumberOfFeatures);			static_cast<size_t>(FeatureIndex::NumberOfFeatures);

	extern const std::array<std::string, NumberOfFeatures> FeatureNameMap;			extern const std::array<std::string, NumberOfFeatures> FeatureNameMap;

	extern const char *const DecisionName;			extern const char *const DecisionName;
	extern const char *const DefaultDecisionName;			extern const char *const DefaultDecisionName;
	extern const char *const RewardName;			extern const char *const RewardName;

	using InlineFeatures = std::vector<int64_t>;			using InlineFeatures = std::vector<int64_t>;

	} // namespace llvm			} // namespace llvm
	#endif // LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H			#endif // LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H

llvm/lib/Analysis/CMakeLists.txt

	if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)			if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
	include(TensorFlowCompile)			include(TensorFlowCompile)
	set(LLVM_INLINER_MODEL_PATH_DEFAULT "models/inliner-Oz")			set(LLVM_INLINER_MODEL_PATH_DEFAULT "models/inliner-Oz")

	# This url points to the most recent most which is known to be compatible with			# This url points to the most recent most which is known to be compatible with
	# LLVM. When better models are published, this url should be updated to aid			# LLVM. When better models are published, this url should be updated to aid
	# discoverability.			# discoverability.
	set(LLVM_INLINER_MODEL_CURRENT_URL "https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v0.1/inlining-Oz-acabaf6-v0.1.tar.gz")			set(LLVM_INLINER_MODEL_CURRENT_URL "TO_BE_UPDATED")
				mtrofinUnsubmitted Not Done Reply Inline Actions more clear to say "<TO BE UPDATED>" mtrofin: more clear to say "<TO BE UPDATED>"

	if (DEFINED LLVM_HAVE_TF_AOT)			if (DEFINED LLVM_HAVE_TF_AOT)
	# If the path is empty, autogenerate the model			# If the path is empty, autogenerate the model
	if (NOT DEFINED LLVM_INLINER_MODEL_PATH OR "${LLVM_INLINER_MODEL_PATH}" STREQUAL "")			if (NOT DEFINED LLVM_INLINER_MODEL_PATH OR "${LLVM_INLINER_MODEL_PATH}" STREQUAL "")
	set(LLVM_INLINER_MODEL_PATH "autogenerate")			set(LLVM_INLINER_MODEL_PATH "autogenerate")
	message(WARNING "LLVM_INLINER_MODEL_PATH was not set: autogenerating a model to finish the build.")			message(WARNING "LLVM_INLINER_MODEL_PATH was not set: autogenerating a model to finish the build.")
	endif()			endif()

	▲ Show 20 Lines • Show All 151 Lines • Show Last 20 Lines

llvm/lib/Analysis/InlineCost.cpp

Show First 20 Lines • Show All 123 Lines • ▼ Show 20 Lines	static cl::opt<bool> InlineCallerSupersetNoBuiltin(
cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "		cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
"attributes."));		"attributes."));

static cl::opt<bool> DisableGEPConstOperand(		static cl::opt<bool> DisableGEPConstOperand(
"disable-gep-const-evaluation", cl::Hidden, cl::init(false),		"disable-gep-const-evaluation", cl::Hidden, cl::init(false),
cl::desc("Disables evaluation of GetElementPtr with constant operands"));		cl::desc("Disables evaluation of GetElementPtr with constant operands"));

namespace {		namespace {
class InlineCostCallAnalyzer;		class InlineCostCallAnalyzer;
		mtrofinUnsubmitted Not Done Reply Inline Actions remove spurious change mtrofin: remove spurious change

// This struct is used to store information about inline cost of a		// This struct is used to store information about inline cost of a
// particular instruction		// particular instruction
struct InstructionCostDetail {		struct InstructionCostDetail {
int CostBefore = 0;		int CostBefore = 0;
int CostAfter = 0;		int CostAfter = 0;
int ThresholdBefore = 0;		int ThresholdBefore = 0;
int ThresholdAfter = 0;		int ThresholdAfter = 0;
▲ Show 20 Lines • Show All 264 Lines • ▼ Show 20 Lines	protected:
bool visitSwitchInst(SwitchInst &SI);		bool visitSwitchInst(SwitchInst &SI);
bool visitIndirectBrInst(IndirectBrInst &IBI);		bool visitIndirectBrInst(IndirectBrInst &IBI);
bool visitResumeInst(ResumeInst &RI);		bool visitResumeInst(ResumeInst &RI);
bool visitCleanupReturnInst(CleanupReturnInst &RI);		bool visitCleanupReturnInst(CleanupReturnInst &RI);
bool visitCatchReturnInst(CatchReturnInst &RI);		bool visitCatchReturnInst(CatchReturnInst &RI);
bool visitUnreachableInst(UnreachableInst &I);		bool visitUnreachableInst(UnreachableInst &I);

public:		public:
CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,		CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,		function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,		function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
ProfileSummaryInfo *PSI = nullptr,		ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr)		OptimizationRemarkEmitter *ORE = nullptr)
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting change - see previous comment mtrofin: formatting change - see previous comment
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),		: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),		PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
CandidateCall(Call), EnableLoadElimination(true) {}		CandidateCall(Call), EnableLoadElimination(true) {}

InlineResult analyze();		InlineResult analyze();

Optional<Constant > getSimplifiedValue(Instruction I) {		Optional<Constant > getSimplifiedValue(Instruction I) {
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting change mtrofin: formatting change
if (SimplifiedValues.find(I) != SimplifiedValues.end())		if (SimplifiedValues.find(I) != SimplifiedValues.end())
return SimplifiedValues[I];		return SimplifiedValues[I];
return None;		return None;
}		}

// Keep a bunch of stats about the cost savings found so we can print them		// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.		// out when debugging.
unsigned NumConstantArgs = 0;		unsigned NumConstantArgs = 0;
unsigned NumConstantOffsetPtrArgs = 0;		unsigned NumConstantOffsetPtrArgs = 0;
unsigned NumAllocaArgs = 0;		unsigned NumAllocaArgs = 0;
unsigned NumConstantPtrCmps = 0;		unsigned NumConstantPtrCmps = 0;
unsigned NumConstantPtrDiffs = 0;		unsigned NumConstantPtrDiffs = 0;
unsigned NumInstructionsSimplified = 0;		unsigned NumInstructionsSimplified = 0;

void dump();		void dump();
};		};

mtrofinUnsubmitted Not Done Reply Inline Actions spurious change? mtrofin: spurious change?
/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note		/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer		/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
class InlineCostCallAnalyzer final : public CallAnalyzer {		class InlineCostCallAnalyzer final : public CallAnalyzer {
const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;		const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
const bool ComputeFullInlineCost;		const bool ComputeFullInlineCost;
int LoadEliminationCost = 0;		int LoadEliminationCost = 0;
/// Bonus to be applied when percentage of vector instructions in callee is		/// Bonus to be applied when percentage of vector instructions in callee is
/// high (see more details in updateThreshold).		/// high (see more details in updateThreshold).
int VectorBonus = 0;		int VectorBonus = 0;
/// Bonus to be applied when the callee has only one reachable basic block.		/// Bonus to be applied when the callee has only one reachable basic block.
int SingleBBBonus = 0;		int SingleBBBonus = 0;

/// Tunable parameters that control the analysis.		/// Tunable parameters that control the analysis.
const InlineParams &Params;		const InlineParams &Params;

// This DenseMap stores the delta change in cost and threshold after		// This DenseMap stores the delta change in cost and threshold after
// accounting for the given instruction. The map is filled only with the		// accounting for the given instruction. The map is filled only with the
// flag PrintInstructionComments on.		// flag PrintInstructionComments on.
DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;		DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;

/// Upper bound for the inlining cost. Bonuses are being applied to account		/// Upper bound for the inlining cost. Bonuses are being applied to account
/// for speculative "expected profit" of the inlining decision.		/// for speculative "expected profit" of the inlining decision.
int Threshold = 0;		int Threshold = 0;

/// Attempt to evaluate indirect calls to boost its inline cost.		/// Attempt to evaluate indirect calls to boost its inline cost.
		mtrofinUnsubmitted Not Done Reply Inline Actions I don't think the ML aspects should intermingle with the non-ml ones. mtrofin: I don't think the ML aspects should intermingle with the non-ml ones.
const bool BoostIndirectCalls;		const bool BoostIndirectCalls;

/// Ignore the threshold when finalizing analysis.		/// Ignore the threshold when finalizing analysis.
const bool IgnoreThreshold;		const bool IgnoreThreshold;

// True if the cost-benefit-analysis-based inliner is enabled.		// True if the cost-benefit-analysis-based inliner is enabled.
const bool CostBenefitAnalysisEnabled;		const bool CostBenefitAnalysisEnabled;

▲ Show 20 Lines • Show All 456 Lines • ▼ Show 20 Lines	Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
return None;		return None;
}		}

virtual ~InlineCostCallAnalyzer() {}		virtual ~InlineCostCallAnalyzer() {}
int getThreshold() { return Threshold; }		int getThreshold() { return Threshold; }
int getCost() { return Cost; }		int getCost() { return Cost; }
bool wasDecidedByCostBenefit() { return DecidedByCostBenefit; }		bool wasDecidedByCostBenefit() { return DecidedByCostBenefit; }
};		};

		class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
		private:
		InlineCostFeatures Cost = {};
		unsigned SROACostSavingOpportunities = 0;
		mtrofinUnsubmitted Not Done Reply Inline Actions Where is this used, other than in onDisableLoadElimination, where it's set to 0? mtrofin: Where is this used, other than in onDisableLoadElimination, where it's set to 0?
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions it is meant so that InlineCostFeatures::LoadElimination is only ever set once, but there is a better way to express that. It also was never set to a nonzero value in the first place, which is just a bug. jacobhegna: it is meant so that InlineCostFeatures::LoadElimination is only ever set once, but there is a…
		int VectorBonus = 0;
		mtrofinUnsubmitted Not Done Reply Inline Actions this is counting the opportunities for SROACostSavings, right? Maybe rename to SROACostSavingOpportunities? mtrofin: this is counting the opportunities for SROACostSavings, right? Maybe rename to…
		int SingleBBBonus = 0;
		int Threshold = 5;

		mtrofinUnsubmitted Not Done Reply Inline Actions Is this really a threshold, or can it have a name that better communicates what it accumulates? (apologies, can't right now figure what that may be) same for the 2 bonuses above. mtrofin: Is this really a threshold, or can it have a name that better communicates what it accumulates?
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions these are copied from the heuristic inline visitor and used in almost the same way - there is some complexity in the other visitor that isn't replicated here. I'll keep the names the same to reflect that duplication. In the future, there some be some abstraction so that the two visitors could share these values/logic for updating them. jacobhegna: these are copied from the heuristic inline visitor and used in almost the same way - there is…
		mtrofinUnsubmitted Not Done Reply Inline Actions OK, can you add a FIXME explaining this, above their name or something like that. mtrofin: OK, can you add a FIXME explaining this, above their name or something like that.
		DenseMap<AllocaInst *, unsigned> SROACosts;

		void update(InlineCostFeatureIndex Feature, int64_t Delta) {
		mtrofinUnsubmitted Done Reply Inline Actions I think `increment` would be more clear. Then Delta can have default value 1, too (For the few cases that'd be the case) mtrofin: I think `increment` would be more clear. Then Delta can have default value 1, too (For the few…
		Cost[static_cast<size_t>(Feature)] += Delta;
		}

		void set(InlineCostFeatureIndex Feature, int64_t Value) {
		Cost[static_cast<size_t>(Feature)] = Value;
		}

		void onDisableSROA(AllocaInst *Arg) override {
		auto CostIt = SROACosts.find(Arg);
		if (CostIt == SROACosts.end())
		return;

		update(InlineCostFeatureIndex::SROALosses, CostIt->second);
		SROACostSavingOpportunities -= CostIt->second;
		SROACosts.erase(CostIt);
		}

		void onDisableLoadElimination() override {
		set(InlineCostFeatureIndex::LoadElimination, 1);
		}

		void onCallPenalty() override {
		update(InlineCostFeatureIndex::CallPenalty, InlineConstants::CallPenalty);
		}

		void onCallArgumentSetup(const CallBase &Call) override {
		update(InlineCostFeatureIndex::CallArgumentSetup,
		Call.arg_size() * InlineConstants::InstrCost);
		}

		void onLoadRelativeIntrinsic() override {
		update(InlineCostFeatureIndex::LoadRelativeIntrinsic,
		3 * InlineConstants::InstrCost);
		mtrofinUnsubmitted Not Done Reply Inline Actions can this '3' be a const with some name (so it's easier to chase down any magic factors) mtrofin: can this '3' be a const with some name (so it's easier to chase down any magic factors)
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions discussing offline jacobhegna: discussing offline
		}

		void onLoweredCall(Function *F, CallBase &Call,
		bool IsIndirectCall) override {
		update(InlineCostFeatureIndex::LoweredCallArgSetup,
		Call.arg_size() * InlineConstants::InstrCost);

		if (IsIndirectCall) {
		InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
		/HintThreshold/ {},
		/ColdThreshold/ {},
		/OptSizeThreshold/ {},
		/OptMinSizeThreshold/ {},
		/HotCallSiteThreshold/ {},
		/LocallyHotCallSiteThreshold/ {},
		/ColdCallSiteThreshold/ {},
		/ComputeFullInlineCost/ true,
		/EnableDeferral/ true};
		IndirectCallParams.DefaultThreshold =
		InlineConstants::IndirectCallThreshold;

		InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
		GetAssumptionCache, GetBFI, PSI, ORE, false,
		true);
		if (CA.analyze().isSuccess()) {
		update(InlineCostFeatureIndex::NestedInlineBonus, CA.getCost());
		update(InlineCostFeatureIndex::NestedInlines, 1);
		}
		} else {
		onCallPenalty();
		}
		}

		void accumulateBonus(const InlineCostFeatures &OtherCost) {
		size_t NestedInlineIdx =
		static_cast<size_t>(InlineCostFeatureIndex::NestedInlines);
		set(InlineCostFeatureIndex::NestedInlines,
		std::max(Cost[NestedInlineIdx], OtherCost[NestedInlineIdx] + 1));

		int64_t Bonus = 0;
		for (int I = 0;
		I < static_cast<int>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
		if (isHeuristicInlineCostFeature(
		static_cast<InlineCostFeatureIndex>(I)) \|\|
		static_cast<InlineCostFeatureIndex>(I) ==
		InlineCostFeatureIndex::NestedInlineBonus) {
		Bonus += Cost[I];
		}
		}
		update(InlineCostFeatureIndex::NestedInlineBonus, Bonus);
		}

		void onFinalizeSwitch(unsigned JumpTableSize,
		unsigned NumCaseCluster) override {

		if (JumpTableSize) {
		int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
		4 * InlineConstants::InstrCost;
		update(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
		return;
		}

		if (NumCaseCluster <= 3) {
		update(InlineCostFeatureIndex::CaseClusterPenalty,
		NumCaseCluster * 2 * InlineConstants::InstrCost);
		mtrofinUnsubmitted Done Reply Inline Actions can we replace this 2 and the 4 above with const values with a good name same below - 3, 2 etc. mtrofin: can we replace this 2 and the 4 above with const values with a good name same below - 3, 2 etc.
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions ok jacobhegna: ok
		return;
		}

		int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
		int64_t SwitchCost =
		ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
		update(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
		}

		void onMissedSimplification() override {
		update(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
		InlineConstants::InstrCost);
		}

		void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
		void onAggregateSROAUse(AllocaInst *Arg) override {
		SROACosts.find(Arg)->second += InlineConstants::InstrCost;
		SROACostSavingOpportunities += InlineConstants::InstrCost;
		}

		void onBlockAnalyzed(const BasicBlock *BB) override {
		if (BB->getTerminator()->getNumSuccessors() > 1)
		set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
		Threshold -= SingleBBBonus;
		}

		InlineResult finalizeAnalysis() override {
		auto *Caller = CandidateCall.getFunction();
		if (Caller->hasMinSize()) {
		DominatorTree DT(F);
		LoopInfo LI(DT);
		for (Loop *L : LI) {
		// Ignore loops that will not be executed
		if (DeadBlocks.count(L->getHeader()))
		continue;
		update(InlineCostFeatureIndex::NumLoops, InlineConstants::CallPenalty);
		}
		}
		set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
		set(InlineCostFeatureIndex::SimplifiedInstructions,
		NumInstructionsSimplified);
		set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
		set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
		NumConstantOffsetPtrArgs);
		set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);

		if (NumVectorInstructions <= NumInstructions / 10)
		update(InlineCostFeatureIndex::Threshold, -1 * VectorBonus);
		else if (NumVectorInstructions <= NumInstructions / 2)
		update(InlineCostFeatureIndex::Threshold, -1 * (VectorBonus / 2));

		set(InlineCostFeatureIndex::Threshold, Threshold);

		return InlineResult::success();
		}

		bool shouldStop() override { return false; }

		void onLoadEliminationOpportunity() override {
		update(InlineCostFeatureIndex::LoadElimination, 1);
		}

		InlineResult onAnalysisStart() override {
		update(InlineCostFeatureIndex::CallSiteCost,
		-1 * getCallsiteCost(this->CandidateCall, DL));

		set(InlineCostFeatureIndex::ColdCcPenalty,
		(F.getCallingConv() == CallingConv::Cold));

		// FIXME: we shouldn't repeat this logic in both the Features and Cost
		// analyzer - instead, we should abstract it to a common method in the
		// CallAnalyzer
		int SingleBBBonusPercent = 50;
		int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
		Threshold += TTI.adjustInliningThreshold(&CandidateCall);
		Threshold *= TTI.getInliningThresholdMultiplier();
		SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
		VectorBonus = Threshold * VectorBonusPercent / 100;
		Threshold += (SingleBBBonus + VectorBonus);

		return InlineResult::success();
		}

		public:
		InlineCostFeaturesAnalyzer(
		const TargetTransformInfo &TTI,
		function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
		function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
		ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE, Function &Callee,
		CallBase &Call)
		: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}

		const InlineCostFeatures &features() const { return Cost; }
		};

} // namespace		} // namespace

/// Test whether the given value is an Alloca-derived function argument.		/// Test whether the given value is an Alloca-derived function argument.
bool CallAnalyzer::isAllocaDerivedArg(Value *V) {		bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
return SROAArgValues.count(V);		return SROAArgValues.count(V);
}		}

void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {		void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
onDisableSROA(SROAArg);		onDisableSROA(SROAArg);
EnabledSROAAllocas.erase(SROAArg);		EnabledSROAAllocas.erase(SROAArg);
disableLoadElimination();		disableLoadElimination();
}		}

void InlineCostAnnotationWriter::emitInstructionAnnot(		void InlineCostAnnotationWriter::emitInstructionAnnot(
const Instruction *I, formatted_raw_ostream &OS) {		const Instruction *I, formatted_raw_ostream &OS) {
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting mtrofin: formatting
// The cost of inlining of the given instruction is printed always.		// The cost of inlining of the given instruction is printed always.
// The threshold delta is printed only when it is non-zero. It happens		// The threshold delta is printed only when it is non-zero. It happens
// when we decided to give a bonus at a particular instruction.		// when we decided to give a bonus at a particular instruction.
Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);		Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
if (!Record)		if (!Record)
OS << "; No analysis for the instruction";		OS << "; No analysis for the instruction";
else {		else {
OS << "; cost before = " << Record->CostBefore		OS << "; cost before = " << Record->CostBefore
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines	if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
// unconditional CFG path. Avoid inlining if this is going to happen above		// unconditional CFG path. Avoid inlining if this is going to happen above
// a threshold.		// a threshold.
// FIXME: If the threshold is removed or lowered too much, we could end up		// FIXME: If the threshold is removed or lowered too much, we could end up
// being too pessimistic and prevent inlining non-problematic code. This		// being too pessimistic and prevent inlining non-problematic code. This
// could result in unintended perf regressions. A better overall strategy		// could result in unintended perf regressions. A better overall strategy
// is needed to track stack usage during inlining.		// is needed to track stack usage during inlining.
Type *Ty = I.getAllocatedType();		Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingMultiplyAdd(		AllocatedSize = SaturatingMultiplyAdd(
AllocSize->getLimitedValue(),		AllocSize->getLimitedValue(),
DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);		DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting mtrofin: formatting
if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline)		if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline)
HasDynamicAlloca = true;		HasDynamicAlloca = true;
return false;		return false;
}		}
}		}

// Accumulate the allocated size.		// Accumulate the allocated size.
if (I.isStaticAlloca()) {		if (I.isStaticAlloca()) {
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines	for (const Use &Op : GEP.indices())
if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))		if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
return false;		return false;
return true;		return true;
};		};

if (!DisableGEPConstOperand)		if (!DisableGEPConstOperand)
if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {		if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
SmallVector<Constant *, 2> Indices;		SmallVector<Constant *, 2> Indices;
for (unsigned int Index = 1; Index < COps.size(); ++Index)		for (unsigned int Index = 1; Index < COps.size(); ++Index)
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting mtrofin: formatting
Indices.push_back(COps[Index]);		Indices.push_back(COps[Index]);
return ConstantExpr::getGetElementPtr(		return ConstantExpr::getGetElementPtr(
I.getSourceElementType(), COps[0], Indices, I.isInBounds());		I.getSourceElementType(), COps[0], Indices, I.isInBounds());
}))		}))
return true;		return true;

if ((I.isInBounds() && canFoldInboundsGEP(I)) \|\| IsGEPOffsetConstant(I)) {		if ((I.isInBounds() && canFoldInboundsGEP(I)) \|\| IsGEPOffsetConstant(I)) {
if (SROAArg)		if (SROAArg)
▲ Show 20 Lines • Show All 721 Lines • ▼ Show 20 Lines	if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
SROAArgValues[&SI] = SROAArg;		SROAArgValues[&SI] = SROAArg;
return true;		return true;
}		}

return Base::visitSelectInst(SI);		return Base::visitSelectInst(SI);
}		}

// Select condition is a constant.		// Select condition is a constant.
Value *SelectedV = CondC->isAllOnesValue() ? TrueVal		Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
: (CondC->isNullValue()) ? FalseVal		: (CondC->isNullValue()) ? FalseVal
: nullptr;		: nullptr;
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting mtrofin: formatting
if (!SelectedV) {		if (!SelectedV) {
// Condition is a vector constant that is not all 1s or all 0s. If all		// Condition is a vector constant that is not all 1s or all 0s. If all
// operands are constants, ConstantExpr::getSelect() can handle the cases		// operands are constants, ConstantExpr::getSelect() can handle the cases
// such as select vectors.		// such as select vectors.
if (TrueC && FalseC) {		if (TrueC && FalseC) {
if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {		if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
SimplifiedValues[&SI] = C;		SimplifiedValues[&SI] = C;
return true;		return true;
▲ Show 20 Lines • Show All 448 Lines • ▼ Show 20 Lines	#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
DEBUG_PRINT_STAT(ContainsNoDuplicateCall);		DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
DEBUG_PRINT_STAT(Cost);		DEBUG_PRINT_STAT(Cost);
DEBUG_PRINT_STAT(Threshold);		DEBUG_PRINT_STAT(Threshold);
#undef DEBUG_PRINT_STAT		#undef DEBUG_PRINT_STAT
}		}

#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)		#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
/// Dump stats about this call's analysis.		/// Dump stats about this call's analysis.
LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(); }		LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(); }
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting mtrofin: formatting
#endif		#endif

/// Test that there are no attribute conflicts between Caller and Callee		/// Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.		/// that prevent inlining.
static bool functionsHaveCompatibleAttributes(		static bool functionsHaveCompatibleAttributes(
Function Caller, Function Callee, TargetTransformInfo &TTI,		Function Caller, Function Callee, TargetTransformInfo &TTI,
function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {		function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
// Note that CalleeTLI must be a copy not a reference. The legacy pass manager		// Note that CalleeTLI must be a copy not a reference. The legacy pass manager
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI, ORE, true,		GetAssumptionCache, GetBFI, PSI, ORE, true,
/IgnoreThreshold/ true);		/IgnoreThreshold/ true);
auto R = CA.analyze();		auto R = CA.analyze();
if (!R.isSuccess())		if (!R.isSuccess())
return None;		return None;
return CA.getCost();		return CA.getCost();
}		}

		Optional<InlineCostFeatures> llvm::getInliningCostFeatures(
		CallBase &Call, TargetTransformInfo &CalleeTTI,
		function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
		function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
		ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE) {
		InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
		ORE, *Call.getCalledFunction(), Call);
		auto R = CFA.analyze();
		mtrofinUnsubmitted Not Done Reply Inline Actions Not sure. what's the value of CFA.features() if !R? may be more clear to handle the error here and return {} or something like that. It creates less coupling between deep implementation details and their consumers; it also simplifies the state space of the values this API can produce. Better yet: return an Optional<InlineCostFeaturesArray>, because a 0-valued feature vector isn't necessarily equivalent to CFA.analyze() saying "no". Returning None in that case is a more clear API mtrofin: Not sure. what's the value of CFA.features() if !R? may be more clear to handle the error here…
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions I'm wasn't returning an optional for a very specific reason: std::array cannot be efficiently moved-from. so, if I return an optional, we couldn't explicitly unpack it later without eating another copy of the array. I can swap it back now, then in MLInlineAdvisor.cpp avoid the copy by taking a reference to the array inside the optional. In a later patch, I want to switch the array to a vector, so that we can efficiently std::move it, and then returning an optional here wouldn't come with a performance hit jacobhegna: I'm wasn't returning an optional for a very specific reason: std::array cannot be efficiently…
		if (!R.isSuccess())
		return None;
		return CFA.features();
		}

Optional<InlineResult> llvm::getAttributeBasedInliningDecision(		Optional<InlineResult> llvm::getAttributeBasedInliningDecision(
CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,		CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {		function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {

// Cannot inline indirect calls.		// Cannot inline indirect calls.
if (!Callee)		if (!Callee)
return InlineResult::failure("indirect call");		return InlineResult::failure("indirect call");

▲ Show 20 Lines • Show All 251 Lines • ▼ Show 20 Lines	if (OptLevel > 2)
Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;		Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
return Params;		return Params;
}		}

PreservedAnalyses		PreservedAnalyses
InlineCostAnnotationPrinterPass::run(Function &F,		InlineCostAnnotationPrinterPass::run(Function &F,
FunctionAnalysisManager &FAM) {		FunctionAnalysisManager &FAM) {
PrintInstructionComments = true;		PrintInstructionComments = true;
std::function<AssumptionCache &(Function &)> GetAssumptionCache =		std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {		[&](Function &F) -> AssumptionCache & {
		mtrofinUnsubmitted Not Done Reply Inline Actions formatting mtrofin: formatting
return FAM.getResult<AssumptionAnalysis>(F);		return FAM.getResult<AssumptionAnalysis>(F);
};		};
Module *M = F.getParent();		Module *M = F.getParent();
ProfileSummaryInfo PSI(*M);		ProfileSummaryInfo PSI(*M);
DataLayout DL(M);		DataLayout DL(M);
TargetTransformInfo TTI(DL);		TargetTransformInfo TTI(DL);
// FIXME: Redesign the usage of InlineParams to expand the scope of this pass.		// FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
// In the current implementation, the type of InlineParams doesn't matter as		// In the current implementation, the type of InlineParams doesn't matter as
Show All 22 Lines

llvm/lib/Analysis/MLInlineAdvisor.cpp

Show All 37 Lines
#define DEBUG_TYPE "inline-ml"		#define DEBUG_TYPE "inline-ml"

static cl::opt<float> SizeIncreaseThreshold(		static cl::opt<float> SizeIncreaseThreshold(
"ml-advisor-size-increase-threshold", cl::Hidden,		"ml-advisor-size-increase-threshold", cl::Hidden,
cl::desc("Maximum factor by which expected native size may increase before "		cl::desc("Maximum factor by which expected native size may increase before "
"blocking any further inlining."),		"blocking any further inlining."),
cl::init(2.0));		cl::init(2.0));

		// clang-format off
		mtrofinUnsubmitted Not Done Reply Inline Actions why mtrofin: why
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions clang-format doesn't seem to understand these back-to-back macros and tries to add another level of indentation to each line, it makes it hard to read. jacobhegna: clang-format doesn't seem to understand these back-to-back macros and tries to add another…
const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{		const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{
		// InlineCost features - these must come first
		#define POPULATE_NAMES(INDEX_NAME, NAME) NAME,
		INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
		#undef POPULATE_NAMES

		// Non-cost features
#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,		#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,
INLINE_FEATURE_ITERATOR(POPULATE_NAMES)		INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
#undef POPULATE_NAMES		#undef POPULATE_NAMES
};		};
		// clang-format on

const char *const llvm::DecisionName = "inlining_decision";		const char *const llvm::DecisionName = "inlining_decision";
const char *const llvm::DefaultDecisionName = "inlining_default";		const char *const llvm::DefaultDecisionName = "inlining_default";
const char *const llvm::RewardName = "delta_size";		const char *const llvm::RewardName = "delta_size";

CallBase *getInlinableCS(Instruction &I) {		CallBase *getInlinableCS(Instruction &I) {
if (auto *CS = dyn_cast<CallBase>(&I))		if (auto *CS = dyn_cast<CallBase>(&I))
if (Function *Callee = CS->getCalledFunction()) {		if (Function *Callee = CS->getCalledFunction()) {
▲ Show 20 Lines • Show All 153 Lines • ▼ Show 20 Lines	if (!IsCallSiteInlinable) {
// We can't inline this for correctness reasons, so return the base		// We can't inline this for correctness reasons, so return the base
// InlineAdvice, as we don't care about tracking any state changes (which		// InlineAdvice, as we don't care about tracking any state changes (which
// won't happen).		// won't happen).
return std::make_unique<InlineAdvice>(this, CB, ORE, false);		return std::make_unique<InlineAdvice>(this, CB, ORE, false);
}		}
CostEstimate = *IsCallSiteInlinable;		CostEstimate = *IsCallSiteInlinable;
}		}

		const auto CostFeatures =
		llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
		if (!CostFeatures) {
		return std::make_unique<InlineAdvice>(this, CB, ORE, false);
		}

if (Mandatory)		if (Mandatory)
return getMandatoryAdvice(CB, true);		return getMandatoryAdvice(CB, true);

auto NrCtantParams = 0;		auto NrCtantParams = 0;
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {		for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
NrCtantParams += (isa<Constant>(*I));		NrCtantParams += (isa<Constant>(*I));
}		}

auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);		auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);		auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);

ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount,		ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount,
CalleeBefore.BasicBlockCount);		CalleeBefore.BasicBlockCount);
ModelRunner->setFeature(FeatureIndex::CallSiteHeight,		ModelRunner->setFeature(FeatureIndex::CallSiteHeight,
FunctionLevels[&Caller]);		FunctionLevels[&Caller]);
ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount);		ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount);
ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams);		ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams);
ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate);
ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount);		ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount);
ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses);		ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses);
ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks,		ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks,
CallerBefore.BlocksReachedFromConditionalInstruction);		CallerBefore.BlocksReachedFromConditionalInstruction);
ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount,		ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount,
CallerBefore.BasicBlockCount);		CallerBefore.BasicBlockCount);
ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks,		ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks,
CalleeBefore.BlocksReachedFromConditionalInstruction);		CalleeBefore.BlocksReachedFromConditionalInstruction);
ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses);		ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses);
		ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate);
		mtrofinUnsubmitted Not Done Reply Inline Actions do we still want this here? mtrofin: do we still want this here?
		jacobhegnaAuthorUnsubmitted Done Reply Inline Actions ya - it increases stability in training. still looking in how to completely remove the heuristics from the ML without sacrificing training stability. jacobhegna: ya - it increases stability in training. still looking in how to completely remove the…

		// Add the cost features
		for (size_t I = 0;
		I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
		ModelRunner->setFeature(
		inlineCostFeatureToMlFeature(static_cast<InlineCostFeatureIndex>(I)),
		CostFeatures->at(I));
		}

return getAdviceFromModel(CB, ORE);		return getAdviceFromModel(CB, ORE);
}		}

std::unique_ptr<MLInlineAdvice>		std::unique_ptr<MLInlineAdvice>
MLInlineAdvisor::getAdviceFromModel(CallBase &CB,		MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
OptimizationRemarkEmitter &ORE) {		OptimizationRemarkEmitter &ORE) {
return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run());		return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run());
}		}
▲ Show 20 Lines • Show All 64 Lines • Show Last 20 Lines

llvm/lib/Analysis/models/inlining/config.py

	Show All 20 Lines


	# pylint: disable=g-complex-comprehension			# pylint: disable=g-complex-comprehension
	def get_input_signature():			def get_input_signature():
	"""Returns the list of features for LLVM inlining."""			"""Returns the list of features for LLVM inlining."""
	# int64 features			# int64 features
	inputs = [			inputs = [
	tf.TensorSpec(dtype=tf.int64, shape=(), name=key) for key in [			tf.TensorSpec(dtype=tf.int64, shape=(), name=key) for key in [
	'caller_basic_block_count', 'caller_conditionally_executed_blocks',			'caller_basic_block_count',
	'caller_users', 'callee_basic_block_count',			'caller_conditionally_executed_blocks',
	'callee_conditionally_executed_blocks', 'callee_users',			'caller_users',
	'nr_ctant_params', 'node_count', 'edge_count', 'callsite_height',			'callee_basic_block_count',
	'cost_estimate', 'inlining_default'			'callee_conditionally_executed_blocks',
				'callee_users',
				'nr_ctant_params',
				'node_count',
				'edge_count',
				'callsite_height',
				'cost_estimate',
				'inlining_default',
				'sroa_savings',
				'sroa_losses',
				'load_elimination',
				'call_penalty',
				'call_argument_setup',
				'load_relative_intrinsic',
				'lowered_call_arg_setup',
				'indirect_call_penalty',
				'jump_table_penalty',
				'case_cluster_penalty',
				'switch_penalty',
				'unsimplified_common_instructions',
				'num_loops',
				'dead_blocks',
				'simplified_instructions',
				'constant_args',
				'constant_offset_ptr_args',
				'callsite_cost',
				'cold_cc_penalty',
				'last_call_to_static_bonus',
				'is_multiple_blocks',
				'nested_inlines',
				'nested_inline_bonus',
				'threshold',
	]			]
	]			]

	# float32 features			# float32 features
	inputs.extend([			inputs.extend([
	tf.TensorSpec(dtype=tf.float32, shape=(), name=key)			tf.TensorSpec(dtype=tf.float32, shape=(), name=key)
	for key in ['discount', 'reward']			for key in ['discount', 'reward']
	])			])
	Show All 15 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Unpack the CostEstimate feature in ML inlining models.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 355304

llvm/include/llvm/Analysis/InlineCost.h

llvm/include/llvm/Analysis/InlineModelFeatureMaps.h

llvm/lib/Analysis/CMakeLists.txt

llvm/lib/Analysis/InlineCost.cpp

llvm/lib/Analysis/MLInlineAdvisor.cpp

llvm/lib/Analysis/models/inlining/config.py

This is an archive of the discontinued LLVM Phabricator instance.

Unpack the CostEstimate feature in ML inlining models.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 355304

llvm/include/llvm/Analysis/InlineCost.h

llvm/include/llvm/Analysis/InlineModelFeatureMaps.h

llvm/lib/Analysis/CMakeLists.txt

llvm/lib/Analysis/InlineCost.cpp

llvm/lib/Analysis/MLInlineAdvisor.cpp

llvm/lib/Analysis/models/inlining/config.py

Unpack the CostEstimate feature in ML inlining models.
ClosedPublic