Diff 11929

include/llvm/Analysis/CodeMetrics.h

Show All 10 Lines
// the Inliner and other passes decide whether to duplicate its contents.		// the Inliner and other passes decide whether to duplicate its contents.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef LLVM_ANALYSIS_CODEMETRICS_H		#ifndef LLVM_ANALYSIS_CODEMETRICS_H
#define LLVM_ANALYSIS_CODEMETRICS_H		#define LLVM_ANALYSIS_CODEMETRICS_H

#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/CallSite.h"		#include "llvm/IR/CallSite.h"

namespace llvm {		namespace llvm {
		class AssumptionTracker;
class BasicBlock;		class BasicBlock;
		class Loop;
class Function;		class Function;
class Instruction;		class Instruction;
class DataLayout;		class DataLayout;
class TargetTransformInfo;		class TargetTransformInfo;
class Value;		class Value;

/// \brief Check whether a call will lower to something small.		/// \brief Check whether a call will lower to something small.
///		///
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	struct CodeMetrics {
unsigned NumRets;		unsigned NumRets;

CodeMetrics()		CodeMetrics()
: exposesReturnsTwice(false), isRecursive(false), notDuplicatable(false),		: exposesReturnsTwice(false), isRecursive(false), notDuplicatable(false),
usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0),		usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0),
NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {}		NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {}

/// \brief Add information about a block to the current state.		/// \brief Add information about a block to the current state.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI);		void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
		SmallPtrSetImpl<const Value*> &EphValues);

		/// \brief Collect a loop's ephemeral values (those used only by an assume
		/// or similar intrinsics in the loop).
		static void collectEphemeralValues(const Loop L, AssumptionTracker AT,
		SmallPtrSetImpl<const Value*> &EphValues);
		chandlercUnsubmitted Not Done Reply Inline Actions Use a SmallPtrSetImpl<const Value>? chandlerc:* Use a SmallPtrSetImpl<const Value*>?

		/// \brief Collect a functions's ephemeral values (those used only by an
		/// assume or similar intrinsics in the function).
		static void collectEphemeralValues(const Function L, AssumptionTracker AT,
		SmallPtrSetImpl<const Value*> &EphValues);
};		};

}		}

#endif		#endif

include/llvm/Analysis/InlineCost.h

Show All 13 Lines
#ifndef LLVM_ANALYSIS_INLINECOST_H		#ifndef LLVM_ANALYSIS_INLINECOST_H
#define LLVM_ANALYSIS_INLINECOST_H		#define LLVM_ANALYSIS_INLINECOST_H

#include "llvm/Analysis/CallGraphSCCPass.h"		#include "llvm/Analysis/CallGraphSCCPass.h"
#include <cassert>		#include <cassert>
#include <climits>		#include <climits>

namespace llvm {		namespace llvm {
		class AssumptionTracker;
class CallSite;		class CallSite;
class DataLayout;		class DataLayout;
class Function;		class Function;
class TargetTransformInfo;		class TargetTransformInfo;

namespace InlineConstants {		namespace InlineConstants {
// Various magic constants used to adjust heuristics.		// Various magic constants used to adjust heuristics.
const int InstrCost = 5;		const int InstrCost = 5;
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines	public:
/// Only valid if the cost is of the variable kind. Returns a negative		/// Only valid if the cost is of the variable kind. Returns a negative
/// value if the cost is too high to inline.		/// value if the cost is too high to inline.
int getCostDelta() const { return Threshold - getCost(); }		int getCostDelta() const { return Threshold - getCost(); }
};		};

/// \brief Cost analyzer used by inliner.		/// \brief Cost analyzer used by inliner.
class InlineCostAnalysis : public CallGraphSCCPass {		class InlineCostAnalysis : public CallGraphSCCPass {
const TargetTransformInfo *TTI;		const TargetTransformInfo *TTI;
		AssumptionTracker *AT;

public:		public:
static char ID;		static char ID;

InlineCostAnalysis();		InlineCostAnalysis();
~InlineCostAnalysis();		~InlineCostAnalysis();

// Pass interface implementation.		// Pass interface implementation.
Show All 31 Lines

lib/Analysis/CodeMetrics.cpp

	//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//			//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
	//			//
	// The LLVM Compiler Infrastructure			// The LLVM Compiler Infrastructure
	//			//
	// This file is distributed under the University of Illinois Open Source			// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.			// License. See LICENSE.TXT for details.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This file implements code cost measurement utilities.			// This file implements code cost measurement utilities.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				#include "llvm/Analysis/AssumptionTracker.h"
	#include "llvm/Analysis/CodeMetrics.h"			#include "llvm/Analysis/CodeMetrics.h"
				#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"			#include "llvm/Analysis/TargetTransformInfo.h"
				#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/CallSite.h"			#include "llvm/IR/CallSite.h"
	#include "llvm/IR/DataLayout.h"			#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/Function.h"			#include "llvm/IR/Function.h"
	#include "llvm/IR/IntrinsicInst.h"			#include "llvm/IR/IntrinsicInst.h"
				#include "llvm/Support/Debug.h"

				#define DEBUG_TYPE "code-metrics"

	using namespace llvm;			using namespace llvm;

				static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
				SmallPtrSetImpl<const Value*> &EphValues) {
				SmallPtrSet<const Value *, 32> Visited;

				// Make sure that all of the items in WorkSet are in our EphValues set.
				EphValues.insert(WorkSet.begin(), WorkSet.end());

				// Note: We don't speculate PHIs here, so we'll miss instruction chains kept
				// alive only by ephemeral values.

				while (!WorkSet.empty()) {
				const Value *V = WorkSet.pop_back_val();
				if (!Visited.insert(V))
				continue;

				// If all uses of this value are ephemeral, then so is this value.
				bool FoundNEUse = false;
				for (const User *I : V->users())
				if (!EphValues.count(I)) {
				FoundNEUse = true;
				reamesUnsubmitted Not Done Reply Inline Actions Most of this change is driven by the assume intrinsic changes, but this actually changes behaviour for existing code as well. It might be good to separate this infrastructure into two distinct changes and get the addition of the other intrinsics more widely reviewed. (i.e. despite the title and description, this isn't specific to assume/invariant) I'm mostly suggesting this for risk management and blame purposes. reames: Most of this change is driven by the assume intrinsic changes, but this actually changes…
				hfinkelAuthorUnsubmitted Not Done Reply Inline Actions You're right, but the blame will be clear regardless. This needs to be committed before we actually start generating @llvm.assume in Clang for anything (and before anyone else should generate them either). As it turns out, most of the people interested in this feature are the people who best understand the impact on costing for the other intrinsics too. Thus, I'm not concerned. hfinkel: You're right, but the blame will be clear regardless. This needs to be committed before we…
				hfinkelAuthorUnsubmitted Not Done Reply Inline Actions That said, the next version of this patch will handle only @llvm.assume (because they'll be an easy way to find them, thus decreasing the cost). hfinkel: That said, the next version of this patch will handle only @llvm.assume (because they'll be an…
				break;
				}

				if (FoundNEUse)
				continue;

				EphValues.insert(V);
				DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
				chandlercUnsubmitted Not Done Reply Inline Actions SmallPtrSet is significantly more efficient than DenseSet, please use it everywhere. chandlerc: SmallPtrSet is significantly more efficient than DenseSet, please use it everywhere.

				reamesUnsubmitted Not Done Reply Inline Actions A useful optimization might be to seed the EphValues set with every single use value in WorkSet. This would help cases like: a = icmp assume(a) c = and a, b assume(c) With your current implementation, if you consider 'a' first, you'll discard it because 'c' is not known ephemeral. You'll eventually revisit it (through 'c''s operands), but this is wasted work. You could even go a step further and make the worklist include only items just found to be ephemeral. You'd essentially be growing the frontier of ephemeral values back through the value graph. This might be a more obvious algorithm. Worth noting is that neither algorithm handles phi cycles. Both are conservative, not optimistic. (In the classic data flow sense.) I think that's fine for now, but worth noting explicitly. reames: A useful optimization might be to seed the EphValues set with every single use value in WorkSet.
				hfinkelAuthorUnsubmitted Not Done Reply Inline Actions Ack; yep. I actually had it that way originally, but made it so that only one place in the code inserted things into EphValues while I was debugging something, and then forgot to change it back. You're correct, that should be noted. hfinkel: Ack; yep. I actually had it that way originally, but made it so that only one place in the code…
				if (const User *U = dyn_cast<User>(V))
				for (const Value *J : U->operands()) {
				if (isSafeToSpeculativelyExecute(J))
				WorkSet.push_back(J);
				}
				}
				}

				reamesUnsubmitted Not Done Reply Inline Actions This code still has issues where you'll miss eph values based on visit order. This isn't a correctness problem, just possible missed optimizations. reames: This code still has issues where you'll miss eph values based on visit order. This isn't a…
				// Find all ephemeral values.
				void CodeMetrics::collectEphemeralValues(const Loop L, AssumptionTracker AT,
				SmallPtrSetImpl<const Value*> &EphValues) {
				SmallVector<const Value *, 16> WorkSet;

				for (auto &I : AT->assumptions(L->getHeader()->getParent())) {
				chandlercUnsubmitted Not Done Reply Inline Actions nit: invert and use 'continue' to reduce indentation chandlerc: nit: invert and use 'continue' to reduce indentation
				// Filter out call sites outside of the loop so we don't to a function's
				// worth of work for each of its loops (and, in the common case, ephemeral
				// values in the loop are likely due to @llvm.assume calls in the loop).
				reamesUnsubmitted Not Done Reply Inline Actions Arguably, we should stop looking at eph values outside the loop during the search, but that's minor. They're likely to have other uses as well. reames: Arguably, we should stop looking at eph values outside the loop during the search, but that's…
				if (!L->contains(I->getParent()))
				continue;

				chandlercUnsubmitted Not Done Reply Inline Actions I think we have an ->operands() that will let you write this as a range loop. Even if not, auto would help. chandlerc: I think we have an ->operands() that will let you write this as a range loop. Even if not, auto…
				WorkSet.push_back(I);
				chandlercUnsubmitted Not Done Reply Inline Actions Why being "ephemeral" is equivalent to being safe to speculate isn't immediately obvious to me. might be nice to add a comment at least ... chandlerc: Why being "ephemeral" is equivalent to being safe to speculate isn't immediately obvious to me.
				hfinkelAuthorUnsubmitted Not Done Reply Inline Actions The essential property of an "ephemeral" instruction is that it will go away (no actual code will be generate corresponding to the operation it represents). If something is not safe to speculate, then it will not really go away because it might have side effects. Does that make sense? hfinkel: The essential property of an "ephemeral" instruction is that it will go away (no actual code…
				}

				completeEphemeralValues(WorkSet, EphValues);
				}

				void CodeMetrics::collectEphemeralValues(const Function F, AssumptionTracker AT,
				SmallPtrSetImpl<const Value*> &EphValues) {
				SmallVector<const Value *, 16> WorkSet;

				for (auto &I : AT->assumptions(const_cast<Function*>(F)))
				WorkSet.push_back(I);

				completeEphemeralValues(WorkSet, EphValues);
				}

	/// analyzeBasicBlock - Fill in the current structure with information gleaned			/// analyzeBasicBlock - Fill in the current structure with information gleaned
	/// from the specified block.			/// from the specified block.
	void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,			void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
	const TargetTransformInfo &TTI) {			const TargetTransformInfo &TTI,
				SmallPtrSetImpl<const Value*> &EphValues) {
	++NumBlocks;			++NumBlocks;
	unsigned NumInstsBeforeThisBB = NumInsts;			unsigned NumInstsBeforeThisBB = NumInsts;
	for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();			for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
	II != E; ++II) {			II != E; ++II) {
				// Skip ephemeral values.
				if (EphValues.count(II))
				continue;

	// Special handling for calls.			// Special handling for calls.
	if (isa<CallInst>(II) \|\| isa<InvokeInst>(II)) {			if (isa<CallInst>(II) \|\| isa<InvokeInst>(II)) {
	ImmutableCallSite CS(cast<Instruction>(II));			ImmutableCallSite CS(cast<Instruction>(II));

	if (const Function *F = CS.getCalledFunction()) {			if (const Function *F = CS.getCalledFunction()) {
	// If a function is both internal and has a single use, then it is			// If a function is both internal and has a single use, then it is
	// extremely likely to get inlined in the future (it was probably			// extremely likely to get inlined in the future (it was probably
	// exposed by an interleaved devirtualization pass).			// exposed by an interleaved devirtualization pass).
	▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

lib/Analysis/IPA/InlineCost.cpp

Show All 11 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/Analysis/InlineCost.h"		#include "llvm/Analysis/InlineCost.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"		#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
		#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"		#include "llvm/Analysis/ConstantFolding.h"
		#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"		#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"		#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"		#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"		#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"		#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"		#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstVisitor.h"		#include "llvm/IR/InstVisitor.h"
Show All 15 Lines	class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
friend class InstVisitor<CallAnalyzer, bool>;		friend class InstVisitor<CallAnalyzer, bool>;

// DataLayout if available, or null.		// DataLayout if available, or null.
const DataLayout *const DL;		const DataLayout *const DL;

/// The TargetTransformInfo available for this compilation.		/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;		const TargetTransformInfo &TTI;

		/// The cache of @llvm.assume intrinsics.
		AssumptionTracker *AT;

// The called function.		// The called function.
Function &F;		Function &F;

int Threshold;		int Threshold;
int Cost;		int Cost;

bool IsCallerRecursive;		bool IsCallerRecursive;
bool IsRecursiveCall;		bool IsRecursiveCall;
Show All 39 Lines	class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,		void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost);		int InstructionCost);
bool isGEPOffsetConstant(GetElementPtrInst &GEP);		bool isGEPOffsetConstant(GetElementPtrInst &GEP);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);		bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallSite CS);		bool simplifyCallSite(Function *F, CallSite CS);
ConstantInt stripAndComputeInBoundsConstantOffsets(Value &V);		ConstantInt stripAndComputeInBoundsConstantOffsets(Value &V);

// Custom analysis routines.		// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB);		bool analyzeBlock(BasicBlock BB, SmallPtrSetImpl<const Value > &EphValues);

// Disable several entry points to the visitor so we don't accidentally use		// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.		// them by declaring but not defining them here.
void visit(Module *); void visit(Module &);		void visit(Module *); void visit(Module &);
void visit(Function *); void visit(Function &);		void visit(Function *); void visit(Function &);
void visit(BasicBlock *); void visit(BasicBlock &);		void visit(BasicBlock *); void visit(BasicBlock &);

// Provide base case for our instruction visit.		// Provide base case for our instruction visit.
Show All 20 Lines	class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitBranchInst(BranchInst &BI);		bool visitBranchInst(BranchInst &BI);
bool visitSwitchInst(SwitchInst &SI);		bool visitSwitchInst(SwitchInst &SI);
bool visitIndirectBrInst(IndirectBrInst &IBI);		bool visitIndirectBrInst(IndirectBrInst &IBI);
bool visitResumeInst(ResumeInst &RI);		bool visitResumeInst(ResumeInst &RI);
bool visitUnreachableInst(UnreachableInst &I);		bool visitUnreachableInst(UnreachableInst &I);

public:		public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,		CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
Function &Callee, int Threshold)		AssumptionTracker *AT, Function &Callee, int Threshold)
: DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),		: DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),		IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),		ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),		ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),		AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),		FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),		NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),		NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
NumInstructionsSimplified(0), SROACostSavings(0),		NumInstructionsSimplified(0), SROACostSavings(0),
▲ Show 20 Lines • Show All 619 Lines • ▼ Show 20 Lines	bool CallAnalyzer::visitCallSite(CallSite CS) {
if (!F)		if (!F)
return Base::visitCallSite(CS);		return Base::visitCallSite(CS);

// If we have a constant that we are calling as a function, we can peer		// If we have a constant that we are calling as a function, we can peer
// through it and see the function target. This happens not infrequently		// through it and see the function target. This happens not infrequently
// during devirtualization and so we want to give it a hefty bonus for		// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan		// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.		// out. Pretend to inline the function, with a custom threshold.
CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);		CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {		if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the		// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.		// bonus we want to apply, but don't go below zero.
Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());		Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
}		}

return Base::visitCallSite(CS);		return Base::visitCallSite(CS);
}		}
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines

/// \brief Analyze a basic block for its contribution to the inline cost.		/// \brief Analyze a basic block for its contribution to the inline cost.
///		///
/// This method walks the analyzer over every instruction in the given basic		/// This method walks the analyzer over every instruction in the given basic
/// block and accounts for their cost during inlining at this callsite. It		/// block and accounts for their cost during inlining at this callsite. It
/// aborts early if the threshold has been exceeded or an impossible to inline		/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer		/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.		/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {		bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
		SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {		for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of		// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,		// our ability to simplify them during inline to constants or dead code,
// are actually used by the vector bonus heuristic. As long as that's true,		// are actually used by the vector bonus heuristic. As long as that's true,
// we have to special case debug intrinsics here to prevent differences in		// we have to special case debug intrinsics here to prevent differences in
// inlining due to debug symbols. Eventually, the number of unsimplified		// inlining due to debug symbols. Eventually, the number of unsimplified
// instructions shouldn't factor into the cost computation, but until then,		// instructions shouldn't factor into the cost computation, but until then,
// hack around it here.		// hack around it here.
if (isa<DbgInfoIntrinsic>(I))		if (isa<DbgInfoIntrinsic>(I))
continue;		continue;

		// Skip ephemeral values.
		if (EphValues.count(I))
		reamesUnsubmitted Not Done Reply Inline Actions It looks like the previous check is now redundant with this one? It might be worth leaving it in place for clarity, but if so, an explicit comment might be warranted. reames: It looks like the previous check is now redundant with this one? It might be worth leaving it…
		hfinkelAuthorUnsubmitted Not Done Reply Inline Actions It is redundant, only if the EphValues set is fully populated. I don't want the rest of the infrastructure to depend on that (at least for now). I'll add a comment. hfinkel: It is redundant, only if the EphValues set is fully populated. I don't want the rest of the…
		continue;

++NumInstructions;		++NumInstructions;
if (isa<ExtractElementInst>(I) \|\| I->getType()->isVectorTy())		if (isa<ExtractElementInst>(I) \|\| I->getType()->isVectorTy())
++NumVectorInstructions;		++NumVectorInstructions;

// If the instruction simplified to a constant, there is no cost to this		// If the instruction simplified to a constant, there is no cost to this
// instruction. Visit the instructions using our InstVisitor to account for		// instruction. Visit the instructions using our InstVisitor to account for
// all of the per-instruction logic. The visit tree returns true if we		// all of the per-instruction logic. The visit tree returns true if we
// consumed the instruction in any way, and false if the instruction's base		// consumed the instruction in any way, and false if the instruction's base
▲ Show 20 Lines • Show All 187 Lines • ▼ Show 20 Lines	if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
SROAArgCosts[PtrArg] = 0;		SROAArgCosts[PtrArg] = 0;
}		}
}		}
}		}
NumConstantArgs = SimplifiedValues.size();		NumConstantArgs = SimplifiedValues.size();
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();		NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
NumAllocaArgs = SROAArgValues.size();		NumAllocaArgs = SROAArgValues.size();

		// FIXME: If a caller has multiple calls to a callee, we end up recomputing
		// the ephemeral values multiple times (and they're completely determined by
		chandlercUnsubmitted Not Done Reply Inline Actions Oof, so, we can't do it this way in the inline cost analysis for terrible reasons. The reason why inline cost analysis ends up duplicating so much code from code metrics is because an essential property of inline cost analysis is that the amount of work done should be bounded by the constant threshold, not by the size of the function (or basic block). This is important because we may end up doing a linear number of calls to compute the inline cost where the linear factor would go quadratic if we do a linear amount of work here. =/ This is particularly problematic for ephemeral values because their very nature is defined backwards. I'm happy with this as an initial implementation but it should have a "FIXME" to revisit this before we start emitting them really heavily. chandlerc: Oof, so, we can't do it this way in the inline cost analysis for terrible reasons. The reason…
		hfinkelAuthorUnsubmitted Not Done Reply Inline Actions Alright; we'll need to think of a better way. What if we did this only once per function, and cached the result. When a function is inlined, the inliner updates a ephemeral value map so the list of instructions in the caller can be updated. Now it is possible for inlining to introduce new ephemeral values in the caller, but only from things that used to be function arguments so we could then do a more-limited propagation. The problem is that the scan is already linear, so I'd be ruining your complexity guarantee as soon as the functionality is committed regardless of the number of invariants. hfinkel: Alright; we'll need to think of a better way. What if we did this only once per function, and…
		// the callee, so this is purely duplicate work).
		SmallPtrSet<const Value *, 32> EphValues;
		CodeMetrics::collectEphemeralValues(&F, AT, EphValues);

// The worklist of live basic blocks in the callee after inlining. We avoid		// The worklist of live basic blocks in the callee after inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this		// adding basic blocks of the callee which can be proven to be dead for this
// particular call site in order to get more accurate cost estimates. This		// particular call site in order to get more accurate cost estimates. This
// requires a somewhat heavyweight iteration pattern: we need to walk the		// requires a somewhat heavyweight iteration pattern: we need to walk the
// basic blocks in a breadth-first order as we insert live successors. To		// basic blocks in a breadth-first order as we insert live successors. To
// accomplish this, prioritizing for small iterations because we exit after		// accomplish this, prioritizing for small iterations because we exit after
// crossing our threshold, we use a small-size optimized SetVector.		// crossing our threshold, we use a small-size optimized SetVector.
typedef SetVector<BasicBlock , SmallVector<BasicBlock , 16>,		typedef SetVector<BasicBlock , SmallVector<BasicBlock , 16>,
Show All 17 Lines	for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// see an indirect branch that ends up being dead code at a particular call		// see an indirect branch that ends up being dead code at a particular call
// site. If the blockaddress escapes the function, e.g., via a global		// site. If the blockaddress escapes the function, e.g., via a global
// variable, inlining may lead to an invalid cross-function reference.		// variable, inlining may lead to an invalid cross-function reference.
if (BB->hasAddressTaken())		if (BB->hasAddressTaken())
return false;		return false;

// Analyze the cost of this block. If we blow through the threshold, this		// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.		// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {		if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall \|\| ExposesReturnsTwice \|\| HasDynamicAlloca \|\|		if (IsRecursiveCall \|\| ExposesReturnsTwice \|\| HasDynamicAlloca \|\|
HasIndirectBr)		HasIndirectBr)
return false;		return false;

// If the caller is a recursive function then we don't want to inline		// If the caller is a recursive function then we don't want to inline
// functions which allocate a lot of stack space because it would increase		// functions which allocate a lot of stack space because it would increase
// the caller stack usage dramatically.		// the caller stack usage dramatically.
if (IsCallerRecursive &&		if (IsCallerRecursive &&
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
DEBUG_PRINT_STAT(VectorBonus);		DEBUG_PRINT_STAT(VectorBonus);
#undef DEBUG_PRINT_STAT		#undef DEBUG_PRINT_STAT
}		}
#endif		#endif

INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",		INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)		true, true)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)		INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
		INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",		INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)		true, true)

char InlineCostAnalysis::ID = 0;		char InlineCostAnalysis::ID = 0;

InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}		InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}

InlineCostAnalysis::~InlineCostAnalysis() {}		InlineCostAnalysis::~InlineCostAnalysis() {}

void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {		void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();		AU.setPreservesAll();
		AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetTransformInfo>();		AU.addRequired<TargetTransformInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);		CallGraphSCCPass::getAnalysisUsage(AU);
}		}

bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {		bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
TTI = &getAnalysis<TargetTransformInfo>();		TTI = &getAnalysis<TargetTransformInfo>();
		AT = &getAnalysis<AssumptionTracker>();
return false;		return false;
}		}

InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {		InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
return getInlineCost(CS, CS.getCalledFunction(), Threshold);		return getInlineCost(CS, CS.getCalledFunction(), Threshold);
}		}

/// \brief Test that two functions either have or have not the given attribute		/// \brief Test that two functions either have or have not the given attribute
Show All 40 Lines	InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// marked noinline.		// marked noinline.
if (Callee->mayBeOverridden() \|\|		if (Callee->mayBeOverridden() \|\|
Callee->hasFnAttribute(Attribute::NoInline) \|\| CS.isNoInline())		Callee->hasFnAttribute(Attribute::NoInline) \|\| CS.isNoInline())
return llvm::InlineCost::getNever();		return llvm::InlineCost::getNever();

DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()		DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");		<< "...\n");

CallAnalyzer CA(Callee->getDataLayout(), TTI, Callee, Threshold);		CallAnalyzer CA(Callee->getDataLayout(), TTI, AT, Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);		bool ShouldInline = CA.analyzeCall(CS);

DEBUG(CA.dump());		DEBUG(CA.dump());

// Check if there was a reason to force inlining or no inlining.		// Check if there was a reason to force inlining or no inlining.
if (!ShouldInline && CA.getCost() < CA.getThreshold())		if (!ShouldInline && CA.getCost() < CA.getThreshold())
return InlineCost::getNever();		return InlineCost::getNever();
if (ShouldInline && CA.getCost() >= CA.getThreshold())		if (ShouldInline && CA.getCost() >= CA.getThreshold())
Show All 35 Lines

lib/Transforms/Scalar/LoopRotation.cpp

//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//		//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
//		//
// The LLVM Compiler Infrastructure		// The LLVM Compiler Infrastructure
//		//
// This file is distributed under the University of Illinois Open Source		// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.		// License. See LICENSE.TXT for details.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file implements Loop Rotation Pass.		// This file implements Loop Rotation Pass.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
		#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"		#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"		#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"		#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"		#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"		#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"		#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"		#include "llvm/IR/Dominators.h"
Show All 24 Lines	LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
if (SpecifiedMaxHeaderSize == -1)		if (SpecifiedMaxHeaderSize == -1)
MaxHeaderSize = DefaultRotationThreshold;		MaxHeaderSize = DefaultRotationThreshold;
else		else
MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);		MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
}		}

// LCSSA form makes instruction renaming easier.		// LCSSA form makes instruction renaming easier.
void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
		AU.addRequired<AssumptionTracker>();
AU.addPreserved<DominatorTreeWrapperPass>();		AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();		AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();		AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);		AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);		AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);		AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);		AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();		AU.addPreserved<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();		AU.addRequired<TargetTransformInfo>();
}		}

bool runOnLoop(Loop *L, LPPassManager &LPM) override;		bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool simplifyLoopLatch(Loop *L);		bool simplifyLoopLatch(Loop *L);
bool rotateLoop(Loop *L, bool SimplifiedLatch);		bool rotateLoop(Loop *L, bool SimplifiedLatch);

private:		private:
unsigned MaxHeaderSize;		unsigned MaxHeaderSize;
LoopInfo *LI;		LoopInfo *LI;
const TargetTransformInfo *TTI;		const TargetTransformInfo *TTI;
		AssumptionTracker *AT;
};		};
}		}

char LoopRotate::ID = 0;		char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)		INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)		INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
		INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)		INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)		INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)		INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)		INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)

Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {		Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
return new LoopRotate(MaxHeaderSize);		return new LoopRotate(MaxHeaderSize);
}		}

/// Rotate Loop L as many times as possible. Return true if		/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.		/// the loop is rotated at least once.
bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {		bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))		if (skipOptnoneFunction(L))
return false;		return false;

// Save the loop metadata.		// Save the loop metadata.
MDNode *LoopMD = L->getLoopID();		MDNode *LoopMD = L->getLoopID();

LI = &getAnalysis<LoopInfo>();		LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();		TTI = &getAnalysis<TargetTransformInfo>();
		AT = &getAnalysis<AssumptionTracker>();

// Simplify the loop latch before attempting to rotate the header		// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the		// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.		// loop exit.
bool SimplifiedLatch = simplifyLoopLatch(L);		bool SimplifiedLatch = simplifyLoopLatch(L);

// One loop can be rotated multiple times.		// One loop can be rotated multiple times.
bool MadeChange = false;		bool MadeChange = false;
▲ Show 20 Lines • Show All 209 Lines • ▼ Show 20 Lines	bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Rotate if either the loop latch does not exit the loop, or if the loop		// Rotate if either the loop latch does not exit the loop, or if the loop
// latch was just simplified.		// latch was just simplified.
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)		if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;		return false;

// Check size of original header and reject loop if it is very big or we can't		// Check size of original header and reject loop if it is very big or we can't
// duplicate blocks inside it.		// duplicate blocks inside it.
{		{
		SmallPtrSet<const Value *, 32> EphValues;
		CodeMetrics::collectEphemeralValues(L, AT, EphValues);

CodeMetrics Metrics;		CodeMetrics Metrics;
Metrics.analyzeBasicBlock(OrigHeader, *TTI);		Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
if (Metrics.notDuplicatable) {		if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"		DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
<< " instructions: "; L->dump());		<< " instructions: "; L->dump());
return false;		return false;
}		}
if (Metrics.NumInsts > MaxHeaderSize)		if (Metrics.NumInsts > MaxHeaderSize)
return false;		return false;
}		}
▲ Show 20 Lines • Show All 232 Lines • Show Last 20 Lines

lib/Transforms/Scalar/LoopUnrollPass.cpp

Show First 20 Lines • Show All 194 Lines • ▼ Show 20 Lines

Pass *llvm::createSimpleLoopUnrollPass() {		Pass *llvm::createSimpleLoopUnrollPass() {
return llvm::createLoopUnrollPass(-1, -1, 0, 0);		return llvm::createLoopUnrollPass(-1, -1, 0, 0);
}		}

/// ApproximateLoopSize - Approximate the size of the loop.		/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,		static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,		bool &NotDuplicatable,
const TargetTransformInfo &TTI) {		const TargetTransformInfo &TTI,
		AssumptionTracker *AT) {
		SmallPtrSet<const Value *, 32> EphValues;
		CodeMetrics::collectEphemeralValues(L, AT, EphValues);

CodeMetrics Metrics;		CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();		for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)		I != E; ++I)
Metrics.analyzeBasicBlock(*I, TTI);		Metrics.analyzeBasicBlock(*I, TTI, EphValues);
NumCalls = Metrics.NumInlineCandidates;		NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;		NotDuplicatable = Metrics.notDuplicatable;

unsigned LoopSize = Metrics.NumInsts;		unsigned LoopSize = Metrics.NumInsts;

// Don't allow an estimate of size zero. This would allows unrolling of loops		// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's		// with huge iteration counts, which is a compile time problem even if it's
// not a problem for code quality.		// not a problem for code quality.
▲ Show 20 Lines • Show All 166 Lines • ▼ Show 20 Lines	bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// on size thresholds.		// on size thresholds.
bool CountSetExplicitly;		bool CountSetExplicitly;
unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,		unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
PragmaCount, UP, CountSetExplicitly);		PragmaCount, UP, CountSetExplicitly);

unsigned NumInlineCandidates;		unsigned NumInlineCandidates;
bool notDuplicatable;		bool notDuplicatable;
unsigned LoopSize =		unsigned LoopSize =
ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);		ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, AT);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");		DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
uint64_t UnrolledSize = (uint64_t)LoopSize * Count;		uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
if (notDuplicatable) {		if (notDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"		DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");		<< " instructions.\n");
return false;		return false;
}		}
if (NumInlineCandidates != 0) {		if (NumInlineCandidates != 0) {
▲ Show 20 Lines • Show All 102 Lines • Show Last 20 Lines

lib/Transforms/Scalar/LoopUnswitch.cpp

Show All 24 Lines
// of the loop, to make the unswitching opportunity obvious.		// of the loop, to make the unswitching opportunity obvious.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
		#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"		#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"		#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"		#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"		#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"		#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"		#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"		#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"		#include "llvm/IR/DerivedTypes.h"
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	public:

LUAnalysisCache() :		LUAnalysisCache() :
CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),		CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
MaxSize(Threshold)		MaxSize(Threshold)
{}		{}

// Analyze loop. Check its size, calculate is it possible to unswitch		// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.		// it. Returns true if we can unswitch this loop.
bool countLoop(const Loop *L, const TargetTransformInfo &TTI);		bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
		AssumptionTracker *AT);

// Clean all data related to given loop.		// Clean all data related to given loop.
void forgetLoop(const Loop *L);		void forgetLoop(const Loop *L);

// Mark case value as unswitched.		// Mark case value as unswitched.
// Since SI instruction can be partly unswitched, in order to avoid		// Since SI instruction can be partly unswitched, in order to avoid
// extra unswitching in cloned loops keep track all unswitched values.		// extra unswitching in cloned loops keep track all unswitched values.
void setUnswitched(const SwitchInst SI, const Value V);		void setUnswitched(const SwitchInst SI, const Value V);

// Check was this case value unswitched before or not.		// Check was this case value unswitched before or not.
bool isUnswitched(const SwitchInst SI, const Value V);		bool isUnswitched(const SwitchInst SI, const Value V);

// Clone all loop-unswitch related loop properties.		// Clone all loop-unswitch related loop properties.
// Redistribute unswitching quotas.		// Redistribute unswitching quotas.
// Note, that new loop data is stored inside the VMap.		// Note, that new loop data is stored inside the VMap.
void cloneData(const Loop NewLoop, const Loop OldLoop,		void cloneData(const Loop NewLoop, const Loop OldLoop,
const ValueToValueMapTy &VMap);		const ValueToValueMapTy &VMap);
};		};

class LoopUnswitch : public LoopPass {		class LoopUnswitch : public LoopPass {
LoopInfo *LI; // Loop information		LoopInfo *LI; // Loop information
LPPassManager *LPM;		LPPassManager *LPM;
		AssumptionTracker *AT;

// LoopProcessWorklist - Used to check if second loop needs processing		// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.		// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;		std::vector<Loop*> LoopProcessWorklist;

LUAnalysisCache BranchesInfo;		LUAnalysisCache BranchesInfo;

bool OptimizeForSize;		bool OptimizeForSize;
Show All 22 Lines	public:

bool runOnLoop(Loop *L, LPPassManager &LPM) override;		bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool processCurrentLoop();		bool processCurrentLoop();

/// This transformation requires natural loop information & requires that		/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.		/// loop preheaders be inserted into the CFG.
///		///
void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
		AU.addRequired<AssumptionTracker>();
AU.addRequiredID(LoopSimplifyID);		AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);		AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();		AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();		AU.addPreserved<LoopInfo>();
AU.addRequiredID(LCSSAID);		AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);		AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTreeWrapperPass>();		AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();		AU.addPreserved<ScalarEvolution>();
Show All 32 Lines	private:
bool IsTrivialUnswitchCondition(Value Cond, Constant *Val = nullptr,		bool IsTrivialUnswitchCondition(Value Cond, Constant *Val = nullptr,
BasicBlock **LoopExit = nullptr);		BasicBlock **LoopExit = nullptr);

};		};
}		}

// Analyze loop. Check its size, calculate is it possible to unswitch		// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.		// it. Returns true if we can unswitch this loop.
bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {		bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
		AssumptionTracker *AT) {

LoopPropsMapIt PropsIt;		LoopPropsMapIt PropsIt;
bool Inserted;		bool Inserted;
std::tie(PropsIt, Inserted) =		std::tie(PropsIt, Inserted) =
LoopsProperties.insert(std::make_pair(L, LoopProperties()));		LoopsProperties.insert(std::make_pair(L, LoopProperties()));

LoopProperties &Props = PropsIt->second;		LoopProperties &Props = PropsIt->second;

if (Inserted) {		if (Inserted) {
// New loop.		// New loop.

// Limit the number of instructions to avoid causing significant code		// Limit the number of instructions to avoid causing significant code
// expansion, and the number of basic blocks, to avoid loops with		// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.		// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.		// This is a very ad-hoc heuristic.

		SmallPtrSet<const Value *, 32> EphValues;
		CodeMetrics::collectEphemeralValues(L, AT, EphValues);

// FIXME: This is overly conservative because it does not take into		// FIXME: This is overly conservative because it does not take into
// consideration code simplification opportunities and code that can		// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.		// be shared by the resultant unswitched loops.
CodeMetrics Metrics;		CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();		for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)		I != E; ++I)
Metrics.analyzeBasicBlock(*I, TTI);		Metrics.analyzeBasicBlock(*I, TTI, EphValues);

Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);		Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);		Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;		MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;

if (Metrics.notDuplicatable) {		if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "NOT unswitching loop %"		DEBUG(dbgs() << "NOT unswitching loop %"
<< L->getHeader()->getName() << ", contents cannot be "		<< L->getHeader()->getName() << ", contents cannot be "
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines	for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) {
NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];		NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
}		}
}		}

char LoopUnswitch::ID = 0;		char LoopUnswitch::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",		INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)		false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)		INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
		INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)		INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)		INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)		INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",		INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)		false, false)

Pass *llvm::createLoopUnswitchPass(bool Os) {		Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);		return new LoopUnswitch(Os);
Show All 34 Lines	static Value FindLIVLoopCondition(Value Cond, Loop *L, bool &Changed) {

return nullptr;		return nullptr;
}		}

bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {		bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
if (skipOptnoneFunction(L))		if (skipOptnoneFunction(L))
return false;		return false;

		AT = &getAnalysis<AssumptionTracker>();
LI = &getAnalysis<LoopInfo>();		LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;		LPM = &LPM_Ref;
DominatorTreeWrapperPass *DTWP =		DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();		getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;		DT = DTWP ? &DTWP->getDomTree() : nullptr;
currentLoop = L;		currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();		Function *F = currentLoop->getHeader()->getParent();
bool Changed = false;		bool Changed = false;
Show All 29 Lines	bool LoopUnswitch::processCurrentLoop() {
// Without dedicated exits, splitting the exit edge may fail.		// Without dedicated exits, splitting the exit edge may fail.
if (!currentLoop->hasDedicatedExits())		if (!currentLoop->hasDedicatedExits())
return false;		return false;

LLVMContext &Context = loopHeader->getContext();		LLVMContext &Context = loopHeader->getContext();

// Probably we reach the quota of branches for this loop. If so		// Probably we reach the quota of branches for this loop. If so
// stop unswitching.		// stop unswitching.
if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))		if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
		AT))
return false;		return false;

// Loop over all of the basic blocks in the loop. If we find an interior		// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this		// block that is branching on a loop-invariant condition, we can unswitch this
// loop.		// loop.
for (Loop::block_iterator I = currentLoop->block_begin(),		for (Loop::block_iterator I = currentLoop->block_begin(),
E = currentLoop->block_end(); I != E; ++I) {		E = currentLoop->block_end(); I != E; ++I) {
TerminatorInst TI = (I)->getTerminator();		TerminatorInst TI = (I)->getTerminator();
▲ Show 20 Lines • Show All 712 Lines • Show Last 20 Lines

test/Transforms/Inline/ephemeral.ll

This file was added.

				; RUN: opt -S -Oz %s \| FileCheck %s

				@a = global i32 4

				define i1 @inner() {
				%a1 = load volatile i32* @a
				%x1 = add i32 %a1, %a1
				%c = icmp eq i32 %x1, 0

				; Here are enough instructions to prevent inlining, but because they are used
				; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
				; still happen).
				%a2 = mul i32 %a1, %a1
				%a3 = sub i32 %a1, 5
				%a4 = udiv i32 %a3, -13
				%a5 = mul i32 %a4, %a4
				%a6 = add i32 %a5, %x1
				%ca = icmp sgt i32 %a6, -7
				tail call void @llvm.assume(i1 %ca)

				ret i1 %c
				}

				; @inner() should be inlined for -Oz.
				reamesUnsubmitted Not Done Reply Inline Actions This test appears really fragile w.r.t. our current inlining heuristics. Is there a way you could restructure this to avoid that? If not, add some comments explaining how to adjust the test if the inlining heuristic change. You also need to add some form of negative example to show that the ephemeral logic actually contributes to the inlining. reames: This test appears really fragile w.r.t. our current inlining heuristics. Is there a way you…
				hfinkelAuthorUnsubmitted Not Done Reply Inline Actions I could hard code the current threshold into the RUN line (that having been said, I adapted this from a similar test that will also need to be changed should the heuristic be altered). I don't understand your second comment. If you don't account for the ephemeral values, then the extra instructions in this example prevent inlining. If you do account for them, then the inlining happens. hfinkel: I could hard code the current threshold into the RUN line (that having been said, I adapted…
				reamesUnsubmitted Not Done Reply Inline Actions For the second comment, you need a test case that shows the inliner not inling unless the values are ephemeral. Your current tests could be made to pass by unconditional inlining. Arguably, this is an inliner test, not an ephemeral value test and might already exists. reames: For the second comment, you need a test case that shows the inliner not inling unless the…
				; CHECK-NOT: call i1 @inner
				define i1 @outer() optsize {
				%r = call i1 @inner()
				ret i1 %r
				}

				declare void @llvm.assume(i1) nounwind

test/Transforms/LoopUnroll/ephemeral.ll

This file was added.

				; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 \| FileCheck %s

				; Make sure this loop is completely unrolled...
				; CHECK-LABEL: @test1
				; CHECK: for.body:
				; CHECK-NOT: for.end:

				define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
				reamesUnsubmitted Not Done Reply Inline Actions Same comments as above w.r.t. test structure. Also, a comment explaining the test approach would be helpful. It looks like you're relying on the return to be constant folded after unrolling? reames: Same comments as above w.r.t. test structure. Also, a comment explaining the test approach…
				hfinkelAuthorUnsubmitted Not Done Reply Inline Actions Hey, the threshold is even hard-coded in this one. ;) No, I'm adding enough extra instructions such that unrolling will be prevented unless you account for the fact that those extra instructions are ephemeral (and, thus, free). But you're right about one thing... some comments are needed. :-) hfinkel: Hey, the threshold is even hard-coded in this one. ;) No, I'm adding enough extra instructions…
				entry:
				br label %for.body

				for.body: ; preds = %for.body, %entry
				%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
				%sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
				%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
				%0 = load i32* %arrayidx, align 4

				; This loop will be completely unrolled, even with these extra instructions,
				; but only because they're ephemeral (and, thus, free).
				%1 = add nsw i32 %0, 2
				%2 = add nsw i32 %1, 4
				%3 = add nsw i32 %2, 4
				%4 = add nsw i32 %3, 4
				%5 = add nsw i32 %4, 4
				%6 = add nsw i32 %5, 4
				%7 = add nsw i32 %6, 4
				%8 = add nsw i32 %7, 4
				%9 = add nsw i32 %8, 4
				%10 = add nsw i32 %9, 4
				%ca = icmp sgt i32 %10, -7
				call void @llvm.assume(i1 %ca)

				%add = add nsw i32 %0, %sum.01
				%indvars.iv.next = add i64 %indvars.iv, 1
				%lftr.wideiv = trunc i64 %indvars.iv.next to i32
				%exitcond = icmp eq i32 %lftr.wideiv, 5
				br i1 %exitcond, label %for.end, label %for.body

				for.end: ; preds = %for.body
				ret i32 %add
				}

				declare void @llvm.assume(i1) nounwind

This is an archive of the discontinued LLVM Phabricator instance.

Ephemeral values for LLVM invariants
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 11929

include/llvm/Analysis/CodeMetrics.h

include/llvm/Analysis/InlineCost.h

lib/Analysis/CodeMetrics.cpp

lib/Analysis/IPA/InlineCost.cpp

lib/Transforms/Scalar/LoopRotation.cpp

lib/Transforms/Scalar/LoopUnrollPass.cpp

lib/Transforms/Scalar/LoopUnswitch.cpp

test/Transforms/Inline/ephemeral.ll

test/Transforms/LoopUnroll/ephemeral.ll

This is an archive of the discontinued LLVM Phabricator instance.

Ephemeral values for LLVM invariantsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 11929

include/llvm/Analysis/CodeMetrics.h

include/llvm/Analysis/InlineCost.h

lib/Analysis/CodeMetrics.cpp

lib/Analysis/IPA/InlineCost.cpp

lib/Transforms/Scalar/LoopRotation.cpp

lib/Transforms/Scalar/LoopUnrollPass.cpp

lib/Transforms/Scalar/LoopUnswitch.cpp

test/Transforms/Inline/ephemeral.ll

test/Transforms/LoopUnroll/ephemeral.ll

Ephemeral values for LLVM invariants
ClosedPublic