Diff 114441

llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 108 Lines • ▼ Show 20 Lines	bool invalidate(Function &, const PreservedAnalyses &,
// FIXME: We should probably in some way ensure that the subtarget		// FIXME: We should probably in some way ensure that the subtarget
// information for a function hasn't changed.		// information for a function hasn't changed.
return false;		return false;
}		}

/// \name Generic Target Information		/// \name Generic Target Information
/// @{		/// @{

		/// \brief The kind of cost model.
		///
		/// There are several different cost models that can be customized by the
		/// target. The normalization of each cost model may be target specific.
		enum TargetCostKind {
		TCK_RecipThroughput, ///< Reciprocal throughput.
		TCK_Latency, ///< The latency of instruction.
		TCK_CodeSize ///< Instruction code size.
		};

		/// \brief Query the cost of a specified instruction.
		///
		/// Clients should use this interface to query the cost of an existing
		/// instruction. The instruction must have a valid parent (basic block).
		///
		/// Note, this method does not cache the cost calculation and it
		/// can be expensive in some cases.
		int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
		switch (kind){
		case TCK_RecipThroughput:
		return getInstructionThroughput(I);

		case TCK_Latency:
		return getInstructionLatency(I);

		case TCK_CodeSize:
		return getUserCost(I);

		default:
		llvm_unreachable("Unknown instruction cost kind");
		return 0;
		}
		}

/// \brief Underlying constants for 'cost' values in this interface.		/// \brief Underlying constants for 'cost' values in this interface.
///		///
/// Many APIs in this interface return a cost. This enum defines the		/// Many APIs in this interface return a cost. This enum defines the
/// fundamental values that should be used to interpret (and produce) those		/// fundamental values that should be used to interpret (and produce) those
/// costs. The costs are returned as an int rather than a member of this		/// costs. The costs are returned as an int rather than a member of this
/// enumeration because it is expected that the cost of one IR instruction		/// enumeration because it is expected that the cost of one IR instruction
/// may have a multiplicative factor to it or otherwise won't fit directly		/// may have a multiplicative factor to it or otherwise won't fit directly
/// into the enum. Moreover, it is common to sum or average costs which works		/// into the enum. Moreover, it is common to sum or average costs which works
▲ Show 20 Lines • Show All 738 Lines • ▼ Show 20 Lines	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;		ReductionFlags Flags) const;

/// \returns True if the target wants to expand the given reduction intrinsic		/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.		/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;		bool shouldExpandReduction(const IntrinsicInst *II) const;
/// @}		/// @}

private:		private:
		/// \brief Estimate the latency of specified instruction.
		/// Returns 1 as the default value.
		int getInstructionLatency(const Instruction *I) const;

		/// \brief Returns the expected throughput cost of the instruction.
		/// Returns -1 if the cost is unknown.
		int getInstructionThroughput(const Instruction *I) const;

/// \brief The abstract base class used to type erase specific TTI		/// \brief The abstract base class used to type erase specific TTI
/// implementations.		/// implementations.
class Concept;		class Concept;

/// \brief The template model for the base class which wraps a concrete		/// \brief The template model for the base class which wraps a concrete
/// implementation in a type erased interface.		/// implementation in a type erased interface.
template <typename T> class Model;		template <typename T> class Model;

▲ Show 20 Lines • Show All 160 Lines • ▼ Show 20 Lines	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,		unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;		VectorType *VecTy) const = 0;
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,		virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,		unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;		VectorType *VecTy) const = 0;
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,		virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;		ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;		virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
		virtual int getInstructionLatency(const Instruction *I) = 0;
};		};

template <typename T>		template <typename T>
class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {		class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
T Impl;		T Impl;

public:		public:
Model(T Impl) : Impl(std::move(Impl)) {}		Model(T Impl) : Impl(std::move(Impl)) {}
▲ Show 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	public:
}		}
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,		bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const override {		ReductionFlags Flags) const override {
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);		return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
}		}
bool shouldExpandReduction(const IntrinsicInst *II) const override {		bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);		return Impl.shouldExpandReduction(II);
}		}
		int getInstructionLatency(const Instruction *I) override {
		return Impl.getInstructionLatency(I);
		}
};		};

template <typename T>		template <typename T>
TargetTransformInfo::TargetTransformInfo(T Impl)		TargetTransformInfo::TargetTransformInfo(T Impl)
: TTIImpl(new Model<T>(Impl)) {}		: TTIImpl(new Model<T>(Impl)) {}

/// \brief Analysis pass providing the \c TargetTransformInfo.		/// \brief Analysis pass providing the \c TargetTransformInfo.
///		///
▲ Show 20 Lines • Show All 94 Lines • Show Last 20 Lines

llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 765 Lines • ▼ Show 20 Lines	if (const CastInst *CI = dyn_cast<CastInst>(U)) {
if (isa<SExtInst>(CI) \|\| isa<ZExtInst>(CI) \|\| isa<FPExtInst>(CI))		if (isa<SExtInst>(CI) \|\| isa<ZExtInst>(CI) \|\| isa<FPExtInst>(CI))
return static_cast<T *>(this)->getExtCost(CI, Operands.back());		return static_cast<T *>(this)->getExtCost(CI, Operands.back());
}		}

return static_cast<T *>(this)->getOperationCost(		return static_cast<T *>(this)->getOperationCost(
Operator::getOpcode(U), U->getType(),		Operator::getOpcode(U), U->getType(),
U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);		U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
}		}

		int getInstructionLatency(const Instruction *I) {
		if (isa<PHINode>(I))
		return 0;

		if (isa<CallInst>(I))
		return 40;

		if (isa<LoadInst>(I))
		return 4;

		Type *dstTy = I->getType();
		if (VectorType *VectorTy = dyn_cast<VectorType>(dstTy))
		dstTy = VectorTy->getElementType();
		if (dstTy->isFloatingPointTy())
		return 3;

		return 1;
		}
};		};
}		}

#endif		#endif

llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 344 Lines • ▼ Show 20 Lines	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.OptSizeThreshold = 0;		UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;		UP.PartialOptSizeThreshold = 0;

// Set number of instructions optimized when "back edge"		// Set number of instructions optimized when "back edge"
// becomes "fall through" to default value of 2.		// becomes "fall through" to default value of 2.
UP.BEInsns = 2;		UP.BEInsns = 2;
}		}

		int getInstructionLatency(const Instruction *I) {
		if (isa<LoadInst>(I))
		return getST()->getSchedModel().DefaultLoadLatency;

		return BaseT::getInstructionLatency(I);
		}

/// @}		/// @}

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{

unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }		unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }

unsigned getRegisterBitWidth(bool Vector) const { return 32; }		unsigned getRegisterBitWidth(bool Vector) const { return 32; }
▲ Show 20 Lines • Show All 892 Lines • Show Last 20 Lines

llvm/trunk/lib/Analysis/CostModel.cpp

	Show All 14 Lines
	// branches are predicted, etc. The cost numbers can be added in order to			// branches are predicted, etc. The cost numbers can be added in order to
	// compare two or more transformation alternatives.			// compare two or more transformation alternatives.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "llvm/ADT/STLExtras.h"			#include "llvm/ADT/STLExtras.h"
	#include "llvm/Analysis/Passes.h"			#include "llvm/Analysis/Passes.h"
	#include "llvm/Analysis/TargetTransformInfo.h"			#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/IR/Function.h"			#include "llvm/IR/Function.h"
	#include "llvm/IR/Instructions.h"			#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Value.h"
	#include "llvm/Pass.h"			#include "llvm/Pass.h"
	#include "llvm/Support/CommandLine.h"			#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"			#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"			#include "llvm/Support/raw_ostream.h"
	using namespace llvm;			using namespace llvm;
	using namespace PatternMatch;
				static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
				"cost-kind", cl::desc("Target cost kind"),
				cl::init(TargetTransformInfo::TCK_RecipThroughput),
				cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput,
				"throughput", "Reciprocal throughput"),
				clEnumValN(TargetTransformInfo::TCK_Latency,
				"latency", "Instruction latency"),
				clEnumValN(TargetTransformInfo::TCK_CodeSize,
				"code-size", "Code size")));

	#define CM_NAME "cost-model"			#define CM_NAME "cost-model"
	#define DEBUG_TYPE CM_NAME			#define DEBUG_TYPE CM_NAME

	static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
	cl::Hidden,
	cl::desc("Recognize reduction patterns."));

	namespace {			namespace {
	class CostModelAnalysis : public FunctionPass {			class CostModelAnalysis : public FunctionPass {

	public:			public:
	static char ID; // Class identification, replacement for typeinfo			static char ID; // Class identification, replacement for typeinfo
	CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {			CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
	initializeCostModelAnalysisPass(			initializeCostModelAnalysisPass(
	*PassRegistry::getPassRegistry());			*PassRegistry::getPassRegistry());
	}			}

	/// Returns the expected cost of the instruction.			/// Returns the expected cost of the instruction.
	/// Returns -1 if the cost is unknown.			/// Returns -1 if the cost is unknown.
	/// Note, this method does not cache the cost calculation and it			/// Note, this method does not cache the cost calculation and it
	/// can be expensive in some cases.			/// can be expensive in some cases.
	unsigned getInstructionCost(const Instruction *I) const;			unsigned getInstructionCost(const Instruction *I) const {
				return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput);
				}

	private:			private:
	void getAnalysisUsage(AnalysisUsage &AU) const override;			void getAnalysisUsage(AnalysisUsage &AU) const override;
	bool runOnFunction(Function &F) override;			bool runOnFunction(Function &F) override;
	void print(raw_ostream &OS, const Module*) const override;			void print(raw_ostream &OS, const Module*) const override;

	/// The function that we analyze.			/// The function that we analyze.
	Function *F;			Function *F;
	Show All 21 Lines
	CostModelAnalysis::runOnFunction(Function &F) {			CostModelAnalysis::runOnFunction(Function &F) {
	this->F = &F;			this->F = &F;
	auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();			auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
	TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;			TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;

	return false;			return false;
	}			}

	static bool isReverseVectorMask(ArrayRef<int> Mask) {
	for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
	if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
	return false;
	return true;
	}

	static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
	bool Vec0 = false;
	bool Vec1 = false;
	for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
	if (Mask[i] >= 0) {
	if ((unsigned)Mask[i] >= NumVecElts)
	Vec1 = true;
	else
	Vec0 = true;
	}
	}
	return !(Vec0 && Vec1);
	}

	static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
	for (unsigned i = 0; i < Mask.size(); ++i)
	if (Mask[i] > 0)
	return false;
	return true;
	}

	static bool isAlternateVectorMask(ArrayRef<int> Mask) {
	bool isAlternate = true;
	unsigned MaskSize = Mask.size();

	// Example: shufflevector A, B, <0,5,2,7>
	for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
	if (Mask[i] < 0)
	continue;
	isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
	}

	if (isAlternate)
	return true;

	isAlternate = true;
	// Example: shufflevector A, B, <4,1,6,3>
	for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
	if (Mask[i] < 0)
	continue;
	isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
	}

	return isAlternate;
	}

	static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
	TargetTransformInfo::OperandValueKind OpInfo =
	TargetTransformInfo::OK_AnyValue;

	// Check for a splat of a constant or for a non uniform vector of constants.
	if (isa<ConstantVector>(V) \|\| isa<ConstantDataVector>(V)) {
	OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
	if (cast<Constant>(V)->getSplatValue() != nullptr)
	OpInfo = TargetTransformInfo::OK_UniformConstantValue;
	}

	// Check for a splat of a uniform value. This is not loop aware, so return
	// true only for the obviously uniform cases (argument, globalvalue)
	const Value *Splat = getSplatValue(V);
	if (Splat && (isa<Argument>(Splat) \|\| isa<GlobalValue>(Splat)))
	OpInfo = TargetTransformInfo::OK_UniformValue;

	return OpInfo;
	}

	static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
	unsigned Level) {
	// We don't need a shuffle if we just want to have element 0 in position 0 of
	// the vector.
	if (!SI && Level == 0 && IsLeft)
	return true;
	else if (!SI)
	return false;

	SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);

	// Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
	// we look at the left or right side.
	for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
	Mask[i] = val;

	SmallVector<int, 16> ActualMask = SI->getShuffleMask();
	return Mask == ActualMask;
	}

	namespace {
	/// Kind of the reduction data.
	enum ReductionKind {
	RK_None, /// Not a reduction.
	RK_Arithmetic, /// Binary reduction data.
	RK_MinMax, /// Min/max reduction data.
	RK_UnsignedMinMax, /// Unsigned min/max reduction data.
	};
	/// Contains opcode + LHS/RHS parts of the reduction operations.
	struct ReductionData {
	ReductionData() = delete;
	ReductionData(ReductionKind Kind, unsigned Opcode, Value LHS, Value RHS)
	: Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
	assert(Kind != RK_None && "expected binary or min/max reduction only.");
	}
	unsigned Opcode = 0;
	Value *LHS = nullptr;
	Value *RHS = nullptr;
	ReductionKind Kind = RK_None;
	bool hasSameData(ReductionData &RD) const {
	return Kind == RD.Kind && Opcode == RD.Opcode;
	}
	};
	} // namespace

	static Optional<ReductionData> getReductionData(Instruction *I) {
	Value L, R;
	if (m_BinOp(m_Value(L), m_Value(R)).match(I))
	return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
	if (auto *SI = dyn_cast<SelectInst>(I)) {
	if (m_SMin(m_Value(L), m_Value(R)).match(SI) \|\|
	m_SMax(m_Value(L), m_Value(R)).match(SI) \|\|
	m_OrdFMin(m_Value(L), m_Value(R)).match(SI) \|\|
	m_OrdFMax(m_Value(L), m_Value(R)).match(SI) \|\|
	m_UnordFMin(m_Value(L), m_Value(R)).match(SI) \|\|
	m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
	auto *CI = cast<CmpInst>(SI->getCondition());
	return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
	}
	if (m_UMin(m_Value(L), m_Value(R)).match(SI) \|\|
	m_UMax(m_Value(L), m_Value(R)).match(SI)) {
	auto *CI = cast<CmpInst>(SI->getCondition());
	return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
	}
	}
	return llvm::None;
	}

	static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
	unsigned Level,
	unsigned NumLevels) {
	// Match one level of pairwise operations.
	// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
	// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
	// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
	if (!I)
	return RK_None;

	assert(I->getType()->isVectorTy() && "Expecting a vector type");

	Optional<ReductionData> RD = getReductionData(I);
	if (!RD)
	return RK_None;

	ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
	if (!LS && Level)
	return RK_None;
	ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
	if (!RS && Level)
	return RK_None;

	// On level 0 we can omit one shufflevector instruction.
	if (!Level && !RS && !LS)
	return RK_None;

	// Shuffle inputs must match.
	Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
	Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
	Value *NextLevelOp = nullptr;
	if (NextLevelOpR && NextLevelOpL) {
	// If we have two shuffles their operands must match.
	if (NextLevelOpL != NextLevelOpR)
	return RK_None;

	NextLevelOp = NextLevelOpL;
	} else if (Level == 0 && (NextLevelOpR \|\| NextLevelOpL)) {
	// On the first level we can omit the shufflevector <0, undef,...>. So the
	// input to the other shufflevector <1, undef> must match with one of the
	// inputs to the current binary operation.
	// Example:
	// %NextLevelOpL = shufflevector %R, <1, undef ...>
	// %BinOp = fadd %NextLevelOpL, %R
	if (NextLevelOpL && NextLevelOpL != RD->RHS)
	return RK_None;
	else if (NextLevelOpR && NextLevelOpR != RD->LHS)
	return RK_None;

	NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
	} else {
	return RK_None;
	}

	// Check that the next levels binary operation exists and matches with the
	// current one.
	if (Level + 1 != NumLevels) {
	Optional<ReductionData> NextLevelRD =
	getReductionData(cast<Instruction>(NextLevelOp));
	if (!NextLevelRD \|\| !RD->hasSameData(*NextLevelRD))
	return RK_None;
	}

	// Shuffle mask for pairwise operation must match.
	if (matchPairwiseShuffleMask(LS, /IsLeft=/true, Level)) {
	if (!matchPairwiseShuffleMask(RS, /IsLeft=/false, Level))
	return RK_None;
	} else if (matchPairwiseShuffleMask(RS, /IsLeft=/true, Level)) {
	if (!matchPairwiseShuffleMask(LS, /IsLeft=/false, Level))
	return RK_None;
	} else {
	return RK_None;
	}

	if (++Level == NumLevels)
	return RD->Kind;

	// Match next level.
	return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
	NumLevels);
	}

	static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
	unsigned &Opcode, Type *&Ty) {
	if (!EnableReduxCost)
	return RK_None;

	// Need to extract the first element.
	ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
	unsigned Idx = ~0u;
	if (CI)
	Idx = CI->getZExtValue();
	if (Idx != 0)
	return RK_None;

	auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
	if (!RdxStart)
	return RK_None;
	Optional<ReductionData> RD = getReductionData(RdxStart);
	if (!RD)
	return RK_None;

	Type *VecTy = RdxStart->getType();
	unsigned NumVecElems = VecTy->getVectorNumElements();
	if (!isPowerOf2_32(NumVecElems))
	return RK_None;

	// We look for a sequence of shuffle,shuffle,add triples like the following
	// that builds a pairwise reduction tree.
	//
	// (X0, X1, X2, X3)
	// (X0 + X1, X2 + X3, undef, undef)
	// ((X0 + X1) + (X2 + X3), undef, undef, undef)
	//
	// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
	// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
	// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
	// %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
	// <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
	// %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
	// <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	// %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
	// %r = extractelement <4 x float> %bin.rdx8, i32 0
	if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
	RK_None)
	return RK_None;

	Opcode = RD->Opcode;
	Ty = VecTy;

	return RD->Kind;
	}

	static std::pair<Value , ShuffleVectorInst >
	getShuffleAndOtherOprd(Value L, Value R) {
	ShuffleVectorInst *S = nullptr;

	if ((S = dyn_cast<ShuffleVectorInst>(L)))
	return std::make_pair(R, S);

	S = dyn_cast<ShuffleVectorInst>(R);
	return std::make_pair(L, S);
	}

	static ReductionKind
	matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
	unsigned &Opcode, Type *&Ty) {
	if (!EnableReduxCost)
	return RK_None;

	// Need to extract the first element.
	ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
	unsigned Idx = ~0u;
	if (CI)
	Idx = CI->getZExtValue();
	if (Idx != 0)
	return RK_None;

	auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
	if (!RdxStart)
	return RK_None;
	Optional<ReductionData> RD = getReductionData(RdxStart);
	if (!RD)
	return RK_None;

	Type *VecTy = ReduxRoot->getOperand(0)->getType();
	unsigned NumVecElems = VecTy->getVectorNumElements();
	if (!isPowerOf2_32(NumVecElems))
	return RK_None;

	// We look for a sequence of shuffles and adds like the following matching one
	// fadd, shuffle vector pair at a time.
	//
	// %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
	// %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
	// %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
	// <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	// %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
	// %r = extractelement <4 x float> %bin.rdx8, i32 0

	unsigned MaskStart = 1;
	Instruction *RdxOp = RdxStart;
	SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
	unsigned NumVecElemsRemain = NumVecElems;
	while (NumVecElemsRemain - 1) {
	// Check for the right reduction operation.
	if (!RdxOp)
	return RK_None;
	Optional<ReductionData> RDLevel = getReductionData(RdxOp);
	if (!RDLevel \|\| !RDLevel->hasSameData(*RD))
	return RK_None;

	Value *NextRdxOp;
	ShuffleVectorInst *Shuffle;
	std::tie(NextRdxOp, Shuffle) =
	getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);

	// Check the current reduction operation and the shuffle use the same value.
	if (Shuffle == nullptr)
	return RK_None;
	if (Shuffle->getOperand(0) != NextRdxOp)
	return RK_None;

	// Check that shuffle masks matches.
	for (unsigned j = 0; j != MaskStart; ++j)
	ShuffleMask[j] = MaskStart + j;
	// Fill the rest of the mask with -1 for undef.
	std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);

	SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
	if (ShuffleMask != Mask)
	return RK_None;

	RdxOp = dyn_cast<Instruction>(NextRdxOp);
	NumVecElemsRemain /= 2;
	MaskStart *= 2;
	}

	Opcode = RD->Opcode;
	Ty = VecTy;
	return RD->Kind;
	}

	unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
	if (!TTI)
	return -1;

	switch (I->getOpcode()) {
	case Instruction::GetElementPtr:
	return TTI->getUserCost(I);

	case Instruction::Ret:
	case Instruction::PHI:
	case Instruction::Br: {
	return TTI->getCFInstrCost(I->getOpcode());
	}
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor: {
	TargetTransformInfo::OperandValueKind Op1VK =
	getOperandInfo(I->getOperand(0));
	TargetTransformInfo::OperandValueKind Op2VK =
	getOperandInfo(I->getOperand(1));
	SmallVector<const Value*, 2> Operands(I->operand_values());
	return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
	Op2VK, TargetTransformInfo::OP_None,
	TargetTransformInfo::OP_None,
	Operands);
	}
	case Instruction::Select: {
	const SelectInst *SI = cast<SelectInst>(I);
	Type *CondTy = SI->getCondition()->getType();
	return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
	}
	case Instruction::ICmp:
	case Instruction::FCmp: {
	Type *ValTy = I->getOperand(0)->getType();
	return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
	}
	case Instruction::Store: {
	const StoreInst *SI = cast<StoreInst>(I);
	Type *ValTy = SI->getValueOperand()->getType();
	return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
	SI->getAlignment(),
	SI->getPointerAddressSpace(), I);
	}
	case Instruction::Load: {
	const LoadInst *LI = cast<LoadInst>(I);
	return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
	LI->getAlignment(),
	LI->getPointerAddressSpace(), I);
	}
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast:
	case Instruction::AddrSpaceCast: {
	Type *SrcTy = I->getOperand(0)->getType();
	return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
	}
	case Instruction::ExtractElement: {
	const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
	ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
	unsigned Idx = -1;
	if (CI)
	Idx = CI->getZExtValue();

	// Try to match a reduction sequence (series of shufflevector and vector
	// adds followed by a extractelement).
	unsigned ReduxOpCode;
	Type *ReduxType;

	switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
	case RK_Arithmetic:
	return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
	/IsPairwiseForm=/false);
	case RK_MinMax:
	return TTI->getMinMaxReductionCost(
	ReduxType, CmpInst::makeCmpResultType(ReduxType),
	/IsPairwiseForm=/false, /IsUnsigned=/false);
	case RK_UnsignedMinMax:
	return TTI->getMinMaxReductionCost(
	ReduxType, CmpInst::makeCmpResultType(ReduxType),
	/IsPairwiseForm=/false, /IsUnsigned=/true);
	case RK_None:
	break;
	}

	switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
	case RK_Arithmetic:
	return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
	/IsPairwiseForm=/true);
	case RK_MinMax:
	return TTI->getMinMaxReductionCost(
	ReduxType, CmpInst::makeCmpResultType(ReduxType),
	/IsPairwiseForm=/true, /IsUnsigned=/false);
	case RK_UnsignedMinMax:
	return TTI->getMinMaxReductionCost(
	ReduxType, CmpInst::makeCmpResultType(ReduxType),
	/IsPairwiseForm=/true, /IsUnsigned=/true);
	case RK_None:
	break;
	}

	return TTI->getVectorInstrCost(I->getOpcode(),
	EEI->getOperand(0)->getType(), Idx);
	}
	case Instruction::InsertElement: {
	const InsertElementInst * IE = cast<InsertElementInst>(I);
	ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
	unsigned Idx = -1;
	if (CI)
	Idx = CI->getZExtValue();
	return TTI->getVectorInstrCost(I->getOpcode(),
	IE->getType(), Idx);
	}
	case Instruction::ShuffleVector: {
	const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
	Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
	unsigned NumVecElems = VecTypOp0->getVectorNumElements();
	SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

	if (NumVecElems == Mask.size()) {
	if (isReverseVectorMask(Mask))
	return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
	0, nullptr);
	if (isAlternateVectorMask(Mask))
	return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
	VecTypOp0, 0, nullptr);

	if (isZeroEltBroadcastVectorMask(Mask))
	return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
	VecTypOp0, 0, nullptr);

	if (isSingleSourceVectorMask(Mask))
	return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
	VecTypOp0, 0, nullptr);

	return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
	VecTypOp0, 0, nullptr);
	}

	return -1;
	}
	case Instruction::Call:
	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
	SmallVector<Value *, 4> Args(II->arg_operands());

	FastMathFlags FMF;
	if (auto *FPMO = dyn_cast<FPMathOperator>(II))
	FMF = FPMO->getFastMathFlags();

	return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
	Args, FMF);
	}
	return -1;
	default:
	// We don't have any information on this instruction.
	return -1;
	}
	}

	void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {			void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
	if (!F)			if (!F)
	return;			return;

	for (BasicBlock &B : *F) {			for (BasicBlock &B : *F) {
	for (Instruction &Inst : B) {			for (Instruction &Inst : B) {
	unsigned Cost = getInstructionCost(&Inst);			unsigned Cost = TTI->getInstructionCost(&Inst, CostKind);
	if (Cost != (unsigned)-1)			if (Cost != (unsigned)-1)
	OS << "Cost Model: Found an estimated cost of " << Cost;			OS << "Cost Model: Found an estimated cost of " << Cost;
	else			else
	OS << "Cost Model: Unknown cost";			OS << "Cost Model: Unknown cost";

	OS << " for instruction: " << Inst << "\n";			OS << " for instruction: " << Inst << "\n";
	}			}
	}			}
	}			}

llvm/trunk/lib/Analysis/TargetTransformInfo.cpp

Show All 10 Lines
#include "llvm/Analysis/TargetTransformInfoImpl.h"		#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CallSite.h"		#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"		#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"		#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"		#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"		#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"		#include "llvm/IR/Operator.h"
		#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"		#include "llvm/Support/ErrorHandling.h"
#include <utility>		#include <utility>

using namespace llvm;		using namespace llvm;
		using namespace PatternMatch;

#define DEBUG_TYPE "tti"		#define DEBUG_TYPE "tti"

static cl::opt<bool> UseWideMemcpyLoopLowering(		static cl::opt<bool> UseWideMemcpyLoopLowering(
"use-wide-memcpy-loop-lowering", cl::init(false),		"use-wide-memcpy-loop-lowering", cl::init(false),
cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),		cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
cl::Hidden);		cl::Hidden);

		static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
		cl::Hidden,
		cl::desc("Recognize reduction patterns."));

namespace {		namespace {
/// \brief No-op implementation of the TTI interface using the utility base		/// \brief No-op implementation of the TTI interface using the utility base
/// classes.		/// classes.
///		///
/// This is used when no target specific information is available.		/// This is used when no target specific information is available.
struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {		struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
explicit NoTTIImpl(const DataLayout &DL)		explicit NoTTIImpl(const DataLayout &DL)
: TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}		: TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
▲ Show 20 Lines • Show All 538 Lines • ▼ Show 20 Lines	bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
Type *Ty, ReductionFlags Flags) const {		Type *Ty, ReductionFlags Flags) const {
return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);		return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
}		}

bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {		bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);		return TTIImpl->shouldExpandReduction(II);
}		}

		int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
		return TTIImpl->getInstructionLatency(I);
		}

		static bool isReverseVectorMask(ArrayRef<int> Mask) {
		for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
		if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
		return false;
		return true;
		}

		static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
		bool Vec0 = false;
		bool Vec1 = false;
		for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
		if (Mask[i] >= 0) {
		if ((unsigned)Mask[i] >= NumVecElts)
		Vec1 = true;
		else
		Vec0 = true;
		}
		}
		return !(Vec0 && Vec1);
		}

		static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
		for (unsigned i = 0; i < Mask.size(); ++i)
		if (Mask[i] > 0)
		return false;
		return true;
		}

		static bool isAlternateVectorMask(ArrayRef<int> Mask) {
		bool isAlternate = true;
		unsigned MaskSize = Mask.size();

		// Example: shufflevector A, B, <0,5,2,7>
		for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
		if (Mask[i] < 0)
		continue;
		isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
		}

		if (isAlternate)
		return true;

		isAlternate = true;
		// Example: shufflevector A, B, <4,1,6,3>
		for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
		if (Mask[i] < 0)
		continue;
		isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
		}

		return isAlternate;
		}

		static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
		TargetTransformInfo::OperandValueKind OpInfo =
		TargetTransformInfo::OK_AnyValue;

		// Check for a splat of a constant or for a non uniform vector of constants.
		if (isa<ConstantVector>(V) \|\| isa<ConstantDataVector>(V)) {
		OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
		if (cast<Constant>(V)->getSplatValue() != nullptr)
		OpInfo = TargetTransformInfo::OK_UniformConstantValue;
		}

		// Check for a splat of a uniform value. This is not loop aware, so return
		// true only for the obviously uniform cases (argument, globalvalue)
		const Value *Splat = getSplatValue(V);
		if (Splat && (isa<Argument>(Splat) \|\| isa<GlobalValue>(Splat)))
		OpInfo = TargetTransformInfo::OK_UniformValue;

		return OpInfo;
		}

		static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
		unsigned Level) {
		// We don't need a shuffle if we just want to have element 0 in position 0 of
		// the vector.
		if (!SI && Level == 0 && IsLeft)
		return true;
		else if (!SI)
		return false;

		SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);

		// Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
		// we look at the left or right side.
		for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
		Mask[i] = val;

		SmallVector<int, 16> ActualMask = SI->getShuffleMask();
		return Mask == ActualMask;
		}

		namespace {
		/// Kind of the reduction data.
		enum ReductionKind {
		RK_None, /// Not a reduction.
		RK_Arithmetic, /// Binary reduction data.
		RK_MinMax, /// Min/max reduction data.
		RK_UnsignedMinMax, /// Unsigned min/max reduction data.
		};
		/// Contains opcode + LHS/RHS parts of the reduction operations.
		struct ReductionData {
		ReductionData() = delete;
		ReductionData(ReductionKind Kind, unsigned Opcode, Value LHS, Value RHS)
		: Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
		assert(Kind != RK_None && "expected binary or min/max reduction only.");
		}
		unsigned Opcode = 0;
		Value *LHS = nullptr;
		Value *RHS = nullptr;
		ReductionKind Kind = RK_None;
		bool hasSameData(ReductionData &RD) const {
		return Kind == RD.Kind && Opcode == RD.Opcode;
		}
		};
		} // namespace

		static Optional<ReductionData> getReductionData(Instruction *I) {
		Value L, R;
		if (m_BinOp(m_Value(L), m_Value(R)).match(I))
		return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
		if (auto *SI = dyn_cast<SelectInst>(I)) {
		if (m_SMin(m_Value(L), m_Value(R)).match(SI) \|\|
		m_SMax(m_Value(L), m_Value(R)).match(SI) \|\|
		m_OrdFMin(m_Value(L), m_Value(R)).match(SI) \|\|
		m_OrdFMax(m_Value(L), m_Value(R)).match(SI) \|\|
		m_UnordFMin(m_Value(L), m_Value(R)).match(SI) \|\|
		m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
		auto *CI = cast<CmpInst>(SI->getCondition());
		return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
		}
		if (m_UMin(m_Value(L), m_Value(R)).match(SI) \|\|
		m_UMax(m_Value(L), m_Value(R)).match(SI)) {
		auto *CI = cast<CmpInst>(SI->getCondition());
		return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
		}
		}
		return llvm::None;
		}

		static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
		unsigned Level,
		unsigned NumLevels) {
		// Match one level of pairwise operations.
		// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
		// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
		// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
		// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
		// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
		if (!I)
		return RK_None;

		assert(I->getType()->isVectorTy() && "Expecting a vector type");

		Optional<ReductionData> RD = getReductionData(I);
		if (!RD)
		return RK_None;

		ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
		if (!LS && Level)
		return RK_None;
		ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
		if (!RS && Level)
		return RK_None;

		// On level 0 we can omit one shufflevector instruction.
		if (!Level && !RS && !LS)
		return RK_None;

		// Shuffle inputs must match.
		Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
		Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
		Value *NextLevelOp = nullptr;
		if (NextLevelOpR && NextLevelOpL) {
		// If we have two shuffles their operands must match.
		if (NextLevelOpL != NextLevelOpR)
		return RK_None;

		NextLevelOp = NextLevelOpL;
		} else if (Level == 0 && (NextLevelOpR \|\| NextLevelOpL)) {
		// On the first level we can omit the shufflevector <0, undef,...>. So the
		// input to the other shufflevector <1, undef> must match with one of the
		// inputs to the current binary operation.
		// Example:
		// %NextLevelOpL = shufflevector %R, <1, undef ...>
		// %BinOp = fadd %NextLevelOpL, %R
		if (NextLevelOpL && NextLevelOpL != RD->RHS)
		return RK_None;
		else if (NextLevelOpR && NextLevelOpR != RD->LHS)
		return RK_None;

		NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
		} else
		return RK_None;

		// Check that the next levels binary operation exists and matches with the
		// current one.
		if (Level + 1 != NumLevels) {
		Optional<ReductionData> NextLevelRD =
		getReductionData(cast<Instruction>(NextLevelOp));
		if (!NextLevelRD \|\| !RD->hasSameData(*NextLevelRD))
		return RK_None;
		}

		// Shuffle mask for pairwise operation must match.
		if (matchPairwiseShuffleMask(LS, /IsLeft=/true, Level)) {
		if (!matchPairwiseShuffleMask(RS, /IsLeft=/false, Level))
		return RK_None;
		} else if (matchPairwiseShuffleMask(RS, /IsLeft=/true, Level)) {
		if (!matchPairwiseShuffleMask(LS, /IsLeft=/false, Level))
		return RK_None;
		} else {
		return RK_None;
		}

		if (++Level == NumLevels)
		return RD->Kind;

		// Match next level.
		return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
		NumLevels);
		}

		static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
		unsigned &Opcode, Type *&Ty) {
		if (!EnableReduxCost)
		return RK_None;

		// Need to extract the first element.
		ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
		unsigned Idx = ~0u;
		if (CI)
		Idx = CI->getZExtValue();
		if (Idx != 0)
		return RK_None;

		auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
		if (!RdxStart)
		return RK_None;
		Optional<ReductionData> RD = getReductionData(RdxStart);
		if (!RD)
		return RK_None;

		Type *VecTy = RdxStart->getType();
		unsigned NumVecElems = VecTy->getVectorNumElements();
		if (!isPowerOf2_32(NumVecElems))
		return RK_None;

		// We look for a sequence of shuffle,shuffle,add triples like the following
		// that builds a pairwise reduction tree.
		//
		// (X0, X1, X2, X3)
		// (X0 + X1, X2 + X3, undef, undef)
		// ((X0 + X1) + (X2 + X3), undef, undef, undef)
		//
		// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
		// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
		// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
		// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
		// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
		// %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
		// <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
		// %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
		// <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
		// %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
		// %r = extractelement <4 x float> %bin.rdx8, i32 0
		if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
		RK_None)
		return RK_None;

		Opcode = RD->Opcode;
		Ty = VecTy;

		return RD->Kind;
		}

		static std::pair<Value , ShuffleVectorInst >
		getShuffleAndOtherOprd(Value L, Value R) {
		ShuffleVectorInst *S = nullptr;

		if ((S = dyn_cast<ShuffleVectorInst>(L)))
		return std::make_pair(R, S);

		S = dyn_cast<ShuffleVectorInst>(R);
		return std::make_pair(L, S);
		}

		static ReductionKind
		matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
		unsigned &Opcode, Type *&Ty) {
		if (!EnableReduxCost)
		return RK_None;

		// Need to extract the first element.
		ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
		unsigned Idx = ~0u;
		if (CI)
		Idx = CI->getZExtValue();
		if (Idx != 0)
		return RK_None;

		auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
		if (!RdxStart)
		return RK_None;
		Optional<ReductionData> RD = getReductionData(RdxStart);
		if (!RD)
		return RK_None;

		Type *VecTy = ReduxRoot->getOperand(0)->getType();
		unsigned NumVecElems = VecTy->getVectorNumElements();
		if (!isPowerOf2_32(NumVecElems))
		return RK_None;

		// We look for a sequence of shuffles and adds like the following matching one
		// fadd, shuffle vector pair at a time.
		//
		// %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
		// <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
		// %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
		// %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
		// <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
		// %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
		// %r = extractelement <4 x float> %bin.rdx8, i32 0

		unsigned MaskStart = 1;
		Instruction *RdxOp = RdxStart;
		SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
		unsigned NumVecElemsRemain = NumVecElems;
		while (NumVecElemsRemain - 1) {
		// Check for the right reduction operation.
		if (!RdxOp)
		return RK_None;
		Optional<ReductionData> RDLevel = getReductionData(RdxOp);
		if (!RDLevel \|\| !RDLevel->hasSameData(*RD))
		return RK_None;

		Value *NextRdxOp;
		ShuffleVectorInst *Shuffle;
		std::tie(NextRdxOp, Shuffle) =
		getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);

		// Check the current reduction operation and the shuffle use the same value.
		if (Shuffle == nullptr)
		return RK_None;
		if (Shuffle->getOperand(0) != NextRdxOp)
		return RK_None;

		// Check that shuffle masks matches.
		for (unsigned j = 0; j != MaskStart; ++j)
		ShuffleMask[j] = MaskStart + j;
		// Fill the rest of the mask with -1 for undef.
		std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);

		SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
		if (ShuffleMask != Mask)
		return RK_None;

		RdxOp = dyn_cast<Instruction>(NextRdxOp);
		NumVecElemsRemain /= 2;
		MaskStart *= 2;
		}

		Opcode = RD->Opcode;
		Ty = VecTy;
		return RD->Kind;
		}

		int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
		switch (I->getOpcode()) {
		case Instruction::GetElementPtr:
		return getUserCost(I);

		case Instruction::Ret:
		case Instruction::PHI:
		case Instruction::Br: {
		return getCFInstrCost(I->getOpcode());
		}
		case Instruction::Add:
		case Instruction::FAdd:
		case Instruction::Sub:
		case Instruction::FSub:
		case Instruction::Mul:
		case Instruction::FMul:
		case Instruction::UDiv:
		case Instruction::SDiv:
		case Instruction::FDiv:
		case Instruction::URem:
		case Instruction::SRem:
		case Instruction::FRem:
		case Instruction::Shl:
		case Instruction::LShr:
		case Instruction::AShr:
		case Instruction::And:
		case Instruction::Or:
		case Instruction::Xor: {
		TargetTransformInfo::OperandValueKind Op1VK =
		getOperandInfo(I->getOperand(0));
		TargetTransformInfo::OperandValueKind Op2VK =
		getOperandInfo(I->getOperand(1));
		SmallVector<const Value*, 2> Operands(I->operand_values());
		return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
		Op2VK, TargetTransformInfo::OP_None,
		TargetTransformInfo::OP_None,
		Operands);
		}
		case Instruction::Select: {
		const SelectInst *SI = cast<SelectInst>(I);
		Type *CondTy = SI->getCondition()->getType();
		return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
		}
		case Instruction::ICmp:
		case Instruction::FCmp: {
		Type *ValTy = I->getOperand(0)->getType();
		return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
		}
		case Instruction::Store: {
		const StoreInst *SI = cast<StoreInst>(I);
		Type *ValTy = SI->getValueOperand()->getType();
		return getMemoryOpCost(I->getOpcode(), ValTy,
		SI->getAlignment(),
		SI->getPointerAddressSpace(), I);
		}
		case Instruction::Load: {
		const LoadInst *LI = cast<LoadInst>(I);
		return getMemoryOpCost(I->getOpcode(), I->getType(),
		LI->getAlignment(),
		LI->getPointerAddressSpace(), I);
		}
		case Instruction::ZExt:
		case Instruction::SExt:
		case Instruction::FPToUI:
		case Instruction::FPToSI:
		case Instruction::FPExt:
		case Instruction::PtrToInt:
		case Instruction::IntToPtr:
		case Instruction::SIToFP:
		case Instruction::UIToFP:
		case Instruction::Trunc:
		case Instruction::FPTrunc:
		case Instruction::BitCast:
		case Instruction::AddrSpaceCast: {
		Type *SrcTy = I->getOperand(0)->getType();
		return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
		}
		case Instruction::ExtractElement: {
		const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
		ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
		unsigned Idx = -1;
		if (CI)
		Idx = CI->getZExtValue();

		// Try to match a reduction sequence (series of shufflevector and vector
		// adds followed by a extractelement).
		unsigned ReduxOpCode;
		Type *ReduxType;

		switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
		case RK_Arithmetic:
		return getArithmeticReductionCost(ReduxOpCode, ReduxType,
		/IsPairwiseForm=/false);
		case RK_MinMax:
		return getMinMaxReductionCost(
		ReduxType, CmpInst::makeCmpResultType(ReduxType),
		/IsPairwiseForm=/false, /IsUnsigned=/false);
		case RK_UnsignedMinMax:
		return getMinMaxReductionCost(
		ReduxType, CmpInst::makeCmpResultType(ReduxType),
		/IsPairwiseForm=/false, /IsUnsigned=/true);
		case RK_None:
		break;
		}

		switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
		case RK_Arithmetic:
		return getArithmeticReductionCost(ReduxOpCode, ReduxType,
		/IsPairwiseForm=/true);
		case RK_MinMax:
		return getMinMaxReductionCost(
		ReduxType, CmpInst::makeCmpResultType(ReduxType),
		/IsPairwiseForm=/true, /IsUnsigned=/false);
		case RK_UnsignedMinMax:
		return getMinMaxReductionCost(
		ReduxType, CmpInst::makeCmpResultType(ReduxType),
		/IsPairwiseForm=/true, /IsUnsigned=/true);
		case RK_None:
		break;
		}

		return getVectorInstrCost(I->getOpcode(),
		EEI->getOperand(0)->getType(), Idx);
		}
		case Instruction::InsertElement: {
		const InsertElementInst * IE = cast<InsertElementInst>(I);
		ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
		unsigned Idx = -1;
		if (CI)
		Idx = CI->getZExtValue();
		return getVectorInstrCost(I->getOpcode(),
		IE->getType(), Idx);
		}
		case Instruction::ShuffleVector: {
		const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
		Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
		unsigned NumVecElems = VecTypOp0->getVectorNumElements();
		SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

		if (NumVecElems == Mask.size()) {
		if (isReverseVectorMask(Mask))
		return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
		0, nullptr);
		if (isAlternateVectorMask(Mask))
		return getShuffleCost(TargetTransformInfo::SK_Alternate,
		VecTypOp0, 0, nullptr);

		if (isZeroEltBroadcastVectorMask(Mask))
		return getShuffleCost(TargetTransformInfo::SK_Broadcast,
		VecTypOp0, 0, nullptr);

		if (isSingleSourceVectorMask(Mask))
		return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
		VecTypOp0, 0, nullptr);

		return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
		VecTypOp0, 0, nullptr);
		}

		return -1;
		}
		case Instruction::Call:
		if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
		SmallVector<Value *, 4> Args(II->arg_operands());

		FastMathFlags FMF;
		if (auto *FPMO = dyn_cast<FPMathOperator>(II))
		FMF = FPMO->getFastMathFlags();

		return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
		Args, FMF);
		}
		return -1;
		default:
		// We don't have any information on this instruction.
		return -1;
		}
		}

TargetTransformInfo::Concept::~Concept() {}		TargetTransformInfo::Concept::~Concept() {}

TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}		TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}

TargetIRAnalysis::TargetIRAnalysis(		TargetIRAnalysis::TargetIRAnalysis(
std::function<Result(const Function &)> TTICallback)		std::function<Result(const Function &)> TTICallback)
: TTICallback(std::move(TTICallback)) {}		: TTICallback(std::move(TTICallback)) {}

▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll

				; RUN: opt < %s -cost-model -cost-kind=latency -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 \| FileCheck %s --check-prefix=LATENCY
				; RUN: opt < %s -cost-model -cost-kind=code-size -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 \| FileCheck %s --check-prefix=CODESIZE

				; Tests if the interface TargetTransformInfo::getInstructionCost() works correctly.

				define i64 @foo(i64 %arg) {

				; LATENCY: cost of 1 {{.*}} %I64 = add
				; CODESIZE: cost of 1 {{.*}} %I64 = add
				%I64 = add i64 undef, undef

				; LATENCY: cost of 4 {{.*}} load
				; CODESIZE: cost of 1 {{.*}} load
				load i64, i64* undef, align 4

				; LATENCY: cost of 1 {{.*}} ret
				; CODESIZE: cost of 1 {{.*}} ret
				ret i64 undef
				}

This is an archive of the discontinued LLVM Phabricator instance.

[TargetTransformInfo] Add a new public interface getInstructionCost
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 114441

llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h

llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h

llvm/trunk/lib/Analysis/CostModel.cpp

llvm/trunk/lib/Analysis/TargetTransformInfo.cpp

llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll

This is an archive of the discontinued LLVM Phabricator instance.

[TargetTransformInfo] Add a new public interface getInstructionCostClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 114441

llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h

llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h

llvm/trunk/lib/Analysis/CostModel.cpp

llvm/trunk/lib/Analysis/TargetTransformInfo.cpp

llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll

[TargetTransformInfo] Add a new public interface getInstructionCost
ClosedPublic