Diff 118707

include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 190 Lines • ▼ Show 20 Lines	public:
/// that it supports an interface that provides extra information specific to		/// that it supports an interface that provides extra information specific to
/// the GEP operation.		/// the GEP operation.
int getGEPCost(Type PointeeType, const Value Ptr,		int getGEPCost(Type PointeeType, const Value Ptr,
ArrayRef<const Value *> Operands) const;		ArrayRef<const Value *> Operands) const;

/// \brief Estimate the cost of a GEP operation when lowered.		/// \brief Estimate the cost of a GEP operation when lowered.
///		///
/// This user-based overload adds the ability to check if the GEP can be		/// This user-based overload adds the ability to check if the GEP can be
/// folded into its users.		/// folded into all of its users.
int getGEPCost(const GEPOperator *GEP,		int getGEPCost(const GEPOperator *GEP,
ArrayRef<const Value *> Operands) const;		ArrayRef<const Value *> Operands) const;

		/// \brief Estimate the cost of a GEP operation when lowered.
		///
		/// This user-based overload adds the ability to check if the GEP can be
		/// folded into its users in \p Users.
		int getGEPCost(const GEPOperator GEP, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users) const;

/// \brief Estimate the cost of a EXT operation when lowered.		/// \brief Estimate the cost of a EXT operation when lowered.
///		///
/// The contract for this function is the same as \c getOperationCost except		/// The contract for this function is the same as \c getOperationCost except
/// that it supports an interface that provides extra information specific to		/// that it supports an interface that provides extra information specific to
/// the EXT operation.		/// the EXT operation.
int getExtCost(const Instruction I, const Value Src) const;		int getExtCost(const Instruction I, const Value Src) const;

/// \brief Estimate the cost of a function call when lowered.		/// \brief Estimate the cost of a function call when lowered.
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	public:
/// cases.		/// cases.
///		///
/// \p Operands is a list of operands which can be a result of transformations		/// \p Operands is a list of operands which can be a result of transformations
/// of the current operands. The number of the operands on the list must equal		/// of the current operands. The number of the operands on the list must equal
/// to the number of the current operands the IR user has. Their order on the		/// to the number of the current operands the IR user has. Their order on the
/// list must be the same as the order of the current operands the IR user		/// list must be the same as the order of the current operands the IR user
/// has.		/// has.
///		///
		/// \p Users is a list of Users which use \p U in the IR. Currently, only GEPs
		/// consider the list of Users in the cost calculation.
		///
/// The returned cost is defined in terms of \c TargetCostConstants, see its		/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.		/// comments for a detailed explanation of the cost values.
int getUserCost(const User U, ArrayRef<const Value > Operands) const;		int getUserCost(const User U, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users) const;

		/// \brief This is a helper function which passes \p Operands to the
		/// three-argument getUserCost with the list of all Users which use \p U.
		int getUserCost(const User U, ArrayRef<const Value > Operands) const {
		SmallVector<const User *, 8> Users(U->user_begin(), U->user_end());
		return getUserCost(U, Operands, Users);
		}

		/// \brief This is a helper function which passes \p Users to the
		/// three-argument getUserCost with the operands \p U has.
		int getUserCost(const User U, ArrayRef<const User > Users) const {
		SmallVector<const Value *, 4> Operands(U->value_op_begin(),
		U->value_op_end());
		return getUserCost(U, Operands, Users);
		}

/// \brief This is a helper function which calls the two-argument getUserCost		/// \brief This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.		/// with \p Operands which are the current operands U has.
int getUserCost(const User *U) const {		int getUserCost(const User *U) const {
SmallVector<const Value *, 4> Operands(U->value_op_begin(),		SmallVector<const Value *, 4> Operands(U->value_op_begin(),
U->value_op_end());		U->value_op_end());
return getUserCost(U, Operands);		return getUserCost(U, Operands);
}		}
▲ Show 20 Lines • Show All 648 Lines • ▼ Show 20 Lines
public:		public:
virtual ~Concept() = 0;		virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;		virtual const DataLayout &getDataLayout() const = 0;
virtual int getOperationCost(unsigned Opcode, Type Ty, Type OpTy) = 0;		virtual int getOperationCost(unsigned Opcode, Type Ty, Type OpTy) = 0;
virtual int getGEPCost(Type PointeeType, const Value Ptr,		virtual int getGEPCost(Type PointeeType, const Value Ptr,
ArrayRef<const Value *> Operands) = 0;		ArrayRef<const Value *> Operands) = 0;
virtual int getGEPCost(const GEPOperator *GEP,		virtual int getGEPCost(const GEPOperator *GEP,
ArrayRef<const Value *> Operands) = 0;		ArrayRef<const Value *> Operands) = 0;
		virtual int getGEPCost(const GEPOperator *GEP,
		ArrayRef<const Value *> Operands,
		ArrayRef<const User *> Users) = 0;
virtual int getExtCost(const Instruction I, const Value Src) = 0;		virtual int getExtCost(const Instruction I, const Value Src) = 0;
virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;		virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
virtual int getCallCost(const Function *F, int NumArgs) = 0;		virtual int getCallCost(const Function *F, int NumArgs) = 0;
virtual int getCallCost(const Function *F,		virtual int getCallCost(const Function *F,
ArrayRef<const Value *> Arguments) = 0;		ArrayRef<const Value *> Arguments) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;		virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,		virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys) = 0;		ArrayRef<Type *> ParamTys) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,		virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments) = 0;		ArrayRef<const Value *> Arguments) = 0;
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,		virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) = 0;		unsigned &JTSize) = 0;
virtual int		virtual int getUserCost(const User U, ArrayRef<const Value > Operands,
getUserCost(const User U, ArrayRef<const Value > Operands) = 0;		ArrayRef<const User *> Users) = 0;
virtual bool hasBranchDivergence() = 0;		virtual bool hasBranchDivergence() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;		virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;		virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;		virtual unsigned getFlatAddressSpace() = 0;
virtual bool isLoweredToCall(const Function *F) = 0;		virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,		virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;		UnrollingPreferences &UP) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;		virtual bool isLegalAddImmediate(int64_t Imm) = 0;
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines	public:
int getGEPCost(Type PointeeType, const Value Ptr,		int getGEPCost(Type PointeeType, const Value Ptr,
ArrayRef<const Value *> Operands) override {		ArrayRef<const Value *> Operands) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands);		return Impl.getGEPCost(PointeeType, Ptr, Operands);
}		}
int getGEPCost(const GEPOperator *GEP,		int getGEPCost(const GEPOperator *GEP,
ArrayRef<const Value *> Operands) override {		ArrayRef<const Value *> Operands) override {
return Impl.getGEPCost(GEP, Operands);		return Impl.getGEPCost(GEP, Operands);
}		}
		int getGEPCost(const GEPOperator GEP, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users) override {
		return Impl.getGEPCost(GEP, Operands, Users);
		}
int getExtCost(const Instruction I, const Value Src) override {		int getExtCost(const Instruction I, const Value Src) override {
return Impl.getExtCost(I, Src);		return Impl.getExtCost(I, Src);
}		}
int getCallCost(FunctionType *FTy, int NumArgs) override {		int getCallCost(FunctionType *FTy, int NumArgs) override {
return Impl.getCallCost(FTy, NumArgs);		return Impl.getCallCost(FTy, NumArgs);
}		}
int getCallCost(const Function *F, int NumArgs) override {		int getCallCost(const Function *F, int NumArgs) override {
return Impl.getCallCost(F, NumArgs);		return Impl.getCallCost(F, NumArgs);
}		}
int getCallCost(const Function *F,		int getCallCost(const Function *F,
ArrayRef<const Value *> Arguments) override {		ArrayRef<const Value *> Arguments) override {
return Impl.getCallCost(F, Arguments);		return Impl.getCallCost(F, Arguments);
}		}
unsigned getInliningThresholdMultiplier() override {		unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();		return Impl.getInliningThresholdMultiplier();
}		}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,		int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys) override {		ArrayRef<Type *> ParamTys) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys);		return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
}		}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,		int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments) override {		ArrayRef<const Value *> Arguments) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments);		return Impl.getIntrinsicCost(IID, RetTy, Arguments);
}		}
int getUserCost(const User U, ArrayRef<const Value > Operands) override {		int getUserCost(const User U, ArrayRef<const Value > Operands,
return Impl.getUserCost(U, Operands);		ArrayRef<const User *> Users) override {
		return Impl.getUserCost(U, Operands, Users);
}		}
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }		bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
bool isSourceOfDivergence(const Value *V) override {		bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);		return Impl.isSourceOfDivergence(V);
}		}

bool isAlwaysUniform(const Value *V) override {		bool isAlwaysUniform(const Value *V) override {
return Impl.isAlwaysUniform(V);		return Impl.isAlwaysUniform(V);
▲ Show 20 Lines • Show All 415 Lines • Show Last 20 Lines

include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 721 Lines • ▼ Show 20 Lines	if (static_cast<T *>(this)->isLegalAddressingMode(
TargetType, const_cast<GlobalValue *>(BaseGV),		TargetType, const_cast<GlobalValue *>(BaseGV),
static_cast<int64_t>(BaseOffset.getLimitedValue()), HasBaseReg,		static_cast<int64_t>(BaseOffset.getLimitedValue()), HasBaseReg,
Scale, AS))		Scale, AS))
return TTI::TCC_Free;		return TTI::TCC_Free;
return TTI::TCC_Basic;		return TTI::TCC_Basic;
}		}

int getGEPCost(const GEPOperator GEP, ArrayRef<const Value > Operands) {		int getGEPCost(const GEPOperator GEP, ArrayRef<const Value > Operands) {
		SmallVector<const User *, 8> Users(GEP->user_begin(), GEP->user_end());
		return getGEPCost(GEP, Operands, Users);
		}

		int getGEPCost(const GEPOperator GEP, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users) {
if (!isa<Instruction>(GEP))		if (!isa<Instruction>(GEP))
return TTI::TCC_Basic;		return TTI::TCC_Basic;

Type *PointeeType = GEP->getSourceElementType();		Type *PointeeType = GEP->getSourceElementType();
const Value *Ptr = GEP->getPointerOperand();		const Value *Ptr = GEP->getPointerOperand();

if (getGEPCost(PointeeType, Ptr, Operands) == TTI::TCC_Free) {		if (getGEPCost(PointeeType, Ptr, Operands) == TTI::TCC_Free) {
// Should check if the GEP is actually used in load / store instructions.		// Should check if the GEP is actually used in load / store instructions.
// For simplicity, we check only direct users of the GEP.		// For simplicity, we check only direct users of the GEP.
//		//
// FIXME: GEPs could also be folded away as a part of addressing mode in		// FIXME: GEPs could also be folded away as a part of addressing mode in
// load/store instructions together with other instructions (e.g., other		// load/store instructions together with other instructions (e.g., other
// GEPs). Handling all such cases must be expensive to be performed		// GEPs). Handling all such cases must be expensive to be performed
// in this function, so we stay conservative for now.		// in this function, so we stay conservative for now.
for (const User *U : GEP->users()) {		for (const User *U : Users) {
const Operator *UOP = cast<Operator>(U);		const Operator *UOP = cast<Operator>(U);
const Value *PointerOperand = nullptr;		const Value *PointerOperand = nullptr;
if (auto *LI = dyn_cast<LoadInst>(UOP))		if (auto *LI = dyn_cast<LoadInst>(UOP))
PointerOperand = LI->getPointerOperand();		PointerOperand = LI->getPointerOperand();
else if (auto *SI = dyn_cast<StoreInst>(UOP))		else if (auto *SI = dyn_cast<StoreInst>(UOP))
PointerOperand = SI->getPointerOperand();		PointerOperand = SI->getPointerOperand();

if ((!PointerOperand \|\| PointerOperand != GEP) &&		if ((!PointerOperand \|\| PointerOperand != GEP) &&
Show All 14 Lines	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
// certain intrinsics based on constants used as arguments.		// certain intrinsics based on constants used as arguments.
SmallVector<Type *, 8> ParamTys;		SmallVector<Type *, 8> ParamTys;
ParamTys.reserve(Arguments.size());		ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)		for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());		ParamTys.push_back(Arguments[Idx]->getType());
return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);		return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
}		}

unsigned getUserCost(const User U, ArrayRef<const Value > Operands) {		unsigned getUserCost(const User U, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users) {
		hfinkelUnsubmitted Not Done Reply Inline Actions I see no reason to bifurcate the API by adding the Users parameter only to getGEPCost. getGEPCost may be the only case where we currently check users, but there's no reason that might not change in the future. Regardless, there's no reason to expose this distinction to users of the API. Please add the Users parameter to getUserCost here, and then pass it through to getGEPCost below. Then, in LICM, you can just call getUserCost for everything. hfinkel: I see no reason to bifurcate the API by adding the Users parameter only to getGEPCost.
if (isa<PHINode>(U))		if (isa<PHINode>(U))
return TTI::TCC_Free; // Model all PHI nodes as free.		return TTI::TCC_Free; // Model all PHI nodes as free.

// Static alloca doesn't generate target instructions.		// Static alloca doesn't generate target instructions.
if (auto *A = dyn_cast<AllocaInst>(U))		if (auto *A = dyn_cast<AllocaInst>(U))
if (A->isStaticAlloca())		if (A->isStaticAlloca())
return TTI::TCC_Free;		return TTI::TCC_Free;

if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U))		if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U))
return static_cast<T *>(this)->getGEPCost(GEP,		return static_cast<T *>(this)->getGEPCost(GEP, Operands.drop_front(),
Operands.drop_front());		Users);

if (auto CS = ImmutableCallSite(U)) {		if (auto CS = ImmutableCallSite(U)) {
const Function *F = CS.getCalledFunction();		const Function *F = CS.getCalledFunction();
if (!F) {		if (!F) {
// Just use the called value type.		// Just use the called value type.
Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();		Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
return static_cast<T *>(this)		return static_cast<T *>(this)
->getCallCost(cast<FunctionType>(FTy), CS.arg_size());		->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
Show All 16 Lines	unsigned getUserCost(const User U, ArrayRef<const Value > Operands,
return static_cast<T *>(this)->getOperationCost(		return static_cast<T *>(this)->getOperationCost(
Operator::getOpcode(U), U->getType(),		Operator::getOpcode(U), U->getType(),
U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);		U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
}		}

int getInstructionLatency(const Instruction *I) {		int getInstructionLatency(const Instruction *I) {
SmallVector<const Value *, 4> Operands(I->value_op_begin(),		SmallVector<const Value *, 4> Operands(I->value_op_begin(),
I->value_op_end());		I->value_op_end());
if (getUserCost(I, Operands) == TTI::TCC_Free)		SmallVector<const User *, 4> Users(I->user_begin(),
		hfinkelUnsubmitted Not Done Reply Inline Actions Can you add an overload of getUserCost that does not take a users array and composes the list and then you don't need to change this? hfinkel: Can you add an overload of getUserCost that does not take a users array and composes the list…
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions Before this change, we had the implementation of getUserCost(User, ArrayRef<const Value >) in TargetTransformInfoImpl.h. So, it was okay to call getUserCost(User, Operands) here. In this change I moved the implementation of getUserCost(User, ArrayRef<const Value >) to TargetTransformInfo.h as a helper function. Now, we have implementation of getUserCost(User, ArrayRef<const Value >, ArrayRef<const User >), so I make this change. Do you think we need to move implementation of getUserCost(User, ArrayRef<const Value >) in TargetTransformInfoImpl.h? Then, we might need to override each of getUserCost() for targets overriding this function (e.g., X86TargetTransformInfo and HexagonTargetTransformInfo). junbuml:* Before this change, we had the implementation of getUserCost(User, ArrayRef<const Value >)…
		I->user_end());
		if (getUserCost(I, Operands, Users) == TTI::TCC_Free)
return 0;		return 0;

if (isa<LoadInst>(I))		if (isa<LoadInst>(I))
return 4;		return 4;

Type *DstTy = I->getType();		Type *DstTy = I->getType();

// Usually an intrinsic is a simple instruction.		// Usually an intrinsic is a simple instruction.
Show All 23 Lines

include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 188 Lines • ▼ Show 20 Lines	int getGEPCost(Type PointeeType, const Value Ptr,
return BaseT::getGEPCost(PointeeType, Ptr, Operands);		return BaseT::getGEPCost(PointeeType, Ptr, Operands);
}		}

int getGEPCost(const GEPOperator *GEP,		int getGEPCost(const GEPOperator *GEP,
ArrayRef<const Value *> Operands) {		ArrayRef<const Value *> Operands) {
return BaseT::getGEPCost(GEP, Operands);		return BaseT::getGEPCost(GEP, Operands);
}		}

		int getGEPCost(const GEPOperator GEP, ArrayRef<const Value > Operands,
		ArrayRef<const User *>Users) {
		return BaseT::getGEPCost(GEP, Operands, Users);
		}

int getExtCost(const Instruction I, const Value Src) {		int getExtCost(const Instruction I, const Value Src) {
if (getTLI()->isExtFree(I))		if (getTLI()->isExtFree(I))
return TargetTransformInfo::TCC_Free;		return TargetTransformInfo::TCC_Free;

if (isa<ZExtInst>(I) \|\| isa<SExtInst>(I))		if (isa<ZExtInst>(I) \|\| isa<SExtInst>(I))
if (const LoadInst *LI = dyn_cast<LoadInst>(Src))		if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
if (getTLI()->isExtLoad(LI, I, DL))		if (getTLI()->isExtLoad(LI, I, DL))
return TargetTransformInfo::TCC_Free;		return TargetTransformInfo::TCC_Free;
▲ Show 20 Lines • Show All 1,098 Lines • Show Last 20 Lines

include/llvm/Transforms/Utils/LoopUtils.h

	Show First 20 Lines • Show All 418 Lines • ▼ Show 20 Lines
	/// dominated by the specified block, and that are in the current loop) in			/// dominated by the specified block, and that are in the current loop) in
	/// reverse depth first order w.r.t the DominatorTree. This allows us to visit			/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
	/// uses before definitions, allowing us to sink a loop body in one pass without			/// uses before definitions, allowing us to sink a loop body in one pass without
	/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,			/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
	/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all			/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
	/// instructions of the loop and loop safety information as			/// instructions of the loop and loop safety information as
	/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.			/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
	bool sinkRegion(DomTreeNode , AliasAnalysis , LoopInfo , DominatorTree ,			bool sinkRegion(DomTreeNode , AliasAnalysis , LoopInfo , DominatorTree ,
	TargetLibraryInfo , Loop , AliasSetTracker *,			TargetLibraryInfo , TargetTransformInfo , Loop *,
	LoopSafetyInfo , OptimizationRemarkEmitter ORE);			AliasSetTracker , LoopSafetyInfo ,
				OptimizationRemarkEmitter *ORE);

	/// \brief Walk the specified region of the CFG (defined by all blocks			/// \brief Walk the specified region of the CFG (defined by all blocks
	/// dominated by the specified block, and that are in the current loop) in depth			/// dominated by the specified block, and that are in the current loop) in depth
	/// first order w.r.t the DominatorTree. This allows us to visit definitions			/// first order w.r.t the DominatorTree. This allows us to visit definitions
	/// before uses, allowing us to hoist a loop body in one pass without iteration.			/// before uses, allowing us to hoist a loop body in one pass without iteration.
	/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,			/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
	/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the			/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
	/// loop and loop safety information as arguments. Diagnostics is emitted via \p			/// loop and loop safety information as arguments. Diagnostics is emitted via \p
	▲ Show 20 Lines • Show All 112 Lines • Show Last 20 Lines

lib/Analysis/TargetTransformInfo.cpp

Show First 20 Lines • Show All 87 Lines • ▼ Show 20 Lines	int TargetTransformInfo::getGEPCost(Type PointeeType, const Value Ptr,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);		return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
}		}

int TargetTransformInfo::getGEPCost(const GEPOperator *GEP,		int TargetTransformInfo::getGEPCost(const GEPOperator *GEP,
ArrayRef<const Value *> Operands) const {		ArrayRef<const Value *> Operands) const {
return TTIImpl->getGEPCost(GEP, Operands);		return TTIImpl->getGEPCost(GEP, Operands);
}		}

		int TargetTransformInfo::getGEPCost(const GEPOperator *GEP,
		ArrayRef<const Value *> Operands,
		ArrayRef<const User *> Users) const {
		return TTIImpl->getGEPCost(GEP, Operands, Users);
		}

int TargetTransformInfo::getExtCost(const Instruction *I,		int TargetTransformInfo::getExtCost(const Instruction *I,
const Value *Src) const {		const Value *Src) const {
return TTIImpl->getExtCost(I, Src);		return TTIImpl->getExtCost(I, Src);
}		}

int TargetTransformInfo::getIntrinsicCost(		int TargetTransformInfo::getIntrinsicCost(
Intrinsic::ID IID, Type RetTy, ArrayRef<const Value > Arguments) const {		Intrinsic::ID IID, Type RetTy, ArrayRef<const Value > Arguments) const {
int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);		int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
assert(Cost >= 0 && "TTI should not produce negative costs!");		assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;		return Cost;
}		}

unsigned		unsigned
TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,		TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) const {		unsigned &JTSize) const {
return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);		return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
}		}

int TargetTransformInfo::getUserCost(const User *U,		int TargetTransformInfo::getUserCost(const User *U,
ArrayRef<const Value *> Operands) const {		ArrayRef<const Value *> Operands,
int Cost = TTIImpl->getUserCost(U, Operands);		ArrayRef<const User *> Users) const {
		int Cost = TTIImpl->getUserCost(U, Operands, Users);
assert(Cost >= 0 && "TTI should not produce negative costs!");		assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;		return Cost;
}		}

bool TargetTransformInfo::hasBranchDivergence() const {		bool TargetTransformInfo::hasBranchDivergence() const {
return TTIImpl->hasBranchDivergence();		return TTIImpl->hasBranchDivergence();
}		}

▲ Show 20 Lines • Show All 1,076 Lines • Show Last 20 Lines

lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 605 Lines • ▼ Show 20 Lines	if (isa<CallInst>(I) \|\| isa<InvokeInst>(I)) {
if (const Function *F = CS.getCalledFunction()) {		if (const Function *F = CS.getCalledFunction()) {
if (!isLoweredToCall(F))		if (!isLoweredToCall(F))
continue;		continue;
}		}
return;		return;
}		}
SmallVector<const Value*, 4> Operands(I.value_op_begin(),		SmallVector<const Value*, 4> Operands(I.value_op_begin(),
I.value_op_end());		I.value_op_end());
Cost += getUserCost(&I, Operands);		SmallVector<const User*, 4> Users(I.user_begin(),
		hfinkelUnsubmitted Not Done Reply Inline Actions And then also don't need this modification? hfinkel: And then also don't need this modification?
		I.user_end());
		Cost += getUserCost(&I, Operands, Users);
}		}

UP.Partial = true;		UP.Partial = true;
UP.Runtime = true;		UP.Runtime = true;
UP.UnrollRemainder = true;		UP.UnrollRemainder = true;
UP.DefaultUnrollRuntimeCount = 4;		UP.DefaultUnrollRuntimeCount = 4;

// Force unrolling small loops can be very useful because of the branch		// Force unrolling small loops can be very useful because of the branch
// taken cost of the backedge.		// taken cost of the backedge.
if (Cost < 12)		if (Cost < 12)
UP.Force = true;		UP.Force = true;
}		}

lib/Target/Hexagon/HexagonTargetTransformInfo.h

Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	public:

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{

unsigned getNumberOfRegisters(bool vector) const;		unsigned getNumberOfRegisters(bool vector) const;

/// @}		/// @}

int getUserCost(const User U, ArrayRef<const Value > Operands);		int getUserCost(const User U, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users);

// Hexagon specific decision to generate a lookup table.		// Hexagon specific decision to generate a lookup table.
bool shouldBuildLookupTables() const;		bool shouldBuildLookupTables() const;
};		};

} // end namespace llvm		} // end namespace llvm

#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H		#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H

lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

	Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
	unsigned HexagonTTIImpl::getPrefetchDistance() const {			unsigned HexagonTTIImpl::getPrefetchDistance() const {
	return getST()->getL1PrefetchDistance();			return getST()->getL1PrefetchDistance();
	}			}

	unsigned HexagonTTIImpl::getCacheLineSize() const {			unsigned HexagonTTIImpl::getCacheLineSize() const {
	return getST()->getL1CacheLineSize();			return getST()->getL1CacheLineSize();
	}			}

	int HexagonTTIImpl::getUserCost(const User *U,			int HexagonTTIImpl::getUserCost(const User U, ArrayRef<const Value > Operands,
	ArrayRef<const Value *> Operands) {			ArrayRef<const User *> Users) {
	auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {			auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
	if (!CI->isIntegerCast())			if (!CI->isIntegerCast())
	return false;			return false;
	const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));			const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
	// Technically, this code could allow multiple uses of the load, and			// Technically, this code could allow multiple uses of the load, and
	// check if all the uses are the same extension operation, but this			// check if all the uses are the same extension operation, but this
	// should be sufficient for most cases.			// should be sufficient for most cases.
	if (!LI \|\| !LI->hasOneUse())			if (!LI \|\| !LI->hasOneUse())
	return false;			return false;

	// Only extensions from an integer type shorter than 32-bit to i32			// Only extensions from an integer type shorter than 32-bit to i32
	// can be folded into the load.			// can be folded into the load.
	unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();			unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();
	unsigned DBW = CI->getDestTy()->getIntegerBitWidth();			unsigned DBW = CI->getDestTy()->getIntegerBitWidth();
	return DBW == 32 && (SBW < DBW);			return DBW == 32 && (SBW < DBW);
	};			};

	if (const CastInst *CI = dyn_cast<const CastInst>(U))			if (const CastInst *CI = dyn_cast<const CastInst>(U))
	if (isCastFoldedIntoLoad(CI))			if (isCastFoldedIntoLoad(CI))
	return TargetTransformInfo::TCC_Free;			return TargetTransformInfo::TCC_Free;
	return BaseT::getUserCost(U, Operands);			return BaseT::getUserCost(U, Operands, Users);
	}			}

	bool HexagonTTIImpl::shouldBuildLookupTables() const {			bool HexagonTTIImpl::shouldBuildLookupTables() const {
	return EmitLookupTables;			return EmitLookupTables;
	}			}

lib/Target/X86/X86TargetTransformInfo.h

Show First 20 Lines • Show All 107 Lines • ▼ Show 20 Lines	public:
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,		int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,		unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace);		unsigned Alignment, unsigned AddressSpace);

int getIntImmCost(int64_t);		int getIntImmCost(int64_t);

int getIntImmCost(const APInt &Imm, Type *Ty);		int getIntImmCost(const APInt &Imm, Type *Ty);

unsigned getUserCost(const User U, ArrayRef<const Value > Operands);		unsigned getUserCost(const User U, ArrayRef<const Value > Operands,
		ArrayRef<const User *> Users);

int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);		int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,		int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);		Type *Ty);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,		bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);		TargetTransformInfo::LSRCost &C2);
bool isLegalMaskedLoad(Type *DataType);		bool isLegalMaskedLoad(Type *DataType);
bool isLegalMaskedStore(Type *DataType);		bool isLegalMaskedStore(Type *DataType);
Show All 19 Lines

lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 2,309 Lines • ▼ Show 20 Lines	case Intrinsic::experimental_patchpoint_i64:
if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))		if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TTI::TCC_Free;		return TTI::TCC_Free;
break;		break;
}		}
return X86TTIImpl::getIntImmCost(Imm, Ty);		return X86TTIImpl::getIntImmCost(Imm, Ty);
}		}

unsigned X86TTIImpl::getUserCost(const User *U,		unsigned X86TTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands) {		ArrayRef<const Value *> Operands,
		ArrayRef<const User *> Users) {
if (isa<StoreInst>(U)) {		if (isa<StoreInst>(U)) {
Value *Ptr = U->getOperand(1);		Value *Ptr = U->getOperand(1);
// Store instruction with index and scale costs 2 Uops.		// Store instruction with index and scale costs 2 Uops.
// Check the preceding GEP to identify non-const indices.		// Check the preceding GEP to identify non-const indices.
if (auto GEP = dyn_cast<GetElementPtrInst>(Ptr)) {		if (auto GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))		if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))
return TTI::TCC_Basic * 2;		return TTI::TCC_Basic * 2;
}		}
return TTI::TCC_Basic;		return TTI::TCC_Basic;
}		}
return BaseT::getUserCost(U, Operands);		return BaseT::getUserCost(U, Operands, Users);
}		}

// Return an average cost of Gather / Scatter instruction, maybe improved later		// Return an average cost of Gather / Scatter instruction, maybe improved later
int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type SrcVTy, Value Ptr,		int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type SrcVTy, Value Ptr,
unsigned Alignment, unsigned AddressSpace) {		unsigned Alignment, unsigned AddressSpace) {

assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");		assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
unsigned VF = SrcVTy->getVectorNumElements();		unsigned VF = SrcVTy->getVectorNumElements();
▲ Show 20 Lines • Show All 437 Lines • Show Last 20 Lines

lib/Transforms/Scalar/LICM.cpp

Show First 20 Lines • Show All 82 Lines • ▼ Show 20 Lines	DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable memory promotion in LICM pass"));		cl::desc("Disable memory promotion in LICM pass"));

static cl::opt<uint32_t> MaxNumUsesTraversed(		static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),		"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "		cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));		"invariance in loop using invariant start (default = 8)"));

static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI);		static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,		static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);		const LoopSafetyInfo *SafetyInfo,
		TargetTransformInfo *TTI,
		bool &FreeInLoop);
static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,		static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);		OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,		static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,
const Loop CurLoop, AliasSetTracker CurAST,		const Loop CurLoop, AliasSetTracker CurAST,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);		OptimizationRemarkEmitter *ORE,
		bool FreeInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,		static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,		const DominatorTree *DT,
const Loop *CurLoop,		const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE,		OptimizationRemarkEmitter *ORE,
const Instruction *CtxI = nullptr);		const Instruction *CtxI = nullptr);
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,		static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
const AAMDNodes &AAInfo,		const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);		AliasSetTracker *CurAST);
static Instruction *		static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,		CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,		const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo);		const LoopSafetyInfo *SafetyInfo);

namespace {		namespace {
struct LoopInvariantCodeMotion {		struct LoopInvariantCodeMotion {
bool runOnLoop(Loop L, AliasAnalysis AA, LoopInfo LI, DominatorTree DT,		bool runOnLoop(Loop L, AliasAnalysis AA, LoopInfo LI, DominatorTree DT,
TargetLibraryInfo TLI, ScalarEvolution SE,		TargetLibraryInfo TLI, TargetTransformInfo TTI,
OptimizationRemarkEmitter *ORE, bool DeleteAST);		ScalarEvolution SE, OptimizationRemarkEmitter ORE,
		bool DeleteAST);

DenseMap<Loop , AliasSetTracker > &getLoopToAliasSetMap() {		DenseMap<Loop , AliasSetTracker > &getLoopToAliasSetMap() {
return LoopToAliasSetMap;		return LoopToAliasSetMap;
}		}

private:		private:
DenseMap<Loop , AliasSetTracker > LoopToAliasSetMap;		DenseMap<Loop , AliasSetTracker > LoopToAliasSetMap;

Show All 23 Lines	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
// pass. Function analyses need to be preserved across loop transformations		// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).		// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());		OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
return LICM.runOnLoop(L,		return LICM.runOnLoop(L,
&getAnalysis<AAResultsWrapperPass>().getAAResults(),		&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),		&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),		&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),		&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
		&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
		*L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, &ORE, false);		SE ? &SE->getSE() : nullptr, &ORE, false);
}		}

/// This transformation requires natural loop information & requires that		/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...		/// loop preheaders be inserted into the CFG...
///		///
void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();		AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();		AU.addRequired<TargetLibraryInfoWrapperPass>();
		AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);		getLoopAnalysisUsage(AU);
}		}

using llvm::Pass::doFinalization;		using llvm::Pass::doFinalization;

bool doFinalization() override {		bool doFinalization() override {
assert(LICM.getLoopToAliasSetMap().empty() &&		assert(LICM.getLoopToAliasSetMap().empty() &&
"Didn't free loop alias sets");		"Didn't free loop alias sets");
Show All 24 Lines	PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,

auto ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);		auto ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);
// FIXME: This should probably be optional rather than required.		// FIXME: This should probably be optional rather than required.
if (!ORE)		if (!ORE)
report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "		report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");		"cached at a higher level");

LoopInvariantCodeMotion LICM;		LoopInvariantCodeMotion LICM;
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))		if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, ORE,
		true))
return PreservedAnalyses::all();		return PreservedAnalyses::all();

auto PA = getLoopPassPreservedAnalyses();		auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();		PA.preserveSet<CFGAnalyses>();
return PA;		return PA;
}		}

char LegacyLICMPass::ID = 0;		char LegacyLICMPass::ID = 0;
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",		INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)		false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)		INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)		INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
		INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,		INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)		false)

Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }		Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }

/// Hoist expressions out of the specified loop. Note, alias info for inner		/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple		/// loop is not preserved so it is not a good idea to run LICM multiple
/// times on one loop.		/// times on one loop.
/// We should delete AST for inner loops in the new pass manager to avoid		/// We should delete AST for inner loops in the new pass manager to avoid
/// memory leak.		/// memory leak.
///		///
bool LoopInvariantCodeMotion::runOnLoop(Loop L, AliasAnalysis AA,		bool LoopInvariantCodeMotion::runOnLoop(
LoopInfo LI, DominatorTree DT,		Loop L, AliasAnalysis AA, LoopInfo LI, DominatorTree DT,
TargetLibraryInfo *TLI,		TargetLibraryInfo TLI, TargetTransformInfo TTI, ScalarEvolution *SE,
ScalarEvolution *SE,		OptimizationRemarkEmitter *ORE, bool DeleteAST) {
OptimizationRemarkEmitter *ORE,
bool DeleteAST) {
bool Changed = false;		bool Changed = false;

assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");		assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");

AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);		AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);

// Get the preheader block to move instructions into...		// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();		BasicBlock *Preheader = L->getLoopPreheader();

// Compute loop safety information.		// Compute loop safety information.
LoopSafetyInfo SafetyInfo;		LoopSafetyInfo SafetyInfo;
computeLoopSafetyInfo(&SafetyInfo, L);		computeLoopSafetyInfo(&SafetyInfo, L);

// We want to visit all of the instructions in this loop... that are not parts		// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of		// of our subloops (they have already had their invariants hoisted out of
// their loop, into this loop, so there is no need to process the BODIES of		// their loop, into this loop, so there is no need to process the BODIES of
// the subloops).		// the subloops).
//		//
// Traverse the body of the loop in depth first order on the dominator tree so		// Traverse the body of the loop in depth first order on the dominator tree so
// that we are guaranteed to see definitions before we see uses. This allows		// that we are guaranteed to see definitions before we see uses. This allows
// us to sink instructions in one pass, without iteration. After sinking		// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.		// instructions, we perform another pass to hoist them out of the loop.
//		//
if (L->hasDedicatedExits())		if (L->hasDedicatedExits())
Changed \|= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,		Changed \|= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST, &SafetyInfo, ORE);		CurAST, &SafetyInfo, ORE);
if (Preheader)		if (Preheader)
Changed \|= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,		Changed \|= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST, &SafetyInfo, ORE);		CurAST, &SafetyInfo, ORE);

// Now that all loop invariants have been removed from the loop, promote any		// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.		// memory references to scalars that we can.
// Don't sink stores from loops without dedicated block exits. Exits		// Don't sink stores from loops without dedicated block exits. Exits
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
}		}

/// Walk the specified region of the CFG (defined by all blocks dominated by		/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in reverse depth		/// the specified block, and that are in the current loop) in reverse depth
/// first order w.r.t the DominatorTree. This allows us to visit uses before		/// first order w.r.t the DominatorTree. This allows us to visit uses before
/// definitions, allowing us to sink a loop body in one pass without iteration.		/// definitions, allowing us to sink a loop body in one pass without iteration.
///		///
bool llvm::sinkRegion(DomTreeNode N, AliasAnalysis AA, LoopInfo *LI,		bool llvm::sinkRegion(DomTreeNode N, AliasAnalysis AA, LoopInfo *LI,
DominatorTree DT, TargetLibraryInfo TLI, Loop *CurLoop,		DominatorTree DT, TargetLibraryInfo TLI,
		TargetTransformInfo TTI, Loop CurLoop,
AliasSetTracker CurAST, LoopSafetyInfo SafetyInfo,		AliasSetTracker CurAST, LoopSafetyInfo SafetyInfo,
OptimizationRemarkEmitter *ORE) {		OptimizationRemarkEmitter *ORE) {

// Verify inputs.		// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&		assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&		CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion");		"Unexpected input to sinkRegion");

Show All 24 Lines	for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
continue;		continue;
}		}

// Check to see if we can sink this instruction to the exit blocks		// Check to see if we can sink this instruction to the exit blocks
// of the loop. We can do this if the all users of the instruction are		// of the loop. We can do this if the all users of the instruction are
// outside of the loop. In this case, it doesn't even matter if the		// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.		// operands of the instruction are loop invariant.
//		//
if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&		bool FreeInLoop = false;
		if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI,
		FreeInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {		canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
		if (!FreeInLoop)
++II;		++II;
Changed \|= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE);		Changed \|= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE,
		FreeInLoop);
}		}
}		}
}		}
return Changed;		return Changed;
}		}

/// Walk the specified region of the CFG (defined by all blocks dominated by		/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first		/// the specified block, and that are in the current loop) in depth first
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines
static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {		static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
for (const Value *IncValue : PN.incoming_values())		for (const Value *IncValue : PN.incoming_values())
if (IncValue != &I)		if (IncValue != &I)
return false;		return false;

return true;		return true;
}		}

		/// Return true if the instruction is free in the loop.
		static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
		const TargetTransformInfo *TTI) {
		hfinkelUnsubmitted Done Reply Inline Actions This isn't a very good FIXME because it doesn't explain what you might fix about it. Are there other things for which we might check? hfinkel: This isn't a very good FIXME because it doesn't explain what you might fix about it. Are there…

		SmallVector<const User *, 8> UsersInLoop;
		for (const User *U : I.users()) {
		bmakamUnsubmitted Not Done Reply Inline Actions Do we need to check I->getParent() == UserI->getParent()? We already check if CurLoop->contains(UserI) right? bmakam: Do we need to check I->getParent() == UserI->getParent()? We already check if CurLoop->contains…
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions Even in the same loop, if they are in different blocks, the current ISel may not fold the GEP into the load. junbuml: Even in the same loop, if they are in different blocks, the current ISel may not fold the GEP…
		const Instruction *UI = cast<Instruction>(U);
		hfinkelUnsubmitted Not Done Reply Inline Actions This can just be: return TTI->getUserCost(GEP) == TargetTransformInfo::TCC_Free; (this code here to call getGEPCost seems to duplicate the implementation logic of getUserCost) On that note, you might not even have to restrict this to GEPs used by Loads, but rather, you could allow all zero-cost instructions. hfinkel: This can just be: return TTI->getUserCost(GEP) == TargetTransformInfo::TCC_Free; (this code…
		hfinkelUnsubmitted Not Done Reply Inline Actions (Also, I think that using all 'Free' instructions has the benefit of being correlated with the unrolling cost of the loop - these are the instructions that won't increase the unrolling cost of the loop). hfinkel: (Also, I think that using all 'Free' instructions has the benefit of being correlated with the…
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions I don't think Free from getUserCost guarantee that the instruction is folded away always. So, I specifically check for a GEP which could be a legal addressing mode and it's used in a load / store in the same block, expecting the isel fold them into its users. junbuml: I don't think Free from getUserCost guarantee that the instruction is folded away always. So…
		hfinkelUnsubmitted Not Done Reply Inline Actions I don't think Free from getUserCost guarantee that the instruction is folded away always. Yes, it should be. Free means free. If not, then there's something wrong with the cost model that we should fix. It might be possible to allow this for all other zero-cost instructions. However, I'm not perfectly sure if this is good or needed for all other free instructions. For example, I'm not clear if sinking a free trunc is needed? However, in GEP case, by sinking a GEP, we can decouple the users of the GEP: one in loop and one in outside of the loop so that the one in loop will be folded in isel if they are in the same block. I don't understand how the advantages, or disadvantages, of doing this for a free truncate are different from a free GEP. In both cases, we decouple things inside the loop from outside the loop allowing the folding to take place later. I think we need extensive tests before opening up this for all other free instruction,s and isolating this for GEP as a first step would make review process easy. Then please run tests (I assume you ran tests for this, as proposed, too). This makes the review harder. LICM is part of our canonicalization process, and we need to have an understandable canonical form. The more that this turns into a patchwork of heuristics, the harder it is to figure out what our canonical form is. "We always decouple free instructions" is easy to explain. We sometimes decouple GEPs if they happen to be used in certain ways is harder. hfinkel: > I don't think Free from getUserCost guarantee that the instruction is folded away always.
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions Use getUercost() directly, instead of getGEPCost(). It might be possible to allow this for all other zero-cost instructions. However, I'm not perfectly sure if this is good or needed for all other free instructions. For example, I'm not clear if sinking a free trunc is needed? However, in GEP case, by sinking a GEP, we can decouple the users of the GEP: one in loop and one in outside of the loop so that the one in loop will be folded in isel if they are in the same block. I think we need extensive tests before opening up this for all other free instruction,s and isolating this for GEP as a first step would make review process easy. junbuml: Use getUercost() directly, instead of getGEPCost(). It might be possible to allow this for all…
		if (CurLoop->contains(UI))
		UsersInLoop.push_back(U);
		}
		return TTI->getUserCost(&I, UsersInLoop) ==
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions As r314923 was reverted, we cannot simply use getUserCost for all instructions because it optimistically assume that a GEP will fold into addressing mode regardless of its users. I don't think we can rely on this optimistic assumption in here. To handle GEPs properly in this change, we can check GEP's users here directly, or we can add a function in TTI to see if an instruction is really foldable. junbuml: As r314923 was reverted, we cannot simply use getUserCost for all instructions because it…
		TargetTransformInfo::TCC_Free;
		}

/// Return true if the only users of this instruction are outside of		/// Return true if the only users of this instruction are outside of
/// the loop. If this is true, we can sink the instruction to the exit		/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.		/// blocks of the loop.
///		///
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,		/// We also return true if the instruction could be folded away in lowering.
const LoopSafetyInfo *SafetyInfo) {		/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
		static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
		const LoopSafetyInfo *SafetyInfo,
		TargetTransformInfo *TTI,
		bool &FreeInLoop) {
const auto &BlockColors = SafetyInfo->BlockColors;		const auto &BlockColors = SafetyInfo->BlockColors;
		bool IsFree = isFreeInLoop(I, CurLoop, TTI);

for (const User *U : I.users()) {		for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);		const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {		if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
const BasicBlock *BB = PN->getParent();		const BasicBlock *BB = PN->getParent();
// We cannot sink uses in catchswitches.		// We cannot sink uses in catchswitches.
if (isa<CatchSwitchInst>(BB->getTerminator()))		if (isa<CatchSwitchInst>(BB->getTerminator()))
return false;		return false;

Show All 20 Lines	if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)		for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I)		if (PN->getIncomingValue(i) == &I)
if (CurLoop->contains(PN->getIncomingBlock(i)))		if (CurLoop->contains(PN->getIncomingBlock(i)))
return false;		return false;

continue;		continue;
}		}

if (CurLoop->contains(UI))		if (CurLoop->contains(UI)) {
		if (IsFree) {
		FreeInLoop = true;
		hfinkelUnsubmitted Done Reply Inline Actions The test for `!ContainFoldableUsersInLoop` limits us to looking for only one foldable user within the loop. Why? hfinkel: The test for `!ContainFoldableUsersInLoop` limits us to looking for only one foldable user…
		continue;
		}
return false;		return false;
}		}
		}
return true;		return true;
}		}

static Instruction *		static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,		CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,		const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo) {		const LoopSafetyInfo *SafetyInfo) {
Instruction *New;		Instruction *New;
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
/// When an instruction is found to only be used outside of the loop, this		/// When an instruction is found to only be used outside of the loop, this
/// function moves it to the exit blocks and patches up SSA form as needed.		/// function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its		/// This method is guaranteed to remove the original instruction from its
/// position, and may either delete it or move it to outside of the loop.		/// position, and may either delete it or move it to outside of the loop.
///		///
static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,		static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,
const Loop CurLoop, AliasSetTracker CurAST,		const Loop CurLoop, AliasSetTracker CurAST,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {		OptimizationRemarkEmitter *ORE,
		bool FreeInLoop) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");		DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)		ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
<< "sinking " << ore::NV("Inst", &I));		<< "sinking " << ore::NV("Inst", &I));
bool Changed = false;		bool Changed = false;
if (isa<LoadInst>(I))		if (isa<LoadInst>(I))
++NumMovedLoads;		++NumMovedLoads;
else if (isa<CallInst>(I))		else if (isa<CallInst>(I))
++NumMovedCalls;		++NumMovedCalls;
++NumSunk;		++NumSunk;
Changed = true;		Changed = true;

#ifndef NDEBUG		#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;		SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);		CurLoop->getUniqueExitBlocks(ExitBlocks);
SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),		SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());		ExitBlocks.end());
#endif		#endif

// Clones of this instruction. Don't create more than one per exit block!		// Clones of this instruction. Don't create more than one per exit block!
SmallDenseMap<BasicBlock , Instruction , 32> SunkCopies;		SmallDenseMap<BasicBlock , Instruction , 32> SunkCopies;
		SmallPtrSet<Instruction *, 2> UsersToBeRemoved;

// If this instruction is only used outside of the loop, then all users are		// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of		// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.		// the instruction.
while (!I.use_empty()) {		for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
Value::user_iterator UI = I.user_begin();
auto User = cast<Instruction>(UI);		auto User = cast<Instruction>(UI);
		Use &U = UI.getUse();
		++UI;

		if (CurLoop->contains(User) \|\| UsersToBeRemoved.count(User))
		continue;

if (!DT->isReachableFromEntry(User->getParent())) {		if (!DT->isReachableFromEntry(User->getParent())) {
User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));		User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
continue;		continue;
}		}
// The user must be a PHI node.		// The user must be a PHI node.
PHINode *PN = cast<PHINode>(User);		PHINode *PN = cast<PHINode>(User);

// Surprisingly, instructions can be used outside of loops without any		// Surprisingly, instructions can be used outside of loops without any
// exits. This can only happen in PHI nodes if the incoming block is		// exits. This can only happen in PHI nodes if the incoming block is
// unreachable.		// unreachable.
Use &U = UI.getUse();
BasicBlock *BB = PN->getIncomingBlock(U);		BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {		if (!DT->isReachableFromEntry(BB)) {
U = UndefValue::get(I.getType());		U = UndefValue::get(I.getType());
continue;		continue;
}		}

BasicBlock *ExitBlock = PN->getParent();		BasicBlock *ExitBlock = PN->getParent();
assert(ExitBlockSet.count(ExitBlock) &&		assert(ExitBlockSet.count(ExitBlock) &&
"The LCSSA PHI is not in an exit block!");		"The LCSSA PHI is not in an exit block!");

Instruction *New;		Instruction *New;
auto It = SunkCopies.find(ExitBlock);		auto It = SunkCopies.find(ExitBlock);
if (It != SunkCopies.end())		if (It != SunkCopies.end())
New = It->second;		New = It->second;
else		else
New = SunkCopies[ExitBlock] =		New = SunkCopies[ExitBlock] =
CloneInstructionInExitBlock(I, ExitBlock, PN, LI, SafetyInfo);		CloneInstructionInExitBlock(I, ExitBlock, PN, LI, SafetyInfo);

		UsersToBeRemoved.insert(PN);
PN->replaceAllUsesWith(New);		PN->replaceAllUsesWith(New);
PN->eraseFromParent();
}		}

		for (auto *User : UsersToBeRemoved)
		User->eraseFromParent();

		if (!FreeInLoop) {
CurAST->deleteValue(&I);		CurAST->deleteValue(&I);
I.eraseFromParent();		I.eraseFromParent();
		}
return Changed;		return Changed;
}		}

/// When an instruction is found to only use loop invariant operands that		/// When an instruction is found to only use loop invariant operands that
/// is safe to hoist, this instruction is called to do the dirty work.		/// is safe to hoist, this instruction is called to do the dirty work.
///		///
static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,		static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
▲ Show 20 Lines • Show All 517 Lines • Show Last 20 Lines

test/Transforms/LICM/sink-foldable.ll

This file was added.

				; RUN: opt < %s -licm -S \| FileCheck %s
				target triple = "aarch64--linux-gnueabi"

				; CHECK-LABEL:@test1
				; CHECK-LABEL:loopexit1:
				; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ]
				; CHECK: getelementptr inbounds i8, i8* %[[PHI]], i64 1
				define i8 @test1(i32 %j, i8 readonly %P, i8* readnone %Q) {
				entry:
				%cmp0 = icmp slt i32 0, %j
				br i1 %cmp0, label %for.body.lr.ph, label %return

				for.body.lr.ph:
				br label %for.body

				for.body:
				%P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
				%i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]

				%i0.ext = sext i32 %i0 to i64
				%arrayidx0 = getelementptr inbounds i8, i8* %P.addr, i64 %i0.ext
				%l0 = load i8, i8* %arrayidx0, align 8
				%cmp1 = icmp ugt i8* %l0, %Q
				br i1 %cmp1, label %loopexit0, label %if.end

				if.end: ; preds = %for.body
				%arrayidx1 = getelementptr inbounds i8, i8* %arrayidx0, i64 1
				%l1 = load i8, i8* %arrayidx1, align 8
				%cmp4 = icmp ugt i8* %l1, %Q
				%i.add = add nsw i32 %i0, 2
				br i1 %cmp4, label %loopexit1, label %for.body

				loopexit0:
				%p1 = phi i8** [%arrayidx0, %for.body]
				br label %return

				loopexit1:
				%p2 = phi i8** [%arrayidx1, %if.end]
				br label %return

				return:
				%retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
				ret i8** %retval.0
				}

				; CHECK-LABEL: @test2
				; CHECK-LABEL: loopexit2:
				; CHECK: %[[PHI:.]] = phi i8* [ %add.ptr, %if.end ]
				; CHECK: getelementptr inbounds i8, i8* %[[PHI]]
				define i8 @test2(i32 %j, i8 readonly %P, i8* readnone %Q) {

				entry:
				br label %for.body

				for.cond:
				%i.addr.0 = phi i32 [ %add, %if.end ]
				%P.addr.0 = phi i8** [ %add.ptr, %if.end ]
				%cmp = icmp slt i32 %i.addr.0, %j
				br i1 %cmp, label %for.body, label %loopexit0

				for.body:
				%P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ]
				%i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ]

				%idx.ext = sext i32 %i.addr to i64
				%add.ptr = getelementptr inbounds i8, i8* %P.addr, i64 %idx.ext
				%l0 = load i8, i8* %add.ptr, align 8

				%cmp1 = icmp ugt i8* %l0, %Q
				br i1 %cmp1, label %loopexit1, label %if.end

				if.end:
				%add.i = add i32 %i.addr, 1
				%idx2.ext = sext i32 %add.i to i64
				%arrayidx2 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx2.ext
				%l1 = load i8, i8* %arrayidx2, align 8
				%cmp2 = icmp ugt i8* %l1, %Q
				%add = add nsw i32 %add.i, 1
				br i1 %cmp2, label %loopexit2, label %for.cond

				loopexit0:
				%p0 = phi i8** [ null, %for.cond ]
				br label %return

				loopexit1:
				%p1 = phi i8** [ %add.ptr, %for.body ]
				br label %return

				loopexit2:
				%p2 = phi i8** [ %arrayidx2, %if.end ]
				br label %return

				return:
				%retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ]
				ret i8** %retval.0
				}


				; CHECK-LABEL: @test3
				; CHECK-LABEL: loopexit1:
				; CHECK: %[[ADD:.*]] = phi i64 [ %add, %if.end ]
				; CHECK: %[[ADDR:.]] = phi i8* [ %P.addr, %if.end ]
				; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32
				; CHECK: getelementptr inbounds i8, i8* %[[ADDR]], i32 %[[TRUNC]]
				; CHECK: call void @dummy(i32 %[[TRUNC]])
				define i8 @test3(i64 %j, i8 readonly %P, i8* readnone %Q) {
				entry:
				%cmp0 = icmp slt i64 0, %j
				br i1 %cmp0, label %for.body.lr.ph, label %return

				for.body.lr.ph:
				br label %for.body

				for.body:
				%P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
				%i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]

				%i0.ext = sext i32 %i0 to i64
				%arrayidx0 = getelementptr inbounds i8, i8* %P.addr, i64 %i0.ext
				%l0 = load i8, i8* %arrayidx0, align 8
				%cmp1 = icmp ugt i8* %l0, %Q
				br i1 %cmp1, label %loopexit0, label %if.end

				if.end: ; preds = %for.body
				%add = add i64 %i0.ext, 1
				%trunc = trunc i64 %add to i32
				%arrayidx1 = getelementptr inbounds i8, i8* %P.addr, i32 %trunc
				%l1 = load i8, i8* %arrayidx1, align 8
				%cmp4 = icmp ugt i8* %l1, %Q
				%i.add = add nsw i32 %i0, 2
				br i1 %cmp4, label %loopexit1, label %for.body

				loopexit0:
				%p1 = phi i8** [%arrayidx0, %for.body]
				br label %return

				loopexit1:
				%p2 = phi i8** [%arrayidx1, %if.end]
				call void @dummy(i32 %trunc)
				br label %return

				return:
				%retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
				ret i8** %retval.0
				}

				declare void @dummy(i32)

This is an archive of the discontinued LLVM Phabricator instance.

[LICM] Allow sinking when foldable in loop
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 118707

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/Analysis/TargetTransformInfoImpl.h

include/llvm/CodeGen/BasicTTIImpl.h

include/llvm/Transforms/Utils/LoopUtils.h

lib/Analysis/TargetTransformInfo.cpp

lib/Target/ARM/ARMTargetTransformInfo.cpp

lib/Target/Hexagon/HexagonTargetTransformInfo.h

lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

lib/Target/X86/X86TargetTransformInfo.h

lib/Target/X86/X86TargetTransformInfo.cpp

lib/Transforms/Scalar/LICM.cpp

test/Transforms/LICM/sink-foldable.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LICM] Allow sinking when foldable in loopClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 118707

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/Analysis/TargetTransformInfoImpl.h

include/llvm/CodeGen/BasicTTIImpl.h

include/llvm/Transforms/Utils/LoopUtils.h

lib/Analysis/TargetTransformInfo.cpp

lib/Target/ARM/ARMTargetTransformInfo.cpp

lib/Target/Hexagon/HexagonTargetTransformInfo.h

lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

lib/Target/X86/X86TargetTransformInfo.h

lib/Target/X86/X86TargetTransformInfo.cpp

lib/Transforms/Scalar/LICM.cpp

test/Transforms/LICM/sink-foldable.ll

[LICM] Allow sinking when foldable in loop
ClosedPublic