Diff 300268

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 607 Lines • ▼ Show 20 Lines	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,		bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0,		unsigned AddrSpace = 0,
Instruction *I = nullptr) const;		Instruction *I = nullptr) const;

/// Return true if LSR cost of C1 is lower than C1.		/// Return true if LSR cost of C1 is lower than C1.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,		bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) const;		TargetTransformInfo::LSRCost &C2) const;

		/// Return true if LSR major cost is register number. Targets which implement
		/// their own isLSRCostLess and unset register number as major cost should
		/// return false, otherwise return true.
		bool isRegNumMajorCostOfLSR() const;
		samparkerUnsubmitted Done Reply Inline Actions Probably a good idea to including LSR in the name, like the other queries. samparker: Probably a good idea to including LSR in the name, like the other queries.

/// \returns true if LSR should not optimize a chain that includes \p I.		/// \returns true if LSR should not optimize a chain that includes \p I.
bool isProfitableLSRChainElement(Instruction *I) const;		bool isProfitableLSRChainElement(Instruction *I) const;

/// Return true if the target can fuse a compare and branch.		/// Return true if the target can fuse a compare and branch.
/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost		/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
/// calculation for the instructions in a loop.		/// calculation for the instructions in a loop.
bool canMacroFuseCmp() const;		bool canMacroFuseCmp() const;

▲ Show 20 Lines • Show All 781 Lines • ▼ Show 20 Lines	public:
virtual bool isLegalAddImmediate(int64_t Imm) = 0;		virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;		virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,		virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
int64_t BaseOffset, bool HasBaseReg,		int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace,		int64_t Scale, unsigned AddrSpace,
Instruction *I) = 0;		Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,		virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;		TargetTransformInfo::LSRCost &C2) = 0;
		virtual bool isRegNumMajorCostOfLSR() = 0;
virtual bool isProfitableLSRChainElement(Instruction *I) = 0;		virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;		virtual bool canMacroFuseCmp() = 0;
virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,		virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
LoopInfo LI, DominatorTree DT, AssumptionCache *AC,		LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) = 0;		TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;		virtual bool shouldFavorPostInc() const = 0;
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;		virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
▲ Show 20 Lines • Show All 305 Lines • ▼ Show 20 Lines	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
Instruction *I) override {		Instruction *I) override {
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,		return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace, I);		AddrSpace, I);
}		}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,		bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {		TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);		return Impl.isLSRCostLess(C1, C2);
}		}
		bool isRegNumMajorCostOfLSR() override {
		return Impl.isRegNumMajorCostOfLSR();
		}
bool isProfitableLSRChainElement(Instruction *I) override {		bool isProfitableLSRChainElement(Instruction *I) override {
return Impl.isProfitableLSRChainElement(I);		return Impl.isProfitableLSRChainElement(I);
}		}
bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }		bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,		bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
DominatorTree DT, AssumptionCache AC,		DominatorTree DT, AssumptionCache AC,
TargetLibraryInfo *LibInfo) override {		TargetLibraryInfo *LibInfo) override {
return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);		return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
▲ Show 20 Lines • Show All 493 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 186 Lines • ▼ Show 20 Lines	public:

bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {		bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,		return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <		C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,		std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);		C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}		}

		bool isRegNumMajorCostOfLSR() { return true; }

bool isProfitableLSRChainElement(Instruction *I) { return false; }		bool isProfitableLSRChainElement(Instruction *I) { return false; }

bool canMacroFuseCmp() { return false; }		bool canMacroFuseCmp() { return false; }

bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,		bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
DominatorTree DT, AssumptionCache AC,		DominatorTree DT, AssumptionCache AC,
TargetLibraryInfo *LibInfo) {		TargetLibraryInfo *LibInfo) {
return false;		return false;
▲ Show 20 Lines • Show All 860 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines	bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
EVT VT = getTLI()->getValueType(DL, Ty);		EVT VT = getTLI()->getValueType(DL, Ty);
return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);		return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
}		}

bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {		bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);		return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}		}

		bool isRegNumMajorCostOfLSR() {
		return TargetTransformInfoImplBase::isRegNumMajorCostOfLSR();
		}

bool isProfitableLSRChainElement(Instruction *I) {		bool isProfitableLSRChainElement(Instruction *I) {
return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);		return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
}		}

int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,		int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {		bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
TargetLoweringBase::AddrMode AM;		TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;		AM.BaseGV = BaseGV;
▲ Show 20 Lines • Show All 1,638 Lines • Show Last 20 Lines

llvm/lib/Analysis/TargetTransformInfo.cpp

Show First 20 Lines • Show All 371 Lines • ▼ Show 20 Lines	bool TargetTransformInfo::isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,		return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace, I);		Scale, AddrSpace, I);
}		}

bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {		bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
return TTIImpl->isLSRCostLess(C1, C2);		return TTIImpl->isLSRCostLess(C1, C2);
}		}

		bool TargetTransformInfo::isRegNumMajorCostOfLSR() const {
		return TTIImpl->isRegNumMajorCostOfLSR();
		}

bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {		bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
return TTIImpl->isProfitableLSRChainElement(I);		return TTIImpl->isProfitableLSRChainElement(I);
}		}

bool TargetTransformInfo::canMacroFuseCmp() const {		bool TargetTransformInfo::canMacroFuseCmp() const {
return TTIImpl->canMacroFuseCmp();		return TTIImpl->canMacroFuseCmp();
}		}

▲ Show 20 Lines • Show All 1,042 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Show First 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
TargetLibraryInfo *LibInfo);		TargetLibraryInfo *LibInfo);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);		bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,		void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);		TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,		void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);		TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,		bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);		TargetTransformInfo::LSRCost &C2);
		bool isRegNumMajorCostOfLSR();

/// @}		/// @}

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{
bool useColdCCForColdCall(Function &F);		bool useColdCCForColdCall(Function &F);
bool enableAggressiveInterleaving(bool LoopHasReductions);		bool enableAggressiveInterleaving(bool LoopHasReductions);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,		TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Show First 20 Lines • Show All 1,198 Lines • ▼ Show 20 Lines	if (!LsrNoInsnsCost)
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,		return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <		C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,		std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);		C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
else		else
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);		return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}		}

		bool PPCTTIImpl::isRegNumMajorCostOfLSR() {
		return false;
		}

bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,		bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) {		MemIntrinsicInfo &Info) {
switch (Inst->getIntrinsicID()) {		switch (Inst->getIntrinsicID()) {
case Intrinsic::ppc_altivec_lvx:		case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:		case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:		case Intrinsic::ppc_altivec_lvebx:
case Intrinsic::ppc_altivec_lvehx:		case Intrinsic::ppc_altivec_lvehx:
case Intrinsic::ppc_altivec_lvewx:		case Intrinsic::ppc_altivec_lvewx:
Show All 33 Lines

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Show First 20 Lines • Show All 2,835 Lines • ▼ Show 20 Lines	static bool isProfitableChain(IVChain &Chain,

if (!Chain.hasIncs())		if (!Chain.hasIncs())
return false;		return false;

if (!Users.empty()) {		if (!Users.empty()) {
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";		LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
for (Instruction *Inst		for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"; });		: Users) { dbgs() << " " << *Inst << "\n"; });
return false;		return false;
		samparkerUnsubmitted Not Done Reply Inline Actions This can potentially interfere with the calls to isProfitableLSRChainElement, which also isn't specifically interested in register uses. It will require more cycles to process but how about moving this to after the for loop below? samparker: This can potentially interfere with the calls to isProfitableLSRChainElement, which also isn't…
		shchenzAuthorUnsubmitted Done Reply Inline Actions good catch. Updated shchenz: good catch. Updated
}		}
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");		assert(!Chain.Incs.empty() && "empty IV chains are not allowed");

// The chain itself may require a register, so intialize cost to 1.		// The chain itself may require a register, so intialize cost to 1.
int cost = 1;		int cost = 1;

// A complete chain likely eliminates the need for keeping the original IV in		// A complete chain likely eliminates the need for keeping the original IV in
// a register. LSR does not currently know how to form a complete chain unless		// a register. LSR does not currently know how to form a complete chain unless
// the header phi already exists.		// the header phi already exists.
if (isa<PHINode>(Chain.tailUserInst())		if (isa<PHINode>(Chain.tailUserInst())
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {		&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;		--cost;
}		}
const SCEV *LastIncExpr = nullptr;		const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;		unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;		unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;		unsigned NumReusedIncrements = 0;

if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))		// If any LSRUse in the chain is marked as profitable by target, mark this
		samparkerUnsubmitted Not Done Reply Inline Actions You don't need this now :) samparker: You don't need this now :)
		shchenzAuthorUnsubmitted Done Reply Inline Actions seems we still need this? The following loop does not contain the first element in the chain. // Return the first increment in the chain. const_iterator begin() const { assert(!Incs.empty()); return std::next(Incs.begin()); } const_iterator end() const { return Incs.end(); } shchenz: seems we still need this? The following loop does not contain the first element in the chain.
		shchenzAuthorUnsubmitted Done Reply Inline Actions Ah, I know what you mean, we should change the loop in line 2865 to use `Chain.Incs` and thus we can delete the first one shchenz: Ah, I know what you mean, we should change the loop in line 2865 to use `Chain.Incs` and thus…
return true;		// chain as profitable.
		for (const IVInc &Inc : Chain.Incs)
for (const IVInc &Inc : Chain) {
if (TTI.isProfitableLSRChainElement(Inc.UserInst))		if (TTI.isProfitableLSRChainElement(Inc.UserInst))
return true;		return true;

		// If register number is the major cost, we cannot benefit from this
		// profitable chain which is based on register number.
		jonpaUnsubmitted Not Done Reply Inline Actions This seems unclear to me - should it be "If register number is not the...", or perhaps "Only if register..." ? It would also be a bit more readable to me if the new hook was called "isNumRegsMajorCostOfLSR", since NumRegs is the name for that counter. (And the comment "number of registers").. but maybe that's just me... jonpa: This seems unclear to me - should it be "If register number is not the...", or perhaps…
		shchenzAuthorUnsubmitted Done Reply Inline Actions Appreciate the comments. Addressed in NFC patch https://reviews.llvm.org/rG00e573cadb2791804fd0859d0ee05b27b702e11e shchenz: Appreciate the comments. Addressed in NFC patch https://reviews.llvm.
		// FIXME: add profitable chain optimization for other kinds major cost, for
		// example instruction number.
		if (!TTI.isRegNumMajorCostOfLSR())
		samparkerUnsubmitted Not Done Reply Inline Actions This will still interfere because if any element in the chain is a ProfitableLSRChainElement, we should return true. So the major cost check should only be executing just before we start doing final cost calculation outside of the loop. samparker: This will still interfere because if any element in the chain is a ProfitableLSRChainElement…
		return false;

		for (const IVInc &Inc : Chain) {
if (Inc.IncExpr->isZero())		if (Inc.IncExpr->isZero())
continue;		continue;

// Incrementing by zero or some constant is neutral. We assume constants can		// Incrementing by zero or some constant is neutral. We assume constants can
// be folded into an addressing mode or an add's immediate operand.		// be folded into an addressing mode or an add's immediate operand.
if (isa<SCEVConstant>(Inc.IncExpr)) {		if (isa<SCEVConstant>(Inc.IncExpr)) {
++NumConstIncrements;		++NumConstIncrements;
continue;		continue;
▲ Show 20 Lines • Show All 209 Lines • ▼ Show 20 Lines	for (PHINode &PN : L->getHeader()->phis()) {
if (!SE.isSCEVable(PN.getType()))		if (!SE.isSCEVable(PN.getType()))
continue;		continue;

Instruction *IncV =		Instruction *IncV =
dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));		dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
if (IncV)		if (IncV)
ChainInstruction(&PN, IncV, ChainUsersVec);		ChainInstruction(&PN, IncV, ChainUsersVec);
}		}
// Remove any unprofitable chains.		// Remove any unprofitable chains.
		steven.zhangUnsubmitted Not Done Reply Inline Actions I am not familiar with the code logic. But it makes more sense to bail out here when the instruction is the major cost. steven.zhang: I am not familiar with the code logic. But it makes more sense to bail out here when the…
		shchenzAuthorUnsubmitted Done Reply Inline Actions agree with your concern here. return early always good for compiling time. But the logic here should be specific for `isProfitableChain`, it would make more sense to keep it inside `isProfitableChain`. For example what if we have more than one caller of `isProfitableChain` function? shchenz: agree with your concern here. return early always good for compiling time. But the logic here…
unsigned ChainIdx = 0;		unsigned ChainIdx = 0;
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();		for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {		UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],		if (!isProfitableChain(IVChainVec[UsersIdx],
ChainUsersVec[UsersIdx].FarUsers, SE, TTI))		ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;		continue;
// Preserve the chain at UsesIdx.		// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)		if (ChainIdx != UsersIdx)
▲ Show 20 Lines • Show All 2,807 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll

	Show All 10 Lines
	; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill			; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
	; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill			; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
	; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill			; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
	; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill			; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
	; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill			; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
	; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill			; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
	; CHECK-NEXT: bge 0, .LBB0_6			; CHECK-NEXT: bge 0, .LBB0_6
	; CHECK-NEXT: # %bb.1: # %.preheader			; CHECK-NEXT: # %bb.1: # %.preheader
	; CHECK-NEXT: mulld 11, 8, 5			; CHECK-NEXT: addi 30, 5, 1
	; CHECK-NEXT: mulld 12, 9, 8			; CHECK-NEXT: addi 29, 5, 3
	; CHECK-NEXT: sldi 0, 10, 3			; CHECK-NEXT: addi 28, 5, 2
	; CHECK-NEXT: sldi 11, 11, 3			; CHECK-NEXT: mulld 12, 8, 5
	; CHECK-NEXT: sldi 12, 12, 3			; CHECK-NEXT: addi 3, 3, 16
	; CHECK-NEXT: add 11, 3, 11			; CHECK-NEXT: mulld 0, 9, 8
				; CHECK-NEXT: sldi 11, 10, 3
				; CHECK-NEXT: mulld 30, 8, 30
				; CHECK-NEXT: mulld 29, 8, 29
				; CHECK-NEXT: mulld 8, 8, 28
	; CHECK-NEXT: b .LBB0_3			; CHECK-NEXT: b .LBB0_3
	; CHECK-NEXT: .p2align 4			; CHECK-NEXT: .p2align 4
	; CHECK-NEXT: .LBB0_2:			; CHECK-NEXT: .LBB0_2:
	; CHECK-NEXT: add 5, 5, 9			; CHECK-NEXT: add 5, 5, 9
	; CHECK-NEXT: add 11, 11, 12			; CHECK-NEXT: add 12, 12, 0
				; CHECK-NEXT: add 30, 30, 0
				; CHECK-NEXT: add 29, 29, 0
				; CHECK-NEXT: add 8, 8, 0
	; CHECK-NEXT: cmpd 5, 7			; CHECK-NEXT: cmpd 5, 7
	; CHECK-NEXT: bge 0, .LBB0_6			; CHECK-NEXT: bge 0, .LBB0_6
	; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1			; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1
	; CHECK-NEXT: # Child Loop BB0_5 Depth 2			; CHECK-NEXT: # Child Loop BB0_5 Depth 2
	; CHECK-NEXT: sub 30, 5, 10			; CHECK-NEXT: sub 28, 5, 10
	; CHECK-NEXT: cmpd 6, 30			; CHECK-NEXT: cmpd 6, 28
	; CHECK-NEXT: bge 0, .LBB0_2			; CHECK-NEXT: bge 0, .LBB0_2
	; CHECK-NEXT: # %bb.4:			; CHECK-NEXT: # %bb.4:
	; CHECK-NEXT: addi 28, 5, 1			; CHECK-NEXT: add 26, 6, 12
	; CHECK-NEXT: addi 27, 5, 2			; CHECK-NEXT: add 25, 6, 30
	; CHECK-NEXT: addi 26, 5, 3
	; CHECK-NEXT: mulld 29, 5, 8
	; CHECK-NEXT: sldi 25, 6, 3
	; CHECK-NEXT: mulld 28, 28, 8
	; CHECK-NEXT: mulld 27, 27, 8
	; CHECK-NEXT: mulld 26, 26, 8
	; CHECK-NEXT: .p2align 5
	; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1
	; CHECK-NEXT: # => This Inner Loop Header: Depth=2
	; CHECK-NEXT: lfdx 0, 4, 25
	; CHECK-NEXT: lfdx 1, 11, 25
	; CHECK-NEXT: add 24, 6, 29			; CHECK-NEXT: add 24, 6, 29
	; CHECK-NEXT: mr 23, 3			; CHECK-NEXT: add 23, 6, 8
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: sldi 27, 6, 3
				; CHECK-NEXT: sldi 26, 26, 3
				; CHECK-NEXT: sldi 25, 25, 3
	; CHECK-NEXT: sldi 24, 24, 3			; CHECK-NEXT: sldi 24, 24, 3
				; CHECK-NEXT: sldi 23, 23, 3
				; CHECK-NEXT: add 27, 4, 27
				; CHECK-NEXT: add 26, 3, 26
				; CHECK-NEXT: add 25, 3, 25
	; CHECK-NEXT: add 24, 3, 24			; CHECK-NEXT: add 24, 3, 24
	; CHECK-NEXT: lfd 1, 8(24)			; CHECK-NEXT: add 23, 3, 23
				; CHECK-NEXT: .p2align 5
				; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1
				; CHECK-NEXT: # => This Inner Loop Header: Depth=2
				; CHECK-NEXT: lfd 0, 0(27)
				; CHECK-NEXT: lfd 1, -16(26)
				; CHECK-NEXT: add 6, 6, 10
				; CHECK-NEXT: cmpd 6, 28
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 16(24)			; CHECK-NEXT: lfd 1, -8(26)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 24(24)			; CHECK-NEXT: lfd 1, 0(26)
	; CHECK-NEXT: add 24, 6, 28
	; CHECK-NEXT: sldi 24, 24, 3
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfdux 1, 23, 24			; CHECK-NEXT: lfd 1, 8(26)
	; CHECK-NEXT: add 24, 6, 27			; CHECK-NEXT: add 26, 26, 11
	; CHECK-NEXT: sldi 24, 24, 3
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 8(23)			; CHECK-NEXT: lfd 1, -16(25)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 16(23)			; CHECK-NEXT: lfd 1, -8(25)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 24(23)			; CHECK-NEXT: lfd 1, 0(25)
	; CHECK-NEXT: mr 23, 3
	; CHECK-NEXT: lfdux 2, 23, 24
	; CHECK-NEXT: add 24, 6, 26
	; CHECK-NEXT: add 6, 6, 10
	; CHECK-NEXT: sldi 24, 24, 3
	; CHECK-NEXT: cmpd 6, 30
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 8(23)			; CHECK-NEXT: lfd 1, 8(25)
	; CHECK-NEXT: xsadddp 0, 0, 2			; CHECK-NEXT: add 25, 25, 11
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 16(23)			; CHECK-NEXT: lfd 1, -16(23)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 24(23)			; CHECK-NEXT: lfd 1, -8(23)
	; CHECK-NEXT: mr 23, 3
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfdux 1, 23, 24			; CHECK-NEXT: lfd 1, 0(23)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 8(23)			; CHECK-NEXT: lfd 1, 8(23)
				; CHECK-NEXT: add 23, 23, 11
				; CHECK-NEXT: xsadddp 0, 0, 1
				; CHECK-NEXT: lfd 1, -16(24)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 16(23)			; CHECK-NEXT: lfd 1, -8(24)
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: lfd 1, 24(23)			; CHECK-NEXT: lfd 1, 0(24)
				; CHECK-NEXT: xsadddp 0, 0, 1
				; CHECK-NEXT: lfd 1, 8(24)
				; CHECK-NEXT: add 24, 24, 11
	; CHECK-NEXT: xsadddp 0, 0, 1			; CHECK-NEXT: xsadddp 0, 0, 1
	; CHECK-NEXT: stfdx 0, 4, 25			; CHECK-NEXT: stfd 0, 0(27)
	; CHECK-NEXT: add 25, 25, 0			; CHECK-NEXT: add 27, 27, 11
	; CHECK-NEXT: blt 0, .LBB0_5			; CHECK-NEXT: blt 0, .LBB0_5
	; CHECK-NEXT: b .LBB0_2			; CHECK-NEXT: b .LBB0_2
	; CHECK-NEXT: .LBB0_6:			; CHECK-NEXT: .LBB0_6:
	; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload			; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
	; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload			; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
	; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload			; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
	; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload			; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
	; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload			; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload
	▲ Show 20 Lines • Show All 106 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[LSR] ignore profitable chain optimization when instruction number is the major cost
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 300268

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LSR] ignore profitable chain optimization when instruction number is the major costClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 300268

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll

[LSR] ignore profitable chain optimization when instruction number is the major cost
ClosedPublic