Diff 265752

llvm/lib/Analysis/ScalarEvolution.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,925 Lines • ▼ Show 20 Lines	assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVMulExpr operand types don't match!");		"SCEVMulExpr operand types don't match!");
#endif		#endif

// Sort by complexity, this groups all similar expression types together.		// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);		GroupByComplexity(Ops, &LI, DT);

Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);		Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);

// Limit recursion calls depth.		// Limit recursion calls depth, but fold all-constant expressions.
if (Depth > MaxArithDepth \|\| hasHugeExpression(Ops))		// `Ops` is sorted, so it's enough to check just last one.
		if ((Depth > MaxArithDepth \|\| hasHugeExpression(Ops)) &&
		!isa<SCEVConstant>(Ops.back()))
return getOrCreateMulExpr(Ops, Flags);		return getOrCreateMulExpr(Ops, Flags);

if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {		if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
		mkazantsevUnsubmitted Not Done Reply Inline Actions This is too expensive. Provided that ops are sorted by type, it's enough to check the last one. mkazantsev: This is too expensive. Provided that ops are sorted by type, it's enough to check the last one.
static_cast<SCEVMulExpr *>(S)->setNoWrapFlags(Flags);		static_cast<SCEVMulExpr *>(S)->setNoWrapFlags(Flags);
return S;		return S;
}		}

// If there are any constants, fold them together.		// If there are any constants, fold them together.
unsigned Idx = 0;		unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {		if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {

Show All 14 Lines	if (Ops.size() == 2)
SCEV::FlagAnyWrap, Depth + 1);		SCEV::FlagAnyWrap, Depth + 1);

++Idx;		++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {		while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!		// We found two constants, fold them together!
ConstantInt *Fold =		ConstantInt *Fold =
ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());		ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
Ops[0] = getConstant(Fold);		Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element		Ops.erase(Ops.begin()+1); // Erase the folded element
		mkazantsevUnsubmitted Done Reply Inline Actions Is this change realy necessary? Looks unrelated, please commit it separately unless there is a reason to have it here. I'm also sure that similar change can be done in Add and some other SCEVs. mkazantsev: Is this change realy necessary? Looks unrelated, please commit it separately unless there is a…
		dantrushinAuthorUnsubmitted Done Reply Inline Actions This exactly is copied from getAddExpr (the only other place where depth limits are applied) dantrushin: This exactly is copied from getAddExpr (the only other place where depth limits are applied)
if (Ops.size() == 1) return Ops[0];		if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);		LHSC = cast<SCEVConstant>(Ops[0]);
}		}

// If we are left with a constant one being multiplied, strip it off.		// If we are left with a constant one being multiplied, strip it off.
if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) {		if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) {
Ops.erase(Ops.begin());		Ops.erase(Ops.begin());
--Idx;		--Idx;
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {		} else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
// If we have a multiply of zero, it will always be zero.		// If we have a multiply of zero, it will always be zero.
return Ops[0];		return Ops[0];
} else if (Ops[0]->isAllOnesValue()) {		} else if (Ops[0]->isAllOnesValue()) {
// If we have a mul by -1 of an add, try distributing the -1 among the		// If we have a mul by -1 of an add, try distributing the -1 among the
// add operands.		// add operands.
if (Ops.size() == 2) {		if (Ops.size() == 2) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {		if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
SmallVector<const SCEV *, 4> NewOps;		SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;		bool AnyFolded = false;
for (const SCEV *AddOp : Add->operands()) {		for (const SCEV *AddOp : Add->operands()) {
const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,		const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
Depth + 1);		Depth + 1);
		mkazantsevUnsubmitted Done Reply Inline Actions We can potentially recurse here with unlimited depth, which is undersiable. Can you please avoid this? mkazantsev: We can potentially recurse here with unlimited depth, which is undersiable. Can you please…
		dantrushinAuthorUnsubmitted Done Reply Inline Actions Again, the idea was that just constant folding should not take too much time even if going deep. This also allows to keep code clean and simple. Ok, I'll change it to check for all constant ops dantrushin: Again, the idea was that just constant folding should not take too much time even if going deep.
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;		if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
NewOps.push_back(Mul);		NewOps.push_back(Mul);
}		}
if (AnyFolded)		if (AnyFolded)
return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);		return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
} else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {		} else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
// Negation preserves a recurrence's no self-wrap property.		// Negation preserves a recurrence's no self-wrap property.
SmallVector<const SCEV *, 4> Operands;		SmallVector<const SCEV *, 4> Operands;
for (const SCEV *AddRecOp : AddRec->operands())		for (const SCEV *AddRecOp : AddRec->operands())
Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,		Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
Depth + 1));		Depth + 1));

return getAddRecExpr(Operands, AddRec->getLoop(),		return getAddRecExpr(Operands, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));		AddRec->getNoWrapFlags(SCEV::FlagNW));
}		}
}		}
}		}

if (Ops.size() == 1)		if (Ops.size() == 1)
return Ops[0];		return Ops[0];
}		}

// Skip over the add expression until we get to a multiply.		// Skip over the add expression until we get to a multiply.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)		while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
++Idx;		++Idx;
		efriedmaUnsubmitted Done Reply Inline Actions I think you need to rearrange the code a bit more: with the current version of the patch, we increase the depth before checking the current depth. The general idea makes sense: we should perform optimizations that aren't recursive before the depth check. efriedma: I think you need to rearrange the code a bit more: with the current version of the patch, we…
		dantrushinAuthorUnsubmitted Done Reply Inline Actions Eli, could you explain? Do you mean `Depth + 1` used in calls above on constant folding path? My reasoning was following: If we're already at depth limit (`Depth == MaxArithDepth`) then we don't want to recurse into possible expensive folding. On the other hand, constant folding will be attempted anyway, which is what we want here. Does it makes sense? dantrushin: Eli, could you explain? Do you mean `Depth + 1` used in calls above on constant folding path?

// If there are mul operands inline them all into this expression.		// If there are mul operands inline them all into this expression.
if (Idx < Ops.size()) {		if (Idx < Ops.size()) {
bool DeletedMul = false;		bool DeletedMul = false;
while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {		while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
if (Ops.size() > MulOpsInlineThreshold)		if (Ops.size() > MulOpsInlineThreshold)
break;		break;
// If we have an mul, expand the mul operands onto the end of the		// If we have an mul, expand the mul operands onto the end of the
▲ Show 20 Lines • Show All 9,680 Lines • Show Last 20 Lines

llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll

This file was added.

				; RUN: opt -passes 'strength-reduce' -scalar-evolution-max-arith-depth=2 -S < %s \| FileCheck %s
				; RUN: opt -loop-reduce -scalar-evolution-max-arith-depth=2 -S < %s \| FileCheck %s

				; This test should just compile cleanly without assertions.

				target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"

				define void @test(i32 %A, i32 %B, i32 %C) {
				; CHECK-LABEL: @test(
				; CHECK: inner_loop:
				; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32
				; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32
				; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32
				; CHECK: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 3
				; CHECK-NEXT: [[LSR_IV_NEXT2:%.*]] = add i32 [[LSR_IV1]], 3
				; CHECK-NEXT: [[LSR_IV_NEXT4:%.*]] = add i32 [[LSR_IV3]], -3
				;
				entry:
				br label %outer_loop

				outer_loop:
				%phi2 = phi i32 [ %A, %entry ], [ 204, %outer_tail ]
				%phi3 = phi i32 [ %A, %entry ], [ 243, %outer_tail ]
				%phi4 = phi i32 [ %B, %entry ], [ %i35, %outer_tail ]
				br label %guard

				guard:
				%lcmp.mod = icmp eq i32 %C, 0
				br i1 %lcmp.mod, label %outer_tail, label %preheader

				preheader:
				%i15 = shl i32 %B, 1
				br label %inner_loop

				inner_loop:
				%phi5 = phi i32 [ %phi3, %preheader ], [ %i30, %inner_loop ]
				%phi6 = phi i32 [ %phi2, %preheader ], [ %i33, %inner_loop ]
				%iter = phi i32 [ %C, %preheader ], [ %iter.sub, %inner_loop ]
				%i17 = sub i32 %phi4, %phi6
				%i18 = sub i32 14, %phi5
				%i19 = mul i32 %i18, %C
				%factor.prol = shl i32 %phi5, 1
				%i20 = add i32 %i17, %factor.prol
				%i21 = add i32 %i20, %B
				%i22 = add i32 %i21, %i19
				%i23 = sub i32 14, %i22
				%i24 = mul i32 %i23, %C
				%factor.1.prol = shl i32 %i22, 1
				%i25 = add i32 %i17, %factor.1.prol
				%i27 = add i32 %i25, %i24
				%i29 = mul i32 %i25, %C
				%factor.2.prol = shl i32 %i27, 1
				%i30 = add i32 %i17, %factor.2.prol
				%i33 = add nsw i32 %phi6, -3
				%iter.sub = add i32 %iter, -1
				%iter.cmp = icmp eq i32 %iter.sub, 0
				br i1 %iter.cmp, label %outer_tail, label %inner_loop

				outer_tail:
				%phi7 = phi i32 [ %phi2, %guard ], [ %i33, %inner_loop ]
				%i35 = sub i32 %A, %phi7
				%cmp = icmp sgt i32 %i35, 9876
				br i1 %cmp, label %exit, label %outer_loop

				exit:
				ret void

				}

llvm/test/Analysis/ScalarEvolution/limit-depth.ll

Show First 20 Lines • Show All 120 Lines • ▼ Show 20 Lines	loop2:
%iv2.inc = add nuw i64 %iv2, 1		%iv2.inc = add nuw i64 %iv2, 1
%cond2 = icmp sle i64 %iv2.inc, 50		%cond2 = icmp sle i64 %iv2.inc, 50
br i1 %cond2, label %loop2, label %exit		br i1 %cond2, label %loop2, label %exit

exit:		exit:
%trunc2 = trunc i64 %iv2.inc to i32		%trunc2 = trunc i64 %iv2.inc to i32
ret void		ret void
}		}

		; Check that all constant SCEVs are folded regardless depth limit.
		define void @test_mul_const(i32 %a) {
		; CHECK-LABEL: @test_mul_const
		; CHECK: %test3 = mul i32 %test2, 3
		; CHECK-NEXT: --> (9 + (3 * (3 * %a)))
		; CHECK: %test4 = mul i32 3, 3
		; CHECK-NEXT: --> 9 U: [9,10) S: [9,10)
		%test = mul i32 3, %a
		%test2 = add i32 3, %test
		%test3 = mul i32 %test2, 3
		%test4 = mul i32 3, 3
		ret void
		}

This is an archive of the discontinued LLVM Phabricator instance.

[SCEV] Constant fold MultExpr before applying depth limit.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 265752

llvm/lib/Analysis/ScalarEvolution.cpp

llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll

llvm/test/Analysis/ScalarEvolution/limit-depth.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SCEV] Constant fold MultExpr before applying depth limit.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 265752

llvm/lib/Analysis/ScalarEvolution.cpp

llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll

llvm/test/Analysis/ScalarEvolution/limit-depth.ll

[SCEV] Constant fold MultExpr before applying depth limit.
ClosedPublic