This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/trunk/
-
trunk/
-
lib/Analysis/
-
Analysis/
-
ScalarEvolution.cpp
-
test/Analysis/ScalarEvolution/
-
Analysis/
-
ScalarEvolution/
-
binomial-explision.ll

Differential D53189

[SCEV] Avoid redundant computations when doing AddRec merge
ClosedPublic

Authored by mkazantsev on Oct 12 2018, 4:38 AM.

Download Raw Diff

Details

Reviewers

sanjoy
rtereshin
kparzysz

Commits

rGe0a2613aeaae: [SCEV] Avoid redundant computations when doing AddRec merge
rL345813: [SCEV] Avoid redundant computations when doing AddRec merge

Summary

When we calculate a product of 2 AddRecs, we end up making quite massive
computations to deduce the operands of resulting AddRec. This process can
be optimized by computing all args of intermediate sum and then calling
getAddExpr once rather than calling getAddExpr with intermediate
result every time a new argument is computed.

Diff Detail

Repository: rL LLVM

Event Timeline

mkazantsev created this revision.Oct 12 2018, 4:38 AM

Rebased: the patch slightly alters simplification, but in this test the expression is still within reasonable bounds.

Do you think it's better to remove the NFC tag from the patch? It doesn't look like it's completely NFC, though, I've tested this out for a major (though, out of tree) GPU target on a very large suite of shaders and found no difference.

What's the compile time impact roughly?

I also think that giving the wrapping flags a thought here (in a sense of preserving / re-deriving them a bit better) is valuable, but I don't have any concrete suggestions unfortunately.

Regardless, LGTM, thanks for doing this.

This revision is now accepted and ready to land.Oct 31 2018, 12:52 PM

I expect compile time impact to be zero for majority of cases. It should only affect corner cases at which we reach limit depth during simplifications. For them, depending on simplifications complexity, we save O(N) time simplifying. I don't think it will really be observable on anything other than corner-cases.

Closed by commit rL345813: [SCEV] Avoid redundant computations when doing AddRec merge (authored by mkazantsev). · Explain WhyOct 31 2018, 11:21 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Analysis/

ScalarEvolution.cpp

11 lines

test/

Analysis/

ScalarEvolution/

binomial-explision.ll

2 lines

Diff 172088

llvm/trunk/lib/Analysis/ScalarEvolution.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,054 Lines • ▼ Show 20 Lines	for (unsigned OtherIdx = Idx+1;
continue;		continue;

bool Overflow = false;		bool Overflow = false;
Type *Ty = AddRec->getType();		Type *Ty = AddRec->getType();
bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;		bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
SmallVector<const SCEV*, 7> AddRecOps;		SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +		for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {		OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
const SCEV *Term = getZero(Ty);		SmallVector <const SCEV *, 7> SumOps;
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {		for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);		uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),		for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
ze = std::min(x+1, (int)OtherAddRec->getNumOperands());		ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
z < ze && !Overflow; ++z) {		z < ze && !Overflow; ++z) {
uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);		uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
uint64_t Coeff;		uint64_t Coeff;
if (LargerThan64Bits)		if (LargerThan64Bits)
Coeff = umul_ov(Coeff1, Coeff2, Overflow);		Coeff = umul_ov(Coeff1, Coeff2, Overflow);
else		else
Coeff = Coeff1*Coeff2;		Coeff = Coeff1*Coeff2;
const SCEV *CoeffTerm = getConstant(Ty, Coeff);		const SCEV *CoeffTerm = getConstant(Ty, Coeff);
const SCEV *Term1 = AddRec->getOperand(y-z);		const SCEV *Term1 = AddRec->getOperand(y-z);
const SCEV *Term2 = OtherAddRec->getOperand(z);		const SCEV *Term2 = OtherAddRec->getOperand(z);
Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,		SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
SCEV::FlagAnyWrap, Depth + 1),		SCEV::FlagAnyWrap, Depth + 1));
SCEV::FlagAnyWrap, Depth + 1);
}		}
}		}
AddRecOps.push_back(Term);		if (SumOps.empty())
		SumOps.push_back(getZero(Ty));
		AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}		}
if (!Overflow) {		if (!Overflow) {
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),		const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
SCEV::FlagAnyWrap);		SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;		if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;		Ops[Idx] = NewAddRec;
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;		Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
OpsModified = true;		OpsModified = true;
▲ Show 20 Lines • Show All 9,346 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/ScalarEvolution/binomial-explision.ll

	; RUN: opt -analyze -scalar-evolution < %s \| FileCheck %s			; RUN: opt -analyze -scalar-evolution < %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"

	; Check that we don't have unreasonably huge SCEVs and in particular only a			; Check that we don't have unreasonably huge SCEVs and in particular only a
	; reasonable amount of AddRecs in the notation of %tmp19. If we "simplify" SCEVs			; reasonable amount of AddRecs in the notation of %tmp19. If we "simplify" SCEVs
	; too aggressively, we may end up with huge nested expressions.			; too aggressively, we may end up with huge nested expressions.
	define void @test(i32 %x, i64 %y, i1 %cond) {			define void @test(i32 %x, i64 %y, i1 %cond) {

	; CHECK: %tmp19 = mul i32 %tmp17, %tmp18			; CHECK: %tmp19 = mul i32 %tmp17, %tmp18
	; CHECK: ((((			; CHECK: ((((((
	; CHECK-NOT: (((((			; CHECK-NOT: (((((
	; CHECK: %tmp20 = add i32 %tmp19, %x			; CHECK: %tmp20 = add i32 %tmp19, %x

	bb:			bb:
	br label %bb1			br label %bb1

	bb1: ; preds = %bb3, %bb			bb1: ; preds = %bb3, %bb
	%tmp = phi i64 [ %y, %bb ], [ %tmp22, %bb3 ]			%tmp = phi i64 [ %y, %bb ], [ %tmp22, %bb3 ]
	Show All 28 Lines