This is an archive of the discontinued LLVM Phabricator instance.

[SCEV] Simplify trunc-of-add/mul to add/mul-of-trunc under more circumstances.
ClosedPublic

Authored by jlebar on Jun 13 2018, 8:37 PM.

Download Raw Diff

Details

Reviewers

Commits

rGb326904dba98: [SCEV] Simplify trunc-of-add/mul to add/mul-of-trunc under more circumstances.
rL334736: [SCEV] Simplify trunc-of-add/mul to add/mul-of-trunc under more circumstances.

Summary

Previously we would do this simplification only if it did not introduce
any new truncs (excepting new truncs which replace other cast ops).

This change weakens this condition: If the number of truncs stays the
same, but we're able to transform trunc(X + Y) to X + trunc(Y), that's
still simpler, and it may open up additional transformations.

While we're here, also clean up some duplicated code.

Diff Detail

Repository: rL LLVM

Event Timeline

jlebar created this revision.Jun 13 2018, 8:37 PM

Herald added a subscriber: hiraditya. · View Herald TranscriptJun 13 2018, 8:37 PM

Harbormaster completed remote builds in B19322: Diff 151302.Jun 13 2018, 8:38 PM

jlebar added a child revision: D48158: [SCEV] Simplify zext/trunc idiom that appears when handling bitmasks..Jun 13 2018, 8:42 PM

lgtm

This revision is now accepted and ready to land.Jun 14 2018, 9:41 AM

Closed by commit rL334736: [SCEV] Simplify trunc-of-add/mul to add/mul-of-trunc under more circumstances. (authored by jlebar). · Explain WhyJun 14 2018, 10:19 AM

This revision was automatically updated to reflect the committed changes.

Hi @jlebar

This patch seems to cause the bug https://bugs.llvm.org/show_bug.cgi?id=39160 on vectorization stage. See reproducer in the bug.

Note that I have taken a look into your patch and don't see any obvious problems in that; it is likely to expose some underlying bug rather than introduce a new one, but it's not for sure.

I'm going to investigate it tomorrow. I'd appreciate if you could take a look into it or revert the patch until its reason is clear.

Thanks,
Max

Revision Contents

Path

Size

llvm/

trunk/

lib/

Analysis/

ScalarEvolution.cpp

54 lines

test/

Analysis/

ScalarEvolution/

different-loops-recs.ll

9 lines

max-trip-count-address-space.ll

2 lines

sext-inreg.ll

4 lines

strip-injective-zext.ll

2 lines

trunc-simplify.ll

25 lines

Diff 151376

llvm/trunk/lib/Analysis/ScalarEvolution.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,250 Lines • ▼ Show 20 Lines	const SCEV ScalarEvolution::getTruncateExpr(const SCEV Op,
// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing		// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))		if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getTruncateOrSignExtend(SS->getOperand(), Ty);		return getTruncateOrSignExtend(SS->getOperand(), Ty);

// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing		// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))		if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);		return getTruncateOrZeroExtend(SZ->getOperand(), Ty);

// trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can		// trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
// eliminate all the truncates, or we replace other casts with truncates.		// trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {		// if after transforming we have at most one truncate, not counting truncates
		// that replace other casts.
		if (isa<SCEVAddExpr>(Op) \|\| isa<SCEVMulExpr>(Op)) {
		auto *CommOp = cast<SCEVCommutativeExpr>(Op);
SmallVector<const SCEV *, 4> Operands;		SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;		unsigned numTruncs = 0;
for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {		for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);		++i) {
if (!isa<SCEVCastExpr>(SA->getOperand(i)))		const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty);
hasTrunc = isa<SCEVTruncateExpr>(S);		if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S))
		numTruncs++;
Operands.push_back(S);		Operands.push_back(S);
}		}
if (!hasTrunc)		if (numTruncs < 2) {
		if (isa<SCEVAddExpr>(Op))
return getAddExpr(Operands);		return getAddExpr(Operands);
// In spite we checked in the beginning that ID is not in the cache,		else if (isa<SCEVMulExpr>(Op))
// it is possible that during recursion and different modification
// ID came to cache, so if we found it, just return it.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;
}

// trunc(x1x2...xN) --> trunc(x1)trunc(x2)...trunc(xN) if we can
// eliminate all the truncates, or we replace other casts with truncates.
if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
if (!isa<SCEVCastExpr>(SM->getOperand(i)))
hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
return getMulExpr(Operands);		return getMulExpr(Operands);
// In spite we checked in the beginning that ID is not in the cache,		else
// it is possible that during recursion and different modification		llvm_unreachable("Unexpected SCEV type for Op.");
// ID came to cache, so if we found it, just return it.		}
		// Although we checked in the beginning that ID is not in the cache, it is
		// possible that during recursion and different modification ID was inserted
		// into the cache. So if we find it, just return it.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))		if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;		return S;
}		}

// If the input value is a chrec scev, truncate the chrec's operands.		// If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {		if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;		SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : AddRec->operands())		for (const SCEV *Op : AddRec->operands())
▲ Show 20 Lines • Show All 10,850 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/ScalarEvolution/different-loops-recs.ll

	Show First 20 Lines • Show All 271 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: --> {3,+,1}<nuw><%loop1>			; CHECK-NEXT: --> {3,+,1}<nuw><%loop1>
	; CHECK: %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ]			; CHECK: %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ]
	; CHECK-NEXT: --> {2,+,1}<nuw><nsw><%loop2>			; CHECK-NEXT: --> {2,+,1}<nuw><nsw><%loop2>
	; CHECK: %tmp10 = sub i64 %tmp9, %tmp7			; CHECK: %tmp10 = sub i64 %tmp9, %tmp7
	; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {-2,+,-1}<nw><%loop2>)			; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {-2,+,-1}<nw><%loop2>)
	; CHECK: %tmp11 = add i64 %tmp10, undef			; CHECK: %tmp11 = add i64 %tmp10, undef
	; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<nw><%loop2>)			; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<nw><%loop2>)
	; CHECK: %tmp13 = trunc i64 %tmp11 to i32			; CHECK: %tmp13 = trunc i64 %tmp11 to i32
	; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {(trunc i64 (-2 + undef) to i32),+,-1}<%loop2>)			; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {(-2 + (trunc i64 undef to i32)),+,-1}<%loop2>)
	; CHECK: %tmp14 = sub i32 %tmp13, %tmp2			; CHECK: %tmp14 = sub i32 %tmp13, %tmp2
	; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {{{{}}(-2 + (trunc i64 (-2 + undef) to i32)),+,-1}<%loop1>,+,-1}<%loop2>)			; `{{[{][{]}}` is the ugliness needed to match `{{`
				; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {{[{][{]}}(-4 + (trunc i64 undef to i32)),+,-1}<%loop1>,+,-1}<%loop2>)
	; CHECK: %tmp15 = add nuw nsw i64 %tmp7, 1			; CHECK: %tmp15 = add nuw nsw i64 %tmp7, 1
	; CHECK-NEXT: --> {3,+,1}<nuw><nsw><%loop2>			; CHECK-NEXT: --> {3,+,1}<nuw><nsw><%loop2>

	bb:			bb:
	br label %loop1			br label %loop1

	loop1:			loop1:
	%tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ]			%tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ]
	▲ Show 20 Lines • Show All 166 Lines • ▼ Show 20 Lines
	; Make sure that a complicated Phi does not get folded with rec's start value			; Make sure that a complicated Phi does not get folded with rec's start value
	; of a loop which is above.			; of a loop which is above.
	define void @test_08() {			define void @test_08() {

	; CHECK-LABEL: Classifying expressions for: @test_08			; CHECK-LABEL: Classifying expressions for: @test_08
	; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1			; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1
	; CHECK-NEXT: --> ({0,+,-1}<nsw><%loop_2> + %iv.2.1)			; CHECK-NEXT: --> ({0,+,-1}<nsw><%loop_2> + %iv.2.1)
	; CHECK: %tmp12 = trunc i64 %tmp11 to i32			; CHECK: %tmp12 = trunc i64 %tmp11 to i32
	; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<nsw><%loop_2> + %iv.2.1) to i32)			; CHECK-NEXT: --> ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>)
	; CHECK: %tmp14 = mul i32 %tmp12, %tmp7			; CHECK: %tmp14 = mul i32 %tmp12, %tmp7
	; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<nsw><%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>)			; CHECK-NEXT: --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>)
	; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1			; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1
	; CHECK-NEXT: --> ({2,+,1}<nuw><nsw><%loop_1> * %iv.2.1)			; CHECK-NEXT: --> ({2,+,1}<nuw><nsw><%loop_1> * %iv.2.1)

	entry:			entry:
	br label %loop_1			br label %loop_1

	loop_1:			loop_1:
	%iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ]			%iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ]
	▲ Show 20 Lines • Show All 156 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll

	; RUN: opt < %s -analyze -scalar-evolution \| FileCheck %s			; RUN: opt < %s -analyze -scalar-evolution \| FileCheck %s

	; ScalarEvolution should be able to understand the loop and eliminate the casts.			; ScalarEvolution should be able to understand the loop and eliminate the casts.

	target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-p4:64:64:64-n16:32:64"			target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-p4:64:64:64-n16:32:64"

	; CHECK: {%d,+,4}<%bb>{{ U: [^ ]+ S: [^ ]+}}{{ }}Exits: ((4 (trunc i32 (-1 + %n) to i16)) + %d)			; CHECK: {%d,+,4}<%bb>{{ U: [^ ]+ S: [^ ]+}}{{ }} Exits: (-4 + (4 (trunc i32 %n to i16)) + %d)


	define void @foo(i32 addrspace(1)* nocapture %d, i32 %n) nounwind {			define void @foo(i32 addrspace(1)* nocapture %d, i32 %n) nounwind {
	; CHECK: @foo			; CHECK: @foo
	entry:			entry:
	%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]			%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
	br i1 %0, label %bb.nph, label %return			br i1 %0, label %bb.nph, label %return

	▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/ScalarEvolution/sext-inreg.ll

Show All 9 Lines	entry:
br i1 %t0, label %bb, label %return		br i1 %t0, label %bb, label %return

bb:		bb:
%i.01 = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]		%i.01 = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]
%t1 = shl i64 %i.01, 7		%t1 = shl i64 %i.01, 7
%t2 = ashr i64 %t1, 7		%t2 = ashr i64 %t1, 7
; CHECK: %t2 = ashr i64 %t1, 7		; CHECK: %t2 = ashr i64 %t1, 7
; CHECK-NEXT: sext i57 {0,+,199}<%bb> to i64		; CHECK-NEXT: sext i57 {0,+,199}<%bb> to i64
; CHECK-SAME: Exits: (sext i57 (199 * (trunc i64 (-1 + (2780916192016515319 * %n)) to i57)) to i64)		; CHECK-SAME: Exits: (sext i57 (-199 + (trunc i64 %n to i57)) to i64)
; CHECK: %s2 = ashr i64 %s1, 5		; CHECK: %s2 = ashr i64 %s1, 5
; CHECK-NEXT: sext i59 {0,+,199}<%bb> to i64		; CHECK-NEXT: sext i59 {0,+,199}<%bb> to i64
; CHECK-SAME: Exits: (sext i59 (199 * (trunc i64 (-1 + (2780916192016515319 * %n)) to i59)) to i64)		; CHECK-SAME: Exits: (sext i59 (-199 + (trunc i64 %n to i59)) to i64)
%s1 = shl i64 %i.01, 5		%s1 = shl i64 %i.01, 5
%s2 = ashr i64 %s1, 5		%s2 = ashr i64 %s1, 5
%t3 = getelementptr i64, i64* %x, i64 %i.01		%t3 = getelementptr i64, i64* %x, i64 %i.01
store i64 0, i64* %t3, align 1		store i64 0, i64* %t3, align 1
%indvar.next = add i64 %i.01, 199		%indvar.next = add i64 %i.01, 199
%exitcond = icmp eq i64 %indvar.next, %n		%exitcond = icmp eq i64 %indvar.next, %n
br i1 %exitcond, label %return, label %bb		br i1 %exitcond, label %return, label %bb

return:		return:
%p = phi i64 [ 0, %entry ], [ %t2, %bb ]		%p = phi i64 [ 0, %entry ], [ %t2, %bb ]
%q = phi i64 [ 0, %entry ], [ %s2, %bb ]		%q = phi i64 [ 0, %entry ], [ %s2, %bb ]
%v = xor i64 %p, %q		%v = xor i64 %p, %q
ret i64 %v		ret i64 %v
}		}

llvm/trunk/test/Analysis/ScalarEvolution/strip-injective-zext.ll

	; RUN: opt -analyze -scalar-evolution < %s \| FileCheck %s			; RUN: opt -analyze -scalar-evolution < %s \| FileCheck %s

	; The initial SCEV for the backedge count is			; The initial SCEV for the backedge count is
	; (zext i2 {(trunc i32 (1 + %a1) to i2),+,1}<%b2> to i32).			; (zext i2 {(trunc i32 (1 + %a1) to i2),+,1}<%b2> to i32).
	; In howFarToZero, this was further converted to an add-rec, the complexity			; In howFarToZero, this was further converted to an add-rec, the complexity
	; of which defeated the calculation of the backedge taken count.			; of which defeated the calculation of the backedge taken count.
	; Since such zero-extensions preserve the values being extended, strip			; Since such zero-extensions preserve the values being extended, strip
	; them in howFarToZero to simplify the input SCEV.			; them in howFarToZero to simplify the input SCEV.

	; Check that the backedge taken count was actually computed:			; Check that the backedge taken count was actually computed:
	; CHECK: Determining loop execution counts for: @f0			; CHECK: Determining loop execution counts for: @f0
	; CHECK-NEXT: Loop %b2: backedge-taken count is (-1 * (trunc i32 (1 + %a1) to i2))			; CHECK-NEXT: Loop %b2: backedge-taken count is (-1 + (-1 * (trunc i32 %a1 to i2)))

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"

	define i32 @f0(i32 %a0, i32 %a1, i32* nocapture %a2) #0 {			define i32 @f0(i32 %a0, i32 %a1, i32* nocapture %a2) #0 {
	b0:			b0:
	%v0 = and i32 %a1, 3			%v0 = and i32 %a1, 3
	%v1 = icmp eq i32 %v0, 0			%v1 = icmp eq i32 %v0, 0
	br i1 %v1, label %b4, label %b1			br i1 %v1, label %b4, label %b1
	Show All 25 Lines

llvm/trunk/test/Analysis/ScalarEvolution/trunc-simplify.ll

				; RUN: opt < %s -analyze -scalar-evolution \| FileCheck %s

				; Check that we convert
				; trunc(C * a) -> trunc(C) * trunc(a)
				; if C is a constant.
				; CHECK-LABEL: @trunc_of_mul
				define i8 @trunc_of_mul(i32 %a) {
				%b = mul i32 %a, 100
				; CHECK: %c
				; CHECK-NEXT: --> (100 * (trunc i32 %a to i8))
				%c = trunc i32 %b to i8
				ret i8 %c
				}

				; Check that we convert
				; trunc(C + a) -> trunc(C) + trunc(a)
				; if C is a constant.
				; CHECK-LABEL: @trunc_of_add
				define i8 @trunc_of_add(i32 %a) {
				%b = add i32 %a, 100
				; CHECK: %c
				; CHECK-NEXT: --> (100 + (trunc i32 %a to i8))
				%c = trunc i32 %b to i8
				ret i8 %c
				}