Skip to content

Commit ad7d0de

Browse files
committedMar 18, 2019
[SCEV] Guard movement of insertion point for loop-invariants
This reinstates r347934, along with a tweak to address a problem with PHI node ordering that that commit created (or exposed). (That commit was reverted at r348426, due to the PHI node issue.) Original commit message: r320789 suppressed moving the insertion point of SCEV expressions with dev/rem operations to the loop header in non-loop-invariant situations. This, and similar, hoisting is also unsafe in the loop-invariant case, since there may be a guard against a zero denominator. This is an adjustment to the fix of r320789 to suppress the movement even in the loop-invariant case. This fixes PR30806. Differential Revision: https://reviews.llvm.org/D57428 llvm-svn: 356392
1 parent 270249d commit ad7d0de

File tree

3 files changed

+178
-41
lines changed

3 files changed

+178
-41
lines changed
 

‎llvm/lib/Analysis/ScalarEvolutionExpander.cpp

+47-41
Original file line numberDiff line numberDiff line change
@@ -1731,49 +1731,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
17311731
// Compute an insertion point for this SCEV object. Hoist the instructions
17321732
// as far out in the loop nest as possible.
17331733
Instruction *InsertPt = &*Builder.GetInsertPoint();
1734-
for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1735-
L = L->getParentLoop())
1736-
if (SE.isLoopInvariant(S, L)) {
1737-
if (!L) break;
1738-
if (BasicBlock *Preheader = L->getLoopPreheader())
1739-
InsertPt = Preheader->getTerminator();
1740-
else {
1741-
// LSR sets the insertion point for AddRec start/step values to the
1742-
// block start to simplify value reuse, even though it's an invalid
1743-
// position. SCEVExpander must correct for this in all cases.
1744-
InsertPt = &*L->getHeader()->getFirstInsertionPt();
1745-
}
1746-
} else {
1747-
// We can move insertion point only if there is no div or rem operations
1748-
// otherwise we are risky to move it over the check for zero denominator.
1749-
auto SafeToHoist = [](const SCEV *S) {
1750-
return !SCEVExprContains(S, [](const SCEV *S) {
1751-
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1752-
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1753-
// Division by non-zero constants can be hoisted.
1754-
return SC->getValue()->isZero();
1755-
// All other divisions should not be moved as they may be
1756-
// divisions by zero and should be kept within the
1757-
// conditions of the surrounding loops that guard their
1758-
// execution (see PR35406).
1759-
return true;
1760-
}
1761-
return false;
1762-
});
1763-
};
1764-
// If the SCEV is computable at this level, insert it into the header
1765-
// after the PHIs (and after any other instructions that we've inserted
1766-
// there) so that it is guaranteed to dominate any user inside the loop.
1767-
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
1768-
SafeToHoist(S))
1769-
InsertPt = &*L->getHeader()->getFirstInsertionPt();
1770-
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
1771-
(isInsertedInstruction(InsertPt) ||
1772-
isa<DbgInfoIntrinsic>(InsertPt))) {
1773-
InsertPt = &*std::next(InsertPt->getIterator());
1734+
1735+
// We can move insertion point only if there is no div or rem operations
1736+
// otherwise we are risky to move it over the check for zero denominator.
1737+
auto SafeToHoist = [](const SCEV *S) {
1738+
return !SCEVExprContains(S, [](const SCEV *S) {
1739+
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1740+
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1741+
// Division by non-zero constants can be hoisted.
1742+
return SC->getValue()->isZero();
1743+
// All other divisions should not be moved as they may be
1744+
// divisions by zero and should be kept within the
1745+
// conditions of the surrounding loops that guard their
1746+
// execution (see PR35406).
1747+
return true;
1748+
}
1749+
return false;
1750+
});
1751+
};
1752+
if (SafeToHoist(S)) {
1753+
for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1754+
L = L->getParentLoop()) {
1755+
if (SE.isLoopInvariant(S, L)) {
1756+
if (!L) break;
1757+
if (BasicBlock *Preheader = L->getLoopPreheader())
1758+
InsertPt = Preheader->getTerminator();
1759+
else
1760+
// LSR sets the insertion point for AddRec start/step values to the
1761+
// block start to simplify value reuse, even though it's an invalid
1762+
// position. SCEVExpander must correct for this in all cases.
1763+
InsertPt = &*L->getHeader()->getFirstInsertionPt();
1764+
} else {
1765+
// If the SCEV is computable at this level, insert it into the header
1766+
// after the PHIs (and after any other instructions that we've inserted
1767+
// there) so that it is guaranteed to dominate any user inside the loop.
1768+
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1769+
InsertPt = &*L->getHeader()->getFirstInsertionPt();
1770+
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
1771+
(isInsertedInstruction(InsertPt) ||
1772+
isa<DbgInfoIntrinsic>(InsertPt)))
1773+
InsertPt = &*std::next(InsertPt->getIterator());
1774+
break;
17741775
}
1775-
break;
17761776
}
1777+
}
1778+
1779+
// IndVarSimplify sometimes sets the insertion point at the block start, even
1780+
// when there are PHIs at that point. We must correct for this.
1781+
if (isa<PHINode>(*InsertPt))
1782+
InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
17771783

17781784
// Check to see if we already expanded this here.
17791785
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; RUN: opt -S -indvars < %s | FileCheck %s
2+
3+
; Produced from the test-case:
4+
;
5+
; extern void foo(char *, unsigned , unsigned *);
6+
; extern void bar(int *, long);
7+
; extern char *processBuf(char *);
8+
;
9+
; extern unsigned theSize;
10+
;
11+
; void foo(char *buf, unsigned denominator, unsigned *flag) {
12+
; int incr = (int) (theSize / denominator);
13+
; int inx = 0;
14+
; while (*flag) {
15+
; int itmp = inx + incr;
16+
; int i = (int) theSize;
17+
; bar(&i, (long) itmp);
18+
; buf = processBuf(buf);
19+
; inx = itmp;
20+
; }
21+
; }
22+
23+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
24+
25+
@theSize = external local_unnamed_addr global i32, align 4
26+
27+
define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
28+
entry:
29+
%i = alloca i32, align 4
30+
%0 = load i32, i32* @theSize, align 4
31+
%div = udiv i32 %0, %denominator
32+
%1 = load i32, i32* %flag, align 4
33+
%tobool5 = icmp eq i32 %1, 0
34+
br i1 %tobool5, label %while.end, label %while.body.lr.ph
35+
36+
while.body.lr.ph: ; preds = %entry
37+
%2 = bitcast i32* %i to i8*
38+
br label %while.body
39+
40+
while.body: ; preds = %while.body.lr.ph, %while.body
41+
; Check that there are two PHIs followed by a 'sext' in the same block, and that
42+
; the test does not crash.
43+
; CHECK: phi
44+
; CHECK-NEXT: phi
45+
; CHECK-NEXT: sext
46+
%buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
47+
%inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
48+
%add = add nsw i32 %inx.06, %div
49+
%3 = load i32, i32* @theSize, align 4
50+
store i32 %3, i32* %i, align 4
51+
%conv = sext i32 %add to i64
52+
call void @bar(i32* nonnull %i, i64 %conv)
53+
%call = call i8* @processBuf(i8* %buf.addr.07)
54+
%4 = load i32, i32* %flag, align 4
55+
%tobool = icmp eq i32 %4, 0
56+
br i1 %tobool, label %while.end.loopexit, label %while.body
57+
58+
while.end.loopexit: ; preds = %while.body
59+
br label %while.end
60+
61+
while.end: ; preds = %while.end.loopexit, %entry
62+
ret void
63+
}
64+
65+
declare void @bar(i32*, i64) local_unnamed_addr
66+
declare i8* @processBuf(i8*) local_unnamed_addr
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
2+
3+
; Produced from test-case:
4+
;
5+
; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim)
6+
; {
7+
; for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) {
8+
; if (denom > 0) {
9+
; const uint32_t lim = numer / denom;
10+
;
11+
; for (uint32_t i = 0; i < lim; ++i)
12+
; ptr[i] = 1;
13+
; }
14+
; }
15+
; }
16+
17+
18+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
19+
target triple = "x86_64-unknown-linux-gnu"
20+
21+
define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) {
22+
entry:
23+
%cmp1 = icmp eq i32 %outer_lim, 0
24+
br i1 %cmp1, label %exit, label %loop1.preheader
25+
26+
; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and
27+
; whatever label comes next.
28+
loop1.preheader:
29+
; CHECK-LABEL: loop1.preheader:
30+
; CHECK-NOT: udiv
31+
; CHECK-LABEL: :
32+
br label %loop1
33+
34+
loop1:
35+
%outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ]
36+
%0 = add i32 %denom, -1
37+
%1 = icmp ult i32 %0, %numer
38+
br i1 %1, label %loop2.preheader, label %loop2.exit
39+
40+
; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and
41+
; whatever label comes next.
42+
loop2.preheader:
43+
; CHECK-LABEL: loop2.preheader:
44+
; CHECK: udiv
45+
; CHECK-LABEL: :
46+
%lim = udiv i32 %numer, %denom
47+
%2 = zext i32 %lim to i64
48+
br label %loop2
49+
50+
loop2:
51+
%indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ]
52+
%arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2
53+
store i32 1, i32* %arrayidx, align 4
54+
%indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1
55+
%cmp2 = icmp ult i64 %indvar.loop2.next, %2
56+
br i1 %cmp2, label %loop2, label %loop2.exit
57+
58+
loop2.exit:
59+
%inc1 = add nuw i32 %outer_i, 1
60+
%exitcond = icmp eq i32 %inc1, %outer_lim
61+
br i1 %exitcond, label %exit, label %loop1
62+
63+
exit:
64+
ret void
65+
}

0 commit comments

Comments
 (0)
Please sign in to comment.