Skip to content

Commit 53a423a

Browse files
committedMar 26, 2018
[IRCE] Enable increasing loops of variable bounds
CanBeMin is currently used which will report true for any unknown values, but often a check is performed outside the loop which covers this situation: for (int i = 0; i < N; ++i) ... if (N > 0) for (int i = 0; i < N; ++i) ... So I've add 'LoopGuardedAgainstMin' which reports whether N is greater than the minimum value which then allows loop with a variable loop count to be optimised. I've also moved the increasing bound checking into its own function and replaced SumCanReachMax is another isLoopEntryGuardedByCond function. llvm-svn: 328480
1 parent a6ce78e commit 53a423a

File tree

3 files changed

+254
-64
lines changed

3 files changed

+254
-64
lines changed
 

‎llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

+78-58
Original file line numberDiff line numberDiff line change
@@ -702,27 +702,59 @@ static bool CanBeMax(ScalarEvolution &SE, const SCEV *S, bool Signed) {
702702
SE.getUnsignedRange(S).contains(Max);
703703
}
704704

705-
static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2,
706-
bool Signed) {
707-
// S1 < INT_MAX - S2 ===> S1 + S2 < INT_MAX.
708-
assert(SE.isKnownNonNegative(S2) &&
709-
"We expected the 2nd arg to be non-negative!");
710-
const SCEV *Max = SE.getConstant(
711-
Signed ? APInt::getSignedMaxValue(
712-
cast<IntegerType>(S1->getType())->getBitWidth())
713-
: APInt::getMaxValue(
714-
cast<IntegerType>(S1->getType())->getBitWidth()));
715-
const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2);
716-
return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
717-
S1, CapForS1);
705+
/// Given a loop with an increasing induction variable, is it possible to
706+
/// safely calculate the bounds of a new loop using the given Predicate.
707+
static bool isSafeIncreasingBound(const SCEV *Start,
708+
const SCEV *BoundSCEV, const SCEV *Step,
709+
ICmpInst::Predicate Pred,
710+
unsigned LatchBrExitIdx,
711+
Loop *L, ScalarEvolution &SE) {
712+
if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
713+
Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
714+
return false;
715+
716+
if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
717+
return false;
718+
719+
DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n");
720+
DEBUG(dbgs() << "irce: Start: " << *Start);
721+
DEBUG(dbgs() << "irce: Step: " << *Step);
722+
DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV);
723+
DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred) << "\n");
724+
DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
725+
726+
bool IsSigned = ICmpInst::isSigned(Pred);
727+
// The predicate that we need to check that the induction variable lies
728+
// within bounds.
729+
ICmpInst::Predicate BoundPred =
730+
IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
731+
732+
if (LatchBrExitIdx == 1)
733+
return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
734+
735+
assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1");
736+
737+
const SCEV *StepMinusOne =
738+
SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
739+
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
740+
APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
741+
APInt::getMaxValue(BitWidth);
742+
const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
743+
744+
return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start,
745+
SE.getAddExpr(BoundSCEV, Step)) &&
746+
SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
718747
}
719748

720-
static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) {
721-
APInt Min = Signed ?
722-
APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth()) :
723-
APInt::getMinValue(cast<IntegerType>(S->getType())->getBitWidth());
724-
return SE.getSignedRange(S).contains(Min) &&
725-
SE.getUnsignedRange(S).contains(Min);
749+
static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,
750+
ScalarEvolution &SE, bool Signed) {
751+
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
752+
APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
753+
APInt::getMinValue(BitWidth);
754+
auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
755+
return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
756+
SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV,
757+
SE.getConstant(Min));
726758
}
727759

728760
static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2,
@@ -904,17 +936,24 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
904936
Pred = ICmpInst::ICMP_ULT;
905937
else
906938
Pred = ICmpInst::ICMP_SLT;
907-
else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
908-
!CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) {
939+
else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
909940
// while (true) { while (true) {
910941
// if (++i == len) ---> if (++i > len - 1)
911942
// break; break;
912943
// ... ...
913944
// } }
914-
// TODO: Insert ICMP_UGT if both are non-negative?
915-
Pred = ICmpInst::ICMP_SGT;
916-
RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
917-
DecreasedRightValueByOne = true;
945+
if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
946+
CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
947+
Pred = ICmpInst::ICMP_UGT;
948+
RightSCEV = SE.getMinusSCEV(RightSCEV,
949+
SE.getOne(RightSCEV->getType()));
950+
DecreasedRightValueByOne = true;
951+
} else if (CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
952+
Pred = ICmpInst::ICMP_SGT;
953+
RightSCEV = SE.getMinusSCEV(RightSCEV,
954+
SE.getOne(RightSCEV->getType()));
955+
DecreasedRightValueByOne = true;
956+
}
918957
}
919958
}
920959

@@ -928,48 +967,25 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
928967
return None;
929968
}
930969

931-
IsSignedPredicate =
932-
Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT;
933-
970+
IsSignedPredicate = ICmpInst::isSigned(Pred);
934971
if (!IsSignedPredicate && !AllowUnsignedLatchCondition) {
935972
FailureReason = "unsigned latch conditions are explicitly prohibited";
936973
return None;
937974
}
938975

939-
// The predicate that we need to check that the induction variable lies
940-
// within bounds.
941-
ICmpInst::Predicate BoundPred =
942-
IsSignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
943-
976+
if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred,
977+
LatchBrExitIdx, &L, SE)) {
978+
FailureReason = "Unsafe loop bounds";
979+
return None;
980+
}
944981
if (LatchBrExitIdx == 0) {
945-
const SCEV *StepMinusOne = SE.getMinusSCEV(Step,
946-
SE.getOne(Step->getType()));
947-
if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) {
948-
// TODO: this restriction is easily removable -- we just have to
949-
// remember that the icmp was an slt and not an sle.
950-
FailureReason = "limit may overflow when coercing le to lt";
951-
return None;
952-
}
953-
954-
if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) ||
955-
!SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart,
956-
SE.getAddExpr(RightSCEV, Step))) {
957-
FailureReason = "Induction variable start not bounded by upper limit";
958-
return None;
959-
}
960-
961982
// We need to increase the right value unless we have already decreased
962983
// it virtually when we replaced EQ with SGT.
963984
if (!DecreasedRightValueByOne) {
964985
IRBuilder<> B(Preheader->getTerminator());
965986
RightValue = B.CreateAdd(RightValue, One);
966987
}
967988
} else {
968-
if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) ||
969-
!SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart, RightSCEV)) {
970-
FailureReason = "Induction variable start not bounded by upper limit";
971-
return None;
972-
}
973989
assert(!DecreasedRightValueByOne &&
974990
"Right value can be decreased only for LatchBrExitIdx == 0!");
975991
}
@@ -1479,13 +1495,15 @@ bool LoopConstrainer::run() {
14791495
if (Increasing)
14801496
ExitPreLoopAtSCEV = *SR.LowLimit;
14811497
else {
1482-
if (CanBeMin(SE, *SR.HighLimit, IsSignedPredicate)) {
1498+
if (CannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
1499+
IsSignedPredicate))
1500+
ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
1501+
else {
14831502
DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
14841503
<< "preloop exit limit. HighLimit = " << *(*SR.HighLimit)
14851504
<< "\n");
14861505
return false;
14871506
}
1488-
ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
14891507
}
14901508

14911509
if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) {
@@ -1505,13 +1523,15 @@ bool LoopConstrainer::run() {
15051523
if (Increasing)
15061524
ExitMainLoopAtSCEV = *SR.HighLimit;
15071525
else {
1508-
if (CanBeMin(SE, *SR.LowLimit, IsSignedPredicate)) {
1526+
if (CannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
1527+
IsSignedPredicate))
1528+
ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
1529+
else {
15091530
DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
15101531
<< "mainloop exit limit. LowLimit = " << *(*SR.LowLimit)
15111532
<< "\n");
15121533
return false;
15131534
}
1514-
ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
15151535
}
15161536

15171537
if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) {

‎llvm/test/Transforms/IRCE/eq_ne.ll

+2-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; CHECK: irce: in function test_01u: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
66
; CHECK-NOT: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
77
; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
8-
; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
8+
; CHECK: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
99
; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
1010
; CHECK-NOT: irce: in function test_06: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
1111
; CHECK: irce: in function test_07: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
@@ -112,7 +112,7 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
112112
; CHECK: test_03(
113113
; CHECK: main.exit.selector:
114114
; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ]
115-
; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100
115+
; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100
116116
; CHECK-NEXT: br i1 [[COND]]
117117

118118
entry:
@@ -138,12 +138,8 @@ exit:
138138
ret void
139139
}
140140

141-
; Show that if n is not known to be greater than the starting value, IRCE
142-
; doesn't apply.
143141
define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
144142

145-
; CHECK: test_04(
146-
147143
entry:
148144
%len = load i32, i32* %a_len_ptr, !range !0
149145
br label %loop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
; RUN: opt -irce -S -verify-loop-info -irce-print-changed-loops -irce-skip-profitability-checks < %s 2>&1 | FileCheck %s
2+
3+
; CHECK: irce: in function test_inc_eq: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting>
4+
; CHECK: irce: in function test_inc_ne: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting>
5+
; CHECK: irce: in function test_inc_slt: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting>
6+
; CHECK: irce: in function test_inc_ult: constrained Loop at depth 1 containing: %for.body<header>,%if.else,%if.then,%for.inc<latch><exiting>
7+
8+
; CHECK-LABEL: test_inc_eq(
9+
; CHECK: main.exit.selector:
10+
; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ]
11+
; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], %N
12+
; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit
13+
define void @test_inc_eq(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
14+
entry:
15+
%cmp16 = icmp sgt i32 %N, 0
16+
br i1 %cmp16, label %for.body, label %for.cond.cleanup
17+
18+
for.cond.cleanup:
19+
ret void
20+
21+
for.body:
22+
%i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
23+
%cmp1 = icmp ult i32 %i.017, 512
24+
%arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017
25+
%0 = load i32, i32* %arrayidx, align 4
26+
%arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017
27+
%1 = load i32, i32* %arrayidx2, align 4
28+
br i1 %cmp1, label %if.then, label %if.else
29+
30+
if.then:
31+
%sub = sub i32 %0, %1
32+
%arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017
33+
%2 = load i32, i32* %arrayidx3, align 4
34+
%add = add nsw i32 %sub, %2
35+
store i32 %add, i32* %arrayidx3, align 4
36+
br label %for.inc
37+
38+
if.else:
39+
%add6 = add nsw i32 %1, %0
40+
%arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017
41+
store i32 %add6, i32* %arrayidx7, align 4
42+
br label %for.inc
43+
44+
for.inc:
45+
%inc = add nuw nsw i32 %i.017, 1
46+
%exitcond = icmp eq i32 %inc, %N
47+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
48+
}
49+
50+
; CHECK-LABEL: test_inc_ne
51+
; CHECK: main.exit.selector:
52+
; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ]
53+
; CHECK: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], %N
54+
; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit
55+
define void @test_inc_ne(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
56+
entry:
57+
%cmp16 = icmp sgt i32 %N, 0
58+
br i1 %cmp16, label %for.body, label %for.cond.cleanup
59+
60+
for.cond.cleanup:
61+
ret void
62+
63+
for.body:
64+
%i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
65+
%cmp1 = icmp ult i32 %i.017, 512
66+
%arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017
67+
%0 = load i32, i32* %arrayidx, align 4
68+
%arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017
69+
%1 = load i32, i32* %arrayidx2, align 4
70+
br i1 %cmp1, label %if.then, label %if.else
71+
72+
if.then:
73+
%sub = sub i32 %0, %1
74+
%arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017
75+
%2 = load i32, i32* %arrayidx3, align 4
76+
%add = add nsw i32 %sub, %2
77+
store i32 %add, i32* %arrayidx3, align 4
78+
br label %for.inc
79+
80+
if.else:
81+
%add6 = add nsw i32 %1, %0
82+
%arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017
83+
store i32 %add6, i32* %arrayidx7, align 4
84+
br label %for.inc
85+
86+
for.inc:
87+
%inc = add nuw nsw i32 %i.017, 1
88+
%exitcond = icmp ne i32 %inc, %N
89+
br i1 %exitcond, label %for.body, label %for.cond.cleanup
90+
}
91+
92+
; CHECK-LABEL: test_inc_slt(
93+
; CHECK: main.exit.selector:
94+
; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ]
95+
; CHECK: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], %N
96+
; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit
97+
define void @test_inc_slt(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
98+
entry:
99+
%cmp16 = icmp sgt i32 %N, 0
100+
br i1 %cmp16, label %for.body, label %for.cond.cleanup
101+
102+
for.cond.cleanup:
103+
ret void
104+
105+
for.body:
106+
%i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
107+
%cmp1 = icmp ult i32 %i.017, 512
108+
%arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017
109+
%0 = load i32, i32* %arrayidx, align 4
110+
%arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017
111+
%1 = load i32, i32* %arrayidx2, align 4
112+
br i1 %cmp1, label %if.then, label %if.else
113+
114+
if.then:
115+
%sub = sub i32 %0, %1
116+
%arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017
117+
%2 = load i32, i32* %arrayidx3, align 4
118+
%add = add nsw i32 %sub, %2
119+
store i32 %add, i32* %arrayidx3, align 4
120+
br label %for.inc
121+
122+
if.else:
123+
%add6 = add nsw i32 %1, %0
124+
%arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017
125+
store i32 %add6, i32* %arrayidx7, align 4
126+
br label %for.inc
127+
128+
for.inc:
129+
%inc = add nuw nsw i32 %i.017, 1
130+
%exitcond = icmp slt i32 %inc, %N
131+
br i1 %exitcond, label %for.body, label %for.cond.cleanup
132+
}
133+
134+
; CHECK-LABEL: test_inc_ult
135+
; CHECK: main.exit.selector:
136+
; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ]
137+
; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], %N
138+
; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit
139+
define void @test_inc_ult(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
140+
entry:
141+
%cmp16 = icmp ugt i32 %N, 0
142+
br i1 %cmp16, label %for.body, label %for.cond.cleanup
143+
144+
for.cond.cleanup:
145+
ret void
146+
147+
for.body:
148+
%i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
149+
%cmp1 = icmp ult i32 %i.017, 512
150+
%arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017
151+
%0 = load i32, i32* %arrayidx, align 4
152+
%arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017
153+
%1 = load i32, i32* %arrayidx2, align 4
154+
br i1 %cmp1, label %if.then, label %if.else
155+
156+
if.then:
157+
%sub = sub i32 %0, %1
158+
%arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017
159+
%2 = load i32, i32* %arrayidx3, align 4
160+
%add = add nsw i32 %sub, %2
161+
store i32 %add, i32* %arrayidx3, align 4
162+
br label %for.inc
163+
164+
if.else:
165+
%add6 = add nsw i32 %1, %0
166+
%arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017
167+
store i32 %add6, i32* %arrayidx7, align 4
168+
br label %for.inc
169+
170+
for.inc:
171+
%inc = add nuw nsw i32 %i.017, 1
172+
%exitcond = icmp ult i32 %inc, %N
173+
br i1 %exitcond, label %for.body, label %for.cond.cleanup
174+
}

0 commit comments

Comments
 (0)
Please sign in to comment.