Skip to content

Commit 1befea2

Browse files
author
Lawrence Hu
committedApr 30, 2016
Reroll loops with multiple IV and negative step part 3
support multiple induction variables This patch enable loop reroll for the following case: for(int i=0; i<N; i += 2) { S += *a++; S += *a++; }; Differential Revision: http://reviews.llvm.org/D16550 llvm-svn: 268147
1 parent df29078 commit 1befea2

File tree

2 files changed

+289
-9
lines changed

2 files changed

+289
-9
lines changed
 

‎llvm/lib/Transforms/Scalar/LoopRerollPass.cpp

+155-9
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ namespace {
163163

164164
// Map between induction variable and its increment
165165
DenseMap<Instruction *, int64_t> IVToIncMap;
166+
// For loop with multiple induction variable, remember the one used only to
167+
// control the loop.
168+
Instruction *LoopControlIV;
166169

167170
// A chain of isomorphic instructions, identified by a single-use PHI
168171
// representing a reduction. Only the last value may be used outside the
@@ -350,9 +353,11 @@ namespace {
350353
ScalarEvolution *SE, AliasAnalysis *AA,
351354
TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
352355
bool PreserveLCSSA,
353-
DenseMap<Instruction *, int64_t> &IncrMap)
356+
DenseMap<Instruction *, int64_t> &IncrMap,
357+
Instruction *LoopCtrlIV)
354358
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
355-
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
359+
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
360+
LoopControlIV(LoopCtrlIV) {}
356361

357362
/// Stage 1: Find all the DAG roots for the induction variable.
358363
bool findRoots();
@@ -391,6 +396,7 @@ namespace {
391396
UsesTy::iterator Start,
392397
UsesTy::iterator End);
393398
void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
399+
void updateNonLoopCtrlIncr();
394400

395401
LoopReroll *Parent;
396402

@@ -421,8 +427,18 @@ namespace {
421427
UsesTy Uses;
422428
// Map between induction variable and its increment
423429
DenseMap<Instruction *, int64_t> &IVToIncMap;
430+
Instruction *LoopControlIV;
424431
};
425432

433+
// Check if it is a compare-like instruction whose user is a branch
434+
bool isCompareUsedByBranch(Instruction *I) {
435+
auto *TI = I->getParent()->getTerminator();
436+
if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
437+
return false;
438+
return I->hasOneUse() && TI->getOperand(0) == I;
439+
};
440+
441+
bool isLoopControlIV(Loop *L, Instruction *IV);
426442
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
427443
void collectPossibleReductions(Loop *L,
428444
ReductionTracker &Reductions);
@@ -494,6 +510,60 @@ static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
494510
return CIncSCEV;
495511
}
496512

513+
// Check if an IV is only used to control the loop. There are two cases:
514+
// 1. It only has one use which is loop increment, and the increment is only
515+
// used by comparison and the PHI, and the comparison is only used by branch.
516+
// 2. It is used by loop increment and the comparison, the loop increment is
517+
// only used by the PHI, and the comparison is used only by the branch.
518+
bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
519+
520+
unsigned IVUses = IV->getNumUses();
521+
if (IVUses != 2 && IVUses != 1)
522+
return false;
523+
524+
for (auto *User : IV->users()) {
525+
int32_t IncOrCmpUses = User->getNumUses();
526+
bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
527+
528+
// User can only have one or two uses.
529+
if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
530+
return false;
531+
532+
// Case 1
533+
if (IVUses == 1) {
534+
// The only user must be the loop increment.
535+
// The loop increment must have two uses.
536+
if (IsCompInst || IncOrCmpUses != 2)
537+
return false;
538+
}
539+
540+
// Case 2
541+
if (IVUses == 2 && IncOrCmpUses != 1)
542+
return false;
543+
544+
// The users of the IV must be a binary operation or a comparison
545+
if (auto *BO = dyn_cast<BinaryOperator>(User)) {
546+
if (BO->getOpcode() == Instruction::Add) {
547+
// Loop Increment
548+
// User of Loop Increment should be either PHI or CMP
549+
for (auto *UU : User->users()) {
550+
if (PHINode *PN = dyn_cast<PHINode>(UU)) {
551+
if (PN != IV)
552+
return false;
553+
}
554+
// Must be a CMP
555+
else if (!isCompareUsedByBranch(dyn_cast<Instruction>(UU)))
556+
return false;
557+
}
558+
} else
559+
return false;
560+
// Compare : can only have one use, and must be branch
561+
} else if (!IsCompInst)
562+
return false;
563+
}
564+
return true;
565+
}
566+
497567
// Collect the list of loop induction variables with respect to which it might
498568
// be possible to reroll the loop.
499569
void LoopReroll::collectPossibleIVs(Loop *L,
@@ -525,7 +595,14 @@ void LoopReroll::collectPossibleIVs(Loop *L,
525595
IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
526596
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
527597
<< "\n");
528-
PossibleIVs.push_back(&*I);
598+
599+
if (isLoopControlIV(L, &*I)) {
600+
assert(!LoopControlIV && "Found two loop control only IV");
601+
LoopControlIV = &(*I);
602+
DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
603+
<< *PHISCEV << "\n");
604+
} else
605+
PossibleIVs.push_back(&*I);
529606
}
530607
}
531608
}
@@ -1072,6 +1149,28 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
10721149
Uses[I].set(IL_All);
10731150
}
10741151

1152+
// Make sure we mark loop-control-only PHIs as used in all iterations. See
1153+
// comment above LoopReroll::isLoopControlIV for more information.
1154+
BasicBlock *Header = L->getHeader();
1155+
if (LoopControlIV && LoopControlIV != IV) {
1156+
for (auto *U : LoopControlIV->users()) {
1157+
Instruction *IVUser = dyn_cast<Instruction>(U);
1158+
// IVUser could be loop increment or compare
1159+
Uses[IVUser].set(IL_All);
1160+
for (auto *UU : IVUser->users()) {
1161+
Instruction *UUser = dyn_cast<Instruction>(UU);
1162+
// UUser could be compare, PHI or branch
1163+
Uses[UUser].set(IL_All);
1164+
// Is UUser a compare instruction?
1165+
if (UU->hasOneUse()) {
1166+
Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1167+
if (BI == cast<BranchInst>(Header->getTerminator()))
1168+
Uses[BI].set(IL_All);
1169+
}
1170+
}
1171+
}
1172+
}
1173+
10751174
// Make sure all instructions in the loop are in one and only one
10761175
// set.
10771176
for (auto &KV : Uses) {
@@ -1314,25 +1413,65 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
13141413
++J;
13151414
}
13161415

1317-
// We need to create a new induction variable for each different BaseInst.
1318-
for (auto &DRS : RootSets)
1319-
// Insert the new induction variable.
1320-
replaceIV(DRS.BaseInst, IV, IterCount);
1416+
bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
1417+
1418+
if (HasTwoIVs) {
1419+
updateNonLoopCtrlIncr();
1420+
replaceIV(LoopControlIV, LoopControlIV, IterCount);
1421+
} else
1422+
// We need to create a new induction variable for each different BaseInst.
1423+
for (auto &DRS : RootSets)
1424+
// Insert the new induction variable.
1425+
replaceIV(DRS.BaseInst, IV, IterCount);
13211426

13221427
SimplifyInstructionsInBlock(Header, TLI);
13231428
DeleteDeadPHIs(Header, TLI);
13241429
}
13251430

1431+
// For non-loop-control IVs, we only need to update the last increment
1432+
// with right amount, then we are done.
1433+
void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
1434+
const SCEV *NewInc = nullptr;
1435+
for (auto *LoopInc : LoopIncs) {
1436+
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
1437+
const SCEVConstant *COp = nullptr;
1438+
if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
1439+
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1440+
} else {
1441+
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
1442+
if (!COp)
1443+
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1444+
}
1445+
1446+
assert(COp && "Didn't find constant operand of LoopInc!\n");
1447+
1448+
const APInt &AInt = COp->getValue()->getValue();
1449+
const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
1450+
if (AInt.isNegative()) {
1451+
NewInc = SE->getNegativeSCEV(COp);
1452+
NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
1453+
NewInc = SE->getNegativeSCEV(NewInc);
1454+
} else
1455+
NewInc = SE->getUDivExpr(COp, ScaleSCEV);
1456+
1457+
LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
1458+
}
1459+
}
1460+
13261461
void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
13271462
Instruction *InstIV,
13281463
const SCEV *IterCount) {
13291464
BasicBlock *Header = L->getHeader();
13301465
int64_t Inc = IVToIncMap[InstIV];
1331-
bool Negative = Inc < 0;
1466+
bool NeedNewIV = InstIV == LoopControlIV;
1467+
bool Negative = !NeedNewIV && Inc < 0;
13321468

13331469
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
13341470
const SCEV *Start = RealIVSCEV->getStart();
13351471

1472+
if (NeedNewIV)
1473+
Start = SE->getConstant(Start->getType(), 0);
1474+
13361475
const SCEV *SizeOfExpr = nullptr;
13371476
const SCEV *IncrExpr =
13381477
SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
@@ -1360,6 +1499,12 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
13601499
if (Uses[BI].find_first() == IL_All) {
13611500
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
13621501

1502+
if (NeedNewIV)
1503+
ICSCEV = SE->getMulExpr(IterCount,
1504+
SE->getConstant(IterCount->getType(), Scale));
1505+
else
1506+
ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1507+
13631508
// Iteration count SCEV minus or plus 1
13641509
const SCEV *MinusPlus1SCEV =
13651510
SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
@@ -1514,7 +1659,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
15141659
const SCEV *IterCount,
15151660
ReductionTracker &Reductions) {
15161661
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1517-
IVToIncMap);
1662+
IVToIncMap, LoopControlIV);
15181663

15191664
if (!DAGRoots.findRoots())
15201665
return false;
@@ -1566,6 +1711,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
15661711
// reroll (there may be several possible options).
15671712
SmallInstructionVector PossibleIVs;
15681713
IVToIncMap.clear();
1714+
LoopControlIV = nullptr;
15691715
collectPossibleIVs(L, PossibleIVs);
15701716

15711717
if (PossibleIVs.empty()) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; RUN: opt -S -loop-reroll %s | FileCheck %s
2+
declare i32 @goo(i32, i32)
3+
4+
@buf = external global i8*
5+
@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
6+
7+
define i32 @test1(i32 %len) {
8+
entry:
9+
br label %while.body
10+
11+
while.body:
12+
;CHECK-LABEL: while.body:
13+
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
14+
;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
15+
;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
16+
;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
17+
;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
18+
;CHECK-NEXT: %add = add i64 %conv, %sum44.020
19+
;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
20+
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
21+
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1
22+
;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body
23+
24+
%dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
25+
%buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
26+
%sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
27+
%0 = load i8, i8* %buf.021, align 1
28+
%conv = zext i8 %0 to i64
29+
%add = add i64 %conv, %sum44.020
30+
%arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
31+
%1 = load i8, i8* %arrayidx1, align 1
32+
%conv2 = zext i8 %1 to i64
33+
%add3 = add i64 %add, %conv2
34+
%arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
35+
%2 = load i8, i8* %arrayidx4, align 1
36+
%conv5 = zext i8 %2 to i64
37+
%add6 = add i64 %add3, %conv5
38+
%arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
39+
%3 = load i8, i8* %arrayidx7, align 1
40+
%conv8 = zext i8 %3 to i64
41+
%add9 = add i64 %add6, %conv8
42+
%add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
43+
%dec = add nsw i32 %dec22, -1
44+
%tobool = icmp eq i32 %dec, 0
45+
br i1 %tobool, label %while.end, label %while.body
46+
47+
while.end: ; preds = %while.body
48+
%conv11 = trunc i64 %add9 to i32
49+
%call = tail call i32 @goo(i32 0, i32 %conv11)
50+
unreachable
51+
}
52+
53+
define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
54+
entry:
55+
%cmp.9 = icmp sgt i32 %N, 0
56+
br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
57+
58+
for.body.lr.ph:
59+
br label %for.body
60+
61+
for.cond.for.cond.cleanup_crit_edge:
62+
br label %for.cond.cleanup
63+
64+
for.cond.cleanup:
65+
%S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
66+
ret i32 %S.addr.0.lcssa
67+
68+
for.body:
69+
;CHECK-LABEL: for.body:
70+
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
71+
;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
72+
;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
73+
;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4
74+
;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011
75+
;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
76+
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
77+
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
78+
;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
79+
80+
%i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
81+
%S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
82+
%a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
83+
%incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
84+
%0 = load i32, i32* %a.addr.010, align 4
85+
%add = add nsw i32 %0, %S.addr.011
86+
%incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
87+
%1 = load i32, i32* %incdec.ptr, align 4
88+
%add2 = add nsw i32 %add, %1
89+
%add3 = add nsw i32 %i.012, 2
90+
%cmp = icmp slt i32 %add3, %N
91+
br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
92+
}
93+
94+
define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
95+
entry:
96+
%cmp10 = icmp sgt i32 %len, 1
97+
br i1 %cmp10, label %while.body.preheader, label %while.end
98+
99+
while.body.preheader: ; preds = %entry
100+
br label %while.body
101+
102+
while.body: ; preds = %while.body.preheader, %while.body
103+
;CHECK-LABEL: while.body:
104+
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
105+
;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
106+
;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
107+
;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4
108+
;CHECK-NEXT: %add = add nsw i32 %4, %S.012
109+
;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
110+
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
111+
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
112+
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
113+
114+
%i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
115+
%S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
116+
%buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
117+
%0 = load i32, i32* %buf.addr.011, align 4
118+
%add = add nsw i32 %0, %S.012
119+
%arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
120+
%1 = load i32, i32* %arrayidx1, align 4
121+
%add2 = add nsw i32 %add, %1
122+
%add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
123+
%sub = add nsw i32 %i.013, -2
124+
%cmp = icmp sgt i32 %sub, 1
125+
br i1 %cmp, label %while.body, label %while.end.loopexit
126+
127+
while.end.loopexit: ; preds = %while.body
128+
br label %while.end
129+
130+
while.end: ; preds = %while.end.loopexit, %entry
131+
%S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
132+
ret i32 %S.0.lcssa
133+
}
134+

0 commit comments

Comments
 (0)
Please sign in to comment.