Skip to content

Commit 3d347bf

Browse files
author
Max Kazantsev
committedNov 1, 2018
[IndVars] Smart hard uses detection
When rewriting loop exit values, IndVars considers this transform not profitable if the loop instruction has a loop user which it believes cannot be optimized away. In current implementation only calls that immediately use the instruction are considered as such. This patch extends the definition of "hard" users to any side-effecting instructions (which usually cannot be optimized away from the loop) and also allows handling of not just immediate users, but use chains. Differentlai Revision: https://reviews.llvm.org/D51584 Reviewed By: etherzhhb llvm-svn: 345814
1 parent e0a2613 commit 3d347bf

File tree

4 files changed

+118
-16
lines changed

4 files changed

+118
-16
lines changed
 

Diff for: ‎llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

+26-13
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class IndVarSimplify {
145145
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
146146
bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
147147
bool rewriteFirstIterationLoopExitValues(Loop *L);
148+
bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
148149

149150
bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
150151
PHINode *IndVar, SCEVExpander &Rewriter);
@@ -524,6 +525,29 @@ struct RewritePhi {
524525
// As a side effect, reduces the amount of IV processing within the loop.
525526
//===----------------------------------------------------------------------===//
526527

528+
bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
529+
SmallPtrSet<const Instruction *, 8> Visited;
530+
SmallVector<const Instruction *, 8> WorkList;
531+
Visited.insert(I);
532+
WorkList.push_back(I);
533+
while (!WorkList.empty()) {
534+
const Instruction *Curr = WorkList.pop_back_val();
535+
// This use is outside the loop, nothing to do.
536+
if (!L->contains(Curr))
537+
continue;
538+
// Do we assume it is a "hard" use which will not be eliminated easily?
539+
if (Curr->mayHaveSideEffects())
540+
return true;
541+
// Otherwise, add all its users to worklist.
542+
for (auto U : Curr->users()) {
543+
auto *UI = cast<Instruction>(U);
544+
if (Visited.insert(UI).second)
545+
WorkList.push_back(UI);
546+
}
547+
}
548+
return false;
549+
}
550+
527551
/// Check to see if this loop has a computable loop-invariant execution count.
528552
/// If so, this means that we can compute the final value of any expressions
529553
/// that are recurrent in the loop, and substitute the exit values from the loop
@@ -598,19 +622,8 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
598622
// Computing the value outside of the loop brings no benefit if it is
599623
// definitely used inside the loop in a way which can not be optimized
600624
// away.
601-
if (ExitValue->getSCEVType()>=scMulExpr) {
602-
bool HasHardInternalUses = false;
603-
for (auto *IB : Inst->users()) {
604-
Instruction *UseInstr = cast<Instruction>(IB);
605-
unsigned Opc = UseInstr->getOpcode();
606-
if (L->contains(UseInstr) && Opc == Instruction::Call) {
607-
HasHardInternalUses = true;
608-
break;
609-
}
610-
}
611-
if (HasHardInternalUses)
612-
continue;
613-
}
625+
if (hasHardUserWithinLoop(L, Inst))
626+
continue;
614627

615628
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
616629
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);

Diff for: ‎llvm/test/Analysis/ScalarEvolution/pr28705.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; PR28705
22
; RUN: opt < %s -indvars -S | FileCheck %s
33

4-
; Check IndVarSimplify replaces the exitval use of the induction var "%inc.i.i"
5-
; with "%.sroa.speculated + 1".
4+
; Check IndVarSimplify doesn't replace external use of the induction var
5+
; "%inc.i.i" with "%.sroa.speculated + 1" because it is not profitable.
66
;
77
; CHECK-LABEL: @foo(
8-
; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1
8+
; CHECK: %[[EXIT:.+]] = phi i32 [ %inc.i.i, %for.body650 ]
99
; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ]
1010
;
1111
define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {

Diff for: ‎llvm/test/Transforms/IndVarSimplify/dont-recompute.ll

+51
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,54 @@ for.end: ; preds = %for.body
123123
tail call void @func(i32 %soft_use)
124124
ret void
125125
}
126+
127+
; CHECK-LABEL: @test5(
128+
define void @test5(i32 %m) nounwind uwtable {
129+
entry:
130+
br label %for.body
131+
132+
for.body: ; preds = %for.body, %entry
133+
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
134+
%a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
135+
%add = add i32 %a.05, %m
136+
%soft_use = add i32 %add, 123
137+
; CHECK: tail call void @func(i32 %soft_use)
138+
tail call void @func(i32 %soft_use)
139+
%inc = add nsw i32 %i.06, 1
140+
%exitcond = icmp eq i32 %inc, 186
141+
br i1 %exitcond, label %for.end, label %for.body
142+
143+
for.end: ; preds = %for.body
144+
; CHECK: for.end:
145+
; CHECK-NOT: mul i32 %m, 186
146+
; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
147+
; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
148+
tail call void @func(i32 %add)
149+
ret void
150+
}
151+
152+
; CHECK-LABEL: @test6(
153+
define void @test6(i32 %m, i32* %p) nounwind uwtable {
154+
entry:
155+
br label %for.body
156+
157+
for.body: ; preds = %for.body, %entry
158+
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
159+
%a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
160+
%add = add i32 %a.05, %m
161+
%soft_use = add i32 %add, 123
162+
; CHECK: store i32 %soft_use, i32* %pidx
163+
%pidx = getelementptr i32, i32* %p, i32 %add
164+
store i32 %soft_use, i32* %pidx
165+
%inc = add nsw i32 %i.06, 1
166+
%exitcond = icmp eq i32 %inc, 186
167+
br i1 %exitcond, label %for.end, label %for.body
168+
169+
for.end: ; preds = %for.body
170+
; CHECK: for.end:
171+
; CHECK-NOT: mul i32 %m, 186
172+
; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
173+
; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
174+
tail call void @func(i32 %add)
175+
ret void
176+
}

Diff for: ‎llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll

+38
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; RUN: opt -S -indvars < %s | FileCheck %s
22

3+
; Do not rewrite the user outside the loop because we must keep the instruction
4+
; inside the loop due to store. Rewrite doesn't give us any profit.
35
define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) {
46
; CHECK-LABEL: @f(
57
not_zero11.preheader:
@@ -22,6 +24,42 @@ not_zero11:
2224
%tmp23 = icmp slt i32 %tmp22, %tmp14
2325
br i1 %tmp23, label %not_zero11, label %main.exit.selector
2426

27+
main.exit.selector:
28+
; CHECK-LABEL: main.exit.selector:
29+
; CHECK: %tmp22.lcssa = phi i32 [ %tmp22, %not_zero11 ]
30+
; CHECK: %tmp24 = icmp slt i32 %tmp22.lcssa, %length.
31+
%tmp24 = icmp slt i32 %tmp22, %length.i
32+
br i1 %tmp24, label %not_zero11.postloop, label %leave
33+
34+
leave:
35+
ret void
36+
37+
not_zero11.postloop:
38+
ret void
39+
}
40+
41+
; Rewrite the user outside the loop because there is no hard users inside the loop.
42+
define void @f1(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) {
43+
; CHECK-LABEL: @f1(
44+
not_zero11.preheader:
45+
%tmp13 = icmp ugt i32 %length.i, %length.i.88
46+
%tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i
47+
%tmp15 = icmp sgt i32 %tmp14, 0
48+
br i1 %tmp15, label %not_zero11, label %not_zero11.postloop
49+
50+
not_zero11:
51+
%v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ]
52+
%tmp16 = zext i32 %v_1 to i64
53+
%tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16
54+
%tmp18 = load i8, i8* %tmp17, align 1
55+
%tmp19 = zext i8 %tmp18 to i32
56+
%tmp20 = or i32 %tmp19, %tmp10
57+
%tmp21 = trunc i32 %tmp20 to i8
58+
%addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16
59+
%tmp22 = add nuw nsw i32 %v_1, 1
60+
%tmp23 = icmp slt i32 %tmp22, %tmp14
61+
br i1 %tmp23, label %not_zero11, label %main.exit.selector
62+
2563
main.exit.selector:
2664
; CHECK-LABEL: main.exit.selector:
2765
; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i

0 commit comments

Comments
 (0)
Please sign in to comment.