Skip to content

Commit f562fc8

Browse files
committedAug 29, 2018
[LICM] Hoist stores of invariant values to invariant addresses out of loops
Teach LICM to hoist stores out of loops when the store writes to a location otherwise unused in the loop, writes a value which is invariant, and is guaranteed to execute if the loop is entered. Worth noting is that this transformation is partially overlapping with the existing promotion transformation. Reasons this is worthwhile anyway include: * For multi-exit loops, this doesn't require duplication of the store. * It kicks in for case where we can't prove we exit through a normal exit (i.e. we may throw), but can prove the store executes before that possible side exit. Differential Revision: https://reviews.llvm.org/D50925 llvm-svn: 340974
1 parent fd48b7d commit f562fc8

File tree

6 files changed

+135
-16
lines changed

6 files changed

+135
-16
lines changed
 

Diff for: ‎llvm/lib/Analysis/AliasSetTracker.cpp

+16-3
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,22 @@ Instruction* AliasSet::getUniqueInstruction() {
256256
if (AliasAny)
257257
// May have collapses alias set
258258
return nullptr;
259-
if (size() != 0)
260-
// Can't track source of pointer, might be many instruction
261-
return nullptr;
259+
if (begin() != end()) {
260+
if (!UnknownInsts.empty())
261+
// Another instruction found
262+
return nullptr;
263+
if (std::next(begin()) != end())
264+
// Another instruction found
265+
return nullptr;
266+
Value *Addr = begin()->getValue();
267+
assert(!Addr->user_empty() &&
268+
"where's the instruction which added this pointer?");
269+
if (std::next(Addr->user_begin()) != Addr->user_end())
270+
// Another instruction found -- this is really restrictive
271+
// TODO: generalize!
272+
return nullptr;
273+
return cast<Instruction>(*(Addr->user_begin()));
274+
}
262275
if (1 != UnknownInsts.size())
263276
return nullptr;
264277
return cast<Instruction>(UnknownInsts[0]);

Diff for: ‎llvm/lib/Transforms/Scalar/LICM.cpp

+23-3
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
416416
//
417417
bool FreeInLoop = false;
418418
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
419-
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE)) {
419+
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
420+
!I.mayHaveSideEffects()) {
420421
if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
421422
if (!FreeInLoop) {
422423
++II;
@@ -604,8 +605,8 @@ namespace {
604605
/// concerns such as aliasing or speculation safety.
605606
bool isHoistableAndSinkableInst(Instruction &I) {
606607
// Only these instructions are hoistable/sinkable.
607-
return (isa<LoadInst>(I) || isa<CallInst>(I) ||
608-
isa<FenceInst>(I) ||
608+
return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
609+
isa<CallInst>(I) || isa<FenceInst>(I) ||
609610
isa<BinaryOperator>(I) || isa<CastInst>(I) ||
610611
isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
611612
isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
@@ -695,6 +696,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
695696
// it's arguments with arbitrary offsets. If we can prove there are no
696697
// writes to this memory in the loop, we can hoist or sink.
697698
if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
699+
// TODO: expand to writeable arguments
698700
for (Value *Op : CI->arg_operands())
699701
if (Op->getType()->isPointerTy() &&
700702
pointerInvalidatedByLoop(
@@ -729,6 +731,24 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
729731
(void)FI; //suppress unused variable warning
730732
assert(UniqueI == FI && "AS must contain FI");
731733
return true;
734+
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
735+
if (!SI->isUnordered())
736+
return false; // Don't sink/hoist volatile or ordered atomic store!
737+
738+
// We can only hoist a store that we can prove writes a value which is not
739+
// read or overwritten within the loop. For those cases, we fallback to
740+
// load store promotion instead.
741+
auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
742+
743+
if (AS.isRef() || !AS.isMustAlias())
744+
// Quick exit test, handled by the full path below as well.
745+
return false;
746+
auto *UniqueI = AS.getUniqueInstruction();
747+
if (!UniqueI)
748+
// other memory op, give up
749+
return false;
750+
assert(UniqueI == SI && "AS must contain SI");
751+
return true;
732752
}
733753

734754
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");

Diff for: ‎llvm/test/Transforms/LICM/atomics.ll

+3-2
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,11 @@ loop:
173173
end:
174174
ret i32 %vala
175175
; CHECK-LABEL: define i32 @test7b(
176+
; CHECK-LABEL: entry:
177+
; CHECK: store i32 5, i32* %x
178+
; CHECK-LABEL: loop:
176179
; CHECK: load atomic i32, i32* %y monotonic
177-
178180
; CHECK-LABEL: end:
179-
; CHECK: store i32 5, i32* %x
180181
; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4
181182
}
182183

Diff for: ‎llvm/test/Transforms/LICM/funclet.ll

+3-1
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,11 @@ else: ; preds = %postinvoke
9797
}
9898

9999
; CHECK-LABEL: define void @test3(
100-
; CHECK: catchswitch within none
100+
; CHECK-LABEL: forbody.preheader:
101101
; CHECK: store i32 1, i32* %bc, align 4
102102
; CHECK: store i32 2, i32* %bc2, align 4
103+
; CHECK: catchswitch within none
104+
; CHECK-LABEL: forbody:
103105

104106
declare void @may_throw()
105107

Diff for: ‎llvm/test/Transforms/LICM/promote-order.ll

+5-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ target triple = "x86_64-apple-macosx10.8.0"
1010
@p = external global i8*
1111

1212
define i32* @_Z4doiti(i32 %n, float* %tmp1, i32* %tmp3) nounwind {
13+
; CHECK-LABEL: for.body.lr.ph:
14+
; CHECK: store float 1.000000e+00, float* %tmp1
15+
; CHECK-LABEL: for.cond.for.end_crit_edge:
16+
; CHECK: store i32 1, i32* %tmp3
17+
1318
entry:
1419
%cmp1 = icmp slt i32 0, %n
1520
br i1 %cmp1, label %for.body.lr.ph, label %for.end
@@ -25,9 +30,6 @@ for.body: ; preds = %for.body, %for.body
2530
%cmp = icmp slt i32 %inc, %n
2631
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
2732

28-
; CHECK: for.cond.for.end_crit_edge:
29-
; CHECK: store float 1.000000e+00, float* %tmp1
30-
; CHECK: store i32 1, i32* %tmp3
3133
for.cond.for.end_crit_edge: ; preds = %for.body
3234
%split = phi i32* [ %tmp3, %for.body ]
3335
br label %for.end

Diff for: ‎llvm/test/Transforms/LICM/store-hoisting.ll

+85-4
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33

44
define void @test(i32* %loc) {
55
; CHECK-LABEL: @test
6-
; CHECK-LABEL: exit:
6+
; CHECK-LABEL: entry:
77
; CHECK: store i32 0, i32* %loc
8+
; CHECK-LABEL: loop:
89
entry:
910
br label %loop
1011

@@ -21,10 +22,9 @@ exit:
2122

2223
define void @test_multiexit(i32* %loc, i1 %earlycnd) {
2324
; CHECK-LABEL: @test_multiexit
24-
; CHECK-LABEL: exit1:
25-
; CHECK: store i32 0, i32* %loc
26-
; CHECK-LABEL: exit2:
25+
; CHECK-LABEL: entry:
2726
; CHECK: store i32 0, i32* %loc
27+
; CHECK-LABEL: loop:
2828
entry:
2929
br label %loop
3030

@@ -44,6 +44,24 @@ exit2:
4444
ret void
4545
}
4646

47+
define i32* @false_negative_2use(i32* %loc) {
48+
; CHECK-LABEL: @false_negative_2use
49+
; CHECK-LABEL: exit:
50+
; CHECK: store i32 0, i32* %loc
51+
entry:
52+
br label %loop
53+
54+
loop:
55+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
56+
store i32 0, i32* %loc
57+
%iv.next = add i32 %iv, 1
58+
%cmp = icmp slt i32 %iv, 200
59+
br i1 %cmp, label %loop, label %exit
60+
61+
exit:
62+
ret i32* %loc
63+
}
64+
4765
define void @neg_lv_value(i32* %loc) {
4866
; CHECK-LABEL: @neg_lv_value
4967
; CHECK-LABEL: exit:
@@ -227,4 +245,67 @@ exit:
227245
ret void
228246
}
229247

248+
declare void @maythrow() inaccessiblememonly
249+
250+
define void @neg_early_exit(i32* %loc) {
251+
; CHECK-LABEL: @neg_early_exit
252+
; CHECK-LABEL: body:
253+
; CHECK: store i32 0, i32* %loc
254+
; CHECK-LABEL: exit:
255+
entry:
256+
br label %loop
257+
258+
loop:
259+
%iv = phi i32 [0, %entry], [%iv.next, %body]
260+
%is_null = icmp eq i32* %loc, null
261+
br i1 %is_null, label %exit, label %body
262+
body:
263+
call void @maythrow()
264+
store i32 0, i32* %loc
265+
%iv.next = add i32 %iv, 1
266+
%cmp = icmp slt i32 %iv, 200
267+
br i1 %cmp, label %loop, label %exit
268+
269+
exit:
270+
ret void
271+
}
272+
273+
define void @neg_early_throw(i32* %loc) {
274+
; CHECK-LABEL: @neg_early_throw
275+
; CHECK-LABEL: loop:
276+
; CHECK: store i32 0, i32* %loc
277+
; CHECK-LABEL: exit:
278+
entry:
279+
br label %loop
280+
281+
loop:
282+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
283+
call void @maythrow()
284+
store i32 0, i32* %loc
285+
%iv.next = add i32 %iv, 1
286+
%cmp = icmp slt i32 %iv, 200
287+
br i1 %cmp, label %loop, label %exit
288+
289+
exit:
290+
ret void
291+
}
292+
293+
define void @test_late_throw(i32* %loc) {
294+
; CHECK-LABEL: @test_late_throw
295+
; CHECK-LABEL: entry:
296+
; CHECK: store i32 0, i32* %loc
297+
; CHECK-LABEL: loop:
298+
entry:
299+
br label %loop
230300

301+
loop:
302+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
303+
store i32 0, i32* %loc
304+
call void @maythrow()
305+
%iv.next = add i32 %iv, 1
306+
%cmp = icmp slt i32 %iv, 200
307+
br i1 %cmp, label %loop, label %exit
308+
309+
exit:
310+
ret void
311+
}

0 commit comments

Comments
 (0)
Please sign in to comment.