Skip to content

Commit 935474f

Browse files
committedApr 9, 2018
[MachineLICM] Re-enable hoisting of constant stores
This patch fixes an issue exposed on the SystemZ build bots when committing https://reviews.llvm.org/rL327856. The hoisting was temporarily disabled with an option. This patch now re-enables hoisting and checks that we only hoist a store instruction when all its operands are either constant caller preserved registers or immediates. Differential Revision: https://reviews.llvm.org/D45286 llvm-svn: 329577
1 parent f0029a7 commit 935474f

File tree

2 files changed

+154
-2
lines changed

2 files changed

+154
-2
lines changed
 

‎llvm/lib/CodeGen/MachineLICM.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
7474
static cl::opt<bool>
7575
HoistConstStores("hoist-const-stores",
7676
cl::desc("Hoist invariant stores"),
77-
cl::init(false), cl::Hidden);
77+
cl::init(true), cl::Hidden);
7878

7979
STATISTIC(NumHoisted,
8080
"Number of machine instructions hoisted out of loops");
@@ -902,10 +902,13 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
902902
// This means, the value being stored and the address where it is being stored
903903
// is constant throughout the body of the function (not including prologue and
904904
// epilogue). When called with an MI that isn't a store, it returns false.
905+
// A future improvement can be to check if the store registers are constant
906+
// throughout the loop rather than throughout the funtion.
905907
static bool isInvariantStore(const MachineInstr &MI,
906908
const TargetRegisterInfo *TRI,
907909
const MachineRegisterInfo *MRI) {
908910

911+
bool FoundCallerPresReg = false;
909912
if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
910913
(MI.getNumOperands() == 0))
911914
return false;
@@ -922,9 +925,13 @@ static bool isInvariantStore(const MachineInstr &MI,
922925
return false;
923926
if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
924927
return false;
928+
else
929+
FoundCallerPresReg = true;
930+
} else if (!MO.isImm()) {
931+
return false;
925932
}
926933
}
927-
return true;
934+
return FoundCallerPresReg;
928935
}
929936

930937
// Return true if the input MI is a copy instruction that feeds an invariant
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=s390x-ibm-linux -mcpu=z13 -O3 -hoist-const-stores < %s | FileCheck %s
2+
3+
@b = dso_local local_unnamed_addr global i32 15, align 4
4+
@e = dso_local local_unnamed_addr global i32 -1, align 4
5+
@f = common dso_local global i32 0, align 4
6+
@g = dso_local local_unnamed_addr global i32* @f, align 8
7+
@c = common dso_local local_unnamed_addr global i32 0, align 4
8+
@a = common dso_local local_unnamed_addr global [6 x i32] zeroinitializer, align 4
9+
@d = common dso_local local_unnamed_addr global i32 0, align 4
10+
@h = common dso_local local_unnamed_addr global i32 0, align 4
11+
@.str = private unnamed_addr constant [15 x i8] c"checksum = %X\0A\00", align 2
12+
13+
; Function Attrs: nounwind
14+
define dso_local signext i32 @main() {
15+
entry:
16+
%i = alloca i32, align 4
17+
%.pr = load i32, i32* @c, align 4, !tbaa !2
18+
%cmp6 = icmp slt i32 %.pr, 6
19+
br i1 %cmp6, label %for.body.preheader, label %for.end
20+
21+
for.body.preheader: ; preds = %entry
22+
store i32 5, i32* getelementptr inbounds ([6 x i32], [6 x i32]* @a, i64 0, i64 1), align 4, !tbaa !2
23+
store i32 6, i32* @c, align 4, !tbaa !2
24+
br label %for.end
25+
26+
for.end: ; preds = %for.body.preheader, %entry
27+
%0 = bitcast i32* %i to i8*
28+
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
29+
store i32 14, i32* %i, align 4, !tbaa !2
30+
%.pr2 = load i32, i32* @d, align 4, !tbaa !2
31+
%cmp25 = icmp sgt i32 %.pr2, -1
32+
br i1 %cmp25, label %for.cond4thread-pre-split.lr.ph, label %for.end.for.end11_crit_edge
33+
34+
for.end.for.end11_crit_edge: ; preds = %for.end
35+
%.pre10 = load i32, i32* @b, align 4, !tbaa !2
36+
br label %for.end11
37+
38+
; CHECK: # %for.cond4thread-pre-split.lr.ph
39+
; CHECK-NOT: mvhi 164(%r15), 0
40+
; CHECK: # %for.end9
41+
; CHECK: mvhi 164(%r15), 0
42+
43+
for.cond4thread-pre-split.lr.ph: ; preds = %for.end
44+
%1 = ptrtoint i32* %i to i64
45+
%2 = trunc i64 %1 to i32
46+
%3 = load i32*, i32** @g, align 8
47+
%.pr3.pre = load i32, i32* @e, align 4, !tbaa !2
48+
br label %for.cond4thread-pre-split
49+
50+
for.cond4thread-pre-split: ; preds = %for.cond4thread-pre-split.lr.ph, %for.end9
51+
%4 = phi i32 [ %.pr2, %for.cond4thread-pre-split.lr.ph ], [ %dec, %for.end9 ]
52+
%5 = phi i32 [ 14, %for.cond4thread-pre-split.lr.ph ], [ 0, %for.end9 ]
53+
%.pr3 = phi i32 [ %.pr3.pre, %for.cond4thread-pre-split.lr.ph ], [ %.pr37, %for.end9 ]
54+
%cmp54 = icmp slt i32 %.pr3, 1
55+
br i1 %cmp54, label %for.body6.preheader, label %for.end9
56+
57+
for.body6.preheader: ; preds = %for.cond4thread-pre-split
58+
store i32 %5, i32* %3, align 4, !tbaa !2
59+
%6 = load i32, i32* @e, align 4, !tbaa !2
60+
%inc811 = add nsw i32 %6, 1
61+
store i32 %inc811, i32* @e, align 4, !tbaa !2
62+
%cmp512 = icmp slt i32 %6, 0
63+
br i1 %cmp512, label %for.body6.for.body6_crit_edge, label %for.end9.loopexit
64+
65+
for.body6.for.body6_crit_edge: ; preds = %for.body6.preheader, %for.body6.for.body6_crit_edge.3
66+
%.pre = load i32, i32* %i, align 4, !tbaa !2
67+
store i32 %.pre, i32* %3, align 4, !tbaa !2
68+
%7 = load i32, i32* @e, align 4, !tbaa !2
69+
%inc8 = add nsw i32 %7, 1
70+
store i32 %inc8, i32* @e, align 4, !tbaa !2
71+
%cmp5 = icmp slt i32 %7, 0
72+
br i1 %cmp5, label %for.body6.for.body6_crit_edge.1, label %for.end9.loopexit
73+
74+
for.end9.loopexit: ; preds = %for.body6.for.body6_crit_edge, %for.body6.for.body6_crit_edge.1, %for.body6.for.body6_crit_edge.2, %for.body6.for.body6_crit_edge.3, %for.body6.preheader
75+
%inc8.lcssa = phi i32 [ %inc811, %for.body6.preheader ], [ %inc8, %for.body6.for.body6_crit_edge ], [ %inc8.1, %for.body6.for.body6_crit_edge.1 ], [ %inc8.2, %for.body6.for.body6_crit_edge.2 ], [ %inc8.3, %for.body6.for.body6_crit_edge.3 ]
76+
%.pre9 = load i32, i32* @d, align 4, !tbaa !2
77+
br label %for.end9
78+
79+
for.end9: ; preds = %for.end9.loopexit, %for.cond4thread-pre-split
80+
%8 = phi i32 [ %.pre9, %for.end9.loopexit ], [ %4, %for.cond4thread-pre-split ]
81+
%.pr37 = phi i32 [ %inc8.lcssa, %for.end9.loopexit ], [ %.pr3, %for.cond4thread-pre-split ]
82+
store i32 %2, i32* @h, align 4, !tbaa !2
83+
store i32 0, i32* %i, align 4, !tbaa !2
84+
%9 = load i32, i32* @b, align 4, !tbaa !2
85+
%10 = load i32, i32* @f, align 4, !tbaa !2
86+
%xor = xor i32 %10, %9
87+
%idxprom = sext i32 %xor to i64
88+
%arrayidx = getelementptr inbounds [6 x i32], [6 x i32]* @a, i64 0, i64 %idxprom
89+
%11 = load i32, i32* %arrayidx, align 4, !tbaa !2
90+
store i32 %11, i32* @b, align 4, !tbaa !2
91+
%dec = add nsw i32 %8, -1
92+
store i32 %dec, i32* @d, align 4, !tbaa !2
93+
%cmp2 = icmp sgt i32 %8, 0
94+
br i1 %cmp2, label %for.cond4thread-pre-split, label %for.end11
95+
96+
for.end11: ; preds = %for.end9, %for.end.for.end11_crit_edge
97+
%12 = phi i32 [ %.pre10, %for.end.for.end11_crit_edge ], [ %11, %for.end9 ]
98+
%call = call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), i32 signext %12)
99+
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
100+
ret i32 0
101+
102+
for.body6.for.body6_crit_edge.1: ; preds = %for.body6.for.body6_crit_edge
103+
%.pre.1 = load i32, i32* %i, align 4, !tbaa !2
104+
store i32 %.pre.1, i32* %3, align 4, !tbaa !2
105+
%13 = load i32, i32* @e, align 4, !tbaa !2
106+
%inc8.1 = add nsw i32 %13, 1
107+
store i32 %inc8.1, i32* @e, align 4, !tbaa !2
108+
%cmp5.1 = icmp slt i32 %13, 0
109+
br i1 %cmp5.1, label %for.body6.for.body6_crit_edge.2, label %for.end9.loopexit
110+
111+
for.body6.for.body6_crit_edge.2: ; preds = %for.body6.for.body6_crit_edge.1
112+
%.pre.2 = load i32, i32* %i, align 4, !tbaa !2
113+
store i32 %.pre.2, i32* %3, align 4, !tbaa !2
114+
%14 = load i32, i32* @e, align 4, !tbaa !2
115+
%inc8.2 = add nsw i32 %14, 1
116+
store i32 %inc8.2, i32* @e, align 4, !tbaa !2
117+
%cmp5.2 = icmp slt i32 %14, 0
118+
br i1 %cmp5.2, label %for.body6.for.body6_crit_edge.3, label %for.end9.loopexit
119+
120+
for.body6.for.body6_crit_edge.3: ; preds = %for.body6.for.body6_crit_edge.2
121+
%.pre.3 = load i32, i32* %i, align 4, !tbaa !2
122+
store i32 %.pre.3, i32* %3, align 4, !tbaa !2
123+
%15 = load i32, i32* @e, align 4, !tbaa !2
124+
%inc8.3 = add nsw i32 %15, 1
125+
store i32 %inc8.3, i32* @e, align 4, !tbaa !2
126+
%cmp5.3 = icmp slt i32 %15, 0
127+
br i1 %cmp5.3, label %for.body6.for.body6_crit_edge, label %for.end9.loopexit
128+
}
129+
130+
; Function Attrs: argmemonly nounwind
131+
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
132+
133+
; Function Attrs: argmemonly nounwind
134+
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
135+
136+
; Function Attrs: nounwind
137+
declare dso_local signext i32 @printf(i8* nocapture readonly, ...)
138+
139+
!llvm.module.flags = !{!0}
140+
141+
!0 = !{i32 1, !"wchar_size", i32 4}
142+
!2 = !{!3, !3, i64 0}
143+
!3 = !{!"int", !4, i64 0}
144+
!4 = !{!"omnipotent char", !5, i64 0}
145+
!5 = !{!"Simple C/C++ TBAA"}

0 commit comments

Comments
 (0)
Please sign in to comment.