Skip to content

Commit 9455c1d

Browse files
committedFeb 5, 2016
[LoopLoadElim] Don't allow versioning when optForSize
This was requested in the review of D16300. llvm-svn: 259861
1 parent 0cf866a commit 9455c1d

File tree

2 files changed

+85
-2
lines changed

2 files changed

+85
-2
lines changed
 

‎llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,16 @@ class LoadEliminationForLoop {
465465
return false;
466466
}
467467

468-
// Point of no-return, start the transformation. First, version the loop if
469-
// necessary.
470468
if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
469+
if (L->getHeader()->getParent()->optForSize()) {
470+
DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing "
471+
"for size.\n");
472+
return false;
473+
}
474+
475+
// Point of no-return, start the transformation. First, version the loop
476+
// if necessary.
477+
471478
LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
472479
LV.setAliasChecks(std::move(Checks));
473480
LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
2+
3+
; When optimizing for size don't eliminate in this loop because the loop would
4+
; have to be versioned first because A and C may alias.
5+
;
6+
; for (unsigned i = 0; i < 100; i++) {
7+
; A[i+1] = B[i] + 2;
8+
; C[i] = A[i] * 2;
9+
; }
10+
11+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
12+
13+
; CHECK-LABEL: @f(
14+
define void @f(i32* %A, i32* %B, i32* %C, i64 %N) optsize {
15+
16+
entry:
17+
br label %for.body
18+
19+
for.body: ; preds = %for.body, %entry
20+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
21+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
22+
23+
%Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
24+
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
25+
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
26+
%Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
27+
28+
%b = load i32, i32* %Bidx, align 4
29+
%a_p1 = add i32 %b, 2
30+
store i32 %a_p1, i32* %Aidx_next, align 4
31+
32+
%a = load i32, i32* %Aidx, align 4
33+
; CHECK: %c = mul i32 %a, 2
34+
%c = mul i32 %a, 2
35+
store i32 %c, i32* %Cidx, align 4
36+
37+
%exitcond = icmp eq i64 %indvars.iv.next, %N
38+
br i1 %exitcond, label %for.end, label %for.body
39+
40+
for.end: ; preds = %for.body
41+
ret void
42+
}
43+
44+
; Same loop but with noalias on %A and %C. In this case load-eliminate even
45+
; with -Os.
46+
47+
; CHECK-LABEL: @g(
48+
define void @g(i32* noalias %A, i32* %B, i32* noalias %C, i64 %N) optsize {
49+
50+
entry:
51+
br label %for.body
52+
53+
for.body: ; preds = %for.body, %entry
54+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
55+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
56+
57+
%Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
58+
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
59+
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
60+
%Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
61+
62+
%b = load i32, i32* %Bidx, align 4
63+
%a_p1 = add i32 %b, 2
64+
store i32 %a_p1, i32* %Aidx_next, align 4
65+
66+
%a = load i32, i32* %Aidx, align 4
67+
; CHECK: %c = mul i32 %store_forwarded, 2
68+
%c = mul i32 %a, 2
69+
store i32 %c, i32* %Cidx, align 4
70+
71+
%exitcond = icmp eq i64 %indvars.iv.next, %N
72+
br i1 %exitcond, label %for.end, label %for.body
73+
74+
for.end: ; preds = %for.body
75+
ret void
76+
}

0 commit comments

Comments
 (0)
Please sign in to comment.