Skip to content

Commit f1da33e

Browse files
committedJun 11, 2016
[LICM] Make isGuaranteedToExecute more accurate.
Summary: Make isGuaranteedToExecute use the isGuaranteedToTransferExecutionToSuccessor helper, and make that helper a bit more accurate. There's a potential performance impact here from assuming that arbitrary calls might not return. This probably has little impact on loads and stores to a pointer because most things alias analysis can reason about are dereferenceable anyway. The other impacts, like less aggressive hoisting of sdiv by a variable and less aggressive hoisting around volatile memory operations, are unlikely to matter for real code. This also impacts SCEV, which uses the same helper. It's a minor improvement there because we can tell that, for example, memcpy always returns normally. Strictly speaking, it's also introducing a bug, but it's not any worse than everywhere else we assume readonly functions terminate. Fixes http://llvm.org/PR27857. Reviewers: hfinkel, reames, chandlerc, sanjoy Subscribers: broune, llvm-commits Differential Revision: http://reviews.llvm.org/D21167 llvm-svn: 272489
1 parent 2b7c02a commit f1da33e

File tree

9 files changed

+135
-27
lines changed

9 files changed

+135
-27
lines changed
 

‎llvm/lib/Analysis/ValueTracking.cpp

+39-13
Original file line numberDiff line numberDiff line change
@@ -3444,19 +3444,45 @@ OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
34443444
}
34453445

34463446
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
3447-
// FIXME: This conservative implementation can be relaxed. E.g. most
3448-
// atomic operations are guaranteed to terminate on most platforms
3449-
// and most functions terminate.
3450-
3451-
// Calls can throw and thus not terminate, and invokes may not terminate and
3452-
// could throw to non-successor (see bug 24185 for details).
3453-
if (isa<CallInst>(I) || isa<InvokeInst>(I))
3454-
// However, llvm.dbg intrinsics are safe, since they're no-ops.
3455-
return isa<DbgInfoIntrinsic>(I);
3456-
3457-
return !I->isAtomic() && // atomics may never succeed on some platforms
3458-
!isa<ResumeInst>(I) && // has no successors
3459-
!isa<ReturnInst>(I); // has no successors
3447+
// A memory operation returns normally if it isn't volatile. A volatile
3448+
// operation is allowed to trap.
3449+
//
3450+
// An atomic operation isn't guaranteed to return in a reasonable amount of
3451+
// time because it's possible for another thread to interfere with it for an
3452+
// arbitrary length of time, but programs aren't allowed to rely on that.
3453+
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
3454+
return !LI->isVolatile();
3455+
if (const StoreInst *SI = dyn_cast<StoreInst>(I))
3456+
return !SI->isVolatile();
3457+
if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
3458+
return !CXI->isVolatile();
3459+
if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
3460+
return !RMWI->isVolatile();
3461+
if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
3462+
return !MII->isVolatile();
3463+
3464+
// If there is no successor, then execution can't transfer to it.
3465+
if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
3466+
return !CRI->unwindsToCaller();
3467+
if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
3468+
return !CatchSwitch->unwindsToCaller();
3469+
if (isa<ResumeInst>(I))
3470+
return false;
3471+
if (isa<ReturnInst>(I))
3472+
return false;
3473+
3474+
// Calls can throw, or contain an infinite loop, or kill the process.
3475+
if (CallSite CS = CallSite(const_cast<Instruction*>(I))) {
3476+
// Calls which don't write to arbitrary memory are safe.
3477+
// FIXME: Ignoring infinite loops without any side-effects is too aggressive,
3478+
// but it's consistent with other passes. See http://llvm.org/PR965 .
3479+
// FIXME: This isn't aggressive enough; a call which only writes to a
3480+
// global is guaranteed to return.
3481+
return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory();
3482+
}
3483+
3484+
// Other instructions return normally.
3485+
return true;
34603486
}
34613487

34623488
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,

‎llvm/lib/Transforms/Scalar/LICM.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
388388
// Iterate over header and compute safety info.
389389
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
390390
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
391-
SafetyInfo->HeaderMayThrow |= I->mayThrow();
391+
SafetyInfo->HeaderMayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
392392

393393
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
394394
// Iterate over loop instructions and compute safety info.
@@ -397,7 +397,7 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
397397
(BB != BBE) && !SafetyInfo->MayThrow; ++BB)
398398
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
399399
(I != E) && !SafetyInfo->MayThrow; ++I)
400-
SafetyInfo->MayThrow |= I->mayThrow();
400+
SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
401401

402402
// Compute funclet colors if we might sink/hoist in a function with a funclet
403403
// personality routine.

‎llvm/lib/Transforms/Utils/LoopUtils.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -962,5 +962,8 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
962962
if (ExitBlocks.empty())
963963
return false;
964964

965+
// FIXME: In general, we have to prove that the loop isn't an infinite loop.
966+
// See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
967+
// just a special case of this.)
965968
return true;
966969
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
2+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3+
target triple = "x86_64-unknown-linux-gnu"
4+
5+
declare void @f() nounwind
6+
7+
; Don't hoist load past nounwind call.
8+
define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable {
9+
; CHECK-LABEL: @test1(
10+
entry:
11+
br label %for.body
12+
13+
; CHECK: tail call void @f()
14+
; CHECK-NEXT: load i32
15+
for.body:
16+
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
17+
%x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
18+
tail call void @f() nounwind
19+
%i1 = load i32, i32* %a, align 4
20+
%add = add nsw i32 %i1, %x.05
21+
%inc = add nuw nsw i32 %i.06, 1
22+
%exitcond = icmp eq i32 %inc, 1000
23+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
24+
25+
for.cond.cleanup:
26+
ret i32 %add
27+
}
28+
29+
; Don't hoist division past nounwind call.
30+
define i32 @test2(i32 %N, i32 %c) nounwind uwtable {
31+
; CHECK-LABEL: @test2(
32+
entry:
33+
%cmp4 = icmp sgt i32 %N, 0
34+
br i1 %cmp4, label %for.body, label %for.cond.cleanup
35+
36+
; CHECK: tail call void @f()
37+
; CHECK-NEXT: sdiv i32
38+
for.body:
39+
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
40+
tail call void @f() nounwind
41+
%div = sdiv i32 5, %c
42+
%add = add i32 %i.05, 1
43+
%inc = add i32 %add, %div
44+
%cmp = icmp slt i32 %inc, %N
45+
br i1 %cmp, label %for.body, label %for.cond.cleanup
46+
47+
for.cond.cleanup:
48+
ret i32 0
49+
}
50+
51+
; Don't hoist load past volatile load.
52+
define i32 @test3(i32* noalias nocapture readonly %a, i32* %v) nounwind uwtable {
53+
; CHECK-LABEL: @test3(
54+
entry:
55+
br label %for.body
56+
57+
; CHECK: load volatile i32
58+
; CHECK-NEXT: load i32
59+
for.body:
60+
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
61+
%x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
62+
%xxx = load volatile i32, i32* %v, align 4
63+
%i1 = load i32, i32* %a, align 4
64+
%add = add nsw i32 %i1, %x.05
65+
%inc = add nuw nsw i32 %i.06, 1
66+
%exitcond = icmp eq i32 %inc, 1000
67+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
68+
69+
for.cond.cleanup:
70+
ret i32 %add
71+
}

‎llvm/test/Transforms/LICM/hoisting.ll

+14-9
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,21 @@ define i32 @test2(i1 %c) {
3737
; CHECK-LABEL: @test2(
3838
; CHECK-NEXT: load i32, i32* @X
3939
; CHECK-NEXT: %B = sdiv i32 4, %A
40-
%A = load i32, i32* @X ; <i32> [#uses=2]
41-
br label %Loop
40+
%A = load i32, i32* @X
41+
br label %Loop
42+
4243
Loop:
43-
;; Should have hoisted this div!
44-
%B = sdiv i32 4, %A ; <i32> [#uses=2]
45-
call void @foo2( i32 %B )
46-
br i1 %c, label %Loop, label %Out
47-
Out: ; preds = %Loop
48-
%C = sub i32 %A, %B ; <i32> [#uses=1]
49-
ret i32 %C
44+
;; Should have hoisted this div!
45+
%B = sdiv i32 4, %A
46+
br label %loop2
47+
48+
loop2:
49+
call void @foo2( i32 %B )
50+
br i1 %c, label %Loop, label %Out
51+
52+
Out:
53+
%C = sub i32 %A, %B
54+
ret i32 %C
5055
}
5156

5257

‎llvm/test/Transforms/LICM/preheader-safe.ll

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ entry:
1414

1515
loop: ; preds = %entry, %for.inc
1616
%div = udiv i64 %x, %y
17+
br label %loop2
18+
19+
loop2:
1720
call void @use_nothrow(i64 %div)
1821
br label %loop
1922
}

‎llvm/test/Transforms/LICM/promote-tls.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ for.header:
2525
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
2626
%old = load i32, i32* %addr, align 4
2727
; deliberate impossible to analyze branch
28-
%guard = load volatile i8*, i8** @p
28+
%guard = load atomic i8*, i8** @p monotonic, align 8
2929
%exitcmp = icmp eq i8* %guard, null
3030
br i1 %exitcmp, label %for.body, label %early-exit
3131

‎llvm/test/Transforms/LICM/scalar_promote.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ Loop: ; preds = %Loop, %0
135135
%x2 = add i32 %x, 1 ; <i32> [#uses=1]
136136
store i32 %x2, i32* @X
137137

138-
store volatile i32* @X, i32** %P2
138+
store atomic i32* @X, i32** %P2 monotonic, align 8
139139

140140
%Next = add i32 %j, 1 ; <i32> [#uses=2]
141141
%cond = icmp eq i32 %Next, 0 ; <i1> [#uses=1]

‎llvm/test/Transforms/LICM/volatile-alias.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
1010

1111
; Function Attrs: nounwind uwtable
12-
define i32 @foo(i32* %p, i32* %q, i32 %n) #0 {
12+
define i32 @foo(i32* dereferenceable(4) nonnull %p, i32* %q, i32 %n) #0 {
1313
entry:
1414
%p.addr = alloca i32*, align 8
1515
%q.addr = alloca i32*, align 8

0 commit comments

Comments
 (0)
Please sign in to comment.