diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1133,7 +1133,6 @@ // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI); // Check that there is no implicit control flow instructions above our load in // its block. If there is an instruction that doesn't always pass the @@ -1150,8 +1149,9 @@ // because if the index is out of bounds we should deoptimize rather than // access the array. // Check that there is no guard in this block above our instruction. - if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI)) - return false; + bool MustEnsureSafetyOfSpeculativeExecution = + ICF->isDominatedByICFIFromSameBlock(LI); + while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1168,8 +1168,8 @@ return false; // Check that there is no implicit control flow in a block above. - if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB)) - return false; + MustEnsureSafetyOfSpeculativeExecution = + MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB); } assert(TmpBB); @@ -1241,6 +1241,17 @@ if (NumUnavailablePreds != 1) return false; + // Now we know where we will insert load. We must ensure that it is safe + // to speculatively execute the load at that points. + if (MustEnsureSafetyOfSpeculativeExecution) { + if (CriticalEdgePred.size()) + if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT)) + return false; + for (auto &PL : PredLoads) + if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT)) + return false; + } + // Split critical edges, and update the unavailable predecessors accordingly. for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); diff --git a/llvm/test/Transforms/GVN/loadpre-context.ll b/llvm/test/Transforms/GVN/loadpre-context.ll --- a/llvm/test/Transforms/GVN/loadpre-context.ll +++ b/llvm/test/Transforms/GVN/loadpre-context.ll @@ -1,18 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -gvn --basic-aa -S | FileCheck %s -; load may be speculated, adress is not null using context search. +; load may be speculated, address is not null using context search. ; There is a critical edge. define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { ; CHECK-LABEL: @loadpre_critical_edge( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null -; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[HEADER:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[ENTRY_HEADER_CRIT_EDGE:%.*]] +; CHECK: entry.header_crit_edge: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[SUM:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]]) +; CHECK-NEXT: [[SUM]] = add i32 [[NEW_V]], [[V]] ; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] @@ -28,7 +31,8 @@ header: %iv = phi i32 [0, %entry], [%iv.next, %header] - %new_v = call i32 @foo(i32 %iv) +; Call prevents to move load over due to it does not guarantee to return. + %new_v = call i32 @ro_foo(i32 %iv) readnone %v = load i32, i32* %arg %sum = add i32 %new_v, %v store i32 %sum, i32* %arg @@ -43,19 +47,20 @@ ret i32 0 } -; load may be speculated, adress is not null using context search. +; load may be speculated, address is not null using context search. define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { ; CHECK-LABEL: @loadpre_basic( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null ; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER:%.*]] ; CHECK: preheader: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[PREHEADER]] ], [ [[SUM:%.*]], [[HEADER]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]]) +; CHECK-NEXT: [[SUM]] = add i32 [[NEW_V]], [[V]] ; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] @@ -74,7 +79,8 @@ header: %iv = phi i32 [0, %preheader], [%iv.next, %header] - %new_v = call i32 @foo(i32 %iv) +; Call prevents to move load over due to it does not guarantee to return. + %new_v = call i32 @ro_foo(i32 %iv) readnone %v = load i32, i32* %arg %sum = add i32 %new_v, %v store i32 %sum, i32* %arg @@ -89,7 +95,7 @@ ret i32 0 } -; load cannot be speculated, adress is not null check does not dominate the loop. +; load cannot be speculated, check "address is not null" does not dominate the loop. define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) { ; CHECK-LABEL: @loadpre_maybe_null( ; CHECK-NEXT: entry: @@ -101,7 +107,7 @@ ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]]) ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 ; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] ; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 @@ -125,7 +131,8 @@ header: %iv = phi i32 [0, %preheader], [%iv.next, %header] - %new_v = call i32 @foo(i32 %iv) +; Call prevents to move load over due to it does not guarantee to return. + %new_v = call i32 @ro_foo(i32 %iv) readnone %v = load i32, i32* %arg %sum = add i32 %new_v, %v store i32 %sum, i32* %arg @@ -141,4 +148,4 @@ } ; Does not guarantee that returns. -declare i32 @foo(i32) readnone +declare i32 @ro_foo(i32) readnone