Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1133,7 +1133,6 @@ // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI); // Check that there is no implicit control flow instructions above our load in // its block. If there is an instruction that doesn't always pass the @@ -1150,8 +1149,8 @@ // because if the index is out of bounds we should deoptimize rather than // access the array. // Check that there is no guard in this block above our instruction. - if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI)) - return false; + bool NeedSafeToSpeculativelyExecute = ICF->isDominatedByICFIFromSameBlock(LI); + while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1168,8 +1167,8 @@ return false; // Check that there is no implicit control flow in a block above. - if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB)) - return false; + NeedSafeToSpeculativelyExecute = + NeedSafeToSpeculativelyExecute || ICF->hasICF(TmpBB); } assert(TmpBB); @@ -1241,6 +1240,17 @@ if (NumUnavailablePreds != 1) return false; + // Now we know where we will insert load. We must ensure that it is safe + // to speculatively execute the load at that points. + if (NeedSafeToSpeculativelyExecute) { + if (CriticalEdgePred.size()) + if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT)) + return false; + for (auto &PL : PredLoads) + if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT)) + return false; + } + // Split critical edges, and update the unavailable predecessors accordingly. for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); Index: llvm/test/Transforms/GVN/loadpre-context.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/loadpre-context.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -gvn --basic-aa -S | FileCheck %s + +; load may be speculated, adress is not null using context search. +; There is a critical edge. +define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { +; CHECK-LABEL: @loadpre_critical_edge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[ENTRY_HEADER_CRIT_EDGE:%.*]] +; CHECK: entry.header_crit_edge: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[SUM:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[SUM]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: null_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %cmp = icmp eq i32* %arg, null + br i1 %cmp, label %null_exit, label %header + +header: + %iv = phi i32 [0, %entry], [%iv.next, %header] + %new_v = call i32 @foo(i32 %iv) + %v = load i32, i32* %arg + %sum = add i32 %new_v, %v + store i32 %sum, i32* %arg + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv.next, %N + br i1 %cond, label %exit, label %header + +exit: + ret i32 %sum + +null_exit: + ret i32 0 +} + +; load may be speculated, adress is not null using context search. +define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { +; CHECK-LABEL: @loadpre_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[PREHEADER]] ], [ [[SUM:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[SUM]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: null_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %cmp = icmp eq i32* %arg, null + br i1 %cmp, label %null_exit, label %preheader + +preheader: + br label %header + +header: + %iv = phi i32 [0, %preheader], [%iv.next, %header] + %new_v = call i32 @foo(i32 %iv) + %v = load i32, i32* %arg + %sum = add i32 %new_v, %v + store i32 %sum, i32* %arg + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv.next, %N + br i1 %cond, label %exit, label %header + +exit: + ret i32 %sum + +null_exit: + ret i32 0 +} + +; load cannot be speculated, adress is not null check does not dominate the loop. +define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) { +; CHECK-LABEL: @loadpre_maybe_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[NULL_CHECK:%.*]], label [[PREHEADER:%.*]] +; CHECK: null_check: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER]] +; CHECK: preheader: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: null_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %c, label %null_check, label %preheader + +null_check: + %cmp = icmp eq i32* %arg, null + br i1 %cmp, label %null_exit, label %preheader + +preheader: + br label %header + +header: + %iv = phi i32 [0, %preheader], [%iv.next, %header] + %new_v = call i32 @foo(i32 %iv) + %v = load i32, i32* %arg + %sum = add i32 %new_v, %v + store i32 %sum, i32* %arg + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv.next, %N + br i1 %cond, label %exit, label %header + +exit: + ret i32 %sum + +null_exit: + ret i32 0 +} + +; Does not guarantee that returns. +declare i32 @foo(i32) readnone