Index: include/llvm/Transforms/Scalar/GVN.h =================================================================== --- include/llvm/Transforms/Scalar/GVN.h +++ include/llvm/Transforms/Scalar/GVN.h @@ -133,6 +133,7 @@ const TargetLibraryInfo *TLI; AssumptionCache *AC; SetVector DeadBlocks; + SetVector BlocksWithImplicitControlFlow; OptimizationRemarkEmitter *ORE; ValueTable VN; Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -36,6 +36,7 @@ #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -1027,6 +1028,23 @@ BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; + // Check that there is no implicit control flow instructions above our load in + // its block. If there is an instruction that doesn't always pass the + // execution to the following instruction, then moving through it may become + // invalid. For example: + // + // int arr[LEN]; + // int index = ???; + // ... + // guard(0 <= index && index < LEN); + // use(arr[index]); + // + // It is illegal to move the array access to any point above the guard, + // because if the index is out of bounds we should deoptimize rather than + // access the array. + // Check that there is no guard in this block above our intruction. + if (BlocksWithImplicitControlFlow.count(TmpBB)) + return false; while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1041,6 +1059,10 @@ // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) return false; + + // Check that there is no implicit control flow in a block above. + if (BlocksWithImplicitControlFlow.count(TmpBB)) + return false; } assert(TmpBB); @@ -2020,6 +2042,33 @@ BI != BE;) { if (!ReplaceWithConstMap.empty()) ChangedFunction |= replaceOperandsWithConsts(&*BI); + + // If a block's instruction doesn't always pass the control to its successor + // instruction, mark the block as having implicit control flow. We use them + // to avoid wrong assumptions of sort "if A is executed and B post-dominates + // A, then B is also executed". This is not true is there is an implicit + // control flow instruction (e.g. a guard) between them. + // + // NOTE: We add blocks to this set as we traverse them, not before the + // traversal begins. This implies two facts we should keep in mind: + // 1) We should not rely on this set unless we know that we have already + // visited all blocks that we want to check. + // 2) If the current block is not in this set by the moment of processing an + // instruction, it does NOT mean that there is no instructions with + // implicit control flow anywhere in this block. It only means that there + // was no such instructions between the beginning of the block and the + // instruction being processed. + // + // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false + // for volatime stores and loads because they can trap. The discussion on + // whether or not it is correct is still ongoing. We might want to get rid + // of this logic in the future. Anyways, trapping instructions shouldn't + // introfuce implicit control flow, so we explicitly allow them here. This + // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed. + if (!isGuaranteedToTransferExecutionToSuccessor(&*BI) && + !isa(&*BI) && !isa(&*BI)) + BlocksWithImplicitControlFlow.insert(BB); + ChangedFunction |= processInstruction(&*BI); if (InstrsToErase.empty()) { @@ -2313,6 +2362,7 @@ VN.clear(); LeaderTable.clear(); BlockRPONumber.clear(); + BlocksWithImplicitControlFlow.clear(); TableAllocator.Reset(); } Index: test/Transforms/GVN/PRE/local-pre.ll =================================================================== --- test/Transforms/GVN/PRE/local-pre.ll +++ test/Transforms/GVN/PRE/local-pre.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s +declare void @may_exit() nounwind + define i32 @main(i32 %p, i32 %q) { +; CHECK-LABEL: @main( block1: %cmp = icmp eq i32 %p, %q br i1 %cmp, label %block2, label %block3 @@ -20,3 +23,28 @@ ; CHECK: %b.pre-phi = phi i32 [ %.pre, %block3 ], [ %a, %block2 ] ; CHECK-NEXT: ret i32 %b.pre-phi } + +define i32 @test2(i32 %p, i32 %q) { +; CHECK-LABEL: @test2 +; CHECK: block1: +block1: + %cmp = icmp eq i32 %p, %q + br i1 %cmp, label %block2, label %block3 + +block2: + %a = sdiv i32 %p, %q + br label %block4 + +block3: + br label %block4 + +; CHECK: block4: +; CHECK-NEXT: call +; CHECK-NEXT: %b = sdiv +; CHECK-NEXT: ret i32 %b + +block4: + call void @may_exit() nounwind + %b = sdiv i32 %p, %q + ret i32 %b +} Index: test/Transforms/GVN/PRE/pre-load-guards.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/PRE/pre-load-guards.ll @@ -0,0 +1,146 @@ +; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare void @llvm.experimental.guard(i1, ...) + +; This is a motivating example on why we prohibit hoisting through guards. +; In the bottom block, we check that the index is within bounds and only access +; the element in this case and deoptimize otherwise. If we hoist the load to a +; place above the guard, it will may lead to out-of-bound array access. +define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) { +; CHECK-LABEL: @test_motivation( +block1: + %el1 = getelementptr inbounds i32, i32* %q, i32 %index + %el2 = getelementptr inbounds i32, i32* %p, i32 %index + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load +; CHECK-NOT: sge +; CHECK-NOT: slt +; CHECK-NOT: and + br label %block4 + +block3: + store i32 0, i32* %el1 + br label %block4 + +block4: + +; CHECK: block4: +; CHECK: %cond1 = icmp sge i32 %index, 0 +; CHECK-NEXT: %cond2 = icmp slt i32 %index, %len +; CHECK-NEXT: %in.bounds = and i1 %cond1, %cond2 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) +; CHECK-NEXT: %PRE = load i32, i32* %P2 +; CHECK: ret i32 %PRE + + %P2 = phi i32* [%el2, %block3], [%el1, %block2] + %cond1 = icmp sge i32 %index, 0 + %cond2 = icmp slt i32 %index, %len + %in.bounds = and i1 %cond1, %cond2 + call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ] + %PRE = load i32, i32* %P2 + ret i32 %PRE +} + +; Guard in load's block that is above the load should prohibit the PRE. +define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_01( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NEXT: load +; CHECK: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + %PRE = load i32, i32* %P2 + ret i32 %PRE +} + +; Guard in load's block that is below the load should not prohibit the PRE. +define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_02( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: load i32, i32* %q + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK-NEXT: phi i32 [ +; CHECK-NEXT: phi i32* [ +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NOT: load +; CHECK: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + %PRE = load i32, i32* %P2 + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + ret i32 %PRE +} + +; Guard above the load's block should prevent PRE from hoisting through it. +define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_03( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK-NEXT: phi i32* +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NEXT: load +; CHECK-NEXT: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + br label %block5 + +block5: + %PRE = load i32, i32* %P2 + ret i32 %PRE +} Index: test/Transforms/GVN/PRE/pre-load.ll =================================================================== --- test/Transforms/GVN/PRE/pre-load.ll +++ test/Transforms/GVN/PRE/pre-load.ll @@ -430,3 +430,31 @@ call void @g(i32 %NOTPRE) cleanupret from %c2 unwind to caller } + +; Don't PRE load across calls. + +define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { +; CHECK-LABEL: @test13( +; CHECK: entry: +; CHECK-NEXT: icmp eq +; CHECK-NEXT: br i1 +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +; CHECK: if.then: +; CHECK-NEXT: load i32 +; CHECK-NEXT: store i32 +if.then: + %uu = load i32, i32* %x, align 4 + store i32 %uu, i32* %r, align 4 + br label %if.end + +; CHECK: if.end: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: load i32 +if.end: + call void @f() + %vv = load i32, i32* %x, align 4 + ret i32 %vv +}