Index: include/llvm/Transforms/Scalar/GVN.h =================================================================== --- include/llvm/Transforms/Scalar/GVN.h +++ include/llvm/Transforms/Scalar/GVN.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -134,6 +135,9 @@ AssumptionCache *AC; SetVector DeadBlocks; OptimizationRemarkEmitter *ORE; + // Maps a block to the topmost instruction with implicit control flow in it. + DenseMap + FirstImplicitControlFlowInsts; ValueTable VN; @@ -243,6 +247,7 @@ BasicBlock *Curr, unsigned int ValNo); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); + void fillImplicitControlFlowInfo(ReversePostOrderTraversal &RPOT); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -34,8 +33,10 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -1027,6 +1028,29 @@ BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; + // Check that there is no implicit control flow instructions above our load in + // its block. If there is an instruction that doesn't always pass the + // execution to the following instruction, then moving through it may become + // invalid. For example: + // + // int arr[LEN]; + // int index = ???; + // ... + // guard(0 <= index && index < LEN); + // use(arr[index]); + // + // It is illegal to move the array access to any point above the guard, + // because if the index is out of bounds we should deoptimize rather than + // access the array. + // Check that there is no guard in this block above our intruction. + auto It = FirstImplicitControlFlowInsts.find(TmpBB); + if (It != FirstImplicitControlFlowInsts.end()) { + assert(It->second->getParent() == TmpBB && + "Implicit control flow map broken?"); + OrderedBasicBlock OBB(TmpBB); + if (OBB.dominates(It->second, LI)) + return false; + } while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1041,6 +1065,10 @@ // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) return false; + + // Check that there is no implicit control flow in a block above. + if (FirstImplicitControlFlowInsts.count(TmpBB)) + return false; } assert(TmpBB); @@ -2303,6 +2331,7 @@ // RPOT walks the graph in its constructor and will not be invalidated during // processBlock. ReversePostOrderTraversal RPOT(&F); + fillImplicitControlFlowInfo(RPOT); for (BasicBlock *BB : RPOT) Changed |= processBlock(BB); @@ -2314,6 +2343,30 @@ LeaderTable.clear(); BlockRPONumber.clear(); TableAllocator.Reset(); + FirstImplicitControlFlowInsts.clear(); +} + +void +GVN::fillImplicitControlFlowInfo(ReversePostOrderTraversal &RPOT) { + for (BasicBlock *BB : RPOT) + // If a block's instruction doesn't always pass the control to its successor + // instruction, mark the block as having implicit control flow. We use them + // to avoid wrong assumptions of sort "if A is executed and B post-dominates + // A, then B is also executed". This is not true is there is an implicit + // control flow instruction (e.g. a guard) between them. + // + // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false + // for volatime stores and loads because they can trap. The discussion on + // whether or not it is correct is still ongoing. We might want to get rid + // of this logic in the future. Anyways, trapping instructions shouldn't + // introfuce implicit control flow, so we explicitly allow them here. This + // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed. + for (auto &I : *BB) + if (!isGuaranteedToTransferExecutionToSuccessor(&I) && + !isa(&I) && !isa(&I)) { + FirstImplicitControlFlowInsts[BB] = &I; + break; + } } /// Verify that the specified instruction does not occur in our Index: test/Transforms/GVN/PRE/local-pre.ll =================================================================== --- test/Transforms/GVN/PRE/local-pre.ll +++ test/Transforms/GVN/PRE/local-pre.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s +declare void @may_exit() nounwind + define i32 @main(i32 %p, i32 %q) { +; CHECK-LABEL: @main( block1: %cmp = icmp eq i32 %p, %q br i1 %cmp, label %block2, label %block3 @@ -20,3 +23,28 @@ ; CHECK: %b.pre-phi = phi i32 [ %.pre, %block3 ], [ %a, %block2 ] ; CHECK-NEXT: ret i32 %b.pre-phi } + +define i32 @test2(i32 %p, i32 %q) { +; CHECK-LABEL: @test2 +; CHECK: block1: +block1: + %cmp = icmp eq i32 %p, %q + br i1 %cmp, label %block2, label %block3 + +block2: + %a = sdiv i32 %p, %q + br label %block4 + +block3: + br label %block4 + +; CHECK: block4: +; CHECK-NEXT: call +; CHECK-NEXT: %b = sdiv +; CHECK-NEXT: ret i32 %b + +block4: + call void @may_exit() nounwind + %b = sdiv i32 %p, %q + ret i32 %b +} Index: test/Transforms/GVN/PRE/pre-load-guards.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/PRE/pre-load-guards.ll @@ -0,0 +1,146 @@ +; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare void @llvm.experimental.guard(i1, ...) + +; This is a motivating example on why we prohibit hoisting through guards. +; In the bottom block, we check that the index is within bounds and only access +; the element in this case and deoptimize otherwise. If we hoist the load to a +; place above the guard, it will may lead to out-of-bound array access. +define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) { +; CHECK-LABEL: @test_motivation( +block1: + %el1 = getelementptr inbounds i32, i32* %q, i32 %index + %el2 = getelementptr inbounds i32, i32* %p, i32 %index + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load +; CHECK-NOT: sge +; CHECK-NOT: slt +; CHECK-NOT: and + br label %block4 + +block3: + store i32 0, i32* %el1 + br label %block4 + +block4: + +; CHECK: block4: +; CHECK: %cond1 = icmp sge i32 %index, 0 +; CHECK-NEXT: %cond2 = icmp slt i32 %index, %len +; CHECK-NEXT: %in.bounds = and i1 %cond1, %cond2 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) +; CHECK-NEXT: %PRE = load i32, i32* %P2 +; CHECK: ret i32 %PRE + + %P2 = phi i32* [%el2, %block3], [%el1, %block2] + %cond1 = icmp sge i32 %index, 0 + %cond2 = icmp slt i32 %index, %len + %in.bounds = and i1 %cond1, %cond2 + call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ] + %PRE = load i32, i32* %P2 + ret i32 %PRE +} + +; Guard in load's block that is above the load should prohibit the PRE. +define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_01( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NEXT: load +; CHECK: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + %PRE = load i32, i32* %P2 + ret i32 %PRE +} + +; Guard in load's block that is below the load should not prohibit the PRE. +define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_02( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: load i32, i32* %q + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK-NEXT: phi i32 [ +; CHECK-NEXT: phi i32* [ +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NOT: load +; CHECK: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + %PRE = load i32, i32* %P2 + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + ret i32 %PRE +} + +; Guard above the load's block should prevent PRE from hoisting through it. +define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_03( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK-NEXT: phi i32* +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NEXT: load +; CHECK-NEXT: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + br label %block5 + +block5: + %PRE = load i32, i32* %P2 + ret i32 %PRE +} Index: test/Transforms/GVN/PRE/pre-load.ll =================================================================== --- test/Transforms/GVN/PRE/pre-load.ll +++ test/Transforms/GVN/PRE/pre-load.ll @@ -430,3 +430,31 @@ call void @g(i32 %NOTPRE) cleanupret from %c2 unwind to caller } + +; Don't PRE load across calls. + +define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { +; CHECK-LABEL: @test13( +; CHECK: entry: +; CHECK-NEXT: icmp eq +; CHECK-NEXT: br i1 +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +; CHECK: if.then: +; CHECK-NEXT: load i32 +; CHECK-NEXT: store i32 +if.then: + %uu = load i32, i32* %x, align 4 + store i32 %uu, i32* %r, align 4 + br label %if.end + +; CHECK: if.end: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: load i32 +if.end: + call void @f() + %vv = load i32, i32* %x, align 4 + ret i32 %vv +}