Index: llvm/trunk/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/GVN.h +++ llvm/trunk/include/llvm/Transforms/Scalar/GVN.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -27,6 +28,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Utils/OrderedInstructions.h" #include #include #include @@ -156,7 +158,11 @@ AssumptionCache *AC; SetVector DeadBlocks; OptimizationRemarkEmitter *ORE; + // Maps a block to the topmost instruction with implicit control flow in it. + DenseMap + FirstImplicitControlFlowInsts; + OrderedInstructions *OI; ValueTable VN; /// A mapping from value numbers to lists of Value*'s that @@ -268,6 +274,7 @@ BasicBlock *Curr, unsigned int ValNo); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); + void fillImplicitControlFlowInfo(BasicBlock *BB); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); Index: llvm/trunk/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/GVN.cpp +++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -1048,7 +1049,32 @@ // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; + bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI); + // Check that there is no implicit control flow instructions above our load in + // its block. If there is an instruction that doesn't always pass the + // execution to the following instruction, then moving through it may become + // invalid. For example: + // + // int arr[LEN]; + // int index = ???; + // ... + // guard(0 <= index && index < LEN); + // use(arr[index]); + // + // It is illegal to move the array access to any point above the guard, + // because if the index is out of bounds we should deoptimize rather than + // access the array. + // Check that there is no guard in this block above our intruction. + if (!IsSafeToSpeculativelyExecute) { + auto It = FirstImplicitControlFlowInsts.find(TmpBB); + if (It != FirstImplicitControlFlowInsts.end()) { + assert(It->second->getParent() == TmpBB && + "Implicit control flow map broken?"); + if (OI->dominates(It->second, LI)) + return false; + } + } while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1063,6 +1089,11 @@ // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) return false; + + // Check that there is no implicit control flow in a block above. + if (!IsSafeToSpeculativelyExecute && + FirstImplicitControlFlowInsts.count(TmpBB)) + return false; } assert(TmpBB); @@ -1982,6 +2013,8 @@ TLI = &RunTLI; VN.setAliasAnalysis(&RunAA); MD = RunMD; + OrderedInstructions OrderedInstrs(DT); + OI = &OrderedInstrs; VN.setMemDep(MD); ORE = RunORE; @@ -2064,14 +2097,26 @@ if (!AtStart) --BI; + bool InvalidateImplicitCF = false; + const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB); for (auto *I : InstrsToErase) { assert(I->getParent() == BB && "Removing instruction from wrong block?"); DEBUG(dbgs() << "GVN removed: " << *I << '\n'); if (MD) MD->removeInstruction(I); DEBUG(verifyRemoved(I)); + if (MaybeFirstICF == I) { + // We have erased the first ICF in block. The map needs to be updated. + InvalidateImplicitCF = true; + // Do not keep dangling pointer on the erased instruction. + MaybeFirstICF = nullptr; + } I->eraseFromParent(); } + + OI->invalidateBlock(BB); InstrsToErase.clear(); + if (InvalidateImplicitCF) + fillImplicitControlFlowInfo(BB); if (AtStart) BI = BB->begin(); @@ -2265,7 +2310,14 @@ if (MD) MD->removeInstruction(CurInst); DEBUG(verifyRemoved(CurInst)); + bool InvalidateImplicitCF = + FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst; + // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes + // some assertion failures. + OI->invalidateBlock(CurrentBlock); CurInst->eraseFromParent(); + if (InvalidateImplicitCF) + fillImplicitControlFlowInfo(CurrentBlock); ++NumGVNInstr; return true; @@ -2332,6 +2384,9 @@ // RPOT walks the graph in its constructor and will not be invalidated during // processBlock. ReversePostOrderTraversal RPOT(&F); + + for (BasicBlock *BB : RPOT) + fillImplicitControlFlowInfo(BB); for (BasicBlock *BB : RPOT) Changed |= processBlock(BB); @@ -2343,6 +2398,48 @@ LeaderTable.clear(); BlockRPONumber.clear(); TableAllocator.Reset(); + FirstImplicitControlFlowInsts.clear(); +} + +void +GVN::fillImplicitControlFlowInfo(BasicBlock *BB) { + // Make sure that all marked instructions are actually deleted by this point, + // so that we don't need to care about omitting them. + assert(InstrsToErase.empty() && "Filling before removed all marked insns?"); + auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) { + // If a block's instruction doesn't always pass the control to its successor + // instruction, mark the block as having implicit control flow. We use them + // to avoid wrong assumptions of sort "if A is executed and B post-dominates + // A, then B is also executed". This is not true is there is an implicit + // control flow instruction (e.g. a guard) between them. + // + // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false + // for volatile stores and loads because they can trap. The discussion on + // whether or not it is correct is still ongoing. We might want to get rid + // of this logic in the future. Anyways, trapping instructions shouldn't + // introduce implicit control flow, so we explicitly allow them here. This + // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed. + if (isGuaranteedToTransferExecutionToSuccessor(I)) + return false; + if (auto *LI = dyn_cast(I)) { + assert(LI->isVolatile() && "Non-volatile load should transfer execution" + " to successor!"); + return false; + } + if (auto *SI = dyn_cast(I)) { + assert(SI->isVolatile() && "Non-volatile store should transfer execution" + " to successor!"); + return false; + } + return true; + }; + FirstImplicitControlFlowInsts.erase(BB); + + for (auto &I : *BB) + if (MayNotTransferExecutionToSuccessor(&I)) { + FirstImplicitControlFlowInsts[BB] = &I; + break; + } } /// Verify that the specified instruction does not occur in our Index: llvm/trunk/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll +++ llvm/trunk/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll @@ -17,6 +17,13 @@ define hidden void @wrapon_fn173() { ; CHECK-LABEL: @wrapon_fn173 +; CHECK: entry: +; CHECK-NEXT: call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef) +; CHECK-NEXT: %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8 +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: call i64* @getaddr_i64(i64 addrspace(100)* %.pre) +; CHECK-NEXT: br label %loop entry: %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef) Index: llvm/trunk/test/Transforms/GVN/PRE/pre-load-guards.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/PRE/pre-load-guards.ll +++ llvm/trunk/test/Transforms/GVN/PRE/pre-load-guards.ll @@ -0,0 +1,146 @@ +; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare void @llvm.experimental.guard(i1, ...) + +; This is a motivating example on why we prohibit hoisting through guards. +; In the bottom block, we check that the index is within bounds and only access +; the element in this case and deoptimize otherwise. If we hoist the load to a +; place above the guard, it will may lead to out-of-bound array access. +define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) { +; CHECK-LABEL: @test_motivation( +block1: + %el1 = getelementptr inbounds i32, i32* %q, i32 %index + %el2 = getelementptr inbounds i32, i32* %p, i32 %index + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load +; CHECK-NOT: sge +; CHECK-NOT: slt +; CHECK-NOT: and + br label %block4 + +block3: + store i32 0, i32* %el1 + br label %block4 + +block4: + +; CHECK: block4: +; CHECK: %cond1 = icmp sge i32 %index, 0 +; CHECK-NEXT: %cond2 = icmp slt i32 %index, %len +; CHECK-NEXT: %in.bounds = and i1 %cond1, %cond2 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) +; CHECK-NEXT: %PRE = load i32, i32* %P2 +; CHECK: ret i32 %PRE + + %P2 = phi i32* [%el2, %block3], [%el1, %block2] + %cond1 = icmp sge i32 %index, 0 + %cond2 = icmp slt i32 %index, %len + %in.bounds = and i1 %cond1, %cond2 + call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ] + %PRE = load i32, i32* %P2 + ret i32 %PRE +} + +; Guard in load's block that is above the load should prohibit the PRE. +define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_01( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NEXT: load +; CHECK: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + %PRE = load i32, i32* %P2 + ret i32 %PRE +} + +; Guard in load's block that is below the load should not prohibit the PRE. +define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_02( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: load i32, i32* %q + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK-NEXT: phi i32 [ +; CHECK-NEXT: phi i32* [ +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NOT: load +; CHECK: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + %PRE = load i32, i32* %P2 + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + ret i32 %PRE +} + +; Guard above the load's block should prevent PRE from hoisting through it. +define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) { +; CHECK-LABEL: @test_guard_03( +block1: + br i1 %C, label %block2, label %block3 + +block2: + +; CHECK: block2: +; CHECK-NEXT: br +; CHECK-NOT: load + + br label %block4 + +block3: + store i32 0, i32* %p + br label %block4 + +block4: + +; CHECK: block4: +; CHECK-NEXT: phi i32* +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) +; CHECK-NEXT: load +; CHECK-NEXT: ret i32 + + %P2 = phi i32* [%p, %block3], [%q, %block2] + call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] + br label %block5 + +block5: + %PRE = load i32, i32* %P2 + ret i32 %PRE +} Index: llvm/trunk/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll +++ llvm/trunk/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll @@ -0,0 +1,118 @@ +; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; These tests exercise situations when instructions that were first instructions +; with implicit control flow get removed. We make sure that after that we don't +; face crashes and are still able to perform PRE correctly. + +declare i32 @foo(i32 %arg) #0 + +define hidden void @test_01(i32 %x, i32 %y) { + +; c2 only throws if c1 throws, so it can be safely removed and then PRE can +; hoist the load out of loop. + +; CHECK-LABEL: @test_01 +; CHECK: entry: +; CHECK-NEXT: %c1 = call i32 @foo(i32 %x) +; CHECK-NEXT: %val.pre = load i32, i32* null, align 8 +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %c3 = call i32 @foo(i32 %val.pre) +; CHECK-NEXT: br label %loop + +entry: + %c1 = call i32 @foo(i32 %x) + br label %loop + +loop: + %c2 = call i32 @foo(i32 %x) + %val = load i32, i32* null, align 8 + %c3 = call i32 @foo(i32 %val) + br label %loop +} + +define hidden void @test_02(i32 %x, i32 %y) { + +; PRE is not allowed because c2 may throw. + +; CHECK-LABEL: @test_02 +; CHECK: entry: +; CHECK-NEXT: %c1 = call i32 @foo(i32 %x) +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %c2 = call i32 @foo(i32 %y) +; CHECK-NEXT: %val = load i32, i32* null, align 8 +; CHECK-NEXT: %c3 = call i32 @foo(i32 %val) +; CHECK-NEXT: br label %loop + +entry: + %c1 = call i32 @foo(i32 %x) + br label %loop + +loop: + %c2 = call i32 @foo(i32 %y) + %val = load i32, i32* null, align 8 + %c3 = call i32 @foo(i32 %val) + br label %loop +} + +define hidden void @test_03(i32 %x, i32 %y) { + +; PRE of load is allowed because c2 only throws if c1 throws. c3 should +; not be eliminated. c4 is eliminated because it only throws if c3 throws. + +; CHECK-LABEL: @test_03 +; CHECK: entry: +; CHECK-NEXT: %c1 = call i32 @foo(i32 %x) +; CHECK-NEXT: %val.pre = load i32, i32* null, align 8 +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %c3 = call i32 @foo(i32 %y) +; CHECK-NEXT: %c5 = call i32 @foo(i32 %val.pre) +; CHECK-NEXT: br label %loop + +entry: + %c1 = call i32 @foo(i32 %x) + br label %loop + +loop: + %c2 = call i32 @foo(i32 %x) + %val = load i32, i32* null, align 8 + %c3 = call i32 @foo(i32 %y) + %val2 = load i32, i32* null, align 8 + %c4 = call i32 @foo(i32 %y) + %c5 = call i32 @foo(i32 %val) + br label %loop +} + +define hidden void @test_04(i32 %x, i32 %y) { + +; PRE is not allowed even after we remove c2 because now c3 prevents us from it. + +; CHECK-LABEL: @test_04 +; CHECK: entry: +; CHECK-NEXT: %c1 = call i32 @foo(i32 %x) +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %c3 = call i32 @foo(i32 %y) +; CHECK-NEXT: %val = load i32, i32* null, align 8 +; CHECK-NEXT: %c5 = call i32 @foo(i32 %val) +; CHECK-NEXT: br label %loop + +entry: + %c1 = call i32 @foo(i32 %x) + br label %loop + +loop: + %c2 = call i32 @foo(i32 %x) + %c3 = call i32 @foo(i32 %y) + %val = load i32, i32* null, align 8 + %c4 = call i32 @foo(i32 %y) + %c5 = call i32 @foo(i32 %val) + br label %loop +} + +attributes #0 = { readnone } Index: llvm/trunk/test/Transforms/GVN/PRE/pre-load.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/PRE/pre-load.ll +++ llvm/trunk/test/Transforms/GVN/PRE/pre-load.ll @@ -430,3 +430,161 @@ call void @g(i32 %NOTPRE) cleanupret from %c2 unwind to caller } + +; Don't PRE load across potentially throwing calls. + +define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { + +; CHECK-LABEL: @test13( +; CHECK: entry: +; CHECK-NEXT: icmp eq +; CHECK-NEXT: br i1 + +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +; CHECK: if.then: +; CHECK-NEXT: load i32 +; CHECK-NEXT: store i32 + +if.then: + %uu = load i32, i32* %x, align 4 + store i32 %uu, i32* %r, align 4 + br label %if.end + +; CHECK: if.end: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: load i32 + +if.end: + call void @f() + %vv = load i32, i32* %x, align 4 + ret i32 %vv +} + +; Same as test13, but now the blocking function is not immediately in load's +; block. + +define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { + +; CHECK-LABEL: @test14( +; CHECK: entry: +; CHECK-NEXT: icmp eq +; CHECK-NEXT: br i1 + +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +; CHECK: if.then: +; CHECK-NEXT: load i32 +; CHECK-NEXT: store i32 + +if.then: + %uu = load i32, i32* %x, align 4 + store i32 %uu, i32* %r, align 4 + br label %if.end + +; CHECK: if.end: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: load i32 + +if.end: + call void @f() + br label %follow_1 + +follow_1: + br label %follow_2 + +follow_2: + %vv = load i32, i32* %x, align 4 + ret i32 %vv +} + +; Same as test13, but %x here is dereferenceable. A pointer that is +; dereferenceable can be loaded from speculatively without a risk of trapping. +; Since it is OK to speculate, PRE is allowed. + +define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) { + +; CHECK-LABEL: @test15 +; CHECK: entry: +; CHECK-NEXT: icmp eq +; CHECK-NEXT: br i1 + +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +; CHECK: entry.if.end_crit_edge: +; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4 +; CHECK-NEXT: br label %if.end + +if.then: + %uu = load i32, i32* %x, align 4 + store i32 %uu, i32* %r, align 4 + br label %if.end + +; CHECK: if.then: +; CHECK-NEXT: %uu = load i32, i32* %x, align 4 +; CHECK-NEXT: store i32 %uu, i32* %r, align 4 +; CHECK-NEXT: br label %if.end + +if.end: + call void @f() + %vv = load i32, i32* %x, align 4 + ret i32 %vv + +; CHECK: if.end: +; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ] +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 %vv + +} + +; Same as test14, but %x here is dereferenceable. A pointer that is +; dereferenceable can be loaded from speculatively without a risk of trapping. +; Since it is OK to speculate, PRE is allowed. + +define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) { + +; CHECK-LABEL: @test16( +; CHECK: entry: +; CHECK-NEXT: icmp eq +; CHECK-NEXT: br i1 + +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +; CHECK: entry.if.end_crit_edge: +; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4 +; CHECK-NEXT: br label %if.end + +if.then: + %uu = load i32, i32* %x, align 4 + store i32 %uu, i32* %r, align 4 + br label %if.end + +; CHECK: if.then: +; CHECK-NEXT: %uu = load i32, i32* %x, align 4 +; CHECK-NEXT: store i32 %uu, i32* %r, align 4 +; CHECK-NEXT: br label %if.end + +if.end: + call void @f() + br label %follow_1 + +; CHECK: if.end: +; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ] +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 %vv + +follow_1: + br label %follow_2 + +follow_2: + %vv = load i32, i32* %x, align 4 + ret i32 %vv +}