Index: include/llvm/Analysis/MustExecute.h =================================================================== --- include/llvm/Analysis/MustExecute.h +++ include/llvm/Analysis/MustExecute.h @@ -74,6 +74,16 @@ bool mustExecuteInLoop(const Loop *CurLoop, const Instruction *I, const DominatorTree *DT) const; + /// Returns true if we could not execute a memory-modifying instruction before + /// we enter \p BB under assumption that \p CurLoop is entered. + bool couldNotWriteMemoryBefore(const Loop *CurLoop, const BasicBlock *BB) + const; + + /// Returns true if we could not execute a memory-modifying instruction before + /// we execute \p I under assumption that \p CurLoop is entered. + bool couldNotWriteMemoryBefore(const Loop *CurLoop, const Instruction *I) + const; + /// Drops cached information regarding the implicit control flow in block /// \p BB. It should be called for every block in which we add or remove any /// instructions. Index: lib/Analysis/MustExecute.cpp =================================================================== --- lib/Analysis/MustExecute.cpp +++ lib/Analysis/MustExecute.cpp @@ -176,6 +176,42 @@ allLoopPathsLeadToBlock(CurLoop, I->getParent(), DT); } +bool LoopSafetyInfo::couldNotWriteMemoryBefore(const Loop *CurLoop, + const BasicBlock *BB) const { + assert(CurLoop->contains(BB) && "Should only be called for loop blocks!"); + + // Fast path: header is always reached once the loop is entered. + if (BB == CurLoop->getHeader()) + return true; + + // Collect all transitive predecessors of BB in the same loop. This set will + // be a subset of the blocks within the loop. + SmallPtrSet Predecessors; + collectTransitivePredecessors(CurLoop, BB, Predecessors); + // Find if there any instruction in either predecessor that could write + // to memory. + // TODO: Cache this in a manner like ICF? + for (auto *Pred : Predecessors) + for (auto &I : *Pred) + if (I.mayWriteToMemory()) + return false; + return true; +} + +bool LoopSafetyInfo::couldNotWriteMemoryBefore(const Loop *CurLoop, + const Instruction *I) const { + auto *BB = I->getParent(); + assert(CurLoop->contains(BB) && "Should only be called for loop blocks!"); + // TODO: Cache this in a manner like ICF? + for (auto &CurrInsn : *BB) { + if (&CurrInsn == I) + break; + if (CurrInsn.mayWriteToMemory()) + return false; + } + return couldNotWriteMemoryBefore(CurLoop, BB); +} + void LoopSafetyInfo::dropCachedInfo(const BasicBlock *BB) { ICF.invalidateBlock(BB); } Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -458,12 +458,6 @@ if (inSubLoop(BB, CurLoop, LI)) continue; - // Keep track of whether the prefix instructions could have written memory. - // TODO: This may be done smarter if we keep track of all throwing and - // mem-writing operations in every block, e.g. using something similar to - // isGuaranteedToExecute. - bool IsMemoryNotModified = CurLoop->getHeader() == BB; - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; // Try constant folding this instruction. If all the operands are @@ -522,15 +516,13 @@ using namespace PatternMatch; if (match(&I, m_Intrinsic()) && - IsMemoryNotModified && CurLoop->hasLoopInvariantOperands(&I) && - SafetyInfo->mustExecuteInLoop(CurLoop, &I, DT)) { + CurLoop->hasLoopInvariantOperands(&I) && + SafetyInfo->mustExecuteInLoop(CurLoop, &I, DT) && + SafetyInfo->couldNotWriteMemoryBefore(CurLoop, &I)) { hoist(I, DT, CurLoop, SafetyInfo, ORE); Changed = true; continue; } - - if (IsMemoryNotModified) - IsMemoryNotModified = !I.mayWriteToMemory(); } } Index: test/Transforms/LICM/guards.ll =================================================================== --- test/Transforms/LICM/guards.ll +++ test/Transforms/LICM/guards.ll @@ -84,16 +84,15 @@ } -; TODO: We can also hoist this guard from mustexec non-header block. define void @test4(i1 %c, i32* %p) { ; CHECK-LABEL: @test4( ; CHECK-LABEL: entry: ; CHECK: %a = load i32, i32* %p ; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) ; CHECK-LABEL: loop: ; CHECK-LABEL: backedge: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) entry: br label %loop @@ -196,6 +195,154 @@ ret void } +; Check that we don't hoist across a store in the header. +define void @test4c(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4c( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + store i8 0, i8* %s + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Check that we don't hoist across a store in a conditionally execute block. +define void @test4d(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4d( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + store i8 0, i8* %s + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Check that we don't hoist across a store before the guard in the backedge. +define void @test4e(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4e( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + store i8 0, i8* %s + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Check that we can hoist the guard in spite of store which happens after. +define void @test4f(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4f( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + store i8 0, i8* %s + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + ; Do not hoist an invariant guard across a variant guard. define void @test5(i1 %c, i32* %p, i32* %q) {