Index: include/llvm/Analysis/MustExecute.h =================================================================== --- include/llvm/Analysis/MustExecute.h +++ include/llvm/Analysis/MustExecute.h @@ -50,6 +50,7 @@ // may throw. // Contains information about implicit control flow in this loop's blocks. mutable ImplicitControlFlowTracking ICF; + mutable MemoryWriteTracking MW; /// Collect all blocks from \p CurLoop which lie on all possible paths from /// the header of \p CurLoop (inclusive) to BB (exclusive) into the set @@ -76,6 +77,16 @@ bool mustExecuteInLoop(const Loop *CurLoop, const Instruction *I, const DominatorTree *DT) const; + /// Returns true if we could not execute a memory-modifying instruction before + /// we enter \p BB under assumption that \p CurLoop is entered. + bool doesNotWriteMemoryBefore(const Loop *CurLoop, const BasicBlock *BB) + const; + + /// Returns true if we could not execute a memory-modifying instruction before + /// we execute \p I under assumption that \p CurLoop is entered. + bool doesNotWriteMemoryBefore(const Loop *CurLoop, const Instruction *I) + const; + /// Drops cached information regarding the implicit control flow in block /// \p BB. It should be called for every block in which we add or remove any /// instructions. @@ -88,7 +99,7 @@ /// LoopSafetyInfo. Some callers rely on this fact. void computeLoopSafetyInfo(Loop *); - LoopSafetyInfo(DominatorTree *DT): ICF(DT) {} + LoopSafetyInfo(DominatorTree *DT): ICF(DT), MW(DT) {} }; /// Returns true if the instruction in a loop is guaranteed to execute at least Index: include/llvm/Transforms/Utils/InstructionPrecedenceTracking.h =================================================================== --- include/llvm/Transforms/Utils/InstructionPrecedenceTracking.h +++ include/llvm/Transforms/Utils/InstructionPrecedenceTracking.h @@ -100,6 +100,31 @@ virtual bool isSpecialInstruction(const Instruction *Insn) const; }; +class MemoryWriteTracking : public InstructionPrecedenceTracking { +public: + MemoryWriteTracking(DominatorTree *DT) : InstructionPrecedenceTracking(DT) {} + + /// Returns the topmost instruction that may write memory from the given + /// basic block. Returns nullptr if there is no such instructions in the block. + const Instruction *getFirstMemoryWrite(const BasicBlock *BB) { + return getFirstSpecialInstruction(BB); + } + + /// Returns true if at least one instruction from the given basic block may + /// write memory. + bool mayWriteToMemory(const BasicBlock *BB) { + return hasSpecialInstructions(BB); + } + + /// Returns true if the first memory writing instruction of Insn's block + /// exists and dominates Insn. + bool isDominatedByMemoryWriteFromSameBlock(const Instruction *Insn) { + return isPreceededBySpecialInstruction(Insn); + } + + virtual bool isSpecialInstruction(const Instruction *Insn) const; +}; + } // llvm #endif // LLVM_TRANSFORMS_UTILS_INSTRUCTIONPRECEDENCETRACKING_H Index: lib/Analysis/MustExecute.cpp =================================================================== --- lib/Analysis/MustExecute.cpp +++ lib/Analysis/MustExecute.cpp @@ -29,6 +29,7 @@ void LoopSafetyInfo::computeLoopSafetyInfo(Loop *CurLoop) { assert(CurLoop != nullptr && "CurLoop can't be null"); ICF.clear(); + MW.clear(); MayThrow = false; // Figure out the fact that at least one block may throw. for (auto &BB : CurLoop->blocks()) @@ -176,8 +177,37 @@ allLoopPathsLeadToBlock(CurLoop, I->getParent(), DT); } +bool LoopSafetyInfo::doesNotWriteMemoryBefore(const Loop *CurLoop, + const BasicBlock *BB) const { + assert(CurLoop->contains(BB) && "Should only be called for loop blocks!"); + + // Fast path: header is always reached once the loop is entered. + if (BB == CurLoop->getHeader()) + return true; + + // Collect all transitive predecessors of BB in the same loop. This set will + // be a subset of the blocks within the loop. + SmallPtrSet Predecessors; + collectTransitivePredecessors(CurLoop, BB, Predecessors); + // Find if there any instruction in either predecessor that could write + // to memory. + for (auto *Pred : Predecessors) + if (MW.mayWriteToMemory(Pred)) + return false; + return true; +} + +bool LoopSafetyInfo::doesNotWriteMemoryBefore(const Loop *CurLoop, + const Instruction *I) const { + auto *BB = I->getParent(); + assert(CurLoop->contains(BB) && "Should only be called for loop blocks!"); + return !MW.isDominatedByMemoryWriteFromSameBlock(I) & + doesNotWriteMemoryBefore(CurLoop, BB); +} + void LoopSafetyInfo::dropCachedInfo(const BasicBlock *BB) { ICF.invalidateBlock(BB); + MW.invalidateBlock(BB); } /// Returns true if the instruction in a loop is guaranteed to execute at least Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -467,12 +467,6 @@ if (inSubLoop(BB, CurLoop, LI)) continue; - // Keep track of whether the prefix instructions could have written memory. - // TODO: This may be done smarter if we keep track of all throwing and - // mem-writing operations in every block, e.g. using something similar to - // isGuaranteedToExecute. - bool IsMemoryNotModified = CurLoop->getHeader() == BB; - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; // Try constant folding this instruction. If all the operands are @@ -531,15 +525,13 @@ using namespace PatternMatch; if (match(&I, m_Intrinsic()) && - IsMemoryNotModified && CurLoop->hasLoopInvariantOperands(&I) && - SafetyInfo->mustExecuteInLoop(CurLoop, &I, DT)) { + CurLoop->hasLoopInvariantOperands(&I) && + SafetyInfo->mustExecuteInLoop(CurLoop, &I, DT) && + SafetyInfo->doesNotWriteMemoryBefore(CurLoop, &I)) { hoist(I, DT, CurLoop, SafetyInfo, ORE); Changed = true; continue; } - - if (IsMemoryNotModified) - IsMemoryNotModified = !I.mayWriteToMemory(); } } Index: lib/Transforms/Utils/InstructionPrecedenceTracking.cpp =================================================================== --- lib/Transforms/Utils/InstructionPrecedenceTracking.cpp +++ lib/Transforms/Utils/InstructionPrecedenceTracking.cpp @@ -96,3 +96,8 @@ } return true; } + +bool MemoryWriteTracking::isSpecialInstruction( + const Instruction *Insn) const { + return Insn->mayWriteToMemory(); +} Index: test/Transforms/LICM/guards.ll =================================================================== --- test/Transforms/LICM/guards.ll +++ test/Transforms/LICM/guards.ll @@ -84,16 +84,15 @@ } -; TODO: We can also hoist this guard from mustexec non-header block. define void @test4(i1 %c, i32* %p) { ; CHECK-LABEL: @test4( ; CHECK-LABEL: entry: ; CHECK: %a = load i32, i32* %p ; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) ; CHECK-LABEL: loop: ; CHECK-LABEL: backedge: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) entry: br label %loop @@ -196,6 +195,154 @@ ret void } +; Check that we don't hoist across a store in the header. +define void @test4c(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4c( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + store i8 0, i8* %s + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Check that we don't hoist across a store in a conditionally execute block. +define void @test4d(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4d( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + store i8 0, i8* %s + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Check that we don't hoist across a store before the guard in the backedge. +define void @test4e(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4e( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + store i8 0, i8* %s + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Check that we can hoist the guard in spite of store which happens after. +define void @test4f(i1 %c, i32* %p, i8* noalias %s) { + +; CHECK-LABEL: @test4f( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + store i8 0, i8* %s + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + ; Do not hoist an invariant guard across a variant guard. define void @test5(i1 %c, i32* %p, i32* %q) {