Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -903,6 +903,33 @@ return true; } +/// Checks if the given load's pointer is dereferenceable on all paths to the +/// loop preheader. +static bool isLoadPtrDereferenceable(LoadInst *LI, const DominatorTree *DT, + const Loop *CurLoop) { + if (LI->isVolatile() || LI->isAtomic()) + return false; + + BasicBlock *PH = CurLoop->getLoopPreheader(); + Value *Ptr = LI->getPointerOperand(); + + for (auto *PUser : Ptr->users()) { + if (isa(PUser) || isa(PUser)) { + Instruction *LSI = cast(PUser); + // For a common case of the pointer originating from the loop, avoid a DT + // query in favour of a densemap lookup. + if (CurLoop->contains(LSI->getParent()) || + !DT->dominates(LSI->getParent(), PH)) + continue; + // Since the block dominates the preheader, we know that execution must + // have transferred from the user's block to its successors. + return true; + } + } + return false; +} + + /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. @@ -919,7 +946,12 @@ isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); if (!GuaranteedToExecute) { + // Conditional loads may be hoisted if the address has already been accessed + // by a dominating block. auto *LI = dyn_cast(&Inst); + if (LI && isLoadPtrDereferenceable(LI, DT, CurLoop)) + return true; + if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit(OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) Index: test/Transforms/LICM/hoisting.ll =================================================================== --- test/Transforms/LICM/hoisting.ll +++ test/Transforms/LICM/hoisting.ll @@ -320,3 +320,37 @@ loopexit: ret i32 %sum } + + +declare i32 @foo3(i32) nounwind readonly + +;; It is ok and desirable to hoist this load %B due to the load of %A. +define i32 @test_cond_load(i1 %c, i1 %c2, i32* noalias %ptr, i32* noalias %storeptr) { +; CHECK-LABEL: @test_cond_load( +; CHECK: %A = load i32, i32* %ptr +; CHECK: br label %Loop.ph +; CHECK: %B = load i32, i32* %ptr +; CHECK: br label %Loop +; CHECK: Loop: +entry: + %A = load i32, i32* %ptr + br label %Loop.ph + +Loop.ph: + br label %Loop + +Loop: + br i1 %c2, label %if.then, label %Loop.cont +if.then: + %B = load i32, i32* %ptr + %res = call i32 @foo3(i32 %B) + store i32 %res, i32* %storeptr + br label %Loop.cont + +Loop.cont: + %res.phi = phi i32 [0, %Loop], [%res, %if.then] + br i1 %c, label %Loop, label %Out +Out: + %C = sub i32 %A, %res.phi + ret i32 %C +}