Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -112,7 +112,7 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *ORE); + bool, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -124,8 +124,8 @@ /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *ORE); + MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, + OptimizationRemarkEmitter *); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -276,6 +276,7 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, + bool NoOfMemAccessesTooLarge, OptimizationRemarkEmitter *ORE = nullptr); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -306,7 +306,8 @@ std::unique_ptr CurAST; std::unique_ptr MSSAU; - bool LocalDisablePromotion = false; + bool NoOfMemAccTooLarge = false; + if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); @@ -321,12 +322,12 @@ (void)MA; AccessCapCount++; if (AccessCapCount > AccessCapForMSSAPromotion) { - LocalDisablePromotion = true; + NoOfMemAccTooLarge = true; break; } } } - if (LocalDisablePromotion) + if (NoOfMemAccTooLarge) break; } } @@ -350,10 +351,12 @@ // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, + NoOfMemAccTooLarge, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, + NoOfMemAccTooLarge, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -363,7 +366,7 @@ // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits() && - !LocalDisablePromotion) { + !NoOfMemAccTooLarge) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -457,7 +460,7 @@ DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, - ICFLoopSafetyInfo *SafetyInfo, + ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge, OptimizationRemarkEmitter *ORE) { // Verify inputs. @@ -501,7 +504,8 @@ // bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, + NoOfMemAccTooLarge, ORE) && !I.mayHaveSideEffects()) { if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) { if (!FreeInLoop) { @@ -755,7 +759,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, - ICFLoopSafetyInfo *SafetyInfo, + ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -808,7 +812,8 @@ // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, + NoOfMemAccTooLarge, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { @@ -1035,6 +1040,7 @@ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, + bool NoOfMemAccTooLarge, OptimizationRemarkEmitter *ORE) { // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) @@ -1173,9 +1179,28 @@ return true; if (!EnableLicmCap) { auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); - if (MSSA->isLiveOnEntryDef(Source) || - !CurLoop->contains(Source->getBlock())) - return true; + // If there are no clobbering Defs in the loop, we still need to check + // for interfering Uses. If there are more accesses than the Promotion + // cap, give up, we're not walking a list that long. Otherwise, walk the + // list, check each Use if it's optimized to an access outside the loop. + // If yes, store is safe to hoist. This is fairly restrictive, but + // conservatively correct. + // TODO: Cache set of Uses on the first walk in runOnLoop, update when + // moving accesses. Can also extend to dominating uses. + if ((!MSSA->isLiveOnEntryDef(Source) && + CurLoop->contains(Source->getBlock())) || + NoOfMemAccTooLarge) + return false; + for (auto *BB : CurLoop->getBlocks()) + if (auto *Accesses = MSSA->getBlockAccesses(BB)) + for (const auto &MA : *Accesses) + if (const auto *MU = dyn_cast(&MA)) { + auto *MD = MU->getDefiningAccess(); + if (!MSSA->isLiveOnEntryDef(MD) && + CurLoop->contains(MD->getBlock())) + return false; + } + return true; } return false; } Index: lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- lib/Transforms/Scalar/LoopSink.cpp +++ lib/Transforms/Scalar/LoopSink.cpp @@ -303,7 +303,7 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false, false)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true; Index: test/Transforms/LICM/store-hoisting.ll =================================================================== --- test/Transforms/LICM/store-hoisting.ll +++ test/Transforms/LICM/store-hoisting.ll @@ -1,5 +1,6 @@ -; RUN: opt -S -basicaa -licm %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -S -basicaa -licm %s | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,MSSA %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s define void @test(i32* %loc) { ; CHECK-LABEL: @test @@ -46,8 +47,11 @@ define i32* @false_negative_2use(i32* %loc) { ; CHECK-LABEL: @false_negative_2use -; CHECK-LABEL: exit: -; CHECK: store i32 0, i32* %loc +; AST-LABEL: exit: +; AST: store i32 0, i32* %loc +; MSSA-LABEL: entry: +; MSSA: store i32 0, i32* %loc +; MSSA-LABEL: loop: entry: br label %loop @@ -119,6 +123,7 @@ ret void } +; Hoisting the store is actually valid here, as it dominates the load. define void @neg_ref(i32* %loc) { ; CHECK-LABEL: @neg_ref ; CHECK-LABEL: exit1: @@ -146,6 +151,35 @@ ret void } +; Hoisting the store here leads to a miscompile. +define void @neg_ref2(i32* %loc) { +; CHECK-LABEL: @neg_ref2 +; CHECK-LABEL: exit1: +; CHECK: store i32 0, i32* %loc +; CHECK-LABEL: exit2: +; CHECK: store i32 0, i32* %loc +entry: + store i32 198, i32* %loc + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %v = load i32, i32* %loc + store i32 0, i32* %loc + %earlycnd = icmp eq i32 %v, 198 + br i1 %earlycnd, label %exit1, label %backedge + +backedge: + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit2 + +exit1: + ret void +exit2: + ret void +} + declare void @modref() define void @neg_modref(i32* %loc) {