Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -118,6 +118,8 @@ cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in " "pathological cases, in exchange for faster compile")); +static bool LocalDisablePromotion = false; + // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save // compile time. @@ -306,7 +308,8 @@ std::unique_ptr CurAST; std::unique_ptr MSSAU; - bool LocalDisablePromotion = false; + LocalDisablePromotion = false; + if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); @@ -1173,9 +1176,29 @@ return true; if (!EnableLicmCap) { auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); - if (MSSA->isLiveOnEntryDef(Source) || - !CurLoop->contains(Source->getBlock())) - return true; + // If there are no clobbering Defs in the loop, we still need to check + // for interfering Uses. If there are more accesses than the Promotion + // cap, give up, we're not walking a list that long. Otherwise, walk the + // list, check each Use if it's optimized to an access outside the loop. + // If yes, store is safe to hoist. This is fairly restrictive, but + // conservatively correct. + // TODO: Cache set of Uses on the first walk in runOnLoop, update when + // moving accesses. Can also extend to dominating uses. + if ((!MSSA->isLiveOnEntryDef(Source) && + CurLoop->contains(Source->getBlock())) || LocalDisablePromotion) + return false; + for (auto *BB : CurLoop->getBlocks()) + if (auto *Accesses = MSSA->getBlockAccesses(BB)) + for (const auto &MA : *Accesses) + if (const auto *MU = dyn_cast(&MA)) { + if (!MU->isOptimized()) + return false; + auto *MD = MU->getDefiningAccess(); + if (!MSSA->isLiveOnEntryDef(MD) && + CurLoop->contains(MD->getBlock())) + return false; + } + return true; } return false; } Index: test/Transforms/LICM/store-hoisting.ll =================================================================== --- test/Transforms/LICM/store-hoisting.ll +++ test/Transforms/LICM/store-hoisting.ll @@ -1,5 +1,6 @@ -; RUN: opt -S -basicaa -licm %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -S -basicaa -licm %s | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,MSSA %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s define void @test(i32* %loc) { ; CHECK-LABEL: @test @@ -46,8 +47,11 @@ define i32* @false_negative_2use(i32* %loc) { ; CHECK-LABEL: @false_negative_2use -; CHECK-LABEL: exit: -; CHECK: store i32 0, i32* %loc +; AST-LABEL: exit: +; AST: store i32 0, i32* %loc +; MSSA-LABEL: entry: +; MSSA: store i32 0, i32* %loc +; MSSA-LABEL: loop: entry: br label %loop @@ -119,6 +123,7 @@ ret void } +; Hoisting the store is actually valid here, as it dominates the load. define void @neg_ref(i32* %loc) { ; CHECK-LABEL: @neg_ref ; CHECK-LABEL: exit1: @@ -146,6 +151,35 @@ ret void } +; Hoisting the store here leads to a miscompile. +define void @neg_ref2(i32* %loc) { +; CHECK-LABEL: @neg_ref2 +; CHECK-LABEL: exit1: +; CHECK: store i32 0, i32* %loc +; CHECK-LABEL: exit2: +; CHECK: store i32 0, i32* %loc +entry: + store i32 198, i32* %loc + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %v = load i32, i32* %loc + store i32 0, i32* %loc + %earlycnd = icmp eq i32 %v, 198 + br i1 %earlycnd, label %exit1, label %backedge + +backedge: + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit2 + +exit1: + ret void +exit2: + ret void +} + declare void @modref() define void @neg_modref(i32* %loc) {