Index: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h @@ -106,6 +106,7 @@ unsigned LicmMssaOptCounter; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool IsSink; }; /// Walk the specified region of the CFG (defined by all blocks Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -376,10 +376,12 @@ // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter, - LicmMssaOptCap, LicmMssaNoAccForPromotionCap}; + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*IsSink=*/true}; if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); + Flags.IsSink = false; if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); @@ -1224,6 +1226,7 @@ // Could do better here, but this is conservatively correct. // TODO: Cache set of Uses on the first walk in runOnLoop, update when // moving accesses. Can also extend to dominating uses. + auto *SIMD = MSSA->getMemoryAccess(SI); for (auto *BB : CurLoop->getBlocks()) if (auto *Accesses = MSSA->getBlockAccesses(BB)) { for (const auto &MA : *Accesses) @@ -1232,6 +1235,12 @@ if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock())) return false; + // Disable hoisting past potentially interfering loads. Optimized + // Uses may point to an access outside the loop, as getClobbering + // checks the previous iteration when walking the backedge. + // FIXME: More precise: no Uses that alias SI. + if (!Flags->IsSink && !MSSA->dominates(SIMD, MU)) + return false; } else if (const auto *MD = dyn_cast(&MA)) if (auto *LI = dyn_cast(MD->getMemoryInst())) { (void)LI; // Silence warning. @@ -2257,16 +2266,47 @@ static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, SinkAndHoistLICMFlags &Flags) { - MemoryAccess *Source; - // See declaration of SetLicmMssaOptCap for usage details. - if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap) - Source = MU->getDefiningAccess(); - else { - Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); - Flags.LicmMssaOptCounter++; + // For hoisting, use the walker to determine safety + if (!Flags.IsSink) { + MemoryAccess *Source; + // See declaration of SetLicmMssaOptCap for usage details. + if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap) + Source = MU->getDefiningAccess(); + else { + Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); + Flags.LicmMssaOptCounter++; + } + return !MSSA->isLiveOnEntryDef(Source) && + CurLoop->contains(Source->getBlock()); } - return !MSSA->isLiveOnEntryDef(Source) && - CurLoop->contains(Source->getBlock()); + + // For sinking, we'd need to check all Defs below this use. The getClobbering + // call will look on the backedge of the loop, but will check aliasing with + // the instructions on the previous iteration. + // For example: + // for (i ... ) + // load a[i] ( Use (LoE) + // store a[i] ( 1 = Def (2), with 2 = Phi for the loop. + // i++; + // The load sees no clobbering inside the loop, as the backedge alias check + // does phi translation, and will check aliasing against store a[i-1]. + // However sinking the load outside the loop, below the store is incorrect. + + // For now, only sink if there are no Defs in the loop, and the existing ones + // precede the use and are in the same block. + // FIXME: Increase precision: Safe to sink if Use post dominates the Def; + // needs PostDominatorTreeAnalysis. + // FIXME: More precise: no Defs that alias this Use. + if (Flags.NoOfMemAccTooLarge) + return true; + for (auto *BB : CurLoop->getBlocks()) + if (auto *Accesses = MSSA->getBlockDefs(BB)) + for (const auto &MA : *Accesses) + if (const auto *MD = dyn_cast(&MA)) + if (MU->getBlock() != MD->getBlock() || + !MSSA->locallyDominates(MD, MU)) + return true; + return false; } /// Little predicate that returns true if the specified basic block is in Index: llvm/trunk/test/Analysis/MemorySSA/pr42294.ll =================================================================== --- llvm/trunk/test/Analysis/MemorySSA/pr42294.ll +++ llvm/trunk/test/Analysis/MemorySSA/pr42294.ll @@ -0,0 +1,48 @@ +; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=true -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM +; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=false -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM +; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=true | FileCheck %s +; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=false | FileCheck %s + +; LICM: Using +; LICM-NOT: LICM sinking instruction: %.pre = load i8, i8* %arrayidx.phi.trans.insert + +; CHECK-LABEL: @fn1 +; CHECK-LABEL: entry: +; CHECK: br i1 true, label %[[END:.*]], label %[[PH:.*]] +; CHECK: [[PH]]: +; CHECK: br label %[[CRIT:.*]] +; CHECK: [[CRIT]]: +; CHECK: load i8 +; CHECK: store i8 +; CHECK: br i1 true, label %[[ENDCRIT:.*]], label %[[CRIT]] +; CHECK: [[ENDCRIT]]: +; CHECK-NOT: load i8 +; CHECK: br label %[[END]] + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64" +target triple = "s390x-unknown-linux-gnu" + +define void @fn1() { +entry: + %g = alloca [9 x i8], align 1 + br label %for.body + +for.body: ; preds = %for.body.for.body_crit_edge, %entry + %0 = phi i64 [ 0, %entry ], [ %phitmp, %for.body.for.body_crit_edge ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body.for.body_crit_edge ] + %arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv + store i8 2, i8* %arrayidx, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 undef, label %for.end18, label %for.body.for.body_crit_edge + +for.body.for.body_crit_edge: ; preds = %for.body + %arrayidx.phi.trans.insert = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv.next + %.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1 + %phitmp = zext i8 %.pre to i64 + br label %for.body + +for.end18: ; preds = %for.body + store i64 %0, i64* undef, align 8 + ret void +} + Index: llvm/trunk/test/Transforms/LICM/store-hoisting.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/store-hoisting.ll +++ llvm/trunk/test/Transforms/LICM/store-hoisting.ll @@ -348,8 +348,11 @@ ; the load must observe. define i32 @test_dominated_read(i32* %loc) { ; CHECK-LABEL: @test_dominated_read -; CHECK-LABEL: exit: -; CHECK: store i32 0, i32* %loc +; MSSA-LABEL: entry: +; MSSA: store i32 0, i32* %loc +; MSSA-LABEL: loop: +; AST-LABEL: exit: +; AST: store i32 0, i32* %loc entry: br label %loop