Index: lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- lib/Analysis/MemorySSAUpdater.cpp +++ lib/Analysis/MemorySSAUpdater.cpp @@ -269,6 +269,8 @@ // Also make sure we skip ourselves to avoid self references. if (isa(U.getUser()) || U.getUser() == MD) continue; + // Defs are automatically unoptimized when the user is set to MD below, + // because the isOptimized() call will fail to find the same ID. U.set(MD); } } @@ -276,6 +278,9 @@ // and that def is now our defining access. MD->setDefiningAccess(DefBefore); + // Remember the index where we may insert new phis below. + unsigned NewPhiIndex = InsertedPHIs.size(); + SmallVector FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it @@ -289,9 +294,49 @@ // backwards to find the def. To make that work, we'd have to track whether // getDefRecursive only ever used the single predecessor case. These types // of paths also only exist in between CFG simplifications. + + // If this is the first def in the block and this insert is in an arbitrary + // place, compute IDF and place phis. + auto Iter = MD->getDefsIterator(); + ++Iter; + auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); + if (Iter == IterEnd) { + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector IDFBlocks; + SmallPtrSet DefiningBlocks; + DefiningBlocks.insert(MD->getBlock()); + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + SmallVector, 4> NewInsertedPHIs; + for (auto *BBIDF : IDFBlocks) + if (!MSSA->getMemoryAccess(BBIDF)) + NewInsertedPHIs.push_back(MSSA->createMemoryPhi(BBIDF)); + + for (auto &MPhi : NewInsertedPHIs) { + auto *BBIDF = MPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap> CachedPreviousDef; + MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), + Pred); + } + } + + // Re-take the index where we're adding the new phis, because the above + // call to getPreviousDefFromEnd, may have inserted more into InsertedPHIs. + NewPhiIndex = InsertedPHIs.size(); + for (auto &MPhi : NewInsertedPHIs) { + InsertedPHIs.push_back(&*MPhi); + FixupList.push_back(&*MPhi); + } + } + FixupList.push_back(MD); } + // Remember the index where we stopped inserting new phis above, since the + // fixupDefs call in the loop below may insert more, that are already minimal. + unsigned NewPhiIndexEnd = InsertedPHIs.size(); + while (!FixupList.empty()) { unsigned StartingPHISize = InsertedPHIs.size(); fixupDefs(FixupList); @@ -299,6 +344,15 @@ // Put any new phis on the fixup list, and process them FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end()); } + + // Optimize potentially non-minimal phis added in this method. + for (unsigned Idx = NewPhiIndex; Idx < NewPhiIndexEnd; ++Idx) { + if (auto *MPhi = cast_or_null(InsertedPHIs[Idx])) { + auto OperRange = MPhi->operands(); + tryRemoveTrivialPhi(MPhi, OperRange); + } + } + // Now that all fixups are done, rename all uses if we are asked. if (RenameUses) { SmallPtrSet Visited; Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -2068,6 +2068,8 @@ // stores in the loop. Promoter.run(LoopUses); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU); Index: test/Analysis/MemorySSA/pr40749.ll =================================================================== --- /dev/null +++ test/Analysis/MemorySSA/pr40749.ll @@ -0,0 +1,58 @@ +; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S < %s | FileCheck %s +; REQUIRES: asserts + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "systemz-unknown" + +@g_3 = external dso_local local_unnamed_addr global i32, align 4 +@g_57 = external dso_local local_unnamed_addr global i8, align 2 +@g_82 = external dso_local global [8 x i16], align 2 +@g_107 = external dso_local local_unnamed_addr global i32, align 4 + +define internal fastcc void @foo1() unnamed_addr{ +; CHECK-LABEL: @foo1() +entry: + %.pre.pre = load i32, i32* @g_3, align 4 + br label %loop1 + +loop1: + %tmp0 = phi i32 [ undef, %entry ], [ %var18.lcssa, %loopexit ] + br label %preheader + +preheader: + %indvars.iv = phi i64 [ 0, %loop1 ], [ %indvars.iv.next, %loop6 ] + %phi18 = phi i32 [ %tmp0, %loop1 ], [ 0, %loop6 ] + %phi87 = phi i32 [ 0, %loop1 ], [ %tmp7, %loop6 ] + %tmp1 = getelementptr inbounds [8 x i16], [8 x i16]* @g_82, i64 0, i64 %indvars.iv + %tmp2 = load i16, i16* %tmp1, align 2 + %tmp3 = trunc i16 %tmp2 to i8 + store i8 %tmp3, i8* @g_57, align 2 + store i32 8, i32* @g_107, align 4 + %tmp4 = icmp eq i32 %.pre.pre, 0 + %spec.select = select i1 %tmp4, i32 %phi18, i32 14 + %tmp5 = trunc i64 %indvars.iv to i32 + switch i32 %spec.select, label %loopexit [ + i32 0, label %loop6 + i32 14, label %loop9 + ] + +loop6: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %tmp7 = add nuw nsw i32 %phi87, 1 + %tmp8 = icmp ult i64 %indvars.iv.next, 6 + br i1 %tmp8, label %preheader, label %loop9 + +loop9: + %phi8.lcssa = phi i32 [ %tmp5, %preheader ], [ %tmp7, %loop6 ] + %tmp10 = trunc i32 %phi8.lcssa to i8 + %tmp11 = tail call i16* @func_101(i16* getelementptr inbounds ([8 x i16], [8 x i16]* @g_82, i64 0, i64 6), i16* undef, i8 zeroext %tmp10) + unreachable + +loopexit: + %var18.lcssa = phi i32 [ %phi18, %preheader ] + br label %loop1 + +} + +declare dso_local i16* @func_101(i16*, i16*, i8) local_unnamed_addr + Index: test/Analysis/MemorySSA/pr40754.ll =================================================================== --- /dev/null +++ test/Analysis/MemorySSA/pr40754.ll @@ -0,0 +1,54 @@ +; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S < %s | FileCheck %s +; REQUIRES: asserts + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "systemz-unknown" + +@g_120 = external dso_local local_unnamed_addr global [8 x [4 x [6 x i32]]], align 4 +@g_185 = external dso_local local_unnamed_addr global i32, align 4 +@g_329 = external dso_local local_unnamed_addr global i16, align 2 + +; Function Attrs: norecurse noreturn nounwind +define dso_local void @func_65() local_unnamed_addr { +; CHECK-LABEL: @func_65() + br label %1 + +;