Index: lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- lib/Analysis/MemorySSAUpdater.cpp +++ lib/Analysis/MemorySSAUpdater.cpp @@ -261,6 +261,8 @@ // of that thing with us, since we are in the way of whatever was there // before. // We now define that def's memorydefs and memoryphis + // We *do* need to reset optimized for Defs, otherwise, the user may be the + // optimized value and we just set a Def to be optimzied to MD. if (DefBeforeSameBlock) { for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end(); UI != UE;) { @@ -269,6 +271,9 @@ // Also make sure we skip ourselves to avoid self references. if (isa(U.getUser()) || U.getUser() == MD) continue; + if (auto *DefUser = dyn_cast(U.getUser())) + if (DefUser->isOptimized()) + DefUser->resetOptimized(); U.set(MD); } } @@ -289,6 +294,38 @@ // backwards to find the def. To make that work, we'd have to track whether // getDefRecursive only ever used the single predecessor case. These types // of paths also only exist in between CFG simplifications. + + // If this is the first def in the block and this insert is in an arbitrary + // place, compute IDF and place phis. + auto Iter = MD->getDefsIterator(); + Iter++; + auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); + if (Iter == IterEnd) { + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector IDFBlocks; + SmallPtrSet DefiningBlocks; + DefiningBlocks.insert(MD->getBlock()); + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + unsigned NewPhiIndex = InsertedPHIs.size(); + for (auto *BBIDF : IDFBlocks) + if (!MSSA->getMemoryAccess(BBIDF)) { + auto *IDFPhi = MSSA->createMemoryPhi(BBIDF); + InsertedPHIs.push_back(IDFPhi); + FixupList.push_back(IDFPhi); + } + for (unsigned Idx = NewPhiIndex, IdxE = InsertedPHIs.size(); Idx < IdxE; + ++Idx) { + auto *IDFPhi = cast(InsertedPHIs[Idx]); + auto *BBIDF = IDFPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap> CachedPreviousDef; + IDFPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), + Pred); + } + } + } + FixupList.push_back(MD); } @@ -299,6 +336,16 @@ // Put any new phis on the fixup list, and process them FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end()); } + + // Optimize trivial phis. + for (auto &MP : InsertedPHIs) { + MemoryPhi *MPhi = dyn_cast_or_null(MP); + if (MPhi) { + auto OperRange = MPhi->operands(); + tryRemoveTrivialPhi(MPhi, OperRange); + } + } + // Now that all fixups are done, rename all uses if we are asked. if (RenameUses) { SmallPtrSet Visited; Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -2068,6 +2068,8 @@ // stores in the loop. Promoter.run(LoopUses); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU); Index: test/Analysis/MemorySSA/pr40749.ll =================================================================== --- /dev/null +++ test/Analysis/MemorySSA/pr40749.ll @@ -0,0 +1,57 @@ +; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S < %s | FileCheck %s + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "systemz-unknown" + +@g_3 = external dso_local local_unnamed_addr global i32, align 4 +@g_57 = external dso_local local_unnamed_addr global i8, align 2 +@g_82 = external dso_local global [8 x i16], align 2 +@g_107 = external dso_local local_unnamed_addr global i32, align 4 + +define internal fastcc void @foo1() unnamed_addr{ +; CHECK-LABEL: @foo1() +entry: + %.pre.pre = load i32, i32* @g_3, align 4 + br label %loop1 + +loop1: + %tmp0 = phi i32 [ undef, %entry ], [ %var18.lcssa, %loopexit ] + br label %preheader + +preheader: + %indvars.iv = phi i64 [ 0, %loop1 ], [ %indvars.iv.next, %loop6 ] + %phi18 = phi i32 [ %tmp0, %loop1 ], [ 0, %loop6 ] + %phi87 = phi i32 [ 0, %loop1 ], [ %tmp7, %loop6 ] + %tmp1 = getelementptr inbounds [8 x i16], [8 x i16]* @g_82, i64 0, i64 %indvars.iv + %tmp2 = load i16, i16* %tmp1, align 2 + %tmp3 = trunc i16 %tmp2 to i8 + store i8 %tmp3, i8* @g_57, align 2 + store i32 8, i32* @g_107, align 4 + %tmp4 = icmp eq i32 %.pre.pre, 0 + %spec.select = select i1 %tmp4, i32 %phi18, i32 14 + %tmp5 = trunc i64 %indvars.iv to i32 + switch i32 %spec.select, label %loopexit [ + i32 0, label %loop6 + i32 14, label %loop9 + ] + +loop6: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %tmp7 = add nuw nsw i32 %phi87, 1 + %tmp8 = icmp ult i64 %indvars.iv.next, 6 + br i1 %tmp8, label %preheader, label %loop9 + +loop9: + %phi8.lcssa = phi i32 [ %tmp5, %preheader ], [ %tmp7, %loop6 ] + %tmp10 = trunc i32 %phi8.lcssa to i8 + %tmp11 = tail call i16* @func_101(i16* getelementptr inbounds ([8 x i16], [8 x i16]* @g_82, i64 0, i64 6), i16* undef, i8 zeroext %tmp10) + unreachable + +loopexit: + %var18.lcssa = phi i32 [ %phi18, %preheader ] + br label %loop1 + +} + +declare dso_local i16* @func_101(i16*, i16*, i8) local_unnamed_addr + Index: test/Analysis/MemorySSA/pr40754.ll =================================================================== --- /dev/null +++ test/Analysis/MemorySSA/pr40754.ll @@ -0,0 +1,51 @@ +; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S < %s | FileCheck %s +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "systemz-unknown" + +@g_120 = external dso_local local_unnamed_addr global [8 x [4 x [6 x i32]]], align 4 +@g_185 = external dso_local local_unnamed_addr global i32, align 4 +@g_329 = external dso_local local_unnamed_addr global i16, align 2 + +; Function Attrs: norecurse noreturn nounwind +define dso_local void @func_65() local_unnamed_addr { + br label %1 + +;