Index: llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h +++ llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h @@ -99,7 +99,7 @@ /// load a /// Where a mayalias b, *does* require RenameUses be set to true. void insertDef(MemoryDef *Def, bool RenameUses = false); - void insertUse(MemoryUse *Use); + void insertUse(MemoryUse *Use, bool RenameUses = false); /// Update the MemoryPhi in `To` following an edge deletion between `From` and /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made. void removeEdge(BasicBlock *From, BasicBlock *To); Index: llvm/trunk/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemorySSA.cpp +++ llvm/trunk/lib/Analysis/MemorySSA.cpp @@ -1875,7 +1875,7 @@ auto *IncAcc = Phi->getIncomingValue(I); // If Pred has no unreachable predecessors, get last def looking at // IDoms. If, while walkings IDoms, any of these has an unreachable - // predecessor, then the expected incoming def is LoE. + // predecessor, then the incoming def can be any access. if (auto *DTNode = DT->getNode(Pred)) { while (DTNode) { if (auto *DefList = getBlockDefs(DTNode->getBlock())) { @@ -1886,16 +1886,13 @@ } DTNode = DTNode->getIDom(); } - } else if (auto *DefList = getBlockDefs(Pred)) { + } else { // If Pred has unreachable predecessors, but has at least a Def, the // incoming access can be the last Def in Pred, or it could have been - // optimized to LoE. - auto *LastAcc = &*(--DefList->end()); - assert((LastAcc == IncAcc || IncAcc == getLiveOnEntryDef()) && - "Incorrect incoming access into phi."); - } else { + // optimized to LoE. After an update, though, the LoE may have been + // replaced by another access, so IncAcc may be any access. // If Pred has unreachable predecessors and no Defs, incoming access - // should be LoE; In practice, after an update, it may be any access. + // should be LoE; However, after an update, it may be any access. } } } Index: llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp +++ llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp @@ -218,17 +218,48 @@ return recursePhi(Same); } -void MemorySSAUpdater::insertUse(MemoryUse *MU) { +void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) { InsertedPHIs.clear(); MU->setDefiningAccess(getPreviousDef(MU)); - // Unlike for defs, there is no extra work to do. Because uses do not create - // new may-defs, there are only two cases: - // + // In cases without unreachable blocks, because uses do not create new + // may-defs, there are only two cases: // 1. There was a def already below us, and therefore, we should not have // created a phi node because it was already needed for the def. // // 2. There is no def below us, and therefore, there is no extra renaming work // to do. + + // In cases with unreachable blocks, where the unnecessary Phis were + // optimized out, adding the Use may re-insert those Phis. Hence, when + // inserting Uses outside of the MSSA creation process, and new Phis were + // added, rename all uses if we are asked. + + if (!RenameUses && !InsertedPHIs.empty()) { + auto *Defs = MSSA->getBlockDefs(MU->getBlock()); + (void)Defs; + assert((!Defs || (++Defs->begin() == Defs->end())) && + "Block may have only a Phi or no defs"); + } + + if (RenameUses && InsertedPHIs.size()) { + SmallPtrSet Visited; + BasicBlock *StartBlock = MU->getBlock(); + + if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) { + MemoryAccess *FirstDef = &*Defs->begin(); + // Convert to incoming value if it's a memorydef. A phi *is* already an + // incoming value. + if (auto *MD = dyn_cast(FirstDef)) + FirstDef = MD->getDefiningAccess(); + + MSSA->renamePass(MU->getBlock(), FirstDef, Visited); + // We just inserted a phi into this block, so the incoming value will + // become the phi anyway, so it does not matter what we pass. + for (auto &MP : InsertedPHIs) + if (MemoryPhi *Phi = cast_or_null(MP)) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } + } } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. @@ -1071,9 +1102,9 @@ // Now reinsert it into the IR and do whatever fixups needed. if (auto *MD = dyn_cast(What)) - insertDef(MD, true); + insertDef(MD, /*RenameUses=*/true); else - insertUse(cast(What)); + insertUse(cast(What), /*RenameUses=*/true); // Clear dangling pointers. We added all MemoryPhi users, but not all // of them are removed by fixupDefs(). Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -1392,7 +1392,7 @@ MSSAU->insertDef(MemDef, /*RenameUses=*/true); else { auto *MemUse = cast(NewMemAcc); - MSSAU->insertUse(MemUse); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); } } } @@ -2119,9 +2119,11 @@ PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); - MSSAU->insertUse(NewMemUse); + MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); Index: llvm/trunk/test/Analysis/MemorySSA/PR42940.ll =================================================================== --- llvm/trunk/test/Analysis/MemorySSA/PR42940.ll +++ llvm/trunk/test/Analysis/MemorySSA/PR42940.ll @@ -0,0 +1,127 @@ +; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S %s | FileCheck %s +; REQUIRES: asserts + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +@g_77 = external dso_local global i16, align 2 + +; CHECK-LABEL: @f1() +define void @f1() { +entry: + store i16 undef, i16* @g_77, align 2 + br label %loop_pre + +unreachablelabel: ; No predecessors + br label %loop_pre + +loop_pre: + br label %for.cond.header + +for.cond.header: + store i32 0, i32* undef, align 4 + br i1 undef, label %for.body, label %for.end + +for.body: + %tmp1 = load volatile i16, i16* undef, align 2 + br label %for.end + +for.end: + br i1 undef, label %func.exit, label %for.cond.header + +func.exit: + ret void +} + +@g_159 = external dso_local global i32, align 4 + +; CHECK-LABEL: @f2() +define void @f2() { +entry: + br label %for.header.first + +for.header.first: + br label %for.body.first + +for.body.first: + store i32 0, i32* @g_159, align 4 + br i1 undef, label %for.body.first, label %for.end.first + +for.end.first: + br i1 undef, label %lor.end, label %for.header.first + +lor.end: + br label %for.pre + +unreachablelabel: ; No predecessors + br label %for.pre + +for.pre: + br label %for.header.second + +for.header.second: + store i32 undef, i32* undef, align 4 + br label %for.header.second +} + +@g_271 = external dso_local global i8, align 2 +@g_427 = external dso_local unnamed_addr global [9 x i16], align 2 + +; CHECK-LABEL: @f3() +define void @f3() { +entry: + br label %for.preheader + +for.preheader: + store volatile i8 undef, i8* @g_271, align 2 + br i1 undef, label %for.preheader, label %for.end + +for.end: + br label %lbl_1058.i + +unreachablelabel: ; No predecessors + br label %lbl_1058.i + +lbl_1058.i: + br label %for.cond3.preheader.i + +for.cond3.preheader.i: + %tmp1 = load i16, i16* getelementptr inbounds ([9 x i16], [9 x i16]* @g_427, i64 0, i64 2), align 2 + %conv620.i129 = zext i16 %tmp1 to i32 + %cmp621.i130 = icmp ugt i32 undef, %conv620.i129 + %conv622.i131 = zext i1 %cmp621.i130 to i32 + store i32 %conv622.i131, i32* undef, align 4 + br i1 undef, label %func.exit, label %for.cond3.preheader.i + +func.exit: + ret void +} + +@g_6 = external dso_local unnamed_addr global [3 x i32], align 4 +@g_244 = external dso_local global i64, align 8 +@g_1164 = external dso_local global i64, align 8 + +; CHECK-LABEL: @f4() +define void @f4() { +entry: + br label %for.cond8.preheader + +for.cond8.preheader: + store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @g_6, i64 0, i64 2), align 4 + br i1 undef, label %if.end, label %for.cond8.preheader + +if.end: + br i1 undef, label %cleanup1270, label %for.cond504.preheader + +for.cond504.preheader: + store i64 undef, i64* @g_244, align 8 + br label %cleanup1270 + +for.cond559.preheader: + store i64 undef, i64* @g_1164, align 8 + br i1 undef, label %for.cond559.preheader, label %cleanup1270 + +cleanup1270: + ret void +} +