Index: lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- lib/Analysis/MemorySSAUpdater.cpp +++ lib/Analysis/MemorySSAUpdater.cpp @@ -71,11 +71,19 @@ // Recurse to get the values in our predecessors for placement of a // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. - for (auto *Pred : predecessors(BB)) - if (MSSA->DT->isReachableFromEntry(Pred)) - PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); - else + bool UniqueIncomingAccess = true; + MemoryAccess *SingleAccess = nullptr; + for (auto *Pred : predecessors(BB)) { + if (MSSA->DT->isReachableFromEntry(Pred)) { + auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef); + if (!SingleAccess) + SingleAccess = IncomingAccess; + else if (IncomingAccess != SingleAccess) + UniqueIncomingAccess = false; + PhiOps.push_back(IncomingAccess); + } else PhiOps.push_back(MSSA->getLiveOnEntryDef()); + } // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -84,7 +92,9 @@ // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); // If we couldn't simplify, we may have to create a phi - if (Result == Phi) { + if (Result == Phi && UniqueIncomingAccess && SingleAccess) + Result = SingleAccess; + else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); @@ -315,8 +325,8 @@ SmallVector FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); - SmallPtrSet DefiningBlocks; - + // Remember the index where we may insert new phis. + unsigned NewPhiIndex = InsertedPHIs.size(); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it // did. Because every may-def is the same, any phis/etc we would create, it @@ -335,49 +345,51 @@ auto Iter = MD->getDefsIterator(); ++Iter; auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); - if (Iter == IterEnd) + if (Iter == IterEnd) { + SmallPtrSet DefiningBlocks; DefiningBlocks.insert(MD->getBlock()); + for (const auto &VH : InsertedPHIs) + if (const auto *RealPHI = cast_or_null(VH)) + DefiningBlocks.insert(RealPHI->getBlock()); + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector IDFBlocks; + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + SmallVector, 4> NewInsertedPHIs; + for (auto *BBIDF : IDFBlocks) { + auto *MPhi = MSSA->getMemoryAccess(BBIDF); + if (!MPhi) { + MPhi = MSSA->createMemoryPhi(BBIDF); + NewInsertedPHIs.push_back(MPhi); + } + // Add the phis created into the IDF blocks to NonOptPhis, so they are + // not optimized out as trivial by the call to getPreviousDefFromEnd + // below. Once they are complete, all these Phis are added to the + // FixupList, and removed from NonOptPhis inside fixupDefs(). Existing + // Phis in IDF may need fixing as well, and potentially be trivial + // before this insertion, hence add all IDF Phis. See PR43044. + NonOptPhis.insert(MPhi); + } + for (auto &MPhi : NewInsertedPHIs) { + auto *BBIDF = MPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap> CachedPreviousDef; + MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), + Pred); + } + } - FixupList.push_back(MD); - } - - ForwardIDFCalculator IDFs(*MSSA->DT); - SmallVector IDFBlocks; - for (const auto &VH : InsertedPHIs) - if (const auto *RealPHI = cast_or_null(VH)) - DefiningBlocks.insert(RealPHI->getBlock()); - IDFs.setDefiningBlocks(DefiningBlocks); - IDFs.calculate(IDFBlocks); - SmallVector, 4> NewInsertedPHIs; - for (auto *BBIDF : IDFBlocks) { - auto *MPhi = MSSA->getMemoryAccess(BBIDF); - if (!MPhi) { - MPhi = MSSA->createMemoryPhi(BBIDF); - NewInsertedPHIs.push_back(MPhi); - } - // Add the phis created into the IDF blocks to NonOptPhis, so they are not - // optimized out as trivial by the call to getPreviousDefFromEnd below. Once - // they are complete, all these Phis are added to the FixupList, and removed - // from NonOptPhis inside fixupDefs(). Existing Phis in IDF may need fixing - // as well, and potentially be trivial before this insertion, hence add all - // IDF Phis. See PR43044. - NonOptPhis.insert(MPhi); - } - - for (auto &MPhi : NewInsertedPHIs) { - auto *BBIDF = MPhi->getBlock(); - for (auto *Pred : predecessors(BBIDF)) { - DenseMap> CachedPreviousDef; - MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred); + // Re-take the index where we're adding the new phis, because the above + // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs. + NewPhiIndex = InsertedPHIs.size(); + for (auto &MPhi : NewInsertedPHIs) { + InsertedPHIs.push_back(&*MPhi); + FixupList.push_back(&*MPhi); + } } + FixupList.push_back(MD); } - // Remember the index where we may insert new phis. - unsigned NewPhiIndex = InsertedPHIs.size(); - for (auto &MPhi : NewInsertedPHIs) { - InsertedPHIs.push_back(&*MPhi); - FixupList.push_back(&*MPhi); - } // Remember the index where we stopped inserting new phis above, since the // fixupDefs call in the loop below may insert more, that are already minimal. unsigned NewPhiIndexEnd = InsertedPHIs.size(); Index: test/Analysis/MemorySSA/PR42940.ll =================================================================== --- /dev/null +++ test/Analysis/MemorySSA/PR42940.ll @@ -1,189 +0,0 @@ -; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S %s | FileCheck %s -; REQUIRES: asserts - -target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" -target triple = "s390x-ibm-linux" - -@g_77 = external dso_local global i16, align 2 - -; CHECK-LABEL: @f1() -define void @f1() { -entry: - store i16 undef, i16* @g_77, align 2 - br label %loop_pre - -unreachablelabel: ; No predecessors - br label %loop_pre - -loop_pre: - br label %for.cond.header - -for.cond.header: - store i32 0, i32* undef, align 4 - br i1 undef, label %for.body, label %for.end - -for.body: - %tmp1 = load volatile i16, i16* undef, align 2 - br label %for.end - -for.end: - br i1 undef, label %func.exit, label %for.cond.header - -func.exit: - ret void -} - -@g_159 = external dso_local global i32, align 4 - -; CHECK-LABEL: @f2() -define void @f2() { -entry: - br label %for.header.first - -for.header.first: - br label %for.body.first - -for.body.first: - store i32 0, i32* @g_159, align 4 - br i1 undef, label %for.body.first, label %for.end.first - -for.end.first: - br i1 undef, label %lor.end, label %for.header.first - -lor.end: - br label %for.pre - -unreachablelabel: ; No predecessors - br label %for.pre - -for.pre: - br label %for.header.second - -for.header.second: - store i32 undef, i32* undef, align 4 - br label %for.header.second -} - -@g_271 = external dso_local global i8, align 2 -@g_427 = external dso_local unnamed_addr global [9 x i16], align 2 - -; CHECK-LABEL: @f3() -define void @f3() { -entry: - br label %for.preheader - -for.preheader: - store volatile i8 undef, i8* @g_271, align 2 - br i1 undef, label %for.preheader, label %for.end - -for.end: - br label %lbl_1058.i - -unreachablelabel: ; No predecessors - br label %lbl_1058.i - -lbl_1058.i: - br label %for.cond3.preheader.i - -for.cond3.preheader.i: - %tmp1 = load i16, i16* getelementptr inbounds ([9 x i16], [9 x i16]* @g_427, i64 0, i64 2), align 2 - %conv620.i129 = zext i16 %tmp1 to i32 - %cmp621.i130 = icmp ugt i32 undef, %conv620.i129 - %conv622.i131 = zext i1 %cmp621.i130 to i32 - store i32 %conv622.i131, i32* undef, align 4 - br i1 undef, label %func.exit, label %for.cond3.preheader.i - -func.exit: - ret void -} - -@g_6 = external dso_local unnamed_addr global [3 x i32], align 4 -@g_244 = external dso_local global i64, align 8 -@g_1164 = external dso_local global i64, align 8 - -; CHECK-LABEL: @f4() -define void @f4() { -entry: - br label %for.cond8.preheader - -for.cond8.preheader: - store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @g_6, i64 0, i64 2), align 4 - br i1 undef, label %if.end, label %for.cond8.preheader - -if.end: - br i1 undef, label %cleanup1270, label %for.cond504.preheader - -for.cond504.preheader: - store i64 undef, i64* @g_244, align 8 - br label %cleanup1270 - -for.cond559.preheader: - store i64 undef, i64* @g_1164, align 8 - br i1 undef, label %for.cond559.preheader, label %cleanup1270 - -cleanup1270: - ret void -} - -@g_1504 = external dso_local local_unnamed_addr global i16****, align 8 - -define void @f5() { -bb: - tail call fastcc void @f21() - br label %bb12.outer - -bb12.outer.loopexit: ; No predecessors! - br label %bb12.outer - -bb12.outer: ; preds = %bb12.outer.loopexit, %bb - br i1 undef, label %bb12.outer.split.us, label %bb12.preheader - -bb12.preheader: ; preds = %bb12.outer - br label %bb12 - -bb12.outer.split.us: ; preds = %bb12.outer - br label %bb16.us.us - -bb16.us.us: ; preds = %bb16.us.us, %bb12.outer.split.us - br label %bb16.us.us - -bb12: ; preds = %bb77.1, %bb12.preheader - br i1 undef, label %bb25.preheader, label %bb77 - -bb25.preheader: ; preds = %bb12.1, %bb12 - br label %bb25 - -bb25: ; preds = %l0, %bb25.preheader - br i1 undef, label %bb62, label %bb71.thread - -bb62: ; preds = %bb25 - br i1 undef, label %bb92.loopexit, label %l0 - -l0: ; preds = %bb62 - br label %bb25 - -bb71.thread: ; preds = %bb25 - br label %bb92 - -bb77: ; preds = %bb12 - %tmp78 = load i16****, i16***** @g_1504, align 8 - %tmp79 = load volatile i16***, i16**** %tmp78, align 8 - br i1 undef, label %bb91, label %bb12.1 - -bb91: ; preds = %bb77.1, %bb77 - unreachable - -bb92.loopexit: ; preds = %bb62 - br label %bb92 - -bb92: ; preds = %bb92.loopexit, %bb71.thread - ret void - -bb12.1: ; preds = %bb77 - br i1 undef, label %bb25.preheader, label %bb77.1 - -bb77.1: ; preds = %bb12.1 - br i1 undef, label %bb91, label %bb12 -} - -declare void @f21() Index: test/Analysis/MemorySSA/PR43044.ll =================================================================== --- /dev/null +++ test/Analysis/MemorySSA/PR43044.ll @@ -1,52 +0,0 @@ -; RUN: opt -loop-rotate -licm -enable-mssa-loop-dependency -verify-memoryssa %s -S | FileCheck %s -; REQUIRES: asserts - -target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" -target triple = "s390x-ibm-linux" - -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) - -; CHECK-LABEL: @func_42() -define void @func_42() { -entry: - br label %for.cond1050 - -for.cond1050.loopexit: ; preds = %for.cond1373 - br label %for.cond1050 - -for.cond1050: ; preds = %for.cond1050.loopexit, %entry - %storemerge6 = phi i32 [ 2, %entry ], [ 0, %for.cond1050.loopexit ] - %cmp1051 = icmp sgt i32 %storemerge6, -1 - br i1 %cmp1051, label %for.cond1055.preheader, label %cleanup1400.loopexit1 - -for.cond1055.preheader: ; preds = %for.cond1050 - store i64 0, i64* null, align 8 - %0 = load i64, i64* null, align 8 - %tobool1383 = icmp eq i64 %0, 0 - br i1 %tobool1383, label %for.cond1055.preheader.cleanup1400.loopexit.split_crit_edge, label %for.cond1055.preheader.for.cond1055.preheader.split_crit_edge - -for.cond1055.preheader.for.cond1055.preheader.split_crit_edge: ; preds = %for.cond1055.preheader - br label %for.body1376 - -for.cond1055.preheader.cleanup1400.loopexit.split_crit_edge: ; preds = %for.cond1055.preheader - br label %cleanup1400.loopexit.split - -for.cond1373: ; preds = %for.body1376 - br i1 true, label %for.body1376, label %for.cond1050.loopexit - -for.body1376: ; preds = %for.cond1373, %for.cond1055.preheader.for.cond1055.preheader.split_crit_edge - br i1 false, label %cleanup1400.loopexit, label %for.cond1373 - -cleanup1400.loopexit: ; preds = %for.body1376 - br label %cleanup1400.loopexit.split - -cleanup1400.loopexit.split: ; preds = %cleanup1400.loopexit, %for.cond1055.preheader.cleanup1400.loopexit.split_crit_edge - br label %cleanup1400 - -cleanup1400.loopexit1: ; preds = %for.cond1050 - br label %cleanup1400 - -cleanup1400: ; preds = %cleanup1400.loopexit1, %cleanup1400.loopexit.split - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull undef) - unreachable -} Index: test/Analysis/MemorySSA/pr40754.ll =================================================================== --- test/Analysis/MemorySSA/pr40754.ll +++ test/Analysis/MemorySSA/pr40754.ll @@ -11,44 +11,45 @@ ; Function Attrs: norecurse noreturn nounwind define dso_local void @func_65() local_unnamed_addr { ; CHECK-LABEL: @func_65() - br label %1 +label0: + br label %label1 -;