diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -46,11 +46,12 @@ class IntrinsicInst; class LoadInst; class LoopInfo; +class MemorySSA; +class MemorySSAUpdater; class OptimizationRemarkEmitter; class PHINode; class TargetLibraryInfo; class Value; - /// A private "module" namespace for types and utilities used by GVN. These /// are implementation details and should not be used by clients. namespace gvn LLVM_LIBRARY_VISIBILITY { @@ -211,6 +212,7 @@ OptimizationRemarkEmitter *ORE = nullptr; ImplicitControlFlowTracking *ICF = nullptr; LoopInfo *LI = nullptr; + MemorySSAUpdater *MSSAU = nullptr; ValueTable VN; @@ -246,7 +248,7 @@ bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -36,6 +36,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -653,14 +655,18 @@ auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); + auto *MSSA = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE); + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); PA.preserve(); + if (MSSA) + PA.preserve(); if (LI) PA.preserve(); return PA; @@ -1335,6 +1341,22 @@ LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); + if (MSSAU) { + auto *MSSA = MSSAU->getMemorySSA(); + // Get the defining access of the original load or use the load if it is a + // MemoryDef (e.g. because it is volatile). The inserted loads are + // guaranteed to load from the same definition. + auto *LIAcc = MSSA->getMemoryAccess(LI); + auto *DefiningAcc = + isa(LIAcc) ? LIAcc : LIAcc->getDefiningAccess(); + auto *NewAccess = MSSAU->createMemoryAccessInBB( + NewLoad, DefiningAcc, NewLoad->getParent(), + MemorySSA::BeforeTerminator); + if (auto *NewDef = dyn_cast(NewAccess)) + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + else + MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); + } // Transfer the old load's AA tags to the new load. AAMDNodes Tags; @@ -1551,9 +1573,17 @@ // Insert a new store to null instruction before the load to indicate that // this code is not reachable. FIXME: We could insert unreachable // instruction directly because we can modify the CFG. - new StoreInst(UndefValue::get(Int8Ty), - Constant::getNullValue(Int8Ty->getPointerTo()), - IntrinsicI); + auto *NewS = new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + IntrinsicI); + if (MSSAU) { + // This added store is to null, so it will never executed and we can + // just use the LiveOnEntry def as defining access. + auto *NewDef = MSSAU->createMemoryAccessInBB( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), NewS->getParent(), + MemorySSA::BeforeTerminator); + MSSAU->insertDef(cast(NewDef), /*RenameUses=*/true); + } } if (isAssumeWithEmptyBundle(*IntrinsicI)) markInstructionForDeletion(IntrinsicI); @@ -1687,6 +1717,8 @@ // Replace the load! patchAndReplaceAllUsesWith(L, AvailableValue); markInstructionForDeletion(L); + if (MSSAU) + MSSAU->removeMemoryAccess(L); ++NumGVNLoad; reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more @@ -2204,7 +2236,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE) { + OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2217,6 +2249,8 @@ VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; + MemorySSAUpdater Updater(MSSA); + MSSAU = MSSA ? &Updater : nullptr; bool Changed = false; bool ShouldContinue = true; @@ -2227,7 +2261,7 @@ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = &*FI++; - bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD); + bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD); if (removedBlock) ++NumGVNBlocks; @@ -2263,6 +2297,9 @@ // iteration. DeadBlocks.clear(); + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + return Changed; } @@ -2303,6 +2340,8 @@ salvageKnowledge(I, AC); salvageDebugInfo(*I); if (MD) MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); LLVM_DEBUG(verifyRemoved(I)); ICF->removeInstruction(I); I->eraseFromParent(); @@ -2533,6 +2572,8 @@ LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); + if (MSSAU) + MSSAU->removeMemoryAccess(CurInst); LLVM_DEBUG(verifyRemoved(CurInst)); // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes // some assertion failures. @@ -2577,7 +2618,7 @@ // possible. BasicBlock *BB = SplitCriticalEdge( Pred, Succ, - CriticalEdgeSplittingOptions(DT, LI).unsetPreserveLoopSimplify()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2592,7 +2633,7 @@ do { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT, LI)); + CriticalEdgeSplittingOptions(DT, LI, MSSAU)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2791,6 +2832,7 @@ auto *LIWP = getAnalysisIfAvailable(); + auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), @@ -2800,7 +2842,8 @@ ? &getAnalysis().getMemDep() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, - &getAnalysis().getORE()); + &getAnalysis().getORE(), + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2817,6 +2860,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); } private: diff --git a/llvm/test/Transforms/GVN/preserve-memoryssa.ll b/llvm/test/Transforms/GVN/preserve-memoryssa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/preserve-memoryssa.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -aa-pipeline=basic-aa -passes='require,gvn' -S -verify-memoryssa %s | FileCheck %s + +; REQUIRES: asserts + +declare void @use(i32) readnone + +define i32 @test(i32* %ptr.0, i32** %ptr.1, i1 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LV_0:%.*]] = load i32, i32* [[PTR_0:%.*]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN749:%.*]], label [[FOR_INC774:%.*]] +; CHECK: if.then749: +; CHECK-NEXT: [[LV_1:%.*]] = load i32*, i32** [[PTR_1:%.*]], align 8 +; CHECK-NEXT: store i32 10, i32* [[LV_1]], align 4 +; CHECK-NEXT: [[LV_2_PRE:%.*]] = load i32, i32* [[PTR_0]], align 8 +; CHECK-NEXT: br label [[FOR_INC774]] +; CHECK: for.inc774: +; CHECK-NEXT: [[LV_2:%.*]] = phi i32 [ [[LV_2_PRE]], [[IF_THEN749]] ], [ [[LV_0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @use(i32 [[LV_2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.end435 + +for.end435: + %lv.0 = load i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.0) + br label %if.end724 + +if.end724: + br i1 %c, label %if.then749, label %for.inc774 + +if.then749: + %lv.1 = load i32*, i32** %ptr.1, align 8 + %arrayidx772 = getelementptr inbounds i32, i32* %lv.1, i64 0 + store i32 10, i32* %arrayidx772, align 4 + br label %for.inc774 + +for.inc774: + br label %for.body830 + +for.body830: + %lv.2 = load i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.2) + br label %for.body.i22 + +for.body.i22: + ret i32 1 +} + +define i32 @test_volatile(i32* %ptr.0, i32** %ptr.1, i1 %c) { +; CHECK-LABEL: @test_volatile( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LV_0:%.*]] = load volatile i32, i32* [[PTR_0:%.*]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN749:%.*]], label [[FOR_INC774:%.*]] +; CHECK: if.then749: +; CHECK-NEXT: [[LV_1:%.*]] = load volatile i32*, i32** [[PTR_1:%.*]], align 8 +; CHECK-NEXT: store i32 10, i32* [[LV_1]], align 4 +; CHECK-NEXT: br label [[FOR_INC774]] +; CHECK: for.inc774: +; CHECK-NEXT: [[LV_2:%.*]] = load volatile i32, i32* [[PTR_0]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.end435 + +for.end435: + %lv.0 = load volatile i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.0) + br label %if.end724 + +if.end724: + br i1 %c, label %if.then749, label %for.inc774 + +if.then749: + %lv.1 = load volatile i32*, i32** %ptr.1, align 8 + %arrayidx772 = getelementptr inbounds i32, i32* %lv.1, i64 0 + store i32 10, i32* %arrayidx772, align 4 + br label %for.inc774 + +for.inc774: + br label %for.body830 + +for.body830: + %lv.2 = load volatile i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.2) + br label %for.body.i22 + +for.body.i22: + ret i32 1 +}