Index: include/llvm/Transforms/Scalar/SROA.h =================================================================== --- include/llvm/Transforms/Scalar/SROA.h +++ include/llvm/Transforms/Scalar/SROA.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" @@ -54,6 +55,7 @@ class SROA : public PassInfoMixin { LLVMContext *C; DominatorTree *DT; + PostDominatorTree *PDT; AssumptionCache *AC; /// \brief Worklist of alloca instructions to simplify. @@ -110,7 +112,7 @@ /// Helper used by both the public run method and by the legacy pass. PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC); + PostDominatorTree &RunPDT, AssumptionCache &RunAC); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, Index: include/llvm/Transforms/Utils/PromoteMemToReg.h =================================================================== --- include/llvm/Transforms/Utils/PromoteMemToReg.h +++ include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -20,6 +20,7 @@ template class ArrayRef; class AllocaInst; class DominatorTree; +class PostDominatorTree; class AliasSetTracker; class AssumptionCache; @@ -41,6 +42,7 @@ /// If AST is specified, the specified tracker is updated to reflect changes /// made to the IR. void PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, + PostDominatorTree &PDT, AliasSetTracker *AST = nullptr, AssumptionCache *AC = nullptr); Index: lib/Transforms/Scalar/RewriteStatepointsForGC.cpp =================================================================== --- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -14,6 +14,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" @@ -102,6 +103,7 @@ // We add and rewrite a bunch of instructions, but don't really do much // else. We could in theory preserve a lot more analyses here. AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -131,6 +133,7 @@ INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) @@ -1595,7 +1598,7 @@ /// Do all the relocation update via allocas and mem2reg static void relocationViaAlloca( - Function &F, DominatorTree &DT, ArrayRef Live, + Function &F, DominatorTree &DT, PostDominatorTree &PDT, ArrayRef Live, ArrayRef Records) { #ifndef NDEBUG // record initial number of (static) allocas; we'll check we have the same @@ -1772,7 +1775,7 @@ "we must have the same allocas with lives"); if (!PromotableAllocas.empty()) { // Apply mem2reg to promote alloca to SSA - PromoteMemToReg(PromotableAllocas, DT); + PromoteMemToReg(PromotableAllocas, DT, PDT); } #ifndef NDEBUG @@ -2079,6 +2082,7 @@ } static bool insertParsePoints(Function &F, DominatorTree &DT, + PostDominatorTree &PDT, TargetTransformInfo &TTI, SmallVectorImpl &ToUpdate) { #ifndef NDEBUG @@ -2284,7 +2288,7 @@ "must be a gc pointer type"); #endif - relocationViaAlloca(F, DT, Live, Records); + relocationViaAlloca(F, DT, PDT, Live, Records); return !Records.empty(); } @@ -2393,6 +2397,7 @@ return false; DominatorTree &DT = getAnalysis(F).getDomTree(); + PostDominatorTree &PDT = getAnalysis(F).getPostDomTree(); TargetTransformInfo &TTI = getAnalysis().getTTI(F); @@ -2471,7 +2476,7 @@ } } - MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded); + MadeChange |= insertParsePoints(F, DT, PDT, TTI, ParsePointNeeded); return MadeChange; } Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2387,6 +2387,21 @@ LI.isVolatile(), LI.getName()); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + + SmallVector, 8> MD; + LI.getAllMetadata(MD); + // Try to preserve as much metadata as possible + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + switch (ID) { + case LLVMContext::MD_nonnull: + if (TargetTy->isPointerTy()) { + NewLI->setMetadata(ID, N); + break; + } + } + } V = NewLI; // If this is an integer load past the end of the slice (which means the @@ -4185,16 +4200,18 @@ NumPromoted += PromotableAllocas.size(); DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); + PromoteMemToReg(PromotableAllocas, *DT, *PDT, nullptr, AC); PromotableAllocas.clear(); return true; } PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, + PostDominatorTree &RunPDT, AssumptionCache &RunAC) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); DT = &RunDT; + PDT = &RunPDT; AC = &RunAC; BasicBlock &EntryBB = F.getEntryBlock(); @@ -4244,6 +4261,7 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) { return runImpl(F, AM.getResult(F), + AM.getResult(F), AM.getResult(F)); } @@ -4265,12 +4283,14 @@ auto PA = Impl.runImpl( F, getAnalysis().getDomTree(), + getAnalysis().getPostDomTree(), getAnalysis().getAssumptionCache(F)); return !PA.areAllPreserved(); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.setPreservesCFG(); } @@ -4287,5 +4307,6 @@ "Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) Index: lib/Transforms/Utils/Mem2Reg.cpp =================================================================== --- lib/Transforms/Utils/Mem2Reg.cpp +++ lib/Transforms/Utils/Mem2Reg.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -28,7 +29,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, - AssumptionCache &AC) { + PostDominatorTree &PDT, AssumptionCache &AC) { std::vector Allocas; BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; @@ -46,7 +47,7 @@ if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, nullptr, &AC); + PromoteMemToReg(Allocas, DT, PDT, nullptr, &AC); NumPromoted += Allocas.size(); Changed = true; } @@ -55,8 +56,9 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); + auto &PDT = AM.getResult(F); auto &AC = AM.getResult(F); - if (!promoteMemoryToRegister(F, DT, AC)) + if (!promoteMemoryToRegister(F, DT, PDT, AC)) return PreservedAnalyses::all(); // FIXME: This should also 'preserve the CFG'. @@ -78,14 +80,16 @@ return false; DominatorTree &DT = getAnalysis().getDomTree(); + PostDominatorTree &PDT = getAnalysis().getPostDomTree(); AssumptionCache &AC = getAnalysis().getAssumptionCache(F); - return promoteMemoryToRegister(F, DT, AC); + return promoteMemoryToRegister(F, DT, PDT, AC); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.setPreservesCFG(); } }; @@ -97,6 +101,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", false, false) Index: lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -223,6 +224,7 @@ /// The alloca instructions being promoted. std::vector Allocas; DominatorTree &DT; + PostDominatorTree &PDT; DIBuilder DIB; /// An AliasSetTracker object to update. If null, don't update it. @@ -269,8 +271,8 @@ public: PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, - AliasSetTracker *AST, AssumptionCache *AC) - : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + PostDominatorTree &PDT, AliasSetTracker *AST, AssumptionCache *AC) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), PDT(PDT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), AST(AST), AC(AC) {} @@ -336,6 +338,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + PostDominatorTree &PDT, AliasSetTracker *AST) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); @@ -387,6 +390,34 @@ // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); + + // If ReplVal is also a Load and LI post-dominates it, + // then lets preserve the metadata on LI. + if (isa(ReplVal)) { + LoadInst *ReplLoad = cast(ReplVal); + // If LI and ReplVal are in the same BB, then we've already checked above + // that LI comes after OnlyStore which must necessarily come after ReplVal. + // If they're in different BB we use post-dominance. + if (LI->getParent() == ReplLoad->getParent() || + PDT.dominates(LI->getParent(), ReplLoad->getParent())) { + + SmallVector, 8> MD; + LI->getAllMetadata(MD); + // Try to preserve as much metadata as possible + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + switch (ID) { + case LLVMContext::MD_nonnull: + if (LI->getType()->isPointerTy()) { + ReplLoad->setMetadata(ID, N); + break; + } + } + } + } + } + LI->replaceAllUsesWith(ReplVal); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); @@ -553,7 +584,7 @@ // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, PDT, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -987,10 +1018,10 @@ } void llvm::PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, - AliasSetTracker *AST, AssumptionCache *AC) { + PostDominatorTree &PDT, AliasSetTracker *AST, AssumptionCache *AC) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, AST, AC).run(); + PromoteMem2Reg(Allocas, DT, PDT, AST, AC).run(); } Index: test/Transforms/SROA/preserve-nonnull.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/preserve-nonnull.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; +; Make sure that SROA doesn't lose nonnull metadata +; on loads from allocas that optimized out. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: yummy_nonnull +; CHECK: %{{.*}} = load float*, float** %0, align 8, !nonnull !0 + +define float* @yummy_nonnull(float**) { +entry-block: + %buf = alloca float* + + %_0_i8 = bitcast float** %0 to i8* + %_buf_i8 = bitcast float** %buf to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_0_i8, i64 8, i32 8, i1 false) + + %ret = load float*, float** %buf, align 8, !nonnull !0 + ret float* %ret +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) + +!0 = !{}