Index: lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -25,6 +25,8 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -109,9 +111,12 @@ private: bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + // We require DominatorTree here only to update and thus preserve it. + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } }; } // end anonymous namespace @@ -120,6 +125,7 @@ INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", @@ -133,8 +139,8 @@ class MemOPSizeOpt : public InstVisitor { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) - : Func(Func), BFI(BFI), ORE(ORE), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree &DT) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { ValueDataArray = llvm::make_unique(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -170,6 +176,7 @@ Function &Func; BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; + DominatorTree &DT; bool Changed; std::vector WorkList; // Start of the previse range. @@ -336,15 +343,16 @@ LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock *DefaultBB = SplitBlock(BB, MI, &DT); BasicBlock::iterator It(*MI); ++It; assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), &DT); MergeBB->setName("MemOP.Merge"); BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); DefaultBB->setName("MemOP.Default"); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); @@ -361,6 +369,9 @@ LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + std::vector Updates; + Updates.reserve(2 * SizeIds.size()); + for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); @@ -374,9 +385,14 @@ CaseBB->getInstList().push_back(NewInst); IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); + Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); SI->addCase(CaseSizeId, CaseBB); + Updates.push_back({DominatorTree::Insert, BB, CaseBB}); LLVM_DEBUG(dbgs() << *CaseBB << "\n"); } + DTU.applyUpdates(Updates); + Updates.clear(); + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); LLVM_DEBUG(dbgs() << *BB << "\n"); @@ -397,13 +413,14 @@ } // namespace static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) { + OptimizationRemarkEmitter &ORE, + DominatorTree &DT) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -412,7 +429,8 @@ BlockFrequencyInfo &BFI = getAnalysis().getBFI(); auto &ORE = getAnalysis().getORE(); - return PGOMemOPSizeOptImpl(F, BFI, ORE); + auto &DT = getAnalysis().getDomTree(); + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); } namespace llvm { @@ -422,11 +440,13 @@ FunctionAnalysisManager &FAM) { auto &BFI = FAM.getResult(F); auto &ORE = FAM.getResult(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE); + auto &DT = FAM.getResult(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve(); + PA.preserve(); return PA; } } // namespace llvm Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -80,7 +80,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: PGOMemOPSize -; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -84,7 +84,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: PGOMemOPSize -; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information