Index: llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -25,6 +25,8 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -112,6 +114,7 @@ AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } }; } // end anonymous namespace @@ -133,8 +136,8 @@ class MemOPSizeOpt : public InstVisitor { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) - : Func(Func), BFI(BFI), ORE(ORE), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree *DT) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { ValueDataArray = llvm::make_unique(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -170,6 +173,7 @@ Function &Func; BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; + DominatorTree *DT; bool Changed; std::vector WorkList; // Start of the previse range. @@ -336,15 +340,16 @@ LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock *DefaultBB = SplitBlock(BB, MI, DT); BasicBlock::iterator It(*MI); ++It; assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); MergeBB->setName("MemOP.Merge"); BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); DefaultBB->setName("MemOP.Default"); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); @@ -361,6 +366,10 @@ LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + std::vector Updates; + if (DT) + Updates.reserve(2 * SizeIds.size()); + for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); @@ -375,8 +384,15 @@ IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); SI->addCase(CaseSizeId, CaseBB); + if (DT) { + Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); + Updates.push_back({DominatorTree::Insert, BB, CaseBB}); + } LLVM_DEBUG(dbgs() << *CaseBB << "\n"); } + DTU.applyUpdates(Updates); + Updates.clear(); + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); LLVM_DEBUG(dbgs() << *BB << "\n"); @@ -397,13 +413,14 @@ } // namespace static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) { + OptimizationRemarkEmitter &ORE, + DominatorTree *DT) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -412,7 +429,9 @@ BlockFrequencyInfo &BFI = getAnalysis().getBFI(); auto &ORE = getAnalysis().getORE(); - return PGOMemOPSizeOptImpl(F, BFI, ORE); + auto *DTWP = getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); } namespace llvm { @@ -422,11 +441,13 @@ FunctionAnalysisManager &FAM) { auto &BFI = FAM.getResult(F); auto &ORE = FAM.getResult(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE); + auto *DT = FAM.getCachedResult(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve(); + PA.preserve(); return PA; } } // namespace llvm Index: llvm/trunk/test/Other/opt-O2-pipeline.ll =================================================================== --- llvm/trunk/test/Other/opt-O2-pipeline.ll +++ llvm/trunk/test/Other/opt-O2-pipeline.ll @@ -80,7 +80,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: PGOMemOPSize -; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information Index: llvm/trunk/test/Other/opt-O3-pipeline.ll =================================================================== --- llvm/trunk/test/Other/opt-O3-pipeline.ll +++ llvm/trunk/test/Other/opt-O3-pipeline.ll @@ -84,7 +84,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: PGOMemOPSize -; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information Index: llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll +++ llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -pgo-memop-opt -S | FileCheck %s +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -S | FileCheck %s define i32 @test(i8* %a, i8* %b) !prof !1 { ; CHECK_LABEL: test Index: llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll +++ llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML -; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML Index: llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll =================================================================== --- llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll +++ llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll @@ -1,7 +1,7 @@ ; Test to ensure the pgo memop optimization pass doesn't try to scale ; up a value profile with a 0 count, which would lead to divide by 0. -; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT -; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT +; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"