diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -198,7 +198,7 @@ void initializeJMCInstrumenterPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); -void initializeLCSSAWrapperPassPass(PassRegistry&); +void initializeLCSSAWrapperPassPass(PassRegistry &); void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&); void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&); void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&); @@ -306,8 +306,9 @@ void initializeModuleMemProfilerLegacyPassPass(PassRegistry &); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); -void initializeMustExecutePrinterPass(PassRegistry&); +void initializeMoveAutoInitWrapperPassPass(PassRegistry &); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); +void initializeMustExecutePrinterPass(PassRegistry &); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); void initializeObjCARCContractLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -113,7 +113,7 @@ (void) llvm::createInstructionCombiningPass(); (void) llvm::createInternalizePass(); (void) llvm::createJMCInstrumenterPass(); - (void) llvm::createLCSSAPass(); + (void)llvm::createLCSSAPass(); (void) llvm::createLegacyDivergenceAnalysisPass(); (void) llvm::createLICMPass(); (void) llvm::createLoopSinkPass(); @@ -136,6 +136,7 @@ (void) llvm::createLowerGlobalDtorsLegacyPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); + (void)llvm::createMoveAutoInitPass(); (void) llvm::createNaryReassociatePass(); (void) llvm::createObjCARCContractPass(); (void) llvm::createPromoteMemoryToRegisterPass(); diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h --- a/llvm/include/llvm/Transforms/Utils.h +++ b/llvm/include/llvm/Transforms/Utils.h @@ -70,6 +70,14 @@ Pass *createLCSSAPass(); extern char &LCSSAID; +//===----------------------------------------------------------------------===// +// +// MoveAutoInit - This pass moves initialization closer to use sites when +// profitable. +// +Pass *createMoveAutoInitPass(); +extern char &MoveAutoInitID; + //===----------------------------------------------------------------------===// // // AddDiscriminators - Add DWARF path discriminators to the IR. diff --git a/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h b/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h @@ -0,0 +1,29 @@ +//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions marked as auto-init closer to their use if +// profitable, generally because it moves them under a guard, potentially +// skipping the overhead of the auto-init under some execution paths. +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H +#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class MoveAutoInitPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -239,6 +239,7 @@ #include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" +#include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -121,6 +121,7 @@ #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" @@ -882,6 +883,7 @@ // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. EarlyFPM.addPass(LowerExpectIntrinsicPass()); EarlyFPM.addPass(SimplifyCFGPass()); + EarlyFPM.addPass(MoveAutoInitPass()); EarlyFPM.addPass(SROAPass()); EarlyFPM.addPass(EarlyCSEPass()); if (Level == OptimizationLevel::O3) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -318,6 +318,7 @@ FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass()) +FUNCTION_PASS("move-auto-init", MoveAutoInitPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -56,6 +56,7 @@ MetaRenamer.cpp MisExpect.cpp ModuleUtils.cpp + MoveAutoInit.cpp NameAnonGlobals.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp diff --git a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp @@ -0,0 +1,221 @@ +//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves instruction maked as auto-init closer to the basic block that +// use it, eventually removing it from some control path of the function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MoveAutoInit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "move-auto-init" + +STATISTIC(NumMoved, "Number of instructions moved"); + +namespace { + +bool hasAutoInitMetadata(Instruction const &I) { + return I.hasMetadata(LLVMContext::MD_annotation) && + any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(), + [](const MDOperand &Op) { + return cast(Op.get())->getString() == "auto-init"; + }); +} + +Value *getAutoInitPointer(Instruction &I) { + Value *MemOp = nullptr; + if (auto *SI = dyn_cast(&I)) { + MemOp = SI->getPointerOperand(); + } + + else if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy_inline: + case Intrinsic::memcpy: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: + MemOp = I.getOperand(0); + break; + default:; + }; + } + assert(MemOp && "auto-init implies some memory operand"); + return MemOp; +} + +BasicBlock *usersDominator(Value *V, Instruction *I, DominatorTree &DT) { + BasicBlock *CurrentDominator = nullptr; + for (User *U : V->users()) { + if (Instruction *UI = dyn_cast(U)) { + if (UI->isLifetimeStartOrEnd() || UI == I) + continue; + if (!CurrentDominator) + CurrentDominator = UI->getParent(); + else { + CurrentDominator = + DT.findNearestCommonDominator(CurrentDominator, UI->getParent()); + } + } + } + return CurrentDominator; +} + +bool runMoveAutoInit(Function &F, DominatorTree &DT) { + BasicBlock &EntryBB = F.getEntryBlock(); + SmallVector> JobList; + + // + // Compute movable instructions. + // + for (Instruction &I : EntryBB) { + + if (!hasAutoInitMetadata(I)) + continue; + + Value *MemOp = getAutoInitPointer(I); + + if (!isa(MemOp)) + // Bail out: We use the assumption that the init statement has been + // generated in the Entry block and has an Alloca as parameter to avoid + // doing complex memory analysis. + continue; + + BasicBlock *UsersDominator = usersDominator(MemOp, &I, DT); + if (!UsersDominator) + continue; + + if (UsersDominator == &EntryBB) + continue; + + // Traverse the cfg to detect cycles involving UsersDominator. We don't want + // a direct path from the entry point to UsersDominator, without back edges. + SmallPtrSet TransitiveSuccessors; + SmallVector WorkList(succ_begin(UsersDominator), + succ_end(UsersDominator)); + bool HasCycle = false; + while (!WorkList.empty()) { + BasicBlock *CurrBB = WorkList.pop_back_val(); + if (CurrBB == UsersDominator) { + // No early exit because we ant to compute the full set of transitive + // successors. + HasCycle = true; + } + for (BasicBlock *Successor : successors(CurrBB)) { + if (!TransitiveSuccessors.insert(Successor).second) { + continue; + } + WorkList.push_back(Successor); + } + } + + // Don't insert if that could create multiple execution of I, + // but we can insert it in the non back-edge predecessors, if it exists. + + if (HasCycle) { + BasicBlock *UsersDominatorHead = UsersDominator->getUniquePredecessor(); + + if (UsersDominatorHead == &EntryBB) + continue; + + BasicBlock *DominatingPredecessor = nullptr; + for (BasicBlock *Pred : predecessors(UsersDominatorHead)) { + // If one of the predecessor also transitively is a successor, that's + // not the path we don't consider that path. + if (TransitiveSuccessors.count(Pred)) + continue; + + if (!DominatingPredecessor) + DominatingPredecessor = Pred; + else + DominatingPredecessor = + DT.findNearestCommonDominator(DominatingPredecessor, Pred); + } + + if (!DominatingPredecessor || DominatingPredecessor == &EntryBB) + continue; + + UsersDominator = DominatingPredecessor; + } + + // We finally found a place where I can be moved while not introducing extra + // execution, and guarded by at least one condition. + JobList.emplace_back(&I, &*UsersDominator->getFirstInsertionPt()); + } + + // + // Perform the actual substitution. + // + if (JobList.empty()) + return false; + + for (auto Job : JobList) + Job.first->moveBefore(Job.second); + + return true; +} + +struct MoveAutoInitWrapperPass : public FunctionPass { + static char ID; + MoveAutoInitWrapperPass() : FunctionPass(ID) { + initializeMoveAutoInitWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + DominatorTree *DT; + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} // namespace + +char MoveAutoInitWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(MoveAutoInitWrapperPass, "move-auto-init", + "Move instruction marked as auto-init Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(MoveAutoInitWrapperPass, "move-auto-init", + "Move instruction marked as auto-init Pass", false, false) + +Pass *llvm::createMoveAutoInitPass() { return new MoveAutoInitWrapperPass(); } +char &llvm::MoveAutoInitID = MoveAutoInitWrapperPass::ID; + +bool MoveAutoInitWrapperPass::runOnFunction(Function &F) { + DT = &getAnalysis().getDomTree(); + return runMoveAutoInit(F, *DT); +} + +PreservedAnalyses MoveAutoInitPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + if (!runMoveAutoInit(F, DT)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -101,8 +101,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis -; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running pass: MoveAutoInitPass ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running pass: SROAPass ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -79,8 +79,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis -; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running pass: MoveAutoInitPass ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running pass: SROAPass ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -37,8 +37,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -39,8 +39,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis -; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running pass: MoveAutoInit ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running pass: SROAPass ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -38,8 +38,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis -; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running pass: MoveAutoInit ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running pass: SROAPass ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -37,8 +37,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis -; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running pass: MoveAutoInitPass ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running pass: SROAPass ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass