Index: llvm/include/llvm/Transforms/Utils/MoveAutoInit.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/MoveAutoInit.h @@ -0,0 +1,29 @@ +//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions marked as auto-init closer to their use if +// profitable, generally because it moves them under a guard, potentially +// skipping the overhead of the auto-init under some execution paths. +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H +#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class MoveAutoInitPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -239,6 +239,7 @@ #include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" +#include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -121,6 +121,7 @@ #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" @@ -884,6 +885,7 @@ EarlyFPM.addPass(SimplifyCFGPass()); EarlyFPM.addPass(SROAPass()); EarlyFPM.addPass(EarlyCSEPass()); + EarlyFPM.addPass(MoveAutoInitPass()); if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -318,6 +318,7 @@ FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass()) +FUNCTION_PASS("move-auto-init", MoveAutoInitPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -56,6 +56,7 @@ MetaRenamer.cpp MisExpect.cpp ModuleUtils.cpp + MoveAutoInit.cpp NameAnonGlobals.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp Index: llvm/lib/Transforms/Utils/MoveAutoInit.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/MoveAutoInit.cpp @@ -0,0 +1,184 @@ +//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves instruction maked as auto-init closer to the basic block that +// use it, eventually removing it from some control path of the function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MoveAutoInit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "move-auto-init" + +STATISTIC(NumMoved, "Number of instructions moved"); + +namespace { + +bool hasAutoInitMetadata(Instruction const &I) { + return I.hasMetadata(LLVMContext::MD_annotation) && + any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(), + [](const MDOperand &Op) { + return cast(Op.get())->getString() == "auto-init"; + }); +} + +BasicBlock *usersDominator(Instruction *I, DominatorTree &DT, MemorySSA &MSSA) { + BasicBlock *CurrentDominator = nullptr; + + MemoryUseOrDef *IMA = MSSA.getMemoryAccess(I); + assert(IMA && "stack init is a memory access"); + + SmallPtrSet Visited; + SmallVector WorkList; + for (User *U : IMA->users()) { + auto *MA = cast(U); + WorkList.push_back(MA); + } + + while (!WorkList.empty()) { + MemoryAccess *MA = WorkList.pop_back_val(); + if (!Visited.insert(MA).second) + continue; + + if (auto *M = dyn_cast(MA)) { + if (auto *MI = M->getMemoryInst()) { + if (MI->isLifetimeStartOrEnd() || MI == I) + continue; + CurrentDominator = CurrentDominator + ? DT.findNearestCommonDominator(CurrentDominator, + MI->getParent()) + : MI->getParent(); + } + } else if (auto *M = dyn_cast(MA)) { + for (unsigned i = 0, n = M->getNumIncomingValues(); i != n; ++i) + WorkList.push_back(M->getIncomingValue(i)); + } + } + + return CurrentDominator; +} + +bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) { + BasicBlock &EntryBB = F.getEntryBlock(); + SmallVector> JobList; + + // + // Compute movable instructions. + // + for (Instruction &I : EntryBB) { + + // FIXME: this guards against generalization of this pass to any statement. + // The remaining logic should be pretty similar. + if (!hasAutoInitMetadata(I)) + continue; + + BasicBlock *UsersDominator = usersDominator(&I, DT, MSSA); + if (!UsersDominator) + continue; + + if (UsersDominator == &EntryBB) + continue; + + // Traverse the cfg to detect cycles involving UsersDominator. We don't want + // a direct path from the entry point to UsersDominator, without back edges. + SmallPtrSet TransitiveSuccessors; + SmallVector WorkList(succ_begin(UsersDominator), + succ_end(UsersDominator)); + bool HasCycle = false; + while (!WorkList.empty()) { + BasicBlock *CurrBB = WorkList.pop_back_val(); + if (CurrBB == UsersDominator) + // No early exit because we ant to compute the full set of transitive + // successors. + HasCycle = true; + for (BasicBlock *Successor : successors(CurrBB)) { + if (!TransitiveSuccessors.insert(Successor).second) + continue; + WorkList.push_back(Successor); + } + } + + // Don't insert if that could create multiple execution of I, + // but we can insert it in the non back-edge predecessors, if it exists. + + if (HasCycle) { + BasicBlock *UsersDominatorHead = UsersDominator; + while (BasicBlock *UniquePredecessor = + UsersDominatorHead->getUniquePredecessor()) + UsersDominatorHead = UniquePredecessor; + + if (UsersDominatorHead == &EntryBB) + continue; + + BasicBlock *DominatingPredecessor = nullptr; + for (BasicBlock *Pred : predecessors(UsersDominatorHead)) { + // If one of the predecessor also transitively is a successor, that's + // not the path we don't consider that path. + if (TransitiveSuccessors.count(Pred)) + continue; + + DominatingPredecessor = + DominatingPredecessor + ? DT.findNearestCommonDominator(DominatingPredecessor, Pred) + : Pred; + } + + if (!DominatingPredecessor || DominatingPredecessor == &EntryBB) + continue; + + UsersDominator = DominatingPredecessor; + } + + // We finally found a place where I can be moved while not introducing extra + // execution, and guarded by at least one condition. + JobList.emplace_back(&I, &*UsersDominator->getFirstInsertionPt()); + } + + // + // Perform the actual substitution. + // + if (JobList.empty()) + return false; + + for (auto Job : JobList) + Job.first->moveBefore(Job.second); + + NumMoved += JobList.size(); + + return true; +} + +} // namespace + +PreservedAnalyses MoveAutoInitPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); + if (!runMoveAutoInit(F, DT, MSSA)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -102,9 +102,16 @@ ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O-NEXT: Running analysis: AAManager on foo +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy on foo ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass @@ -114,11 +121,6 @@ ; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass @@ -128,6 +130,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager +; CHECK-O-NEXT: Invalidating analysis: MemorySSAAnalysis on foo ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -80,9 +80,16 @@ ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O-NEXT: Running analysis: AAManager on foo +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy on foo ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass @@ -92,11 +99,6 @@ ; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis @@ -105,6 +107,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager +; CHECK-O-NEXT: Invalidating analysis: MemorySSAAnalysis on foo ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -38,9 +38,16 @@ ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O-NEXT: Running analysis: AAManager on foo +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy on foo ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass @@ -49,11 +56,6 @@ ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. @@ -68,6 +70,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager +; CHECK-O-NEXT: Invalidating analysis: MemorySSAAnalysis on foo ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -40,17 +40,19 @@ ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O-NEXT: Running analysis: AAManager on foo +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy on foo ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: InstCombinePass on foo ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo -; CHECK-O-NEXT: Running analysis: AAManager on foo -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis @@ -77,6 +79,7 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager +; CHECK-O-NEXT: Invalidating analysis: MemorySSAAnalysis on foo ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -39,9 +39,16 @@ ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O-NEXT: Running analysis: AAManager on foo +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy on foo ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass @@ -50,11 +57,6 @@ ; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O123SZ-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O123SZ-NEXT: Running analysis: InlineAdvisorAnalysis Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -38,17 +38,19 @@ ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis ; CHECK-O-NEXT: Running pass: SROAPass -; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: MoveAutoInitPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis on foo +; CHECK-O-NEXT: Running analysis: AAManager on foo +; CHECK-O-NEXT: Running analysis: BasicAA on foo +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA on foo +; CHECK-O-NEXT: Running analysis: TypeBasedAA on foo +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy on foo ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: InstCombinePass on foo ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo -; CHECK-O-NEXT: Running analysis: AAManager on foo -; CHECK-O-NEXT: Running analysis: BasicAA -; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA -; CHECK-O-NEXT: Running analysis: TypeBasedAA -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis @@ -72,6 +74,7 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager +; CHECK-O-NEXT: Invalidating analysis: MemorySSAAnalysis on foo ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis Index: llvm/test/Transforms/MoveAutoInit/branch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MoveAutoInit/branch.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes='move-auto-init' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__const.foo.buffer = private unnamed_addr constant [8 x i32] [i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766], align 16 + +define dso_local void @foo(i32 noundef %x) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUFFER:%.*]] = alloca [8 x i32], align 16 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[BUFFER]], ptr align 16 @__const.foo.buffer, i64 32, i1 false), !annotation !0 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], ptr [[BUFFER]], i64 0, i64 0 +; CHECK-NEXT: call void @dump(ptr noundef [[ARRAYIDX]]) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; + +entry: + %buffer = alloca [8 x i32], align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %buffer, ptr align 16 @__const.foo.buffer, i64 32, i1 false), !annotation !0 + %tobool = icmp ne i32 %x, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds [8 x i32], ptr %buffer, i64 0, i64 0 + call void @dump(ptr noundef %arrayidx) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) + +declare void @dump(ptr noundef) + +!0 = !{!"auto-init"} + Index: llvm/test/Transforms/MoveAutoInit/fence.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MoveAutoInit/fence.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes='move-auto-init' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo(i32 noundef %x) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: fence acquire +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @dump(ptr noundef [[VAL]]) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %val = alloca i32, align 4 + store i32 -1431655766, ptr %val, align 4, !annotation !0 + %tobool = icmp ne i32 %x, 0 + fence acquire + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + call void @dump(ptr noundef %val) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + +declare void @dump(ptr noundef) + +!0 = !{!"auto-init"} Index: llvm/test/Transforms/MoveAutoInit/loop.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MoveAutoInit/loop.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes='move-auto-init' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo(i32 noundef %x) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUFFER:%.*]] = alloca [80 x i32], align 16 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[BUFFER]], i8 -86, i64 320, i1 false), !annotation !0 +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[DO_COND:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [80 x i32], ptr [[BUFFER]], i64 0, i64 0 +; CHECK-NEXT: call void @dump(ptr noundef [[ARRAYIDX]]) +; CHECK-NEXT: br label [[DO_COND]] +; CHECK: do.cond: +; CHECK-NEXT: [[DEC]] = add nsw i32 [[X_ADDR_0]], -1 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X_ADDR_0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[DO_BODY]], label [[DO_END:%.*]] +; CHECK: do.end: +; CHECK-NEXT: ret void +; + +entry: + %buffer = alloca [80 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %buffer, i8 -86, i64 320, i1 false), !annotation !0 + br label %do.body + +do.body: ; preds = %do.cond, %entry + %x.addr.0 = phi i32 [ %x, %entry ], [ %dec, %do.cond ] + %arrayidx = getelementptr inbounds [80 x i32], ptr %buffer, i64 0, i64 0 + call void @dump(ptr noundef %arrayidx) + br label %do.cond + +do.cond: ; preds = %do.body + %dec = add nsw i32 %x.addr.0, -1 + %tobool = icmp ne i32 %x.addr.0, 0 + br i1 %tobool, label %do.body, label %do.end + +do.end: ; preds = %do.cond + ret void +} + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) + +declare void @dump(ptr noundef) + +define dso_local void @bar(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUFFER:%.*]] = alloca [80 x i32], align 16 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[Y:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[BUFFER]], i8 -86, i64 320, i1 false), !annotation !0 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], [[Y]] +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[DEC:%.*]], [[DO_COND:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [80 x i32], ptr [[BUFFER]], i64 0, i64 0 +; CHECK-NEXT: call void @dump(ptr noundef [[ARRAYIDX]]) +; CHECK-NEXT: br label [[DO_COND]] +; CHECK: do.cond: +; CHECK-NEXT: [[DEC]] = add nsw i32 [[X_ADDR_0]], -1 +; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[X_ADDR_0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL1]], label [[DO_BODY]], label [[DO_END:%.*]] +; CHECK: do.end: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; + +entry: + %buffer = alloca [80 x i32], align 16 + call void @llvm.memset.p0.i64(ptr align 16 %buffer, i8 -86, i64 320, i1 false), !annotation !0 + %tobool = icmp ne i32 %y, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + %add = add nsw i32 %x, %y + br label %do.body + +do.body: ; preds = %do.cond, %if.then + %x.addr.0 = phi i32 [ %add, %if.then ], [ %dec, %do.cond ] + %arrayidx = getelementptr inbounds [80 x i32], ptr %buffer, i64 0, i64 0 + call void @dump(ptr noundef %arrayidx) + br label %do.cond + +do.cond: ; preds = %do.body + %dec = add nsw i32 %x.addr.0, -1 + %tobool1 = icmp ne i32 %x.addr.0, 0 + br i1 %tobool1, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %if.end + +if.end: ; preds = %do.end, %entry + ret void +} + +!0 = !{!"auto-init"} Index: llvm/test/Transforms/MoveAutoInit/scalar.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MoveAutoInit/scalar.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes='move-auto-init' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo(i32 noundef %x) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0 +; CHECK-NEXT: call void @dump(ptr noundef [[VAL]]) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; + +entry: + %val = alloca i32, align 4 + store i32 -1431655766, ptr %val, align 4, !annotation !0 + %tobool = icmp ne i32 %x, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + call void @dump(ptr noundef %val) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare void @dump(ptr noundef) + +!0 = !{!"auto-init"}