diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1436,6 +1436,7 @@ Conf.RelocModel = CGOpts.RelocationModel; Conf.CGOptLevel = getCGOptLevel(CGOpts); Conf.OptLevel = CGOpts.OptimizationLevel; + Conf.SizeLevel = CGOpts.OptimizeSize; initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; diff --git a/clang/test/CodeGen/thinlto-debug-pm.c b/clang/test/CodeGen/thinlto-debug-pm.c --- a/clang/test/CodeGen/thinlto-debug-pm.c +++ b/clang/test/CodeGen/thinlto-debug-pm.c @@ -3,15 +3,513 @@ // RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s // RUN: llvm-lto -thinlto -o %t %t.o -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NEWPM -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-NEWPM -// O2-NEWPM: Running pass: LoopVectorizePass -// O0-NEWPM-NOT: Running pass: LoopVectorizePass +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefixes=O0-NEWPM,O0123sz-NEWPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O1 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefixes=O0123sz-NEWPM,O123sz-NEWPM,O12-NEWPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefixes=O0123sz-NEWPM,O123sz-NEWPM,O23sz-NEWPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O3 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefixes=O0123sz-NEWPM,O123sz-NEWPM,O3-NEWPM,O23sz-NEWPM,O123-NEWPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -Os -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefixes=O0123sz-NEWPM,O123sz-NEWPM,O23sz-NEWPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -Oz -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefixes=O0123sz-NEWPM,O123sz-NEWPM,O23sz-NEWPM -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O2-OLDPM -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O0-OLDPM -// O2-OLDPM: Loop Vectorization -// O0-OLDPM-NOT: Loop Vectorization +// O0123sz-NEWPM: Running analysis: PassInstrumentationAnalysis +// O0123sz-NEWPM: Starting llvm::Module pass manager run. +// O0123sz-NEWPM: Running pass: WholeProgramDevirtPass +// O0123sz-NEWPM: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> +// O0123sz-NEWPM: Running pass: LowerTypeTestsPass +// O0123sz-NEWPM: Invalidating all non-preserved analyses for: +// O0123sz-NEWPM: Invalidating analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> +// O123sz-NEWPM: Running pass: ForceFunctionAttrsPass +// O123sz-NEWPM: Running pass: PassManager +// O123sz-NEWPM: Starting llvm::Module pass manager run. +// O123sz-NEWPM: Running pass: PGOIndirectCallPromotion +// O123sz-NEWPM: Running analysis: ProfileSummaryAnalysis +// O123sz-NEWPM: Running pass: InferFunctionAttrsPass +// O123sz-NEWPM: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> +// O123sz-NEWPM: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +// O123sz-NEWPM: Running analysis: PassInstrumentationAnalysis +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running analysis: TargetIRAnalysis +// O123sz-NEWPM: Running analysis: AssumptionAnalysis +// O123sz-NEWPM: Running pass: SROA +// O123sz-NEWPM: Running analysis: DominatorTreeAnalysis +// O123sz-NEWPM: Running pass: EarlyCSEPass +// O123sz-NEWPM: Running analysis: TargetLibraryAnalysis +// O123sz-NEWPM: Running pass: LowerExpectIntrinsicPass +// O3-NEWPM: Running pass: CallSiteSplittingPass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: IPSCCPPass +// O123sz-NEWPM: Running pass: CalledValuePropagationPass +// O123sz-NEWPM: Running pass: GlobalOptPass +// O123sz-NEWPM: Invalidating all non-preserved analyses for: +// O123sz-NEWPM: Invalidating analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> +// O123sz-NEWPM: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +// O123sz-NEWPM: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> +// O123sz-NEWPM: Running analysis: DominatorTreeAnalysis +// O123sz-NEWPM: Running analysis: PassInstrumentationAnalysis +// O123sz-NEWPM: Running analysis: AssumptionAnalysis +// O123sz-NEWPM: Running pass: DeadArgumentEliminationPass +// O123sz-NEWPM: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: InstCombinePass +// O123sz-NEWPM: Running analysis: TargetLibraryAnalysis +// O123sz-NEWPM: Running analysis: OptimizationRemarkEmitterAnalysis +// O123sz-NEWPM: Running analysis: AAManager +// O123sz-NEWPM: Running analysis: BasicAA +// O123sz-NEWPM: Running analysis: ScopedNoAliasAA +// O123sz-NEWPM: Running analysis: TypeBasedAA +// O123sz-NEWPM: Running analysis: OuterAnalysisManagerProxy +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running analysis: TargetIRAnalysis +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA +// O123sz-NEWPM: Running analysis: GlobalsAA +// O123sz-NEWPM: Running analysis: CallGraphAnalysis +// O123sz-NEWPM: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +// O123sz-NEWPM: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> +// O123sz-NEWPM: Running analysis: InnerAnalysisManagerProxy<{{.*(CGSCCAnalysisManager|AnalysisManager<.*LazyCallGraph::SCC.*>).*}},{{.*}}Module> +// O123sz-NEWPM: Running analysis: LazyCallGraphAnalysis +// O123sz-NEWPM: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo) +// O123sz-NEWPM: Running analysis: PassInstrumentationAnalysis on (foo) +// O123sz-NEWPM: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> on (foo) +// O123sz-NEWPM: Starting CGSCC pass manager run. +// O123sz-NEWPM: Running pass: InlinerPass on (foo) +// O123sz-NEWPM: Running pass: PostOrderFunctionAttrsPass on (foo) +// O3-NEWPM: Running pass: ArgumentPromotionPass on (foo) +// O123sz-NEWPM: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: SROA +// O123sz-NEWPM: Running pass: EarlyCSEPass +// O123sz-NEWPM: Running analysis: MemorySSAAnalysis +// O23sz-NEWPM: Running pass: SpeculativeExecutionPass +// O23sz-NEWPM: Running pass: JumpThreadingPass +// O23sz-NEWPM: Running analysis: LazyValueAnalysis +// O23sz-NEWPM: Running pass: CorrelatedValuePropagationPass +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O3-NEWPM: Running pass: AggressiveInstCombinePass +// O123sz-NEWPM: Running pass: InstCombinePass +// O123-NEWPM: Running pass: LibCallsShrinkWrapPass +// O23sz-NEWPM: Running pass: TailCallElimPass +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running pass: ReassociatePass +// O123sz-NEWPM: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis +// O123sz-NEWPM: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: LoopSimplifyPass +// O123sz-NEWPM: Running analysis: LoopAnalysis +// O123sz-NEWPM: Running pass: LCSSAPass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running pass: InstCombinePass +// O123sz-NEWPM: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: LoopSimplifyPass +// O123sz-NEWPM: Running pass: LCSSAPass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: SROA +// O12-NEWPM: Running pass: MemCpyOptPass +// O23sz-NEWPM: Running pass: MergedLoadStoreMotionPass +// O23sz-NEWPM: Running pass: GVN +// O123sz-NEWPM: Running analysis: MemoryDependenceAnalysis +// O123sz-NEWPM: Running analysis: PhiValuesAnalysis +// O23sz-NEWPM: Running pass: MemCpyOptPass +// O123sz-NEWPM: Running pass: SCCPPass +// O123sz-NEWPM: Running pass: BDCEPass +// O123sz-NEWPM: Running analysis: DemandedBitsAnalysis +// O123sz-NEWPM: Running pass: InstCombinePass +// O23sz-NEWPM: Running pass: JumpThreadingPass +// O23sz-NEWPM: Running pass: CorrelatedValuePropagationPass +// O23sz-NEWPM: Running pass: DSEPass +// O23sz-NEWPM: Running pass: FunctionToLoopPassAdaptor +// O23sz-NEWPM: Starting llvm::Function pass manager run. +// O23sz-NEWPM: Running pass: LoopSimplifyPass +// O23sz-NEWPM: Running pass: LCSSAPass +// O23sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: ADCEPass +// O123sz-NEWPM: Running analysis: PostDominatorTreeAnalysis +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running pass: InstCombinePass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Finished CGSCC pass manager run. +// O0123sz-NEWPM: Finished llvm::Module pass manager run. +// O123sz-NEWPM: Running pass: PassManager +// O123sz-NEWPM: Starting llvm::Module pass manager run. +// O123sz-NEWPM: Running pass: GlobalOptPass +// O123sz-NEWPM: Running pass: GlobalDCEPass +// O123sz-NEWPM: Running pass: EliminateAvailableExternallyPass +// O123sz-NEWPM: Running pass: ReversePostOrderFunctionAttrsPass +// O123sz-NEWPM: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA +// O123sz-NEWPM: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: Float2IntPass +// O123sz-NEWPM: Running pass: LowerConstantIntrinsicsPass +// O123sz-NEWPM: Running pass: FunctionToLoopPassAdaptor +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: LoopSimplifyPass +// O123sz-NEWPM: Running pass: LCSSAPass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: LoopDistributePass +// O123sz-NEWPM: Running analysis: ScalarEvolutionAnalysis +// O123sz-NEWPM: Running analysis: InnerAnalysisManagerProxy +// O123sz-NEWPM: Running pass: LoopVectorizePass +// O123sz-NEWPM: Running analysis: BlockFrequencyAnalysis +// O123sz-NEWPM: Running analysis: BranchProbabilityAnalysis +// O123sz-NEWPM: Running pass: LoopLoadEliminationPass +// O123sz-NEWPM: Running pass: InstCombinePass +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running pass: InstCombinePass +// O123sz-NEWPM: Running pass: LoopUnrollPass +// O123sz-NEWPM: Running pass: WarnMissedTransformationsPass +// O123sz-NEWPM: Running pass: InstCombinePass +// O123sz-NEWPM: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis +// O123sz-NEWPM: Running pass: FunctionToLoopPassAdaptor +// O123sz-NEWPM: Starting llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: LoopSimplifyPass +// O123sz-NEWPM: Running pass: LCSSAPass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: AlignmentFromAssumptionsPass +// O123sz-NEWPM: Running pass: LoopSinkPass +// O123sz-NEWPM: Running pass: InstSimplifyPass +// O123sz-NEWPM: Running pass: DivRemPairsPass +// O123sz-NEWPM: Running pass: SimplifyCFGPass +// O123sz-NEWPM: Running pass: SpeculateAroundPHIsPass +// O123sz-NEWPM: Finished llvm::Function pass manager run. +// O123sz-NEWPM: Running pass: CGProfilePass +// O123sz-NEWPM: Running pass: GlobalDCEPass +// O123sz-NEWPM: Running pass: ConstantMergePass +// O123sz-NEWPM: Finished llvm::Module pass manager run. +// O123sz-NEWPM: Finished llvm::Module pass manager run. + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefixes=O0123sz-OLDPM,O023sz-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O1 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefixes=O0123sz-OLDPM,O123sz-OLDPM,O123-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefixes=O0123sz-OLDPM,O123sz-OLDPM,O23sz-OLDPM,O123-OLDPM,O23-OLDPM,O023sz-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O3 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefixes=O0123sz-OLDPM,O123sz-OLDPM,O3-OLDPM,O23sz-OLDPM,O123-OLDPM,O23-OLDPM,O023sz-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -Os -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefixes=O0123sz-OLDPM,O123sz-OLDPM,O23sz-OLDPM,O023sz-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -Oz -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefixes=O0123sz-OLDPM,O123sz-OLDPM,O23sz-OLDPM,O023sz-OLDPM + +// O0123sz-OLDPM: Pass Arguments: +// O0123sz-OLDPM: Target Transform Information +// O0123sz-OLDPM: Target Library Information +// O0123sz-OLDPM: Assumption Cache Tracker +// O123sz-OLDPM: Type-Based Alias Analysis +// O123sz-OLDPM: Scoped NoAlias Alias Analysis +// O0123sz-OLDPM: Profile summary info +// O0123sz-OLDPM: ModulePass Manager +// O0123sz-OLDPM: FunctionPass Manager +// O0123sz-OLDPM: Module Verifier +// O0123sz-OLDPM: Whole program devirtualization +// O0123sz-OLDPM: FunctionPass Manager +// O0123sz-OLDPM: Dominator Tree Construction +// O0123sz-OLDPM: Lower type metadata +// O0123sz-OLDPM: Force set function attributes +// O123sz-OLDPM: PGOIndirectCallPromotion +// O123sz-OLDPM: Infer set function attributes +// O3-OLDPM: FunctionPass Manager +// O3-OLDPM: Dominator Tree Construction +// O3-OLDPM: Call-site splitting +// O123sz-OLDPM: Interprocedural Sparse Conditional Constant Propagation +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Called Value Propagation +// O123sz-OLDPM: Deduce and propagate attributes +// O123sz-OLDPM: Global Variable Optimizer +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Promote Memory to Register +// O123sz-OLDPM: Dead Argument Elimination +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123sz-OLDPM: Simplify the CFG +// O0123sz-OLDPM: CallGraph Construction +// O123sz-OLDPM: Globals Alias Analysis +// O0123sz-OLDPM: Call Graph SCC Pass Manager +// O123sz-OLDPM: Remove unused exception handling info +// O0123sz-OLDPM: Function Integration/Inlining +// O123sz-OLDPM: Deduce function attributes +// O3-OLDPM: Promote 'by reference' arguments to scalars +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: SROA +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Memory SSA +// O123sz-OLDPM: Early CSE w/ MemorySSA +// O23sz-OLDPM: Speculatively execute instructions if target has divergent branches +// O23sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O23sz-OLDPM: Function Alias Analysis Results +// O23sz-OLDPM: Lazy Value Information Analysis +// O23sz-OLDPM: Jump Threading +// O23sz-OLDPM: Value Propagation +// O123sz-OLDPM: Simplify the CFG +// O123sz-OLDPM: Dominator Tree Construction +// O3-OLDPM: Combine pattern based expressions +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123-OLDPM: Conditionally eliminate dead library calls +// O123-OLDPM: Natural Loop Information +// O123-OLDPM: Branch Probability Analysis +// O123-OLDPM: Block Frequency Analysis +// O123-OLDPM: Lazy Branch Probability Analysis +// O123-OLDPM: Lazy Block Frequency Analysis +// O123-OLDPM: Optimization Remark Emitter +// O123-OLDPM: PGOMemOPSize +// O23-OLDPM: Basic Alias Analysis (stateless AA impl) +// O23-OLDPM: Function Alias Analysis Results +// O23-OLDPM: Natural Loop Information +// O23-OLDPM: Lazy Branch Probability Analysis +// O23-OLDPM: Lazy Block Frequency Analysis +// O23sz-OLDPM: Optimization Remark Emitter +// O23sz-OLDPM: Tail Call Elimination +// O123sz-OLDPM: Simplify the CFG +// O123sz-OLDPM: Reassociate expressions +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Memory SSA +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: LCSSA Verifier +// O123sz-OLDPM: Loop-Closed SSA Form Pass +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Loop Pass Manager +// O123sz-OLDPM: Rotate Loops +// O123sz-OLDPM: Loop Invariant Code Motion +// O123sz-OLDPM: Unswitch loops +// O123sz-OLDPM: Simplify the CFG +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: LCSSA Verifier +// O123sz-OLDPM: Loop-Closed SSA Form Pass +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Loop Pass Manager +// O123sz-OLDPM: Induction Variable Simplification +// O123sz-OLDPM: Recognize loop idioms +// O123sz-OLDPM: Delete dead loops +// O123sz-OLDPM: Unroll loops +// O23sz-OLDPM: MergedLoadStoreMotion +// O123sz-OLDPM: Phi Values Analysis +// O23sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O23sz-OLDPM: Function Alias Analysis Results +// O23sz-OLDPM: Memory Dependence Analysis +// O23sz-OLDPM: Lazy Branch Probability Analysis +// O23sz-OLDPM: Lazy Block Frequency Analysis +// O23sz-OLDPM: Optimization Remark Emitter +// O23sz-OLDPM: Global Value Numbering +// O23sz-OLDPM: Phi Values Analysis +// O23sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O23sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Memory Dependence Analysis +// O123sz-OLDPM: MemCpy Optimization +// O123sz-OLDPM: Sparse Conditional Constant Propagation +// O123sz-OLDPM: Demanded bits analysis +// O123sz-OLDPM: Bit-Tracking Dead Code Elimination +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O23sz-OLDPM: Lazy Value Information Analysis +// O23sz-OLDPM: Jump Threading +// O23sz-OLDPM: Value Propagation +// O23sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O23sz-OLDPM: Function Alias Analysis Results +// O23sz-OLDPM: Phi Values Analysis +// O23sz-OLDPM: Memory Dependence Analysis +// O23sz-OLDPM: Dead Store Elimination +// O23sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O23sz-OLDPM: Function Alias Analysis Results +// O23sz-OLDPM: Memory SSA +// O23sz-OLDPM: Natural Loop Information +// O23sz-OLDPM: Canonicalize natural loops +// O23sz-OLDPM: LCSSA Verifier +// O23sz-OLDPM: Loop-Closed SSA Form Pass +// O23sz-OLDPM: Scalar Evolution Analysis +// O23sz-OLDPM: Loop Pass Manager +// O23sz-OLDPM: Loop Invariant Code Motion +// O123sz-OLDPM: Post-Dominator Tree Construction +// O123sz-OLDPM: Aggressive Dead Code Elimination +// O123sz-OLDPM: Simplify the CFG +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123sz-OLDPM: A No-Op Barrier Pass +// O023sz-OLDPM: Eliminate Available Externally Globals +// O123sz-OLDPM: CallGraph Construction +// O123sz-OLDPM: Deduce function attributes in RPO +// O123sz-OLDPM: Global Variable Optimizer +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O0123sz-OLDPM: Dead Global Elimination +// O123sz-OLDPM: Global Variable Optimizer +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: CallGraph Construction +// O123sz-OLDPM: Globals Alias Analysis +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Float to int +// O123sz-OLDPM: Lower constant intrinsics +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Memory SSA +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: LCSSA Verifier +// O123sz-OLDPM: Loop-Closed SSA Form Pass +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Loop Pass Manager +// O123sz-OLDPM: Rotate Loops +// O123sz-OLDPM: Loop Access Analysis +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Loop Distribution +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Loop Access Analysis +// O123sz-OLDPM: Demanded bits analysis +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Loop Vectorization +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Loop Access Analysis +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Loop Load Elimination +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123sz-OLDPM: Simplify the CFG +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Demanded bits analysis +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: SLP Vectorizer +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: LCSSA Verifier +// O123sz-OLDPM: Loop-Closed SSA Form Pass +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Loop Pass Manager +// O123sz-OLDPM: Unroll loops +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Combine redundant instructions +// O123sz-OLDPM: Memory SSA +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: LCSSA Verifier +// O123sz-OLDPM: Loop-Closed SSA Form Pass +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Loop Pass Manager +// O123sz-OLDPM: Loop Invariant Code Motion +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Warn about non-applied transformations +// O123sz-OLDPM: Alignment from assumptions +// O123sz-OLDPM: Strip Unused Function Prototypes +// O23sz-OLDPM: Dead Global Elimination +// O23sz-OLDPM: Merge Duplicate Global Constants +// O0123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: Canonicalize natural loops +// O123sz-OLDPM: LCSSA Verifier +// O123sz-OLDPM: Loop-Closed SSA Form Pass +// O123sz-OLDPM: Basic Alias Analysis (stateless AA impl) +// O123sz-OLDPM: Function Alias Analysis Results +// O123sz-OLDPM: Scalar Evolution Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: Loop Pass Manager +// O123sz-OLDPM: Loop Sink +// O123sz-OLDPM: Lazy Branch Probability Analysis +// O123sz-OLDPM: Lazy Block Frequency Analysis +// O123sz-OLDPM: Optimization Remark Emitter +// O123sz-OLDPM: Remove redundant instructions +// O123sz-OLDPM: Hoist/decompose integer division and remainder +// O123sz-OLDPM: Simplify the CFG +// O0123sz-OLDPM: Module Verifier +// O0123sz-OLDPM: Pass Arguments: -domtree +// O0123sz-OLDPM: FunctionPass Manager +// O0123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Pass Arguments: -domtree +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Pass Arguments: -targetlibinfo -domtree -loops -branch-prob -block-freq +// O123sz-OLDPM: Target Library Information +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: Pass Arguments: -targetlibinfo -domtree -loops -branch-prob -block-freq +// O123sz-OLDPM: Target Library Information +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis +// O123sz-OLDPM: Pass Arguments: -targetlibinfo -domtree -loops -branch-prob -block-freq +// O123sz-OLDPM: Target Library Information +// O123sz-OLDPM: FunctionPass Manager +// O123sz-OLDPM: Dominator Tree Construction +// O123sz-OLDPM: Natural Loop Information +// O123sz-OLDPM: Branch Probability Analysis +// O123sz-OLDPM: Block Frequency Analysis void foo() { } diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -142,6 +142,7 @@ // Used for /opt:lldlto=N unsigned ltoo = 2; + unsigned ltoos = 0; // Used for /opt:lldltojobs=N unsigned thinLTOJobs = 0; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1406,8 +1406,8 @@ tailMerge = 0; } else if (s.startswith("lldlto=")) { StringRef optLevel = s.substr(7); - if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3) - error("/opt:lldlto: invalid optimization level: " + optLevel); + config->ltoo = CHECK(llvm::lto::getOptLevel(optLevel), "/opt:lldlto: "); + config->ltoos = check(llvm::lto::getSizeLevel(optLevel)); } else if (s.startswith("lldltojobs=")) { StringRef jobs = s.substr(11); if (jobs.getAsInteger(10, config->thinLTOJobs) || diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -79,6 +79,7 @@ c.DisableVerify = true; c.DiagHandler = diagnosticHandler; c.OptLevel = config->ltoo; + c.SizeLevel = config->ltoos; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); c.CGOptLevel = args::getCGOptLevel(config->ltoo); diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -239,6 +239,7 @@ uint64_t zStackSize; unsigned ltoPartitions; unsigned ltoo; + unsigned ltoos; unsigned optimize; unsigned thinLTOJobs; int32_t splitStackAdjustSize; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -892,7 +892,8 @@ config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->ltoNewPassManager = args.hasArg(OPT_lto_new_pass_manager); config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); - config->ltoo = args::getInteger(args, OPT_lto_O, 2); + config->ltoo = check(llvm::lto::getOptLevel(args.getLastArg(OPT_lto_O), 2)); + config->ltoos = check(llvm::lto::getSizeLevel(args.getLastArg(OPT_lto_O), 0)); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); @@ -1000,8 +1001,6 @@ for (auto *arg : args.filtered(OPT_mllvm)) parseClangOption(arg->getValue(), arg->getSpelling()); - if (config->ltoo > 3) - error("invalid optimization level for LTO: " + Twine(config->ltoo)); if (config->ltoPartitions == 0) error("--lto-partitions: number of threads must be > 0"); if (config->thinLTOJobs == 0) diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -89,6 +89,7 @@ c.DisableVerify = config->disableVerify; c.DiagHandler = diagnosticHandler; c.OptLevel = config->ltoo; + c.SizeLevel = config->ltoos; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); c.CGOptLevel = args::getCGOptLevel(config->ltoo); diff --git a/lld/test/COFF/lto-opt-level.ll b/lld/test/COFF/lto-opt-level.ll --- a/lld/test/COFF/lto-opt-level.ll +++ b/lld/test/COFF/lto-opt-level.ll @@ -4,6 +4,10 @@ ; RUN: FileCheck --check-prefix=CHECK-O0 %s < %t0.map ; RUN: lld-link /out:%t2.exe /entry:main /subsystem:console /opt:lldlto=2 /lldmap:%t2.map %t.obj ; RUN: FileCheck --check-prefix=CHECK-O2 %s < %t2.map +; RUN: lld-link /out:%ts.exe /entry:main /subsystem:console /opt:lldlto=s /lldmap:%ts.map %t.obj +; RUN: FileCheck --check-prefix=CHECK-O2 %s < %ts.map +; RUN: lld-link /out:%tz.exe /entry:main /subsystem:console /opt:lldlto=z /lldmap:%tz.map %t.obj +; RUN: FileCheck --check-prefix=CHECK-O2 %s < %tz.map ; RUN: lld-link /out:%t2a.exe /entry:main /subsystem:console /lldmap:%t2a.map %t.obj ; RUN: FileCheck --check-prefix=CHECK-O2 %s < %t2a.map diff --git a/lld/test/ELF/lto/opt-level.ll b/lld/test/ELF/lto/opt-level.ll --- a/lld/test/ELF/lto/opt-level.ll +++ b/lld/test/ELF/lto/opt-level.ll @@ -6,6 +6,10 @@ ; RUN: llvm-nm %t0 | FileCheck --check-prefix=CHECK-O0 %s ; RUN: ld.lld -o %t2 -e main --lto-O2 %t.o ; RUN: llvm-nm %t2 | FileCheck --check-prefix=CHECK-O2 %s +; RUN: ld.lld -o %ts -e main --lto-Os %t.o +; RUN: llvm-nm %ts | FileCheck --check-prefix=CHECK-O2 %s +; RUN: ld.lld -o %tz -e main --lto-Oz %t.o +; RUN: llvm-nm %tz | FileCheck --check-prefix=CHECK-O2 %s ; RUN: ld.lld -o %t2a -e main %t.o ; RUN: llvm-nm %t2a | FileCheck --check-prefix=CHECK-O2 %s ; RUN: ld.lld -o %t2 -e main %t.o --plugin-opt O2 @@ -19,14 +23,14 @@ ; RUN: FileCheck --check-prefix=INVALID1 %s ; RUN: not ld.lld -o %t3 -e main --plugin-opt=Ofoo %t.o 2>&1 | \ ; RUN: FileCheck --check-prefix=INVALID2 %s -; INVALID2: --plugin-opt=Ofoo: number expected, but got 'foo' +; INVALID2: invalid optimization level for LTO: foo ; RUN: not ld.lld -o %t3 -e main --lto-O-1 %t.o 2>&1 | \ ; RUN: FileCheck --check-prefix=INVALIDNEGATIVE1 %s -; INVALIDNEGATIVE1: invalid optimization level for LTO: 4294967295 +; INVALIDNEGATIVE1: invalid optimization level for LTO: -1 ; RUN: not ld.lld -o %t3 -e main --plugin-opt=O-1 %t.o 2>&1 | \ ; RUN: FileCheck --check-prefix=INVALIDNEGATIVE2 %s -; INVALIDNEGATIVE2: invalid optimization level for LTO: 4294967295 +; INVALIDNEGATIVE2: invalid optimization level for LTO: -1 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/lld/test/wasm/lto/opt-level.ll b/lld/test/wasm/lto/opt-level.ll --- a/lld/test/wasm/lto/opt-level.ll +++ b/lld/test/wasm/lto/opt-level.ll @@ -3,6 +3,10 @@ ; RUN: obj2yaml %t0 | FileCheck --check-prefix=CHECK-O0 %s ; RUN: wasm-ld -o %t2 -e main --lto-O2 %t.o ; RUN: obj2yaml %t2 | FileCheck --check-prefix=CHECK-O2 %s +; RUN: wasm-ld -o %ts -e main --lto-Os %t.o +; RUN: obj2yaml %ts | FileCheck --check-prefix=CHECK-O2 %s +; RUN: wasm-ld -o %tz -e main --lto-Oz %t.o +; RUN: obj2yaml %tz | FileCheck --check-prefix=CHECK-O2 %s ; RUN: wasm-ld -o %t2a -e main %t.o ; RUN: obj2yaml %t2a | FileCheck --check-prefix=CHECK-O2 %s @@ -13,7 +17,7 @@ ; RUN: not wasm-ld -o %t3 -m elf_x86_64 -e main --lto-O-1 %t.o 2>&1 | \ ; RUN: FileCheck --check-prefix=INVALIDNEGATIVE %s -; INVALIDNEGATIVE: invalid optimization level for LTO: 4294967295 +; INVALIDNEGATIVE: invalid optimization level for LTO: -1 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown-wasm" diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -52,6 +52,7 @@ uint32_t zStackSize; unsigned ltoPartitions; unsigned ltoo; + unsigned ltoos; unsigned optimize; unsigned thinLTOJobs; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -318,7 +318,8 @@ config->importMemory = args.hasArg(OPT_import_memory); config->sharedMemory = args.hasArg(OPT_shared_memory); config->importTable = args.hasArg(OPT_import_table); - config->ltoo = args::getInteger(args, OPT_lto_O, 2); + config->ltoo = check(llvm::lto::getOptLevel(args.getLastArg(OPT_lto_O), 2)); + config->ltoos = check(llvm::lto::getSizeLevel(args.getLastArg(OPT_lto_O), 0)); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->optimize = args::getInteger(args, OPT_O, 0); config->outputFile = args.getLastArgValue(OPT_o); @@ -391,8 +392,6 @@ error("--compress-relocations is incompatible with output debug" " information. Please pass --strip-debug or --strip-all"); - if (config->ltoo > 3) - error("invalid optimization level for LTO: " + Twine(config->ltoo)); if (config->ltoPartitions == 0) error("--lto-partitions: number of threads must be > 0"); if (config->thinLTOJobs == 0) diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -50,6 +50,7 @@ c.DisableVerify = config->disableVerify; c.DiagHandler = diagnosticHandler; c.OptLevel = config->ltoo; + c.SizeLevel = config->ltoos; c.MAttrs = getMAttrs(); c.CGOptLevel = args::getCGOptLevel(config->ltoo); diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -46,6 +46,7 @@ CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; CodeGenFileType CGFileType = CGFT_ObjectFile; unsigned OptLevel = 2; + unsigned SizeLevel = 0; bool DisableVerify = false; /// Use the new pass manager diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -39,6 +39,9 @@ class Module; class Target; class raw_pwrite_stream; +namespace opt { +class Arg; +} /// Resolve linkage for prevailing symbols in the \p Index. Linkage changes /// recorded in the index and the ThinLTO backends must apply the changes to @@ -302,6 +305,8 @@ /// by LTO but might not be visible from bitcode symbol table. static ArrayRef getRuntimeLibcallSymbols(); + static void addFnAttrForOptSize(unsigned SizeLevel, Module &M); + private: Config Conf; @@ -451,6 +456,27 @@ unsigned LinkerRedefined : 1; }; +class LTOOLevelError : public ErrorInfo { +public: + static char ID; + StringRef Level; + + LTOOLevelError(StringRef Level) : Level(Level) {} + + void log(raw_ostream &OS) const override { + OS << "invalid optimization level for LTO: " << Level; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +Expected getOptLevel(StringRef OArg); +Expected getOptLevel(llvm::opt::Arg *OArg, unsigned Default); +Expected getSizeLevel(StringRef OArg); +Expected getSizeLevel(llvm::opt::Arg *OArg, unsigned Default); + } // namespace lto } // namespace llvm diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -95,6 +95,7 @@ void setCpu(StringRef MCpu) { this->MCpu = MCpu; } void setAttr(StringRef MAttr) { this->MAttr = MAttr; } void setOptLevel(unsigned OptLevel); + void setSizeLevel(unsigned SizeLevel) { this->SizeLevel = SizeLevel; } void setShouldInternalize(bool Value) { ShouldInternalize = Value; } void setShouldEmbedUselists(bool Value) { ShouldEmbedUselists = Value; } @@ -235,6 +236,7 @@ const Target *MArch = nullptr; std::string TripleStr; unsigned OptLevel = 2; + unsigned SizeLevel = 0; lto_diagnostic_handler_t DiagHandler = nullptr; void *DiagContext = nullptr; bool ShouldInternalize = EnableLTOInternalization; diff --git a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h --- a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -226,6 +226,11 @@ OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel; } + /// IR optimization level for size: from 0 to 2. + void setSizeLevel(unsigned NewOptLevel) { + SizeLevel = (NewOptLevel > 2) ? 2 : NewOptLevel; + } + /// Disable CodeGen, only run the stages till codegen and stop. The output /// will be bitcode. void disableCodeGen(bool Disable) { DisableCodeGen = Disable; } @@ -341,6 +346,9 @@ /// IR Optimization Level [0-3]. unsigned OptLevel = 3; + + /// IR Optimization Level for size [0-2]. + unsigned SizeLevel = 0; }; } #endif diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -29,8 +29,10 @@ #include "llvm/LTO/SummaryBasedOptimizations.h" #include "llvm/Linker/IRMover.h" #include "llvm/Object/IRObjectFile.h" +#include "llvm/Option/Arg.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -136,6 +138,7 @@ AddUnsigned(Conf.CGOptLevel); AddUnsigned(Conf.CGFileType); AddUnsigned(Conf.OptLevel); + AddUnsigned(Conf.SizeLevel); AddUnsigned(Conf.UseNewPM); AddUnsigned(Conf.Freestanding); AddString(Conf.OptPipeline); @@ -953,6 +956,8 @@ } Error LTO::runRegularLTO(AddStreamFn AddStream) { + LTO::addFnAttrForOptSize(Conf.SizeLevel, *RegularLTO.CombinedModule); + // Make sure commons have the right size/alignment: we kept the largest from // all the prevailing when adding the inputs, and we apply it here. const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); @@ -1377,6 +1382,26 @@ return BackendProc->wait(); } +void LTO::addFnAttrForOptSize(unsigned SizeLevel, Module &M) { + if (!SizeLevel) + return; + + if (SizeLevel == 1) { + for (auto &F : M.functions()) + if (!F.hasOptNone()) { + F.addFnAttr(llvm::Attribute::OptimizeForSize); + F.removeFnAttr(llvm::Attribute::MinSize); + } + } else if (SizeLevel == 2) { + for (auto &F : M.functions()) + if (!F.hasOptNone()) { + F.addFnAttr(llvm::Attribute::MinSize); + F.removeFnAttr(llvm::Attribute::OptimizeForSize); + } + } else + llvm_unreachable("unknown opt size level"); +} + Expected> lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, @@ -1416,3 +1441,40 @@ StatsFile->keep(); return std::move(StatsFile); } + +char LTOOLevelError::ID; + +Expected lto::getOptLevel(StringRef S) { + if (S == "s" || S == "z" || S.empty()) + return 2; + + if (S == "g") + return 1; + + int Res; + if (S.getAsInteger(10, Res) || Res < 0 || Res > 3) + return make_error(S); + return Res; +} + +Expected lto::getOptLevel(opt::Arg *OArg, unsigned Default) { + if (!OArg) + return Default; + return lto::getOptLevel(OArg->getValue()); +} + +Expected lto::getSizeLevel(StringRef S) { + if (S[0] == 's') + return 1; + if (S[0] == 'z') + return 2; + if (S[0] >= '0' || S[0] <= '3') + return 0; + return make_error(S); +} + +Expected lto::getSizeLevel(opt::Arg *OArg, unsigned Default) { + if (!OArg) + return Default; + return lto::getSizeLevel(OArg->getValue()); +} diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -153,9 +153,39 @@ CodeModel, Conf.CGOptLevel)); } +static PassBuilder::OptimizationLevel mapToLevel(const Config &Conf) { + switch (Conf.OptLevel) { + default: + llvm_unreachable("Invalid optimization level!"); + + case 0: + return PassBuilder::O0; + + case 1: + return PassBuilder::O1; + + case 2: + switch (Conf.SizeLevel) { + default: + llvm_unreachable("Invalid optimization level for size!"); + + case 0: + return PassBuilder::O2; + + case 1: + return PassBuilder::Os; + + case 2: + return PassBuilder::Oz; + } + + case 3: + return PassBuilder::O3; + } +} + static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, - unsigned OptLevel, bool IsThinLTO, - ModuleSummaryIndex *ExportSummary, + bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { Optional PGOOpt; if (!Conf.SampleProfile.empty()) @@ -197,24 +227,7 @@ ModulePassManager MPM(Conf.DebugPassManager); // FIXME (davide): verify the input. - PassBuilder::OptimizationLevel OL; - - switch (OptLevel) { - default: - llvm_unreachable("Invalid optimization level"); - case 0: - OL = PassBuilder::O0; - break; - case 1: - OL = PassBuilder::O1; - break; - case 2: - OL = PassBuilder::O2; - break; - case 3: - OL = PassBuilder::O3; - break; - } + PassBuilder::OptimizationLevel OL = mapToLevel(Conf); if (IsThinLTO) MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager, @@ -287,6 +300,7 @@ PMB.LoopVectorize = true; PMB.SLPVectorize = true; PMB.OptLevel = Conf.OptLevel; + PMB.SizeLevel = Conf.SizeLevel; PMB.PGOSampleUse = Conf.SampleProfile; PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr; if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) { @@ -300,18 +314,17 @@ passes.run(Mod); } -bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, - bool IsThinLTO, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { +bool optimize(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, + bool IsThinLTO, ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) { // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); - else if (Conf.UseNewPM) - runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary, - ImportSummary); - else - runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); + else { + auto runPasses = Conf.UseNewPM ? runNewPMPasses : runOldPMPasses; + runPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); + } return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); } @@ -464,8 +477,8 @@ auto DiagnosticOutputFile = std::move(*DiagFileOrErr); if (!C.CodeGenOnly) { - if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, - /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) + if (!optimize(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, + /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); } @@ -518,6 +531,8 @@ return DiagFileOrErr.takeError(); auto DiagnosticOutputFile = std::move(*DiagFileOrErr); + LTO::addFnAttrForOptSize(Conf.SizeLevel, Mod); + if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); @@ -559,8 +574,8 @@ if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); - if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, - /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) + if (!optimize(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, + /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); codegen(Conf, TM.get(), AddStream, Task, Mod); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -554,6 +554,7 @@ // Add an appropriate DataLayout instance for this module... MergedModule->setDataLayout(TargetMach->createDataLayout()); + lto::LTO::addFnAttrForOptSize(SizeLevel, *MergedModule); passes.add( createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis())); @@ -569,6 +570,7 @@ if (Freestanding) PMB.LibraryInfo->disableAllFunctions(); PMB.OptLevel = OptLevel; + PMB.SizeLevel = SizeLevel; PMB.VerifyInput = !DisableVerify; PMB.VerifyOutput = !DisableVerify; diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -227,8 +227,8 @@ } static void optimizeModule(Module &TheModule, TargetMachine &TM, - unsigned OptLevel, bool Freestanding, - ModuleSummaryIndex *Index) { + unsigned OptLevel, unsigned SizeLevel, + bool Freestanding, ModuleSummaryIndex *Index) { // Populate the PassManager PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); @@ -237,6 +237,7 @@ PMB.Inliner = createFunctionInliningPass(); // FIXME: should get it from the bitcode? PMB.OptLevel = OptLevel; + PMB.SizeLevel = SizeLevel; PMB.LoopVectorize = true; PMB.SLPVectorize = true; // Already did this in verifyLoadedModule(). @@ -317,7 +318,8 @@ const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel, - bool Freestanding, const TargetMachineBuilder &TMBuilder) { + unsigned SizeLevel, bool Freestanding, + const TargetMachineBuilder &TMBuilder) { if (CachePath.empty()) return; @@ -332,6 +334,7 @@ llvm::lto::Config Conf; Conf.OptLevel = OptLevel; + Conf.SizeLevel = SizeLevel; Conf.Options = TMBuilder.Options; Conf.CPU = TMBuilder.MCpu; Conf.MAttrs.push_back(TMBuilder.MAttr); @@ -405,7 +408,9 @@ const GVSummaryMapTy &DefinedGlobals, const ThinLTOCodeGenerator::CachingOptions &CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, - bool Freestanding, unsigned OptLevel, unsigned count) { + bool Freestanding, unsigned OptLevel, unsigned SizeLevel, + unsigned count) { + lto::LTO::addFnAttrForOptSize(SizeLevel, TheModule); // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); @@ -437,7 +442,7 @@ saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } - optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index); + optimizeModule(TheModule, TM, OptLevel, SizeLevel, Freestanding, &Index); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); @@ -848,8 +853,8 @@ initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); // Optimize now - optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding, - nullptr); + optimizeModule(TheModule, *TMBuilder.create(), OptLevel, SizeLevel, + Freestanding, nullptr); } /// Write out the generated object file, either from CacheEntryPath or from @@ -1050,8 +1055,8 @@ ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, ImportLists[ModuleIdentifier], ExportList, ResolvedODR[ModuleIdentifier], - DefinedGVSummaries, OptLevel, Freestanding, - TMBuilder); + DefinedGVSummaries, OptLevel, SizeLevel, + Freestanding, TMBuilder); auto CacheEntryPath = CacheEntry.getEntryPath(); { @@ -1096,7 +1101,8 @@ *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, ExportList, GUIDPreservedSymbols, ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, - DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count); + DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, SizeLevel, + count); // Commit to the cache (if enabled) CacheEntry.write(*OutputBuffer); diff --git a/llvm/test/LTO/X86/Inputs/opt-level-size.ll b/llvm/test/LTO/X86/Inputs/opt-level-size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/Inputs/opt-level-size.ll @@ -0,0 +1,23 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-O: define void @foo_optnone({{.*}}) {{.*}}#[[ATTR0:.*]] { +define void @foo_optnone() noinline optnone { + ret void +} + +; CHECK-Os: define void @foo({{.*}}) {{.*}}#[[ATTR1:.*]] { +; CHECK-Oz: define void @foo({{.*}}) {{.*}}#[[ATTR2:.*]] { +define void @foo() { + ret void +} + +; CHECK-O: attributes #[[ATTR0]] = {{{.*}} optnone {{.*}}} +; CHECK-Os-NOT: attributes #[[ATTR0]] = {{{.*}} optsize {{.*}}} +; CHECK-Oz-NOT: attributes #[[ATTR0]] = {{{.*}} minsize {{.*}}} + +; CHECK-Os: attributes #[[ATTR1]] = {{{.*}} optsize {{.*}}} +; CHECK-Oz: attributes #[[ATTR2]] = {{{.*}} minsize {{.*}}} + +!0 = !{i32 1, !"ThinLTO", i32 0} +!llvm.module.flags = !{ !0 } diff --git a/llvm/test/LTO/X86/opt-level-size.ll b/llvm/test/LTO/X86/opt-level-size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/opt-level-size.ll @@ -0,0 +1,72 @@ +; Test that specifying -Os adds 'optsize' to the function attributes set and +; test that specifying -Oz adds 'minsize' to the function attributes set. +; If 'optnone' is already in the function attributes set, the set is not changed. + +; Legacy LTO API (Full) +; RUN: llvm-as -o %t1.bc %s +; RUN: llvm-lto -exported-symbol=foo_optnone -exported-symbol=foo \ +; RUN: -save-merged-module -Os -o %t2 %t1.bc +; RUN: llvm-dis < %t2.merged.bc -o - | FileCheck --check-prefixes=CHECK-O,CHECK-Os %s +; RUN: llvm-lto -exported-symbol=foo_optnone -exported-symbol=foo \ +; RUN: -save-merged-module -Oz -o %t2 %t1.bc +; RUN: llvm-dis < %t2.merged.bc -o - | FileCheck --check-prefixes=CHECK-O,CHECK-Oz %s + +; Legacy LTO API (Thin) +; RUN: opt -module-summary -thinlto-bc %s -o %t3.bc +; RUN: llvm-lto -exported-symbol=foo_optnone -exported-symbol=foo \ +; RUN: -thinlto-action=run -thinlto-save-temps=%t3. -Os %t3.bc +; RUN: llvm-dis < %t3.0.2.internalized.bc -o - | \ +; RUN: FileCheck --check-prefixes=CHECK-O,CHECK-Os %s +; RUN: llvm-lto -exported-symbol=foo_optnone -exported-symbol=foo \ +; RUN: -thinlto-action=run -thinlto-save-temps=%t3. -Oz %t3.bc +; RUN: llvm-dis < %t3.0.2.internalized.bc -o - | \ +; RUN: FileCheck --check-prefixes=CHECK-O,CHECK-Oz %s + +; LTO API (Full) +; TODO: opt has trouble creating a full LTO bitcode with summaries because +; it could not write "ThinLTO" module flags for the moment (Fixing this +; requires a lot of test fixing). Hence this extra file in Inputs. +; RUN: opt -module-summary %S/Inputs/opt-level-size.ll -o %t4.bc +; RUN: llvm-lto2 run -save-temps %t4.bc -Os -o %t4 \ +; RUN: -r=%t4.bc,foo_optnone,px \ +; RUN: -r=%t4.bc,foo,px +; RUN: llvm-dis < %t4.0.2.internalize.bc -o - | \ +; RUN: FileCheck --check-prefixes=CHECK-O,CHECK-Os %s +; RUN: llvm-lto2 run -save-temps %t4.bc -Oz -o %t4 \ +; RUN: -r=%t4.bc,foo_optnone,px \ +; RUN: -r=%t4.bc,foo,px +; RUN: llvm-dis < %t4.0.2.internalize.bc -o - | \ +; RUN: FileCheck --check-prefixes=CHECK-O,CHECK-Oz %s + +; LTO API (Thin) +; RUN: llvm-lto2 run -save-temps %t3.bc -Os -o %t3 \ +; RUN: -r=%t3.bc,foo_optnone,px \ +; RUN: -r=%t3.bc,foo,px +; RUN: llvm-dis < %t3.1.2.internalize.bc -o - | \ +; RUN: FileCheck --check-prefixes=CHECK-O,CHECK-Os %s +; RUN: llvm-lto2 run -save-temps %t3.bc -Oz -o %t3 \ +; RUN: -r=%t3.bc,foo_optnone,px \ +; RUN: -r=%t3.bc,foo,px +; RUN: llvm-dis < %t3.1.2.internalize.bc -o - | \ +; RUN: FileCheck --check-prefixes=CHECK-O,CHECK-Oz %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-O: define void @foo_optnone({{.*}}) {{.*}}#[[ATTR0:.*]] { +define void @foo_optnone() noinline optnone { + ret void +} + +; CHECK-Os: define void @foo({{.*}}) {{.*}}#[[ATTR1:.*]] { +; CHECK-Oz: define void @foo({{.*}}) {{.*}}#[[ATTR2:.*]] { +define void @foo() { + ret void +} + +; CHECK-O: attributes #[[ATTR0]] = {{{.*}} optnone {{.*}}} +; CHECK-Os-NOT: attributes #[[ATTR0]] = {{{.*}} optsize {{.*}}} +; CHECK-Oz-NOT: attributes #[[ATTR0]] = {{{.*}} minsize {{.*}}} + +; CHECK-Os: attributes #[[ATTR1]] = {{{.*}} optsize {{.*}}} +; CHECK-Oz: attributes #[[ATTR2]] = {{{.*}} minsize {{.*}}} diff --git a/llvm/test/tools/lto/opt-level.ll b/llvm/test/tools/lto/opt-level.ll --- a/llvm/test/tools/lto/opt-level.ll +++ b/llvm/test/tools/lto/opt-level.ll @@ -3,6 +3,10 @@ ; RUN: llvm-nm --no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O0 %s ; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -O2 -o %t.dylib %t.o ; RUN: llvm-nm --no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O2 %s +; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -Os -o %t.dylib %t.o +; RUN: llvm-nm --no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O2 %s +; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -Oz -o %t.dylib %t.o +; RUN: llvm-nm --no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O2 %s target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -132,6 +132,7 @@ }; static OutputType TheOutputType = OT_NORMAL; static unsigned OptLevel = 2; + static unsigned SizeLevel = 0; // Default parallelism of 0 used to indicate that user did not specify. // Actual parallelism default value depends on implementation. // Currently only affects ThinLTO, where the default is @@ -261,9 +262,15 @@ } else if (opt.startswith("cache-policy=")) { cache_policy = opt.substr(strlen("cache-policy=")); } else if (opt.size() == 2 && opt[0] == 'O') { - if (opt[1] < '0' || opt[1] > '3') - message(LDPL_FATAL, "Optimization level must be between 0 and 3"); - OptLevel = opt[1] - '0'; + if (auto LevelOrErr = llvm::lto::getOptLevel(opt.substr(1, 1))) + OptLevel = *LevelOrErr; + else + message(LDPL_FATAL, toString(LevelOrErr.takeError()).c_str()); + + if (auto LevelOrErr = llvm::lto::getSizeLevel(opt.substr(1, 1))) + SizeLevel = *LevelOrErr; + else + message(LDPL_FATAL, toString(LevelOrErr.takeError()).c_str()); } else if (opt.startswith("jobs=")) { if (StringRef(opt_ + 5).getAsInteger(10, Parallelism)) message(LDPL_FATAL, "Invalid parallelism level: %s", opt_ + 5); @@ -860,6 +867,7 @@ Conf.CGOptLevel = getCGOptLevel(); Conf.DisableVerify = options::DisableVerify; Conf.OptLevel = options::OptLevel; + Conf.SizeLevel = options::SizeLevel; Conf.PTO.LoopVectorization = options::OptLevel > 1; Conf.PTO.SLPVectorization = options::OptLevel > 1; diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -62,10 +62,11 @@ using namespace llvm; -static cl::opt - OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), - cl::Prefix, cl::ZeroOrMore, cl::init('2')); +static cl::opt + OptLevel("O", + cl::desc("Optimization level. [-O0, -O1, -O2, -Os, -Oz or " + "-O3] (default = '-O2')"), + cl::Prefix, cl::ZeroOrMore, cl::init("2")); static cl::opt IndexStats("thinlto-index-stats", @@ -530,6 +531,14 @@ ThinGenerator.setCacheMaxSizeBytes(ThinLTOCacheMaxSizeBytes); ThinGenerator.setFreestanding(EnableFreestanding); + LLVMContext Ctx; + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Ctx, llvm::lto::getOptLevel(OptLevel))) + ThinGenerator.setOptLevel(*Result); + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Ctx, llvm::lto::getSizeLevel(OptLevel))) + ThinGenerator.setSizeLevel(*Result); + // Add all the exported symbols to the table of symbols to preserve. for (unsigned i = 0; i < ExportedSymbols.size(); ++i) ThinGenerator.preserveSymbol(ExportedSymbols[i]); @@ -863,9 +872,6 @@ InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv, "llvm LTO linker\n"); - if (OptLevel < '0' || OptLevel > '3') - error("optimization level must be between 0 and 3"); - // Initialize the configured targets. InitializeAllTargets(); InitializeAllTargetMCs(); @@ -982,7 +988,12 @@ // Set cpu and attrs strings for the default target/subtarget. CodeGen.setCpu(MCPU.c_str()); - CodeGen.setOptLevel(OptLevel - '0'); + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Context, llvm::lto::getOptLevel(OptLevel))) + CodeGen.setOptLevel(*Result); + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Context, llvm::lto::getSizeLevel(OptLevel))) + CodeGen.setSizeLevel(*Result); std::string attrs; for (unsigned i = 0; i < MAttrs.size(); ++i) { diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -29,10 +29,11 @@ using namespace llvm; using namespace lto; -static cl::opt - OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), - cl::Prefix, cl::ZeroOrMore, cl::init('2')); +static cl::opt + OptLevel("O", + cl::desc("Optimization level. [-O0, -O1, -O2, -Os, -Oz, or " + "-O3] (default = '-O2')"), + cl::Prefix, cl::ZeroOrMore, cl::init("2")); static cl::opt CGOptLevel( "cg-opt-level", @@ -244,7 +245,10 @@ Conf.OptPipeline = OptPipeline; Conf.AAPipeline = AAPipeline; - Conf.OptLevel = OptLevel - '0'; + Conf.OptLevel = + check(llvm::lto::getOptLevel(OptLevel), "invalid optimization level"); + Conf.SizeLevel = + check(llvm::lto::getSizeLevel(OptLevel), "invalid optimization level"); Conf.UseNewPM = UseNewPM; switch (CGOptLevel) { case '0': diff --git a/llvm/tools/lto/lto.cpp b/llvm/tools/lto/lto.cpp --- a/llvm/tools/lto/lto.cpp +++ b/llvm/tools/lto/lto.cpp @@ -28,14 +28,13 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" -// extra command-line flags needed for LTOCodeGenerator -static cl::opt +static cl::opt OptLevel("O", - cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " + cl::desc("Optimization level. [-O0, -O1, -O2, -Os, -Oz or -O3] " "(default = '-O2')"), cl::Prefix, cl::ZeroOrMore, - cl::init('2')); + cl::init("2")); static cl::opt DisableInline("disable-inlining", cl::init(false), @@ -165,9 +164,15 @@ CG->setAttr(attrs); } - if (OptLevel < '0' || OptLevel > '3') - report_fatal_error("Optimization level must be between 0 and 3"); - CG->setOptLevel(OptLevel - '0'); + LLVMContext &Ctx = CG->getContext(); + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Ctx, llvm::lto::getOptLevel(OptLevel))) + CG->setOptLevel(*Result); + + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Ctx, llvm::lto::getSizeLevel(OptLevel))) + CG->setSizeLevel(*Result); + CG->setFreestanding(EnableFreestanding); } @@ -487,25 +492,19 @@ CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags()); CodeGen->setFreestanding(EnableFreestanding); - if (OptLevel.getNumOccurrences()) { - if (OptLevel < '0' || OptLevel > '3') - report_fatal_error("Optimization level must be between 0 and 3"); - CodeGen->setOptLevel(OptLevel - '0'); - switch (OptLevel) { - case '0': - CodeGen->setCodeGenOptLevel(CodeGenOpt::None); - break; - case '1': - CodeGen->setCodeGenOptLevel(CodeGenOpt::Less); - break; - case '2': - CodeGen->setCodeGenOptLevel(CodeGenOpt::Default); - break; - case '3': - CodeGen->setCodeGenOptLevel(CodeGenOpt::Aggressive); - break; - } - } + LLVMContext Ctx; + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Ctx, llvm::lto::getOptLevel(OptLevel))) + CodeGen->setOptLevel(*Result); + else + return nullptr; + + if (ErrorOr Result = expectedToErrorOrAndEmitErrors( + Ctx, llvm::lto::getSizeLevel(OptLevel))) + CodeGen->setSizeLevel(*Result); + else + return nullptr; + return wrap(CodeGen); }