Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -188,9 +188,14 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. + /// + /// \p PrepareForThinLTO indicates whether this is invoked in + /// PrepareForThinLTO phase. Special handling is needed for sample PGO to + /// ensure profile accurate in the backend profile annotation phase. FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool PrepareForThinLTO = false); /// Construct the core LLVM module canonicalization and simplification /// pipeline. @@ -205,9 +210,14 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. + /// + /// \p PrepareForThinLTO indicates whether this is invoked in + /// PrepareForThinLTO phase. Special handling is needed for sample PGO to + /// ensure profile accurate in the backend profile annotation phase. ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool PrepareForThinLTO = false); /// Construct the core LLVM module optimization pipeline. /// Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -307,7 +307,8 @@ FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { assert(Level != O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); @@ -364,10 +365,11 @@ LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(LoopDeletionPass()); - // FIXME: The old pass manager has a hack to disable loop unrolling during - // ThinLTO when using sample PGO. Need to either fix it or port some - // workaround. - LPM2.addPass(LoopUnrollPass::createFull(Level)); + // Do not enable unrolling in PrepareForThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. + if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty()) + LPM2.addPass(LoopUnrollPass::createFull(Level)); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. @@ -490,7 +492,8 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { ModulePassManager MPM(DebugLogging); // Do basic inference of function attributes from known properties of system @@ -544,8 +547,11 @@ MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion( - false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + // Do not enable it in PrepareForThinLTO phase during sample PGO because + // it changes IR to makes profile annotation in back compile inaccurate. + if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty()) + MPM.addPass(PGOIndirectCallPromotion( + false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); } // Require the GlobalsAA analysis for the module so we can query it within @@ -570,7 +576,12 @@ // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. - MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level))); + // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty()) + IP.HotCallSiteThreshold = 0; + MainCGPipeline.addPass(InlinerPass(IP)); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -583,7 +594,8 @@ // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, DebugLogging))); + buildFunctionSimplificationPipeline(Level, DebugLogging, + PrepareForThinLTO))); // We wrap the CGSCC pipeline in a devirtualization repeater. This will try // to detect when we devirtualize indirect calls and iterate the SCC passes @@ -726,7 +738,8 @@ MPM.addPass(ForceFunctionAttrsPass()); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -747,7 +760,8 @@ // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/true)); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -785,7 +799,8 @@ !PGOOpt->ProfileUseFile.empty())); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));