Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1839,6 +1839,17 @@ MainFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); + + LoopPassManager LateLPM(DebugLogging); + LateLPM.addPass( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + + // LICM should always be run after the final InstCombine because InstCombine + // sinks instructions without regard to loop-invariance. + MainFPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LateLPM), /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/true, DebugLogging)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); // Create a function that performs CFI checks for cross-DSO calls with Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1189,6 +1189,10 @@ PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); } + // LICM should always be run after the final InstCombine because InstCombine + // sinks instructions without regard to loop-invariance. + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. PM.add(createCrossDSOCFIPass()); Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -133,6 +133,13 @@ ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass on foo ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass on foo +; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo +; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Starting Loop pass manager run. +; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop +; CHECK-O23SZ-NEXT: Finished Loop pass manager run. ; CHECK-O23SZ-NEXT: Running pass: CrossDSOCFIPass ; CHECK-O23SZ-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass Index: llvm/test/Other/opt-LTO-pipeline.ll =================================================================== --- llvm/test/Other/opt-LTO-pipeline.ll +++ llvm/test/Other/opt-LTO-pipeline.ll @@ -176,6 +176,18 @@ ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Invariant Code Motion ; CHECK-NEXT: Cross-DSO CFI ; CHECK-NEXT: Lower type metadata ; CHECK-NEXT: Lower type metadata Index: llvm/test/Transforms/PhaseOrdering/lto-licm.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/lto-licm.ll +++ llvm/test/Transforms/PhaseOrdering/lto-licm.ll @@ -5,6 +5,7 @@ define void @hoist_fdiv(float* %a, float %b) { ; CHECK-LABEL: @hoist_fdiv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -13,9 +14,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]] -; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: