Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1243,19 +1243,24 @@ // or SimplifyCFG passes scheduled after us, that would cleanup // the CFG mess this may created if allowed to modify CFG, so forbid that. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); - FPM.addPass(InstCombinePass()); - FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true), - /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); } + FPM.addPass(InstCombinePass()); + + // This is needed for two reasons: + // 1. It works around problems that instcombine introduces, such as sinking + // expensive FP divides into loops containing multiplications using the + // divide result. + // 2. It helps to clean up some loop-invariant code created by the loop + // unroll pass when IsFullLTO=false. + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), + /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); + // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. FPM.addPass(AlignmentFromAssumptionsPass()); - - if (IsFullLTO) - FPM.addPass(InstCombinePass()); } ModulePassManager Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -129,8 +129,11 @@ ; CHECK-O3-NEXT: Running pass: SLPVectorizerPass on foo ; CHECK-OS-NEXT: Running pass: SLPVectorizerPass on foo ; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass on foo -; CHECK-O23SZ-NEXT: Running pass: AlignmentFromAssumptionsPass on foo ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Running pass: LICMPass +; CHECK-O23SZ-NEXT: Running pass: AlignmentFromAssumptionsPass on foo ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass on foo ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass on foo ; CHECK-O23SZ-NEXT: Running pass: LowerTypeTestsPass Index: llvm/test/Transforms/PhaseOrdering/lto-licm.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/lto-licm.ll +++ llvm/test/Transforms/PhaseOrdering/lto-licm.ll @@ -4,6 +4,7 @@ define void @hoist_fdiv(ptr %a, float %b) { ; CHECK-LABEL: @hoist_fdiv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -12,9 +13,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]] -; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: store float [[TMP2]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: