Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1254,8 +1254,19 @@ // alignment information, try to re-derive it here. FPM.addPass(AlignmentFromAssumptionsPass()); - if (IsFullLTO) + if (IsFullLTO) { FPM.addPass(InstCombinePass()); + + // This is needed to work around problems that instcombine introduces, such + // as sinking expensive FP divides into loops containing multiplications + // using the divide result. + // For normal compilation without LTO we never run the InstCombine pass + // after LICM so we avoid this problem. + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), + /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); + } } ModulePassManager Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -131,6 +131,9 @@ ; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass on foo ; CHECK-O23SZ-NEXT: Running pass: AlignmentFromAssumptionsPass on foo ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass on foo ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass on foo ; CHECK-O23SZ-NEXT: Running pass: LowerTypeTestsPass Index: llvm/test/Transforms/PhaseOrdering/lto-licm.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/lto-licm.ll +++ llvm/test/Transforms/PhaseOrdering/lto-licm.ll @@ -4,6 +4,7 @@ define void @hoist_fdiv(ptr %a, float %b) { ; CHECK-LABEL: @hoist_fdiv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -12,9 +13,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]] -; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: store float [[TMP2]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: