diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1182,23 +1182,6 @@ addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); - // Split out cold code. Splitting is done late to avoid hiding context from - // other optimizations and inadvertently regressing performance. The tradeoff - // is that this has a higher code size cost than splitting early. - if (EnableHotColdSplit && !LTOPreLink) - MPM.addPass(HotColdSplittingPass()); - - // Search the code for similar regions of code. If enough similar regions can - // be found where extracting the regions into their own function will decrease - // the size of the program, we extract the regions, a deduplicate the - // structurally similar regions. - if (EnableIROutliner) - MPM.addPass(IROutlinerPass()); - - // Merge functions if requested. - if (PTO.MergeFunctions) - MPM.addPass(MergeFunctionsPass()); - // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -1226,6 +1209,23 @@ for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); + // Split out cold code. Splitting is done late to avoid hiding context from + // other optimizations and inadvertently regressing performance. The tradeoff + // is that this has a higher code size cost than splitting early. + if (EnableHotColdSplit && !LTOPreLink) + MPM.addPass(HotColdSplittingPass()); + + // Search the code for similar regions of code. If enough similar regions can + // be found where extracting the regions into their own function will decrease + // the size of the program, we extract the regions, a deduplicate the + // structurally similar regions. + if (EnableIROutliner) + MPM.addPass(IROutlinerPass()); + + // Merge functions if requested. + if (PTO.MergeFunctions) + MPM.addPass(MergeFunctionsPass()); + if (PTO.CallGraphProfile) MPM.addPass(CGProfilePass()); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll @@ -90,15 +90,8 @@ define i1 @test2(i32 %c) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = add i32 [[C:%.*]], -100 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 20 -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i20 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i20 -490991, [[SWITCH_CAST]] -; CHECK-NEXT: [[TMP1:%.*]] = and i20 [[SWITCH_DOWNSHIFT]], 1 -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = icmp ne i20 [[TMP1]], 0 -; CHECK-NEXT: [[I_0:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false -; CHECK-NEXT: ret i1 [[I_0]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call i1 @test1(i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: ret i1 [[TMP2]] ; entry: %i = alloca i8, align 1