diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,7 +59,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" @@ -368,6 +370,16 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); + if (!RunOnAllFunctions) { + // FIXME: This breaks some tests. + // TODO: As it is now, this code turns on the machine outliner for + // everyone (even those that don't target AArch64) that provides profile + // data unless they supply `-enable-machine-outliner=never`. The + // alternative is to add another option like + // `-enable-machine-outliner=profile_guided`. + AU.addRequired(); + AU.addRequired(); + } AU.setPreservesAll(); ModulePass::getAnalysisUsage(AU); } @@ -871,6 +883,10 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, MachineModuleInfo &MMI) { + ProfileSummaryInfo *PSI = nullptr; + if (auto *Wrapper = getAnalysisIfAvailable()) + PSI = &Wrapper->getPSI(); + // Build instruction mappings for each function in the module. Start by // iterating over each Function in M. for (Function &F : M) { @@ -891,7 +907,12 @@ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) + BlockFrequencyInfo *BFI = nullptr; + if (PSI && PSI->hasInstrumentationProfile() && F.hasProfileData()) + BFI = &getAnalysis(F).getBFI(); + + if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF) && + !BFI) continue; // We have a MachineFunction. Ask the target if it's suitable for outlining. @@ -917,6 +938,13 @@ if (MBB.hasAddressTaken()) continue; + // TODO: We could either avoid outlining hot blocks, or only consider + // outlining cold blocks. + // If profile data is available to the machine outliner, then do not + // outline hot blocks. + if (BFI && PSI->isHotBlock(MBB.getBasicBlock(), BFI)) + continue; + // MBB is suitable for outlining. Map it to a list of unsigneds. Mapper.convertToUnsignedVec(MBB, *TII); } diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-profile.ll b/llvm/test/CodeGen/AArch64/machine-outliner-profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-profile.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +declare void @z(i32, i32, i32, i32) + +; CHECK-LABEL: cold: +define void @cold() !prof !20 { +entry: +; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: hot: +define void @hot() !prof !21 { +entry: +; CHECK-NOT: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: minsize_cold: +define void @minsize_cold() optsize minsize !prof !20 { +entry: +; CHECK: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: minsize_hot: +define void @minsize_hot() optsize minsize !prof !21 { +entry: +; CHECK-NOT: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: no_profile: +define void @no_profile() { +entry: +; CHECK-NOT: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: minsize_no_profile: +define void @minsize_no_profile() optsize minsize { +entry: +; CHECK: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK: [[OUTLINED]]: +; CHECK-SAME: // @{{.*}} Tail Call +; CHECK: mov w0, #1 +; CHECK-NEXT: mov w1, #2 +; CHECK-NEXT: mov w2, #3 +; CHECK-NEXT: mov w3, #4 +; CHECK-NEXT: b z + +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 0} +!21 = !{!"function_entry_count", i64 100} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 200} +!5 = !{!"MaxCount", i64 100} +!6 = !{!"MaxInternalCount", i64 0} +!7 = !{!"MaxFunctionCount", i64 100} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 10, i32 4} +!13 = !{i32 999999, i64 10, i32 4}