diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,7 +59,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" @@ -113,6 +115,11 @@ cl::desc( "Number of times to rerun the outliner after the initial outline")); +static cl::opt + UseProfileData("machine-outliner-use-profile-data", cl::init(false), + cl::Hidden, + cl::desc("Use profile data to avoid outlining hot blocks.")); + namespace { /// Maps \p MachineInstrs to unsigned integers and stores the mappings. @@ -368,6 +375,10 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); + if (UseProfileData) { + AU.addRequired(); + AU.addRequired(); + } AU.setPreservesAll(); ModulePass::getAnalysisUsage(AU); } @@ -871,6 +882,10 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, MachineModuleInfo &MMI) { + ProfileSummaryInfo *PSI = nullptr; + if (auto *Wrapper = getAnalysisIfAvailable()) + PSI = &Wrapper->getPSI(); + // Build instruction mappings for each function in the module. Start by // iterating over each Function in M. for (Function &F : M) { @@ -891,7 +906,12 @@ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) + BlockFrequencyInfo *BFI = nullptr; + if (UseProfileData && PSI && F.hasProfileData()) + BFI = &getAnalysis(F).getBFI(); + + if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF) && + !BFI) continue; // We have a MachineFunction. Ask the target if it's suitable for outlining. @@ -917,6 +937,11 @@ if (MBB.hasAddressTaken()) continue; + // If we have profile data then avoid outlining hot blocks. + if (BFI && PSI->isHotBlock(MBB.getBasicBlock(), BFI)) + continue; + // TODO: Should we use isHotBlockNthPercentile() instead? + // MBB is suitable for outlining. Map it to a list of unsigneds. Mapper.convertToUnsignedVec(MBB, *TII); } diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-profile.ll b/llvm/test/CodeGen/AArch64/machine-outliner-profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-profile.ll @@ -0,0 +1,84 @@ +; RUN: llc -machine-outliner-use-profile-data -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +declare void @z(i32, i32, i32, i32) + +; CHECK-LABEL: cold: +define void @cold() !prof !20 { +entry: +; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: hot: +define void @hot() !prof !21 { +entry: +; CHECK-NOT: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: small_cold: +define void @small_cold() optsize minsize !prof !20 { +entry: +; CHECK: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: small_hot: +define void @small_hot() optsize minsize !prof !21 { +entry: +; CHECK-NOT: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: no_profile: +define void @no_profile() { +entry: +; CHECK-NOT: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK-LABEL: small_no_profile: +define void @small_no_profile() optsize minsize { +entry: +; CHECK: [[OUTLINED]] + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +; CHECK: .cfi_endproc +} + +; CHECK: [[OUTLINED]]: +; CHECK-SAME: // @{{.*}} Tail Call +; CHECK: mov w0, #1 +; CHECK-NEXT: mov w1, #2 +; CHECK-NEXT: mov w2, #3 +; CHECK-NEXT: mov w3, #4 +; CHECK-NEXT: b z + +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 0} +!21 = !{!"function_entry_count", i64 100} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 200} +!5 = !{!"MaxCount", i64 100} +!6 = !{!"MaxInternalCount", i64 0} +!7 = !{!"MaxFunctionCount", i64 100} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 0, i32 2} +!13 = !{i32 990000, i64 100, i32 4} +!14 = !{i32 999999, i64 100, i32 4}