diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -60,7 +60,7 @@ /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. - MachineFunctionPass *createMachineFunctionSplitterPass(); + MachineFunctionPass *createMachineFunctionSplitterPass(bool IsFSAFDOFlavor); /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -25,6 +25,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/EHUtils.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -69,7 +72,8 @@ class MachineFunctionSplitter : public MachineFunctionPass { public: static char ID; - MachineFunctionSplitter() : MachineFunctionPass(ID) { + MachineFunctionSplitter(bool IsFSAFDOFlavor) + : MachineFunctionPass(ID), IsFSAFDOFlavor(IsFSAFDOFlavor) { initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); } @@ -80,6 +84,10 @@ void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &F) override; + +private: + // Whether this pass is using FSAFDO profile or not. + bool IsFSAFDOFlavor = false; }; } // end anonymous namespace @@ -96,8 +104,12 @@ static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, + bool IsFSAFDOFlavor) { std::optional Count = MBFI->getBlockProfileCount(&MBB); + if (IsFSAFDOFlavor) + return Count.has_value() && Count.value() == 0; + if (!Count) return true; @@ -140,9 +152,19 @@ MachineBlockFrequencyInfo *MBFI = nullptr; ProfileSummaryInfo *PSI = nullptr; + bool candidate_for_split = true; if (UseProfileData) { MBFI = &getAnalysis(); PSI = &getAnalysis().getPSI(); + if (IsFSAFDOFlavor) { + const LoopInfo *LI = &getAnalysis().getLoopInfo(); + const BranchProbabilityInfo *BPI = + new BranchProbabilityInfo(MF.getFunction(), *LI); + BlockFrequencyInfo *BFI = + new BlockFrequencyInfo(MF.getFunction(), *BPI, *LI); + candidate_for_split = + (PSI && PSI->isFunctionHotInCallGraph(&MF.getFunction(), *BFI)); + } } SmallVector LandingPads; @@ -152,7 +174,8 @@ if (MBB.isEHPad()) LandingPads.push_back(&MBB); - else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) + else if (UseProfileData && candidate_for_split && + isColdBlock(MBB, MBFI, PSI, IsFSAFDOFlavor) && !SplitAllEHCode) MBB.setSectionID(MBBSectionID::ColdSectionID); } @@ -163,7 +186,7 @@ else { bool HasHotLandingPads = false; for (const MachineBasicBlock *LP : LandingPads) { - if (!isColdBlock(*LP, MBFI, PSI)) + if (!(candidate_for_split && isColdBlock(*LP, MBFI, PSI, IsFSAFDOFlavor))) HasHotLandingPads = true; } if (!HasHotLandingPads) { @@ -180,6 +203,7 @@ } void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -190,6 +214,7 @@ "Split machine functions using profile information", false, false) -MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { - return new MachineFunctionSplitter(); +MachineFunctionPass * +llvm::createMachineFunctionSplitterPass(bool IsFSAFDOFlavor) { + return new MachineFunctionSplitter(IsFSAFDOFlavor); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1225,14 +1225,6 @@ addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); - if (EnableFSDiscriminator && !FSNoFinalDiscrim) - // Add FS discriminators here so that all the instruction duplicates - // in different BBs get their own discriminators. With this, we can "sum" - // the SampleFDO counters instead of using MAX. This will improve the - // SampleFDO profile quality. - addPass(createMIRAddFSDiscriminatorsPass( - sampleprof::FSDiscriminatorPass::PassLast)); - addPreEmitPass(); if (TM->Options.EnableIPRA) @@ -1258,6 +1250,10 @@ addPass(createMachineOutlinerPass(RunOnAllFunctions)); } + if (EnableFSDiscriminator) + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::Pass3)); + // Machine function splitter uses the basic block sections feature. Both // cannot be enabled at the same time. Basic block sections takes precedence. // FIXME: In principle, BasicBlockSection::Labels and splitting can used @@ -1270,9 +1266,27 @@ addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { - addPass(createMachineFunctionSplitterPass()); + bool IsUsingFSAFDOProfile = false; + if (EnableFSDiscriminator) { + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty()) { + IsUsingFSAFDOProfile = true; + addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass3, + nullptr)); + } + } + addPass(createMachineFunctionSplitterPass(IsUsingFSAFDOProfile)); } + if (EnableFSDiscriminator) + // Add FS discriminators here so that all the instruction duplicates + // in different BBs get their own discriminators. With this, we can "sum" + // the SampleFDO counters instead of using MAX. This will improve the + // SampleFDO profile quality. + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::PassLast)); + if (!DisableCFIFixup && TM->Options.EnableCFIFixup) addPass(createCFIFixup()); diff --git a/llvm/test/CodeGen/X86/fsafdo_mfs_test.ll b/llvm/test/CodeGen/X86/fsafdo_mfs_test.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fsafdo_mfs_test.ll @@ -0,0 +1,133 @@ +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -split-machine-functions < %s 2>&1 | FileCheck %s --check-prefixes=MFS +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s 2>&1 | FileCheck %s --check-prefixes=NOMFS + +; MFS: .section .text.hot.main,"ax" +; MFS: .section .text.split.main,"ax" +; NOMFS: .section .text.hot.,"ax" +; NOMFS-NOT: .section .text.split + +source_filename = "sample_1.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [10 x i8] c"going %d\0A\00", align 1 +@.str.1 = private unnamed_addr constant [19 x i8] c"getting %d from %d\00", align 1 +@stderr = external local_unnamed_addr global ptr, align 8 +@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@__llvm_fs_discriminator__ = weak_odr constant i1 true +@llvm.used = appending global [1 x ptr] [ptr @__llvm_fs_discriminator__], section "llvm.metadata" + +define dso_local i32 @main(i32 noundef %0, ptr nocapture noundef readnone %1) local_unnamed_addr #0 !dbg !42 !prof !45 { + %3 = icmp sgt i32 %0, 10 + br label %4, !dbg !46 + +4: ; preds = %2, %11 + %5 = phi i32 [ 100, %2 ], [ %12, %11 ] + br i1 %3, label %6, label %8, !dbg !48, !prof !49 + +6: ; preds = %4 + %7 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef %5), !dbg !50 + br label %11, !dbg !51 + +8: ; preds = %4 + %9 = add nuw nsw i32 %5, 10, !dbg !52 + %10 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.1, i32 noundef %5, i32 noundef %9), !dbg !53, !prof !54 + br label %11 + +11: ; preds = %6, %8 + %12 = add nuw nsw i32 %5, 1, !dbg !55 + %13 = icmp eq i32 %12, 8000, !dbg !57 + br i1 %13, label %14, label %4, !dbg !46, !prof !58, !llvm.loop !59 + +14: ; preds = %11 + %15 = load ptr, ptr @stderr, align 8, !dbg !63, !tbaa !64 + %16 = tail call i32 (ptr, ptr, ...) @fprintf(ptr noundef %15, ptr noundef nonnull @.str.2, i32 noundef 0) #2, !dbg !68, !prof !69 + ret i32 0, !dbg !70 +} + +declare !dbg !71 noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #1 + +declare !dbg !73 noundef i32 @fprintf(ptr nocapture noundef, ptr nocapture noundef readonly, ...) local_unnamed_addr #1 + +attributes #0 = { nofree nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { nofree nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { cold } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !37} +!llvm.ident = !{!41} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0 (git@github-personal:llvm/llvm-project.git 921d8ee516d421c9b2b9569411c49e042525edea)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "sample_1.c", directory: "/goe/mfs", checksumkind: CSK_MD5, checksum: "3964e1c99d5cecff5873b5e3360baa56") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 1, !"ProfileSummary", !9} +!9 = !{!10, !11, !12, !13, !14, !15, !16, !17, !18, !19} +!10 = !{!"ProfileFormat", !"SampleProfile"} +!11 = !{!"TotalCount", i64 17} +!12 = !{!"MaxCount", i64 7} +!13 = !{!"MaxInternalCount", i64 0} +!14 = !{!"MaxFunctionCount", i64 0} +!15 = !{!"NumCounts", i64 7} +!16 = !{!"NumFunctions", i64 1} +!17 = !{!"IsPartialProfile", i64 0} +!18 = !{!"PartialProfileRatio", double 0.000000e+00} +!19 = !{!"DetailedSummary", !20} +!20 = !{!21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36} +!21 = !{i32 10000, i64 0, i32 0} +!22 = !{i32 100000, i64 7, i32 1} +!23 = !{i32 200000, i64 7, i32 1} +!24 = !{i32 300000, i64 7, i32 1} +!25 = !{i32 400000, i64 7, i32 1} +!26 = !{i32 500000, i64 5, i32 3} +!27 = !{i32 600000, i64 5, i32 3} +!28 = !{i32 700000, i64 5, i32 3} +!29 = !{i32 800000, i64 5, i32 3} +!30 = !{i32 900000, i64 5, i32 3} +!31 = !{i32 950000, i64 5, i32 3} +!32 = !{i32 990000, i64 5, i32 3} +!33 = !{i32 999000, i64 5, i32 3} +!34 = !{i32 999900, i64 5, i32 3} +!35 = !{i32 999990, i64 5, i32 3} +!36 = !{i32 999999, i64 5, i32 3} +!37 = !{i32 5, !"CG Profile", !38} +!38 = distinct !{!39, !40} +!39 = !{ptr @main, ptr @printf, i64 4} +!40 = !{ptr @main, ptr @fprintf, i64 1} +!41 = !{!"clang version 17.0.0 (git@github-personal:llvm/llvm-project.git 921d8ee516d421c9b2b9569411c49e042525edea)"} +!42 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !43, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !44) +!43 = !DISubroutineType(types: !44) +!44 = !{} +!45 = !{!"function_entry_count", i64 1} +!46 = !DILocation(line: 6, column: 3, scope: !47) +!47 = !DILexicalBlockFile(scope: !42, file: !1, discriminator: 1) +!48 = !DILocation(line: 7, column: 9, scope: !42) +!49 = !{!"branch_weights", i32 1, i32 8} +!50 = !DILocation(line: 8, column: 7, scope: !42) +!51 = !DILocation(line: 9, column: 5, scope: !42) +!52 = !DILocation(line: 10, column: 41, scope: !42) +!53 = !DILocation(line: 10, column: 7, scope: !42) +!54 = !{!"branch_weights", i32 7} +!55 = !DILocation(line: 6, column: 28, scope: !56) +!56 = !DILexicalBlockFile(scope: !42, file: !1, discriminator: 2) +!57 = !DILocation(line: 6, column: 19, scope: !47) +!58 = !{!"branch_weights", i32 2, i32 6} +!59 = distinct !{!59, !60, !61, !62} +!60 = !DILocation(line: 6, column: 3, scope: !42) +!61 = !DILocation(line: 12, column: 3, scope: !42) +!62 = !{!"llvm.loop.mustprogress"} +!63 = !DILocation(line: 13, column: 11, scope: !42) +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C/C++ TBAA"} +!68 = !DILocation(line: 13, column: 3, scope: !42) +!69 = !{!"branch_weights", i32 1} +!70 = !DILocation(line: 14, column: 3, scope: !42) +!71 = !DISubprogram(name: "printf", scope: !72, file: !72, line: 356, type: !43, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !44) +!72 = !DIFile(filename: "include/stdio.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "b5a90985555f47bfb88eff5a8f0f5b72") +!73 = !DISubprogram(name: "fprintf", scope: !72, file: !72, line: 350, type: !43, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !44)