diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1225,14 +1225,6 @@ addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); - if (EnableFSDiscriminator && !FSNoFinalDiscrim) - // Add FS discriminators here so that all the instruction duplicates - // in different BBs get their own discriminators. With this, we can "sum" - // the SampleFDO counters instead of using MAX. This will improve the - // SampleFDO profile quality. - addPass(createMIRAddFSDiscriminatorsPass( - sampleprof::FSDiscriminatorPass::PassLast)); - addPreEmitPass(); if (TM->Options.EnableIPRA) @@ -1258,6 +1250,10 @@ addPass(createMachineOutlinerPass(RunOnAllFunctions)); } + if (EnableFSDiscriminator) + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::Pass3)); + // Machine function splitter uses the basic block sections feature. Both // cannot be enabled at the same time. Basic block sections takes precedence. // FIXME: In principle, BasicBlockSection::Labels and splitting can used @@ -1270,7 +1266,17 @@ addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { - addPass(createMachineFunctionSplitterPass()); + bool HasAccureateSampleProfile = false; + if (EnableFSDiscriminator) { + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty()) { + HasAccureateSampleProfile = true; + addPass(createMIRProfileLoaderPass( + ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass3, nullptr)); + } + } + addPass(createMachineFunctionSplitterPass(HasAccureateSampleProfile)); } if (!DisableCFIFixup && TM->Options.EnableCFIFixup) diff --git a/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@&1 | FileCheck %s --check-prefix=NOPROFILE +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=PROFILE-NOMFS +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -split-machine-functions -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=PROFILE-MFS + +; NOPROFILE: Add FS discriminators in MIR +; NO-NOPROFILE: SampleFDO loader in MIR +; NOPROFILE: Add FS discriminators in MIR +; NO-NOPROFILE: SampleFDO loader in MIR +; NOPROFILE: Add FS discriminators in MIR +; NO-NOPROFILE: SampleFDO loader in MIR + +; PROFILE-NOMFS: Add FS discriminators in MIR +; PROFILE-NOMFS: SampleFDO loader in MIR +; PROFILE-NOMFS: Add FS discriminators in MIR +; PROFILE-NOMFS: SampleFDO loader in MIR +; PROFILE-NOMFS: Add FS discriminators in MIR +;;;; mfs is not specified, so no "SampleFDO loader should be created" +; NO-PROFILE-NOMFS: SampleFDO loader in MIR + +; PROFILE-MFS: Add FS discriminators in MIR +; PROFILE-MFS: SampleFDO loader in MIR +; PROFILE-MFS: Add FS discriminators in MIR +; PROFILE-MFS: SampleFDO loader in MIR +; PROFILE-MFS: Add FS discriminators in MIR +; PROFILE-MFS: SampleFDO loader in MIR +; PROFILE-MFS: Machine Function Splitter Transformation + +define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 { + br i1 %0, label %3, label %7, !prof !18 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !19 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() + +attributes #0 = { "implicit-section-name"="nosplit" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"SampleProfile"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 5} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999900, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 7000} +!15 = !{!"function_section_prefix", !"hot"} +!16 = !{!"function_section_prefix", !"unlikely"} +!17 = !{!"branch_weights", i32 7000, i32 0} +!18 = !{!"branch_weights", i32 0, i32 7000} +!19 = !{!"branch_weights", i32 1000, i32 6000} +!20 = !{!"function_entry_count", i64 10000} +!21 = !{!"branch_weights", i32 6000, i32 4000} +!22 = !{!"branch_weights", i32 80, i32 9920} +!23 = !{!"function_entry_count", i64 7} diff --git a/llvm/test/CodeGen/X86/machine-function-splitter-fsafdo.ll b/llvm/test/CodeGen/X86/machine-function-splitter-fsafdo.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-function-splitter-fsafdo.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -enable-fs-discriminator -improved-fs-discriminator=true -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS + +; FSAFDO-MFS: .section .text.split.foo4,"ax" +; FSAFDO-MFS: foo4.cold: + +define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 { + br i1 %0, label %3, label %7, !prof !18 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !19 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() + +attributes #0 = { "implicit-section-name"="nosplit" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"SampleProfile"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 5} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999900, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 7000} +!15 = !{!"function_section_prefix", !"hot"} +!16 = !{!"function_section_prefix", !"unlikely"} +!17 = !{!"branch_weights", i32 7000, i32 0} +!18 = !{!"branch_weights", i32 0, i32 7000} +!19 = !{!"branch_weights", i32 1000, i32 6000} +!20 = !{!"function_entry_count", i64 10000} +!21 = !{!"branch_weights", i32 6000, i32 4000} +!22 = !{!"branch_weights", i32 80, i32 9920} +!23 = !{!"function_entry_count", i64 7}