diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -43,6 +43,7 @@ #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/WithColor.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" @@ -172,12 +173,6 @@ clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2", "Disable the abort but emit a diagnostic on failure"))); -// An option that disables inserting FS-AFDO discriminators before emit. -// This is mainly for debugging and tuning purpose. -static cl::opt - FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden, - cl::desc("Do not insert FS-AFDO discriminators before " - "emit.")); // Disable MIRProfileLoader before RegAlloc. This is for for debugging and // tuning purpose. static cl::opt DisableRAFSProfileLoader( @@ -1225,14 +1220,6 @@ addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); - if (EnableFSDiscriminator && !FSNoFinalDiscrim) - // Add FS discriminators here so that all the instruction duplicates - // in different BBs get their own discriminators. With this, we can "sum" - // the SampleFDO counters instead of using MAX. This will improve the - // SampleFDO profile quality. - addPass(createMIRAddFSDiscriminatorsPass( - sampleprof::FSDiscriminatorPass::PassLast)); - addPreEmitPass(); if (TM->Options.EnableIPRA) @@ -1258,6 +1245,10 @@ addPass(createMachineOutlinerPass(RunOnAllFunctions)); } + if (EnableFSDiscriminator) + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::PassLast)); + // Machine function splitter uses the basic block sections feature. Both // cannot be enabled at the same time. Basic block sections takes precedence. // FIXME: In principle, BasicBlockSection::Labels and splitting can used @@ -1270,6 +1261,20 @@ addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty()) { + if (EnableFSDiscriminator) { + addPass(createMIRProfileLoaderPass( + ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::PassLast, nullptr)); + } else { + // Sample profile is given, but FSDiscriminator is not + // enabled, this may result in performance regression. + WithColor::warning() + << "Using AutoFDO without FSDiscriminator for MFS may regress " + "performance."; + } + } addPass(createMachineFunctionSplitterPass()); } diff --git a/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@&1 | FileCheck %s --check-prefix=NODISCRIMINATOR +; NODISCRIMINATOR: warning: Using AutoFDO without FSDiscriminator for MFS may regress performance. + +define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind { + br i1 %0, label %3, label %7 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() diff --git a/llvm/test/CodeGen/X86/load-sample-profile.ll b/llvm/test/CodeGen/X86/load-sample-profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/load-sample-profile.ll @@ -0,0 +1,61 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=NOPROFILE +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=PROFILE-NOMFS +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -split-machine-functions -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=PROFILE-MFS + +;; No profile is specified, no load passes. +; NOPROFILE: Add FS discriminators in MIR +; NO-NOPROFILE: SampleFDO loader in MIR +; NOPROFILE: Add FS discriminators in MIR +; NO-NOPROFILE: SampleFDO loader in MIR +; NOPROFILE: Add FS discriminators in MIR +; NO-NOPROFILE: SampleFDO loader in MIR + +;; Profile is specified, so we have first 2 load passes. +; PROFILE-NOMFS: Add FS discriminators in MIR +; PROFILE-NOMFS: SampleFDO loader in MIR +; PROFILE-NOMFS: Add FS discriminators in MIR +; PROFILE-NOMFS: SampleFDO loader in MIR +; PROFILE-NOMFS: Add FS discriminators in MIR +;; But mfs is not specified, so no "SampleFDO loader should be created" +; NO-PROFILE-NOMFS: SampleFDO loader in MIR + +;; Profile is specified with mfs, so we have 3 load passes. +; PROFILE-MFS: Add FS discriminators in MIR +; PROFILE-MFS: SampleFDO loader in MIR +; PROFILE-MFS: Add FS discriminators in MIR +; PROFILE-MFS: SampleFDO loader in MIR +; PROFILE-MFS: Add FS discriminators in MIR +; PROFILE-MFS: SampleFDO loader in MIR +; PROFILE-MFS: Machine Function Splitter Transformation + +define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind { + br i1 %0, label %3, label %7 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux()