diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -578,11 +578,19 @@ /// otherwise this does nothing FunctionPass *createRegAllocScoringPass(); + // Export machine basic block profiles to a file if the necessary command + // line flags are set. + FunctionPass *createMBBProfileDumpPass(); + /// JMC instrument pass. ModulePass *createJMCInstrumenterPass(); /// This pass converts conditional moves to conditional jumps when profitable. FunctionPass *createSelectOptimizePass(); + + // This pass exports basic block profile infromation for downstream cost + // modelling applications. + FunctionPass *createMBBProfileDumpPass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -337,6 +337,7 @@ void initializeRegAllocFastPass(PassRegistry&); void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); +void initializeMBBProfileDumpPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry&); void initializeRegToMemLegacyPass(PassRegistry&); void initializeRegUsageInfoCollectorPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -144,6 +144,7 @@ MachineTraceMetrics.cpp MachineUniformityAnalysis.cpp MachineVerifier.cpp + MBBProfileDump.cpp MIRFSDiscriminator.cpp MIRSampleProfile.cpp MIRYamlMapping.cpp diff --git a/llvm/lib/CodeGen/MBBProfileDump.cpp b/llvm/lib/CodeGen/MBBProfileDump.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MBBProfileDump.cpp @@ -0,0 +1,80 @@ +//===- MBBProfileDump.cpp - MBB Profile Dump Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MBB Profile Dump Pass +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt BasicBlockProfileDump( + "mbb-profile-dump", cl::Hidden, + cl::desc("Basic block profile dump for external cost modelling. If " + "matching up BBs with afterwards, the compilation must be " + "performed with -fbasic-block-sections=labels. Enabling this " + "flag with in-process ThinLTO in the same compiler invocation.")); + +namespace { +class MBBProfileDump : public MachineFunctionPass { +public: + static char ID; + std::unique_ptr FileOutput; + + MBBProfileDump() : MachineFunctionPass(ID) { + initializeMBBProfileDumpPass(*PassRegistry::getPassRegistry()); + if (!BasicBlockProfileDump.empty()) { + std::error_code PossibleFileError; + FileOutput = std::make_unique(BasicBlockProfileDump, + PossibleFileError); + } + } + + ~MBBProfileDump() override { + if (FileOutput) { + FileOutput->close(); + } + } + + StringRef getPassName() const override { return "MBB Profile Dump"; } + + // Analysis usage + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + // Perform the pass + bool runOnMachineFunction(MachineFunction &) override; +}; + +} // anonymous namespace + +char MBBProfileDump::ID = 0; +FunctionPass *llvm::createMBBProfileDumpPass() { return new MBBProfileDump(); } + +INITIALIZE_PASS(MBBProfileDump, "mbbprofiledump", "MBB Profile Dump", false, + false) + +bool MBBProfileDump::runOnMachineFunction(MachineFunction &MF) { + if (!BasicBlockProfileDump.empty()) { + MachineBlockFrequencyInfo &MBFI = + getAnalysis().getBFI(); + for (const auto &MBB : MF) { + *FileOutput.get() << MF.getName() << "," << MBB.getNumber() << "," + << MBFI.getBlockFreqRelativeToEntryBlock(&MBB) << "\n"; + } + } + return false; +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1273,6 +1273,11 @@ // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); + // Export machine basic block profile information to be used in downstream + // cost modelling applications. This needs to be right before the AsmPrinter + // so that nothing following it will mutate MBB numbers or frequencies. + addPass(createMBBProfileDumpPass()); + AddingMachinePasses = false; } diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -75,6 +75,7 @@ ; CHECK-NEXT: Insert CFI remember/restore state instructions ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -226,6 +226,7 @@ ; CHECK-NEXT: Insert CFI remember/restore state instructions ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll --- a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -43,6 +43,10 @@ ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis' +; HOTNESS-NEXT: Executing Pass 'MBB Profile Dump' +; HOTNESS-NEXT: Freeing Pass 'MBB Profile Dump' +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter' ; HOTNESS-NEXT: Building MachineBlockFrequencyInfo on the fly ; HOTNESS-NEXT: Building LoopInfo on the fly @@ -68,6 +72,10 @@ ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis' +; NO_HOTNESS-NEXT: Executing Pass 'MBB Profile Dump' +; NO_HOTNESS-NEXT: Freeing Pass 'MBB Profile Dump' +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter' ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer' diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -143,6 +143,8 @@ ; GCN-O0-NEXT: Register Usage Information Collector Pass ; GCN-O0-NEXT: Live DEBUG_VALUE analysis ; GCN-O0-NEXT: Machine Sanitizer Binary Metadata +; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O0-NEXT: MBB Profile Dump ; GCN-O0-NEXT: Function register usage analysis ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis @@ -411,6 +413,8 @@ ; GCN-O1-NEXT: Register Usage Information Collector Pass ; GCN-O1-NEXT: Live DEBUG_VALUE analysis ; GCN-O1-NEXT: Machine Sanitizer Binary Metadata +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: MBB Profile Dump ; GCN-O1-NEXT: Function register usage analysis ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis @@ -711,6 +715,8 @@ ; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass ; GCN-O1-OPTS-NEXT: Live DEBUG_VALUE analysis ; GCN-O1-OPTS-NEXT: Machine Sanitizer Binary Metadata +; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-OPTS-NEXT: MBB Profile Dump ; GCN-O1-OPTS-NEXT: Function register usage analysis ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis @@ -1014,6 +1020,8 @@ ; GCN-O2-NEXT: Register Usage Information Collector Pass ; GCN-O2-NEXT: Live DEBUG_VALUE analysis ; GCN-O2-NEXT: Machine Sanitizer Binary Metadata +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: MBB Profile Dump ; GCN-O2-NEXT: Function register usage analysis ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis @@ -1327,6 +1335,8 @@ ; GCN-O3-NEXT: Register Usage Information Collector Pass ; GCN-O3-NEXT: Live DEBUG_VALUE analysis ; GCN-O3-NEXT: Machine Sanitizer Binary Metadata +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: MBB Profile Dump ; GCN-O3-NEXT: Function register usage analysis ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -204,6 +204,7 @@ ; CHECK-NEXT: ReachingDefAnalysis ; CHECK-NEXT: ARM Low Overhead Loops pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: ARM Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -66,6 +66,7 @@ ; CHECK-NEXT: Machine Sanitizer Binary Metadata ; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: LoongArch Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -162,6 +162,7 @@ ; CHECK-NEXT: Machine Sanitizer Binary Metadata ; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: LoongArch Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll @@ -0,0 +1,21 @@ +; REQUIRES: have_tflite +; REQUIRES: default_triple +; +; Check that the basic block profile dump outputs data and in the correct +; format. +; +; RUN: llc -o /dev/null -mbb-profile-dump=%t %s +; RUN: FileCheck --input-file %t %s + +define i64 @f2(i64 %a, i64 %b) { + %sum = add i64 %a, %b + ret i64 %sum +} + +define i64 @f1() { + %sum = call i64 @f2(i64 2, i64 2) + ret i64 %sum +} + +; CHECK: f2,0,1.000000e+00 +; CHECK-NEXT: f1,0,1.000000e+00 diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll @@ -66,6 +66,7 @@ ; CHECK-NEXT: PowerPC Expand Atomic ; CHECK-NEXT: PowerPC Branch Selector ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Linux PPC Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -212,6 +212,7 @@ ; CHECK-NEXT: PowerPC Expand Atomic ; CHECK-NEXT: PowerPC Branch Selector ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Linux PPC Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -61,6 +61,7 @@ ; CHECK-NEXT: RISCV pseudo instruction expansion pass ; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: RISCV Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -172,6 +172,7 @@ ; CHECK-NEXT: RISCV pseudo instruction expansion pass ; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: RISCV Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -81,6 +81,7 @@ ; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: X86 Assembly Printer ; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -216,6 +216,7 @@ ; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: MBB Profile Dump ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: X86 Assembly Printer ; CHECK-NEXT: Free MachineFunction