diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// #include "../Target.h" -#include "../Latency.h" #include "AArch64.h" #include "AArch64RegisterInfo.h" diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp --- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp +++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp @@ -244,9 +244,9 @@ return Entries; } -// Uops repeat the same opcode over again. Just show this opcode and show the -// whole snippet only on hover. -static void writeUopsSnippetHtml(raw_ostream &OS, +// Parallel benchmarks repeat the same opcode multiple times. Just show this +// opcode and show the whole snippet only on hover. +static void writeParallelSnippetHtml(raw_ostream &OS, const std::vector &Instructions, const MCInstrInfo &InstrInfo) { if (Instructions.empty()) @@ -282,7 +282,7 @@ break; case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + writeParallelSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); break; default: llvm_unreachable("invalid mode"); diff --git a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt --- a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt @@ -27,18 +27,20 @@ BenchmarkRunner.cpp Clustering.cpp CodeTemplate.cpp - Latency.cpp + LatencyBenchmarkRunner.cpp LlvmState.cpp MCInstrDescView.cpp + ParallelSnippetGenerator.cpp PerfHelper.cpp RegisterAliasing.cpp RegisterValue.cpp SchedClassResolution.cpp + SerialSnippetGenerator.cpp SnippetFile.cpp SnippetGenerator.cpp SnippetRepetitor.cpp Target.cpp - Uops.cpp + UopsBenchmarkRunner.cpp ) llvm_update_compile_flags(LLVMExegesis) diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h rename from llvm/tools/llvm-exegesis/lib/Latency.h rename to llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h --- a/llvm/tools/llvm-exegesis/lib/Latency.h +++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h @@ -1,4 +1,4 @@ -//===-- Latency.h -----------------------------------------------*- C++ -*-===// +//===-- LatencyBenchmarkRunner.h --------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,23 +15,10 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H #include "BenchmarkRunner.h" -#include "Error.h" -#include "MCInstrDescView.h" -#include "SnippetGenerator.h" namespace llvm { namespace exegesis { -class LatencySnippetGenerator : public SnippetGenerator { -public: - using SnippetGenerator::SnippetGenerator; - ~LatencySnippetGenerator() override; - - Expected> - generateCodeTemplates(const Instruction &Instr, - const BitVector &ForbiddenRegisters) const override; -}; - class LatencyBenchmarkRunner : public BenchmarkRunner { public: LatencyBenchmarkRunner(const LLVMState &State, diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp @@ -0,0 +1,58 @@ +//===-- LatencyBenchmarkRunner.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LatencyBenchmarkRunner.h" + +#include "Target.h" +#include "BenchmarkRunner.h" + +namespace llvm { +namespace exegesis { + +static constexpr size_t kMaxAliasingInstructions = 10; + +LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State, + InstructionBenchmark::ModeE Mode) + : BenchmarkRunner(State, Mode) { + assert((Mode == InstructionBenchmark::Latency || + Mode == InstructionBenchmark::InverseThroughput) && + "invalid mode"); +} + +LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; + +Expected> LatencyBenchmarkRunner::runMeasurements( + const FunctionExecutor &Executor) const { + // Cycle measurements include some overhead from the kernel. Repeat the + // measure several times and take the minimum value. + constexpr const int NumMeasurements = 30; + int64_t MinValue = std::numeric_limits::max(); + const char *CounterName = State.getPfmCounters().CycleCounter; + for (size_t I = 0; I < NumMeasurements; ++I) { + auto ExpectedCounterValue = Executor.runAndMeasure(CounterName); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + if (*ExpectedCounterValue < MinValue) + MinValue = *ExpectedCounterValue; + } + std::vector Result; + switch (Mode) { + case InstructionBenchmark::Latency: + Result = {BenchmarkMeasure::Create("latency", MinValue)}; + break; + case InstructionBenchmark::InverseThroughput: + Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)}; + break; + default: + break; + } + return std::move(Result); +} + +} // namespace exegesis +} // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/Mips/Target.cpp b/llvm/tools/llvm-exegesis/lib/Mips/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/Mips/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Mips/Target.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "../Error.h" #include "../Target.h" -#include "../Latency.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" #include "MipsRegisterInfo.h" diff --git a/llvm/tools/llvm-exegesis/lib/Uops.h b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h rename from llvm/tools/llvm-exegesis/lib/Uops.h rename to llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/Uops.h +++ b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h @@ -1,4 +1,4 @@ -//===-- Uops.h --------------------------------------------------*- C++ -*-===// +//===-- ParallelSnippetGenerator.h ------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,23 +7,22 @@ //===----------------------------------------------------------------------===// /// /// \file -/// A BenchmarkRunner implementation to measure uop decomposition. +/// A SnippetGenerator implementation to create parallel instruction snippets. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H -#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H -#include "BenchmarkRunner.h" #include "SnippetGenerator.h" namespace llvm { namespace exegesis { -class UopsSnippetGenerator : public SnippetGenerator { +class ParallelSnippetGenerator : public SnippetGenerator { public: using SnippetGenerator::SnippetGenerator; - ~UopsSnippetGenerator() override; + ~ParallelSnippetGenerator() override; Expected> generateCodeTemplates(const Instruction &Instr, @@ -60,20 +59,7 @@ std::vector &SnippetTemplate) const; }; -class UopsBenchmarkRunner : public BenchmarkRunner { -public: - UopsBenchmarkRunner(const LLVMState &State) - : BenchmarkRunner(State, InstructionBenchmark::Uops) {} - ~UopsBenchmarkRunner() override; - - static constexpr const size_t kMinNumDifferentAddresses = 6; - -private: - Expected> - runMeasurements(const FunctionExecutor &Executor) const override; -}; - } // namespace exegesis } // namespace llvm -#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H +#endif // LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H diff --git a/llvm/tools/llvm-exegesis/lib/Uops.cpp b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp rename from llvm/tools/llvm-exegesis/lib/Uops.cpp rename to llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/Uops.cpp +++ b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp @@ -1,4 +1,4 @@ -//===-- Uops.cpp ------------------------------------------------*- C++ -*-===// +//===-- ParallelSnippetGenerator.cpp ----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "Uops.h" +#include "ParallelSnippetGenerator.h" -#include "Assembler.h" #include "BenchmarkRunner.h" #include "MCInstrDescView.h" #include "Target.h" @@ -16,9 +15,9 @@ // FIXME: Load constants into registers (e.g. with fld1) to not break // instructions like x87. -// Ideally we would like the only limitation on executing uops to be the issue -// ports. Maximizing port pressure increases the likelihood that the load is -// distributed evenly across possible ports. +// Ideally we would like the only limitation on executing instructions to be the +// availability of the CPU resources (e.g. execution ports) needed to execute +// them, instead of the availability of their data dependencies. // To achieve that, one approach is to generate instructions that do not have // data dependencies between them. @@ -89,11 +88,9 @@ return Result; } -UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; +ParallelSnippetGenerator::~ParallelSnippetGenerator() = default; -UopsSnippetGenerator::~UopsSnippetGenerator() = default; - -void UopsSnippetGenerator::instantiateMemoryOperands( +void ParallelSnippetGenerator::instantiateMemoryOperands( const unsigned ScratchSpacePointerInReg, std::vector &Instructions) const { if (ScratchSpacePointerInReg == 0) @@ -157,7 +154,7 @@ } } -Expected> UopsSnippetGenerator::generateCodeTemplates( +Expected> ParallelSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { CodeTemplate CT; CT.ScratchSpacePointerInReg = @@ -219,34 +216,7 @@ return getSingleton(std::move(CT)); } -Expected> -UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const { - std::vector Result; - const PfmCountersInfo &PCI = State.getPfmCounters(); - // Uops per port. - for (const auto *IssueCounter = PCI.IssueCounters, - *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; - IssueCounter != IssueCounterEnd; ++IssueCounter) { - if (!IssueCounter->Counter) - continue; - auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName, - *ExpectedCounterValue)); - } - // NumMicroOps. - if (const char *const UopsCounter = PCI.UopsCounter) { - auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - Result.push_back( - BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue)); - } - return std::move(Result); -} - -constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses; +constexpr const size_t ParallelSnippetGenerator::kMinNumDifferentAddresses; } // namespace exegesis } // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/PowerPC/Target.cpp b/llvm/tools/llvm-exegesis/lib/PowerPC/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/PowerPC/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/PowerPC/Target.cpp @@ -7,7 +7,6 @@ // The PowerPC ExegesisTarget. //===----------------------------------------------------------------------===// #include "../Target.h" -#include "../Latency.h" #include "PPC.h" #include "PPCRegisterInfo.h" diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h rename from llvm/tools/llvm-exegesis/lib/Latency.h rename to llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/Latency.h +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h @@ -1,4 +1,4 @@ -//===-- Latency.h -----------------------------------------------*- C++ -*-===// +//===-- SerialSnippetGenerator.h --------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,14 +7,13 @@ //===----------------------------------------------------------------------===// /// /// \file -/// A BenchmarkRunner implementation to measure instruction latencies. +/// A SnippetGenerator implementation to create serial instruction snippets. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H -#define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H -#include "BenchmarkRunner.h" #include "Error.h" #include "MCInstrDescView.h" #include "SnippetGenerator.h" @@ -22,27 +21,17 @@ namespace llvm { namespace exegesis { -class LatencySnippetGenerator : public SnippetGenerator { +class SerialSnippetGenerator : public SnippetGenerator { public: using SnippetGenerator::SnippetGenerator; - ~LatencySnippetGenerator() override; + ~SerialSnippetGenerator() override; Expected> generateCodeTemplates(const Instruction &Instr, const BitVector &ForbiddenRegisters) const override; }; -class LatencyBenchmarkRunner : public BenchmarkRunner { -public: - LatencyBenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode); - ~LatencyBenchmarkRunner() override; - -private: - Expected> - runMeasurements(const FunctionExecutor &Executor) const override; -}; } // namespace exegesis } // namespace llvm -#endif // LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H diff --git a/llvm/tools/llvm-exegesis/lib/Latency.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp rename from llvm/tools/llvm-exegesis/lib/Latency.cpp rename to llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/Latency.cpp +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -1,4 +1,4 @@ -//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// +//===-- SerialSnippetGenerator.cpp ------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,17 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "Latency.h" +#include "SerialSnippetGenerator.h" -#include "Assembler.h" -#include "BenchmarkRunner.h" #include "MCInstrDescView.h" -#include "PerfHelper.h" -#include "Target.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstBuilder.h" -#include "llvm/Support/FormatVariadic.h" +#include "CodeTemplate.h" +#include +#include +#include namespace llvm { namespace exegesis { @@ -149,10 +145,10 @@ } } -LatencySnippetGenerator::~LatencySnippetGenerator() = default; +SerialSnippetGenerator::~SerialSnippetGenerator() = default; Expected> -LatencySnippetGenerator::generateCodeTemplates( +SerialSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { std::vector Results; const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters); @@ -169,43 +165,5 @@ return std::move(Results); } -LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode) - : BenchmarkRunner(State, Mode) { - assert((Mode == InstructionBenchmark::Latency || - Mode == InstructionBenchmark::InverseThroughput) && - "invalid mode"); -} - -LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; - -Expected> LatencyBenchmarkRunner::runMeasurements( - const FunctionExecutor &Executor) const { - // Cycle measurements include some overhead from the kernel. Repeat the - // measure several times and take the minimum value. - constexpr const int NumMeasurements = 30; - int64_t MinValue = std::numeric_limits::max(); - const char *CounterName = State.getPfmCounters().CycleCounter; - for (size_t I = 0; I < NumMeasurements; ++I) { - auto ExpectedCounterValue = Executor.runAndMeasure(CounterName); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - if (*ExpectedCounterValue < MinValue) - MinValue = *ExpectedCounterValue; - } - std::vector Result; - switch (Mode) { - case InstructionBenchmark::Latency: - Result = {BenchmarkMeasure::Create("latency", MinValue)}; - break; - case InstructionBenchmark::InverseThroughput: - Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)}; - break; - default: - break; - } - return std::move(Result); -} - } // namespace exegesis } // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -144,9 +144,9 @@ // Targets can implement their own snippet generators/benchmarks runners by // implementing these. - std::unique_ptr virtual createLatencySnippetGenerator( + std::unique_ptr virtual createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const; - std::unique_ptr virtual createUopsSnippetGenerator( + std::unique_ptr virtual createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createLatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode) const; diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "Target.h" -#include "Latency.h" -#include "Uops.h" +#include "LatencyBenchmarkRunner.h" +#include "ParallelSnippetGenerator.h" +#include "SerialSnippetGenerator.h" +#include "UopsBenchmarkRunner.h" namespace llvm { namespace exegesis { @@ -43,10 +45,10 @@ case InstructionBenchmark::Unknown: return nullptr; case InstructionBenchmark::Latency: - return createLatencySnippetGenerator(State, Opts); + return createSerialSnippetGenerator(State, Opts); case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - return createUopsSnippetGenerator(State, Opts); + return createParallelSnippetGenerator(State, Opts); } return nullptr; } @@ -77,14 +79,14 @@ return nullptr; } -std::unique_ptr ExegesisTarget::createLatencySnippetGenerator( +std::unique_ptr ExegesisTarget::createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } -std::unique_ptr ExegesisTarget::createUopsSnippetGenerator( +std::unique_ptr ExegesisTarget::createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner( diff --git a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h @@ -0,0 +1,38 @@ +//===-- UopsBenchmarkRunner.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure uop decomposition. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H + +#include "BenchmarkRunner.h" + +namespace llvm { +namespace exegesis { + +class UopsBenchmarkRunner : public BenchmarkRunner { +public: + UopsBenchmarkRunner(const LLVMState &State) + : BenchmarkRunner(State, InstructionBenchmark::Uops) {} + ~UopsBenchmarkRunner() override; + + static constexpr const size_t kMinNumDifferentAddresses = 6; + +private: + Expected> + runMeasurements(const FunctionExecutor &Executor) const override; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H diff --git a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp @@ -0,0 +1,46 @@ +//===-- UopsBenchmarkRunner.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UopsBenchmarkRunner.h" + +#include "Target.h" + +namespace llvm { +namespace exegesis { + +UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; + +Expected> +UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const { + std::vector Result; + const PfmCountersInfo &PCI = State.getPfmCounters(); + // Uops per port. + for (const auto *IssueCounter = PCI.IssueCounters, + *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; + IssueCounter != IssueCounterEnd; ++IssueCounter) { + if (!IssueCounter->Counter) + continue; + auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName, + *ExpectedCounterValue)); + } + // NumMicroOps. + if (const char *const UopsCounter = PCI.UopsCounter) { + auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + Result.push_back( + BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue)); + } + return std::move(Result); +} + +} // namespace exegesis +} // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -8,9 +8,9 @@ #include "../Target.h" #include "../Error.h" -#include "../Latency.h" +#include "../SerialSnippetGenerator.h" #include "../SnippetGenerator.h" -#include "../Uops.h" +#include "../ParallelSnippetGenerator.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" @@ -242,9 +242,9 @@ } namespace { -class X86LatencySnippetGenerator : public LatencySnippetGenerator { +class X86SerialSnippetGenerator : public SerialSnippetGenerator { public: - using LatencySnippetGenerator::LatencySnippetGenerator; + using SerialSnippetGenerator::SerialSnippetGenerator; Expected> generateCodeTemplates(const Instruction &Instr, @@ -253,7 +253,7 @@ } // namespace Expected> -X86LatencySnippetGenerator::generateCodeTemplates( +X86SerialSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); @@ -271,7 +271,7 @@ switch (getX86FPFlags(Instr)) { case X86II::NotFP: - return LatencySnippetGenerator::generateCodeTemplates(Instr, + return SerialSnippetGenerator::generateCodeTemplates(Instr, ForbiddenRegisters); case X86II::ZeroArgFP: case X86II::OneArgFP: @@ -292,9 +292,9 @@ } namespace { -class X86UopsSnippetGenerator : public UopsSnippetGenerator { +class X86ParallelSnippetGenerator : public ParallelSnippetGenerator { public: - using UopsSnippetGenerator::UopsSnippetGenerator; + using ParallelSnippetGenerator::ParallelSnippetGenerator; Expected> generateCodeTemplates(const Instruction &Instr, @@ -304,7 +304,7 @@ } // namespace Expected> -X86UopsSnippetGenerator::generateCodeTemplates( +X86ParallelSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); @@ -333,7 +333,7 @@ switch (getX86FPFlags(Instr)) { case X86II::NotFP: - return UopsSnippetGenerator::generateCodeTemplates(Instr, + return ParallelSnippetGenerator::generateCodeTemplates(Instr, ForbiddenRegisters); case X86II::ZeroArgFP: case X86II::OneArgFP: @@ -577,16 +577,16 @@ sizeof(kUnavailableRegisters[0])); } - std::unique_ptr createLatencySnippetGenerator( + std::unique_ptr createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const override { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } - std::unique_ptr createUopsSnippetGenerator( + std::unique_ptr createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const override { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } bool matchesArch(Triple::ArchType Arch) const override { diff --git a/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "../Common/AssemblerUtils.h" -#include "Latency.h" #include "LlvmState.h" #include "MCInstrDescView.h" #include "MipsInstrInfo.h" +#include "ParallelSnippetGenerator.h" #include "RegisterAliasing.h" +#include "SerialSnippetGenerator.h" #include "TestBase.h" -#include "Uops.h" #include @@ -48,12 +48,12 @@ SnippetGeneratorT Generator; }; -using LatencySnippetGeneratorTest = - SnippetGeneratorTest; +using SerialSnippetGeneratorTest = SnippetGeneratorTest; -using UopsSnippetGeneratorTest = SnippetGeneratorTest; +using ParallelSnippetGeneratorTest = + SnippetGeneratorTest; -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { // - ADD // - Op0 Explicit Def RegClass(GPR32) // - Op1 Explicit Use RegClass(GPR32) @@ -77,8 +77,8 @@ << "Op0 is either set to Op1 or to Op2"; } -TEST_F(LatencySnippetGeneratorTest, - ImplicitSelfDependencyThroughExplicitRegsForbidAll) { +TEST_F(SerialSnippetGeneratorTest, + ImplicitSelfDependencyThroughExplicitRegsForbidAll) { // - XOR // - Op0 Explicit Def RegClass(GPR32) // - Op1 Explicit Use RegClass(GPR32) @@ -96,7 +96,7 @@ consumeError(std::move(Error)); } -TEST_F(UopsSnippetGeneratorTest, MemoryUse) { +TEST_F(ParallelSnippetGeneratorTest, MemoryUse) { // LB reads from memory. // - LB // - Op0 Explicit Def RegClass(GPR32) @@ -110,10 +110,11 @@ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; - EXPECT_THAT(CT.Info, HasSubstr("instruction is parallel, repeating a random one.")); + EXPECT_THAT(CT.Info, + HasSubstr("instruction is parallel, repeating a random one.")); EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, - SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); + SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); ASSERT_THAT(IT.getVariableValues(), SizeIs(3)); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "../Common/AssemblerUtils.h" -#include "Latency.h" #include "LlvmState.h" #include "MCInstrDescView.h" +#include "ParallelSnippetGenerator.h" #include "RegisterAliasing.h" +#include "SerialSnippetGenerator.h" #include "TestBase.h" -#include "Uops.h" #include "X86InstrInfo.h" #include @@ -59,12 +59,12 @@ SnippetGeneratorT Generator; }; -using LatencySnippetGeneratorTest = - SnippetGeneratorTest; +using SerialSnippetGeneratorTest = SnippetGeneratorTest; -using UopsSnippetGeneratorTest = SnippetGeneratorTest; +using ParallelSnippetGeneratorTest = + SnippetGeneratorTest; -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { // - ADC16i16 // - Op0 Explicit Use Immediate // - Op1 Implicit Def Reg(AX) @@ -90,7 +90,7 @@ EXPECT_THAT(IT.getVariableValues()[0], IsInvalid()) << "Immediate is not set"; } -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { // - ADD16ri // - Op0 Explicit Def RegClass(GR16) // - Op1 Explicit Use RegClass(GR16) TiedToOp0 @@ -114,7 +114,7 @@ EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()) << "Operand 2 is not set"; } -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { // - VXORPSrr // - Op0 Explicit Def RegClass(VR128) // - Op1 Explicit Use RegClass(VR128) @@ -138,7 +138,7 @@ << "Op0 is either set to Op1 or to Op2"; } -TEST_F(LatencySnippetGeneratorTest, +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegsForbidAll) { // - VXORPSrr // - Op0 Explicit Def RegClass(VR128) @@ -158,7 +158,7 @@ consumeError(std::move(Error)); } -TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { +TEST_F(SerialSnippetGeneratorTest, DependencyThroughOtherOpcode) { // - CMP64rr // - Op0 Explicit Use RegClass(GR64) // - Op1 Explicit Use RegClass(GR64) @@ -182,7 +182,7 @@ } } -TEST_F(LatencySnippetGeneratorTest, LAHF) { +TEST_F(SerialSnippetGeneratorTest, LAHF) { // - LAHF // - Op0 Implicit Def Reg(AH) // - Op1 Implicit Use Reg(EFLAGS) @@ -198,7 +198,7 @@ } } -TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { +TEST_F(ParallelSnippetGeneratorTest, ParallelInstruction) { // - BNDCL32rr // - Op0 Explicit Use RegClass(BNDR) // - Op1 Explicit Use RegClass(GR32) @@ -218,7 +218,7 @@ EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()); } -TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { +TEST_F(ParallelSnippetGeneratorTest, SerialInstruction) { // - CDQ // - Op0 Implicit Def Reg(EAX) // - Op1 Implicit Def Reg(EDX) @@ -237,7 +237,7 @@ ASSERT_THAT(IT.getVariableValues(), SizeIs(0)); } -TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { +TEST_F(ParallelSnippetGeneratorTest, StaticRenaming) { // CMOV32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. @@ -268,7 +268,7 @@ << "Each instruction writes to a different register"; } -TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) { +TEST_F(ParallelSnippetGeneratorTest, NoTiedVariables) { // CMOV_GR32 has no tied variables, we make sure def and use are different // from each other. @@ -302,7 +302,7 @@ EXPECT_THAT(IT.getVariableValues()[3], IsInvalid()); } -TEST_F(UopsSnippetGeneratorTest, MemoryUse) { +TEST_F(ParallelSnippetGeneratorTest, MemoryUse) { // Mov32rm reads from memory. // - MOV32rm // - Op0 Explicit Def RegClass(GR32) @@ -326,7 +326,7 @@ EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, - SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); + SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); ASSERT_THAT(IT.getVariableValues(), SizeIs(6)); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp @@ -7,12 +7,10 @@ //===----------------------------------------------------------------------===// #include "../Common/AssemblerUtils.h" -#include "Latency.h" #include "LlvmState.h" #include "MCInstrDescView.h" #include "RegisterAliasing.h" #include "TestBase.h" -#include "Uops.h" #include "X86InstrInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -34,8 +32,7 @@ void SetUp() { TM = State.createTargetMachine(); Context = std::make_unique(); - Mod = - std::make_unique("X86SnippetRepetitorTest", *Context); + Mod = std::make_unique("X86SnippetRepetitorTest", *Context); Mod->setDataLayout(TM->createDataLayout()); MMI = std::make_unique(TM.get()); MF = &createVoidVoidPtrMachineFunction("TestFn", Mod.get(), MMI.get()); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TestBase.h b/llvm/unittests/tools/llvm-exegesis/X86/TestBase.h --- a/llvm/unittests/tools/llvm-exegesis/X86/TestBase.h +++ b/llvm/unittests/tools/llvm-exegesis/X86/TestBase.h @@ -1,4 +1,4 @@ -//===-- TestBase.cpp --------------------------------------------*- C++ -*-===// +//===-- TestBase.h ----------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information.