diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.h b/llvm/tools/llvm-exegesis/lib/Analysis.h --- a/llvm/tools/llvm-exegesis/lib/Analysis.h +++ b/llvm/tools/llvm-exegesis/lib/Analysis.h @@ -36,12 +36,10 @@ // A helper class to analyze benchmark results for a target. class Analysis { public: - Analysis(const Target &Target, std::unique_ptr SubtargetInfo, - std::unique_ptr InstrInfo, + Analysis(const LLVMState &State, const InstructionBenchmarkClustering &Clustering, double AnalysisInconsistencyEpsilon, - bool AnalysisDisplayUnstableOpcodes, - const std::string &ForceCpuName = ""); + bool AnalysisDisplayUnstableOpcodes); // Prints a csv of instructions for each cluster. struct PrintClusters {}; @@ -113,10 +111,8 @@ const char *Separator) const; const InstructionBenchmarkClustering &Clustering_; + const LLVMState &State_; std::unique_ptr Context_; - std::unique_ptr SubtargetInfo_; - std::unique_ptr InstrInfo_; - std::unique_ptr RegInfo_; std::unique_ptr AsmInfo_; std::unique_ptr InstPrinter_; std::unique_ptr Disasm_; diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp --- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp +++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp @@ -102,6 +102,7 @@ void Analysis::writeSnippet(raw_ostream &OS, ArrayRef Bytes, const char *Separator) const { SmallVector Lines; + const auto &SI = State_.getSubtargetInfo(); // Parse the asm snippet and print it. while (!Bytes.empty()) { MCInst MI; @@ -114,7 +115,7 @@ } SmallString<128> InstPrinterStr; // FIXME: magic number. raw_svector_ostream OSS(InstPrinterStr); - InstPrinter_->printInst(&MI, 0, "", *SubtargetInfo_, OSS); + InstPrinter_->printInst(&MI, 0, "", SI, OSS); Bytes = Bytes.drop_front(MISize); Lines.emplace_back(InstPrinterStr.str().trim()); } @@ -136,10 +137,10 @@ const MCInst &MCI = Point.keyInstruction(); unsigned SchedClassId; std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( - *SubtargetInfo_, *InstrInfo_, MCI); + State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const MCSchedClassDesc *const SCDesc = - SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId); + State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId); writeEscaped(OS, SCDesc->Name); #else OS << SchedClassId; @@ -151,38 +152,30 @@ OS << "\n"; } -Analysis::Analysis(const Target &Target, - std::unique_ptr SubtargetInfo, - std::unique_ptr InstrInfo, +Analysis::Analysis(const LLVMState &State, const InstructionBenchmarkClustering &Clustering, double AnalysisInconsistencyEpsilon, - bool AnalysisDisplayUnstableOpcodes, - const std::string &ForceCpuName) - : Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)), - InstrInfo_(std::move(InstrInfo)), + bool AnalysisDisplayUnstableOpcodes) + : Clustering_(Clustering), State_(State), AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * AnalysisInconsistencyEpsilon), AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { if (Clustering.getPoints().empty()) return; - const InstructionBenchmark &FirstPoint = Clustering.getPoints().front(); - const std::string CpuName = - ForceCpuName.empty() ? FirstPoint.CpuName : ForceCpuName; - RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple)); MCTargetOptions MCOptions; - AsmInfo_.reset( - Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple, MCOptions)); - SubtargetInfo_.reset( - Target.createMCSubtargetInfo(FirstPoint.LLVMTriple, CpuName, "")); - InstPrinter_.reset(Target.createMCInstPrinter( - Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_, - *InstrInfo_, *RegInfo_)); - - Context_ = - std::make_unique(Triple(FirstPoint.LLVMTriple), AsmInfo_.get(), - RegInfo_.get(), SubtargetInfo_.get()); - Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_)); + const auto &TM = State.getTargetMachine(); + const auto &Triple = TM.getTargetTriple(); + AsmInfo_.reset(TM.getTarget().createMCAsmInfo(State_.getRegInfo(), + Triple.str(), MCOptions)); + InstPrinter_.reset(TM.getTarget().createMCInstPrinter( + Triple, 0 /*default variant*/, *AsmInfo_, State_.getInstrInfo(), + State_.getRegInfo())); + + Context_ = std::make_unique( + Triple, AsmInfo_.get(), &State_.getRegInfo(), &State_.getSubtargetInfo()); + Disasm_.reset(TM.getTarget().createMCDisassembler(State_.getSubtargetInfo(), + *Context_)); assert(Disasm_ && "cannot create MCDisassembler. missing call to " "InitializeXXXTargetDisassembler ?"); } @@ -232,14 +225,14 @@ unsigned SchedClassId; bool WasVariant; std::tie(SchedClassId, WasVariant) = - ResolvedSchedClass::resolveSchedClassId(*SubtargetInfo_, *InstrInfo_, - MCI); + ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(), + State_.getInstrInfo(), MCI); const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); if (IndexIt == SchedClassIdToIndex.end()) { // Create a new entry. SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); - ResolvedSchedClassAndPoints Entry( - ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant)); + ResolvedSchedClassAndPoints Entry(ResolvedSchedClass( + State_.getSubtargetInfo(), SchedClassId, WasVariant)); Entry.PointIds.push_back(PointId); Entries.push_back(std::move(Entry)); } else { @@ -284,11 +277,11 @@ OS << "\">"; switch (Point.Mode) { case InstructionBenchmark::Latency: - writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); break; case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - writeParallelSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); break; default: llvm_unreachable("invalid mode"); @@ -314,7 +307,8 @@ OS << ""; for (const SchedClassCluster &Cluster : Clusters) { OS << "Idealized Resource Pressure"; if (RSC.SCDesc->isValid()) { - const auto &SM = SubtargetInfo_->getSchedModel(); + const auto &SI = State_.getSubtargetInfo(); + const auto &SM = SI.getSchedModel(); OS << "✔"; OS << "" << (RSC.WasVariant ? "✔" : "✕") << ""; OS << "" << RSC.SCDesc->NumMicroOps << ""; // Latencies. OS << "
    "; for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { - const auto *const Entry = - SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I); + const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I); OS << "
  • " << Entry->Cycles; if (RSC.SCDesc->NumWriteLatencyEntries > 1) { // Dismabiguate if more than 1 latency. @@ -403,8 +397,7 @@ // inverse throughput. OS << ""; writeMeasurementValue( - OS, - MCSchedModel::getReciprocalThroughput(*SubtargetInfo_, *RSC.SCDesc)); + OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc)); OS << ""; // WriteProcRes. OS << "
      "; @@ -419,9 +412,8 @@ OS << "
        "; for (const auto &Pressure : RSC.IdealizedProcResPressure) { OS << "
      • "; - writeEscaped(OS, SubtargetInfo_->getSchedModel() - .getProcResource(Pressure.first) - ->Name); + writeEscaped( + OS, SI.getSchedModel().getProcResource(Pressure.first)->Name); OS << ": "; writeMeasurementValue(OS, Pressure.second); OS << "
      • "; @@ -550,6 +542,7 @@ writeEscaped(OS, FirstPoint.CpuName); OS << ""; + const auto &SI = State_.getSubtargetInfo(); for (const auto &RSCAndPoints : makePointsPerSchedClass()) { if (!RSCAndPoints.RSC.SCDesc) continue; @@ -574,10 +567,9 @@ // Print any scheduling class that has at least one cluster that does not // match the checked-in data. - if (all_of(SchedClassClusters, [this, - &RSCAndPoints](const SchedClassCluster &C) { - return C.measurementsMatch(*SubtargetInfo_, RSCAndPoints.RSC, - Clustering_, + if (all_of(SchedClassClusters, [this, &RSCAndPoints, + &SI](const SchedClassCluster &C) { + return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_, AnalysisInconsistencyEpsilonSquared_); })) continue; // Nothing weird. diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -19,10 +19,12 @@ #include "RegisterValue.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/YAMLTraits.h" #include +#include #include #include #include @@ -78,10 +80,22 @@ enum ResultAggregationModeE { Min, Max, Mean, MinVariance }; // Read functions. static Expected readYaml(const LLVMState &State, - StringRef Filename); + MemoryBufferRef Buffer); static Expected> - readYamls(const LLVMState &State, StringRef Filename); + readYamls(const LLVMState &State, MemoryBufferRef Buffer); + + // Given a set of serialized instruction benchmarks, returns the set of + // triples and CPUs that appear in the list of benchmarks. + struct TripleAndCpu { + std::string LLVMTriple; + std::string CpuName; + bool operator<(const TripleAndCpu &O) const { + return std::tie(LLVMTriple, CpuName) < std::tie(O.LLVMTriple, O.CpuName); + } + }; + static Expected> + readTriplesAndCpusFromYamls(MemoryBufferRef Buffer); class Error readYamlFrom(const LLVMState &State, StringRef InputContent); diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -327,47 +327,69 @@ } }; +template <> struct MappingTraits { + static void mapping(IO &Io, + exegesis::InstructionBenchmark::TripleAndCpu &Obj) { + assert(!Io.outputting() && "can only read TripleAndCpu"); + // Read triple. + Io.mapRequired("llvm_triple", Obj.LLVMTriple); + Io.mapRequired("cpu_name", Obj.CpuName); + // Drop everything else. + } +}; + } // namespace yaml namespace exegesis { -Expected -InstructionBenchmark::readYaml(const LLVMState &State, StringRef Filename) { - if (auto ExpectedMemoryBuffer = - errorOrToExpected(MemoryBuffer::getFile(Filename, /*IsText=*/true))) { - yaml::Input Yin(*ExpectedMemoryBuffer.get()); - YamlContext Context(State); - InstructionBenchmark Benchmark; - if (Yin.setCurrentDocument()) - yaml::yamlize(Yin, Benchmark, /*unused*/ true, Context); - if (!Context.getLastError().empty()) - return make_error(Context.getLastError()); - return Benchmark; - } else { - return ExpectedMemoryBuffer.takeError(); +Expected> +InstructionBenchmark::readTriplesAndCpusFromYamls(MemoryBufferRef Buffer) { + // We're only mapping a field, drop other fields and silence the corresponding + // warnings. + yaml::Input Yin( + Buffer, nullptr, +[](const SMDiagnostic &, void *Context) {}); + Yin.setAllowUnknownKeys(true); + std::set Result; + yaml::EmptyContext Context; + while (Yin.setCurrentDocument()) { + TripleAndCpu TC; + yamlize(Yin, TC, /*unused*/ true, Context); + if (Yin.error()) + return errorCodeToError(Yin.error()); + Result.insert(TC); + Yin.nextDocument(); } + return Result; +} + +Expected +InstructionBenchmark::readYaml(const LLVMState &State, MemoryBufferRef Buffer) { + yaml::Input Yin(Buffer); + YamlContext Context(State); + InstructionBenchmark Benchmark; + if (Yin.setCurrentDocument()) + yaml::yamlize(Yin, Benchmark, /*unused*/ true, Context); + if (!Context.getLastError().empty()) + return make_error(Context.getLastError()); + return Benchmark; } Expected> -InstructionBenchmark::readYamls(const LLVMState &State, StringRef Filename) { - if (auto ExpectedMemoryBuffer = - errorOrToExpected(MemoryBuffer::getFile(Filename, /*IsText=*/true))) { - yaml::Input Yin(*ExpectedMemoryBuffer.get()); - YamlContext Context(State); - std::vector Benchmarks; - while (Yin.setCurrentDocument()) { - Benchmarks.emplace_back(); - yamlize(Yin, Benchmarks.back(), /*unused*/ true, Context); - if (Yin.error()) - return errorCodeToError(Yin.error()); - if (!Context.getLastError().empty()) - return make_error(Context.getLastError()); - Yin.nextDocument(); - } - return Benchmarks; - } else { - return ExpectedMemoryBuffer.takeError(); +InstructionBenchmark::readYamls(const LLVMState &State, + MemoryBufferRef Buffer) { + yaml::Input Yin(Buffer); + YamlContext Context(State); + std::vector Benchmarks; + while (Yin.setCurrentDocument()) { + Benchmarks.emplace_back(); + yamlize(Yin, Benchmarks.back(), /*unused*/ true, Context); + if (Yin.error()) + return errorCodeToError(Yin.error()); + if (!Context.getLastError().empty()) + return make_error(Context.getLastError()); + Yin.nextDocument(); } + return Benchmarks; } Error InstructionBenchmark::writeYamlTo(const LLVMState &State, diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -416,40 +416,50 @@ InitializeNativeTargetDisassembler(); InitializeNativeExegesisTarget(); + auto MemoryBuffer = ExitOnFileError( + BenchmarkFile, + errorOrToExpected(MemoryBuffer::getFile(BenchmarkFile, /*IsText=*/true))); + + const auto TriplesAndCpus = ExitOnFileError( + BenchmarkFile, + InstructionBenchmark::readTriplesAndCpusFromYamls(*MemoryBuffer)); + if (TriplesAndCpus.empty()) { + errs() << "no benchmarks to analyze\n"; + return; + } + if (TriplesAndCpus.size() > 1) { + ExitWithError("analysis file contains benchmarks from several CPUs. This " + "is unsupported."); + } + auto TripleAndCpu = *TriplesAndCpus.begin(); + if (!CpuName.empty()) { + llvm::errs() << "overridding file CPU name (" << TripleAndCpu.CpuName + << ") with provided CPU name (" << CpuName << ")\n"; + TripleAndCpu.CpuName = CpuName; + } + llvm::errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '" + << TripleAndCpu.CpuName << "'\n"; + // Read benchmarks. - const LLVMState State = ExitOnErr(LLVMState::Create("", "")); + const LLVMState State = ExitOnErr( + LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName)); const std::vector Points = ExitOnFileError( - BenchmarkFile, InstructionBenchmark::readYamls(State, BenchmarkFile)); + BenchmarkFile, InstructionBenchmark::readYamls(State, *MemoryBuffer)); outs() << "Parsed " << Points.size() << " benchmark points\n"; if (Points.empty()) { errs() << "no benchmarks to analyze\n"; return; } - // FIXME: Check that all points have the same triple/cpu. // FIXME: Merge points from several runs (latency and uops). - std::string Error; - const auto *TheTarget = - TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error); - if (!TheTarget) { - errs() << "unknown target '" << Points[0].LLVMTriple << "'\n"; - return; - } - - std::unique_ptr SubtargetInfo( - TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, CpuName, "")); - - std::unique_ptr InstrInfo(TheTarget->createMCInstrInfo()); - assert(InstrInfo && "Unable to create instruction info!"); - const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create( Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints, - AnalysisClusteringEpsilon, SubtargetInfo.get(), InstrInfo.get())); + AnalysisClusteringEpsilon, &State.getSubtargetInfo(), + &State.getInstrInfo())); - const Analysis Analyzer( - *TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering, - AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes, CpuName); + const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon, + AnalysisDisplayUnstableOpcodes); maybeRunAnalysis(Analyzer, "analysis clusters", AnalysisClustersOutputFile); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp @@ -20,7 +20,9 @@ #include "gtest/gtest.h" using ::testing::AllOf; +using ::testing::ElementsAre; using ::testing::Eq; +using ::testing::Field; using ::testing::get; using ::testing::Pointwise; using ::testing::Property; @@ -89,10 +91,25 @@ errs() << Filename << "-------\n"; ExitOnErr(ToDisk.writeYaml(State, Filename)); + const std::unique_ptr Buffer = + std::move(*MemoryBuffer::getFile(Filename)); + + { + // Read Triples/Cpu only. + const auto TriplesAndCpus = + ExitOnErr(InstructionBenchmark::readTriplesAndCpusFromYamls(*Buffer)); + + ASSERT_THAT(TriplesAndCpus, + testing::ElementsAre( + AllOf(Field(&InstructionBenchmark::TripleAndCpu::LLVMTriple, + Eq("llvm_triple")), + Field(&InstructionBenchmark::TripleAndCpu::CpuName, + Eq("cpu_name"))))); + } { // One-element version. const auto FromDisk = - ExitOnErr(InstructionBenchmark::readYaml(State, Filename)); + ExitOnErr(InstructionBenchmark::readYaml(State, *Buffer)); EXPECT_THAT(FromDisk.Key.Instructions, Pointwise(EqMCInst(), ToDisk.Key.Instructions)); @@ -108,7 +125,7 @@ { // Vector version. const auto FromDiskVector = - ExitOnErr(InstructionBenchmark::readYamls(State, Filename)); + ExitOnErr(InstructionBenchmark::readYamls(State, *Buffer)); ASSERT_EQ(FromDiskVector.size(), size_t{1}); const auto FromDisk = FromDiskVector[0]; EXPECT_THAT(FromDisk.Key.Instructions,