Index: tools/llvm-exegesis/lib/Analysis.h =================================================================== --- tools/llvm-exegesis/lib/Analysis.h +++ tools/llvm-exegesis/lib/Analysis.h @@ -16,11 +16,16 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_ANALYSIS_H #include "Clustering.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Error.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -48,7 +53,7 @@ SchedClass(const llvm::MCSchedClassDesc &SD, const llvm::MCSubtargetInfo &STI); - const llvm::MCSchedClassDesc &SCDesc; + const llvm::MCSchedClassDesc *const SCDesc; const llvm::SmallVector NonRedundantWriteProcRes; const std::vector> IdealizedProcResPressure; @@ -97,9 +102,19 @@ std::unordered_map> makePointsPerSchedClass() const; + template + void writeSnippet(llvm::raw_ostream &OS, llvm::ArrayRef Bytes, + const char *Separator) const; + const InstructionBenchmarkClustering &Clustering_; + llvm::MCObjectFileInfo ObjectFileInfo_; + std::unique_ptr Context_; std::unique_ptr SubtargetInfo_; std::unique_ptr InstrInfo_; + std::unique_ptr RegInfo_; + std::unique_ptr AsmInfo_; + std::unique_ptr InstPrinter_; + std::unique_ptr Disasm_; std::unordered_map MnemonicToOpcode_; }; Index: tools/llvm-exegesis/lib/Analysis.cpp =================================================================== --- tools/llvm-exegesis/lib/Analysis.cpp +++ tools/llvm-exegesis/lib/Analysis.cpp @@ -10,6 +10,7 @@ #include "Analysis.h" #include "BenchmarkResult.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/FormatVariadic.h" #include #include @@ -57,7 +58,8 @@ } template <> -void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S) { +void writeEscaped(llvm::raw_ostream &OS, + const llvm::StringRef S) { for (const char C : S) { if (C == '"') OS << "\\\""; @@ -85,17 +87,31 @@ writeEscaped(OS, llvm::formatv("{0:F}", Value).str()); } -template -static void writeSnippet(llvm::raw_ostream &OS, - const std::vector &Instructions, - const llvm::MCInstrInfo &InstrInfo, - const char* Separator) { - // FIXME: Print operands. - llvm::SmallVector Opcodes; - for (const llvm::MCInst &Instr : Instructions) { - Opcodes.push_back(InstrInfo.getName(Instr.getOpcode())); +template +void Analysis::writeSnippet(llvm::raw_ostream &OS, + llvm::ArrayRef Bytes, + const char *Separator) const { + llvm::SmallVector Lines; + // Parse the asm snippet and print it. + while (!Bytes.empty()) { + llvm::MCInst MI; + uint64_t MISize = 0; + if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(), + llvm::nulls())) { + writeEscaped(OS, llvm::join(Lines, Separator)); + writeEscaped(OS, Separator); + writeEscaped(OS, "[error decoding asm snippet]"); + return; + } + Lines.emplace_back(); + std::string &Line = Lines.back(); + llvm::raw_string_ostream OSS(Line); + InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_); + Bytes = Bytes.drop_front(MISize); + OSS.flush(); + Line = llvm::StringRef(Line).trim().str(); } - writeEscaped(OS, llvm::join(Opcodes, Separator)); + writeEscaped(OS, llvm::join(Lines, Separator)); } // Prints a row representing an instruction, along with scheduling info and @@ -105,7 +121,7 @@ const InstructionBenchmark &Point = Clustering_.getPoints()[PointId]; writeClusterId(OS, Clustering_.getClusterIdForPoint(PointId)); OS << kCsvSep; - writeSnippet(OS, Point.Key.Instructions, *InstrInfo_, "; "); + writeSnippet(OS, Point.AssembledSnippet, "; "); OS << kCsvSep; writeEscaped(OS, Point.Key.Config); OS << kCsvSep; @@ -134,10 +150,21 @@ if (Clustering.getPoints().empty()) return; - InstrInfo_.reset(Target.createMCInstrInfo()); const InstructionBenchmark &FirstPoint = Clustering.getPoints().front(); + InstrInfo_.reset(Target.createMCInstrInfo()); + RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple)); + AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple)); SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple, FirstPoint.CpuName, "")); + InstPrinter_.reset(Target.createMCInstPrinter( + llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_, + *InstrInfo_, *RegInfo_)); + + Context_ = llvm::make_unique(AsmInfo_.get(), RegInfo_.get(), + &ObjectFileInfo_); + Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_)); + assert(Disasm_ && "cannot create MCDisassembler. missing call to " + "InitializeXXXTargetDisassembler ?"); } template <> @@ -197,9 +224,10 @@ // Latency tries to find a serial path. Just show the opcode path and show the // whole snippet only on hover. -static void writeLatencySnippetHtml(llvm::raw_ostream &OS, - const std::vector &Instructions, - const llvm::MCInstrInfo &InstrInfo) { +static void +writeLatencySnippetHtml(llvm::raw_ostream &OS, + const std::vector &Instructions, + const llvm::MCInstrInfo &InstrInfo) { bool First = true; for (const llvm::MCInst &Instr : Instructions) { if (First) @@ -238,17 +266,18 @@ for (const size_t PointId : Cluster.getPointIds()) { const auto &Point = Points[PointId]; OS << "
  • (OS, Point.Key.Instructions, *InstrInfo_, "\n"); + writeSnippet(OS, Point.AssembledSnippet, + "\n"); OS << "\">"; switch (Point.Mode) { - case InstructionBenchmark::Latency: - writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); - break; - case InstructionBenchmark::Uops: - writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); - break; - default: - llvm_unreachable("invalid mode"); + case InstructionBenchmark::Latency: + writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + break; + case InstructionBenchmark::Uops: + writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + break; + default: + llvm_unreachable("invalid mode"); } OS << " "; writeEscaped(OS, Point.Key.Config); @@ -345,7 +374,7 @@ Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD, const llvm::MCSubtargetInfo &STI) - : SCDesc(SD), + : SCDesc(&SD), NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)), IdealizedProcResPressure(computeIdealizedProcResPressure( STI.getSchedModel(), NonRedundantWriteProcRes)) {} @@ -382,9 +411,9 @@ } // Find the latency. SchedClassPoint[0].Value = 0.0; - for (unsigned I = 0; I < SC.SCDesc.NumWriteLatencyEntries; ++I) { + for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) { const llvm::MCWriteLatencyEntry *const WLE = - STI.getWriteLatencyEntry(&SC.SCDesc, I); + STI.getWriteLatencyEntry(SC.SCDesc, I); SchedClassPoint[0].Value = std::max(SchedClassPoint[0].Value, WLE->Cycles); } @@ -425,19 +454,19 @@ "th>WriteProcResIdealized " "Resource Pressure"; - if (SC.SCDesc.isValid()) { + if (SC.SCDesc->isValid()) { const auto &SM = SubtargetInfo_->getSchedModel(); OS << "✔"; - OS << "" << (SC.SCDesc.isVariant() ? "✔" : "✕") + OS << "" << (SC.SCDesc->isVariant() ? "✔" : "✕") << ""; - OS << "" << SC.SCDesc.NumMicroOps << ""; + OS << "" << SC.SCDesc->NumMicroOps << ""; // Latencies. OS << "
      "; - for (int I = 0, E = SC.SCDesc.NumWriteLatencyEntries; I < E; ++I) { + for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { const auto *const Entry = - SubtargetInfo_->getWriteLatencyEntry(&SC.SCDesc, I); + SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I); OS << "
    • " << Entry->Cycles; - if (SC.SCDesc.NumWriteLatencyEntries > 1) { + if (SC.SCDesc->NumWriteLatencyEntries > 1) { // Dismabiguate if more than 1 latency. OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; } Index: tools/llvm-exegesis/lib/BenchmarkResult.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkResult.h +++ tools/llvm-exegesis/lib/BenchmarkResult.h @@ -55,6 +55,7 @@ std::vector Measurements; std::string Error; std::string Info; + std::vector AssembledSnippet; // Read functions. static llvm::Expected Index: tools/llvm-exegesis/lib/BenchmarkResult.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkResult.cpp +++ tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -10,6 +10,7 @@ #include "BenchmarkResult.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" @@ -146,6 +147,23 @@ }; template <> struct MappingTraits { + class NormalizedBinary { + public: + NormalizedBinary(IO &io) {} + NormalizedBinary(IO &, std::vector &Data) : Binary(Data) {} + std::vector denormalize(IO &) { + std::vector Data; + std::string Str; + raw_string_ostream OSS(Str); + Binary.writeAsBinary(OSS); + OSS.flush(); + Data.assign(Str.begin(), Str.end()); + return Data; + } + + BinaryRef Binary; + }; + static void mapping(IO &Io, exegesis::InstructionBenchmark &Obj) { Io.mapRequired("mode", Obj.Mode); Io.mapRequired("key", Obj.Key); @@ -155,6 +173,10 @@ Io.mapRequired("measurements", Obj.Measurements); Io.mapRequired("error", Obj.Error); Io.mapOptional("info", Obj.Info); + // AssembledSnippet + MappingNormalization> BinaryString( + Io, Obj.AssembledSnippet); + Io.mapOptional("assembled_snippet", BinaryString->Binary); } }; Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.h +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -91,6 +91,8 @@ llvm::Expected writeObjectFile(llvm::ArrayRef Code) const; + llvm::Expected + createExecutableFunction(llvm::ArrayRef Code) const; }; } // namespace exegesis Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -74,24 +74,41 @@ return InstrBenchmark; } - for (const auto &MCInst : Snippet) - InstrBenchmark.Key.Instructions.push_back(MCInst); - - std::vector Code; - for (int I = 0; I < InstrBenchmark.NumRepetitions; ++I) - Code.push_back(Snippet[I % Snippet.size()]); + InstrBenchmark.Key.Instructions = Snippet; + + // Repeat the snippet until there are at least NumInstructions in the + // resulting code. The snippet is always repeated at least once. + const auto GenerateInstructions = [&Snippet](const int MinInstructions) { + std::vector Code = Snippet; + for (int I = 0; I < MinInstructions; ++I) + Code.push_back(Snippet[I % Snippet.size()]); + return Code; + }; + + // Assemble at least kMinInstructionsForSnippet instructions by repeating the + // snippet for debug/analysis. This is so that the user clearly understands + // that the inside instructions are repeated. + constexpr const int kMinInstructionsForSnippet = 16; + { + auto EF = createExecutableFunction( + GenerateInstructions(kMinInstructionsForSnippet)); + if (llvm::Error E = EF.takeError()) { + InstrBenchmark.Error = llvm::toString(std::move(E)); + return InstrBenchmark; + } + const auto FnBytes = EF->getFunctionBytes(); + InstrBenchmark.AssembledSnippet.assign(FnBytes.begin(), FnBytes.end()); + } - auto ExpectedObjectPath = writeObjectFile(Code); - if (llvm::Error E = ExpectedObjectPath.takeError()) { + // Assemble NumRepetitions instructions repetitions of the snippet for + // measurements. + auto EF = createExecutableFunction( + GenerateInstructions(InstrBenchmark.NumRepetitions)); + if (llvm::Error E = EF.takeError()) { InstrBenchmark.Error = llvm::toString(std::move(E)); return InstrBenchmark; } - - // FIXME: Check if TargetMachine or ExecutionEngine can be reused instead of - // creating one everytime. - const ExecutableFunction EF(State.createTargetMachine(), - getObjectFromFile(*ExpectedObjectPath)); - InstrBenchmark.Measurements = runMeasurements(EF, NumRepetitions); + InstrBenchmark.Measurements = runMeasurements(*EF, NumRepetitions); return InstrBenchmark; } @@ -110,4 +127,17 @@ return ResultPath.str(); } +llvm::Expected BenchmarkRunner::createExecutableFunction( + llvm::ArrayRef Code) const { + auto ExpectedObjectPath = writeObjectFile(Code); + if (llvm::Error E = ExpectedObjectPath.takeError()) { + return std::move(E); + } + + // FIXME: Check if TargetMachine or ExecutionEngine can be reused instead of + // creating one everytime. + return ExecutableFunction(State.createTargetMachine(), + getObjectFromFile(*ExpectedObjectPath)); +} + } // namespace exegesis Index: tools/llvm-exegesis/lib/CMakeLists.txt =================================================================== --- tools/llvm-exegesis/lib/CMakeLists.txt +++ tools/llvm-exegesis/lib/CMakeLists.txt @@ -22,8 +22,10 @@ ExecutionEngine GlobalISel MC + MCDisassembler MCJIT Object + ObjectYAML Support ) Index: tools/llvm-exegesis/lib/LLVMBuild.txt =================================================================== --- tools/llvm-exegesis/lib/LLVMBuild.txt +++ tools/llvm-exegesis/lib/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Exegesis parent = Libraries -required_libraries = CodeGen ExecutionEngine MC MCJIT Object Support +required_libraries = CodeGen ExecutionEngine MC MCDisassembler MCJIT Object ObjectYAML Support Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- tools/llvm-exegesis/llvm-exegesis.cpp +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -180,6 +180,7 @@ llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetDisassembler(); // Read benchmarks. const LLVMState State; const std::vector Points =