diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -196,14 +196,16 @@ to specify at least one of the `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`. -.. option:: -repetition-mode=[duplicate|loop] +.. option:: -repetition-mode=[duplicate|loop|min] Specify the repetition mode. `duplicate` will create a large, straight line basic block with `num-repetitions` copies of the snippet. `loop` will wrap the snippet in a loop which will be run `num-repetitions` times. The `loop` mode tends to better hide the effects of the CPU frontend on architectures that cache decoded instructions, but consumes a register for counting - iterations. + iterations. If performing an analysis over many opcodes, it may be best + to instead use the `min` mode, which will run each other mode, and produce + the minimal measured result. .. option:: -num-repetitions= diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -68,8 +68,7 @@ // The number of instructions inside the repeated snippet. For example, if a // snippet of 3 instructions is repeated 4 times, this is 12. int NumRepetitions = 0; - enum RepetitionModeE { Duplicate, Loop }; - RepetitionModeE RepetitionMode; + enum RepetitionModeE { Duplicate, Loop, AggregateMin }; // Note that measurements are per instruction. std::vector Measurements; std::string Error; diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -40,7 +40,7 @@ Expected runConfiguration(const BenchmarkCode &Configuration, unsigned NumRepetitions, - const SnippetRepetitor &Repetitor, + ArrayRef> Repetitors, bool DumpObjectToDisk) const; // Scratch space to run instructions that touch memory. diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -14,6 +14,7 @@ #include "Error.h" #include "MCInstrDescView.h" #include "PerfHelper.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -81,7 +82,8 @@ Expected BenchmarkRunner::runConfiguration( const BenchmarkCode &BC, unsigned NumRepetitions, - const SnippetRepetitor &Repetitor, bool DumpObjectToDisk) const { + ArrayRef> Repetitors, + bool DumpObjectToDisk) const { InstructionBenchmark InstrBenchmark; InstrBenchmark.Mode = Mode; InstrBenchmark.CpuName = std::string(State.getTargetMachine().getTargetCPU()); @@ -94,70 +96,105 @@ InstrBenchmark.Key = BC.Key; - // Assemble at least kMinInstructionsForSnippet instructions by repeating the - // snippet for debug/analysis. This is so that the user clearly understands - // that the inside instructions are repeated. - constexpr const int kMinInstructionsForSnippet = 16; - { - SmallString<0> Buffer; - raw_svector_ostream OS(Buffer); - if (Error E = assembleToStream( - State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, - BC.Key.RegisterInitialValues, - Repetitor.Repeat(Instructions, kMinInstructionsForSnippet), OS)) { - return std::move(E); + // If we end up having an error, and we've previously succeeded with + // some other Repetitor, we want to discard the success. + bool Success = false; + auto _ = llvm::make_scope_exit([&InstrBenchmark, &Success]() { + if (Success) + return; + InstrBenchmark.Measurements.clear(); + }); + + for (const std::unique_ptr &Repetitor : Repetitors) { + // Assemble at least kMinInstructionsForSnippet instructions by repeating + // the snippet for debug/analysis. This is so that the user clearly + // understands that the inside instructions are repeated. + constexpr const int kMinInstructionsForSnippet = 16; + { + SmallString<0> Buffer; + raw_svector_ostream OS(Buffer); + if (Error E = assembleToStream( + State.getExegesisTarget(), State.createTargetMachine(), + BC.LiveIns, BC.Key.RegisterInitialValues, + Repetitor->Repeat(Instructions, kMinInstructionsForSnippet), + OS)) { + return std::move(E); + } + const ExecutableFunction EF(State.createTargetMachine(), + getObjectFromBuffer(OS.str())); + const auto FnBytes = EF.getFunctionBytes(); + InstrBenchmark.AssembledSnippet.insert( + InstrBenchmark.AssembledSnippet.end(), FnBytes.begin(), + FnBytes.end()); } - const ExecutableFunction EF(State.createTargetMachine(), - getObjectFromBuffer(OS.str())); - const auto FnBytes = EF.getFunctionBytes(); - InstrBenchmark.AssembledSnippet.assign(FnBytes.begin(), FnBytes.end()); - } - // Assemble NumRepetitions instructions repetitions of the snippet for - // measurements. - const auto Filler = - Repetitor.Repeat(Instructions, InstrBenchmark.NumRepetitions); + // Assemble NumRepetitions instructions repetitions of the snippet for + // measurements. + const auto Filler = + Repetitor->Repeat(Instructions, InstrBenchmark.NumRepetitions); - object::OwningBinary ObjectFile; - if (DumpObjectToDisk) { - auto ObjectFilePath = writeObjectFile(BC, Filler); - if (Error E = ObjectFilePath.takeError()) { + object::OwningBinary ObjectFile; + if (DumpObjectToDisk) { + auto ObjectFilePath = writeObjectFile(BC, Filler); + if (Error E = ObjectFilePath.takeError()) { + InstrBenchmark.Error = toString(std::move(E)); + return InstrBenchmark; + } + outs() << "Check generated assembly with: /usr/bin/objdump -d " + << *ObjectFilePath << "\n"; + ObjectFile = getObjectFromFile(*ObjectFilePath); + } else { + SmallString<0> Buffer; + raw_svector_ostream OS(Buffer); + if (Error E = assembleToStream( + State.getExegesisTarget(), State.createTargetMachine(), + BC.LiveIns, BC.Key.RegisterInitialValues, Filler, OS)) { + return std::move(E); + } + ObjectFile = getObjectFromBuffer(OS.str()); + } + + const FunctionExecutorImpl Executor(State, std::move(ObjectFile), + Scratch.get()); + auto NewMeasurements = runMeasurements(Executor); + if (Error E = NewMeasurements.takeError()) { + if (!E.isA()) + return std::move(E); InstrBenchmark.Error = toString(std::move(E)); return InstrBenchmark; } - outs() << "Check generated assembly with: /usr/bin/objdump -d " - << *ObjectFilePath << "\n"; - ObjectFile = getObjectFromFile(*ObjectFilePath); - } else { - SmallString<0> Buffer; - raw_svector_ostream OS(Buffer); - if (Error E = assembleToStream(State.getExegesisTarget(), - State.createTargetMachine(), BC.LiveIns, - BC.Key.RegisterInitialValues, Filler, OS)) { - return std::move(E); + assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions"); + for (BenchmarkMeasure &BM : *NewMeasurements) { + // Scale the measurements by instruction. + BM.PerInstructionValue /= InstrBenchmark.NumRepetitions; + // Scale the measurements by snippet. + BM.PerSnippetValue *= static_cast(Instructions.size()) / + InstrBenchmark.NumRepetitions; + } + if (InstrBenchmark.Measurements.empty()) { + InstrBenchmark.Measurements = std::move(*NewMeasurements); + continue; + } + + assert(Repetitors.size() > 1 && !InstrBenchmark.Measurements.empty() && + "We're in an 'min' repetition mode, and need to aggregate new " + "result to the existing result."); + assert(InstrBenchmark.Measurements.size() == NewMeasurements->size() && + "Expected to have identical number of measurements."); + for (auto I : zip(InstrBenchmark.Measurements, *NewMeasurements)) { + BenchmarkMeasure &Measurement = std::get<0>(I); + BenchmarkMeasure &NewMeasurement = std::get<1>(I); + assert(Measurement.Key == NewMeasurement.Key && + "Expected measurements to be symmetric"); + + Measurement.PerInstructionValue = std::min( + Measurement.PerInstructionValue, NewMeasurement.PerInstructionValue); + Measurement.PerSnippetValue = + std::min(Measurement.PerSnippetValue, NewMeasurement.PerSnippetValue); } - ObjectFile = getObjectFromBuffer(OS.str()); - } - - const FunctionExecutorImpl Executor(State, std::move(ObjectFile), - Scratch.get()); - auto Measurements = runMeasurements(Executor); - if (Error E = Measurements.takeError()) { - if (!E.isA()) - return std::move(E); - InstrBenchmark.Error = toString(std::move(E)); - return InstrBenchmark; - } - InstrBenchmark.Measurements = std::move(*Measurements); - assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions"); - for (BenchmarkMeasure &BM : InstrBenchmark.Measurements) { - // Scale the measurements by instruction. - BM.PerInstructionValue /= InstrBenchmark.NumRepetitions; - // Scale the measurements by snippet. - BM.PerSnippetValue *= static_cast(Instructions.size()) / - InstrBenchmark.NumRepetitions; } + Success = true; // Disable scoped RAII cleanup for failure return InstrBenchmark; } diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp @@ -110,6 +110,8 @@ return std::make_unique(State); case InstructionBenchmark::Loop: return std::make_unique(State); + case InstructionBenchmark::AggregateMin: + break; } llvm_unreachable("Unknown RepetitionModeE enum"); } diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -86,10 +86,14 @@ static cl::opt RepetitionMode( "repetition-mode", cl::desc("how to repeat the instruction snippet"), cl::cat(BenchmarkOptions), - cl::values(clEnumValN(exegesis::InstructionBenchmark::Duplicate, - "duplicate", "Duplicate the snippet"), - clEnumValN(exegesis::InstructionBenchmark::Loop, "loop", - "Loop over the snippet"))); + cl::values( + clEnumValN(exegesis::InstructionBenchmark::Duplicate, "duplicate", + "Duplicate the snippet"), + clEnumValN(exegesis::InstructionBenchmark::Loop, "loop", + "Loop over the snippet"), + clEnumValN(exegesis::InstructionBenchmark::AggregateMin, "min", + "All of the above and take the minimum of measurements")), + cl::init(exegesis::InstructionBenchmark::Duplicate)); static cl::opt NumRepetitions("num-repetitions", @@ -285,7 +289,22 @@ const auto Opcodes = getOpcodesOrDie(State.getInstrInfo()); - const auto Repetitor = SnippetRepetitor::Create(RepetitionMode, State); + SmallVector, 2> Repetitors; + if (RepetitionMode != InstructionBenchmark::RepetitionModeE::AggregateMin) + Repetitors.emplace_back(SnippetRepetitor::Create(RepetitionMode, State)); + else { + for (InstructionBenchmark::RepetitionModeE RepMode : + {InstructionBenchmark::RepetitionModeE::Duplicate, + InstructionBenchmark::RepetitionModeE::Loop}) + Repetitors.emplace_back(SnippetRepetitor::Create(RepMode, State)); + } + + BitVector AllReservedRegs; + llvm::for_each(Repetitors, + [&AllReservedRegs]( + const std::unique_ptr &Repetitor) { + AllReservedRegs |= Repetitor->getReservedRegs(); + }); std::vector Configurations; if (!Opcodes.empty()) { @@ -298,8 +317,8 @@ << ": ignoring instruction without sched class\n"; continue; } - auto ConfigsForInstr = - generateSnippets(State, Opcode, Repetitor->getReservedRegs()); + + auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs); if (!ConfigsForInstr) { logAllUnhandledErrors( ConfigsForInstr.takeError(), errs(), @@ -324,7 +343,7 @@ for (const BenchmarkCode &Conf : Configurations) { InstructionBenchmark Result = ExitOnErr(Runner->runConfiguration( - Conf, NumRepetitions, *Repetitor, DumpObjectToDisk)); + Conf, NumRepetitions, Repetitors, DumpObjectToDisk)); ExitOnFileError(BenchmarkFile, Result.writeYaml(State, BenchmarkFile)); } exegesis::pfm::pfmTerminate();