diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test b/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test --- a/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test @@ -4,8 +4,7 @@ # have different configs, so they should not be placed in the same cluster by # stabilization. -# CHECK-UNSTABLE: SQRTSSr -# CHECK-UNSTABLE: SQRTSSr +# CHECK-UNSTABLE-NOT: SQRTSSr --- mode: latency diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h b/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h @@ -9,7 +9,7 @@ #ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H #define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H -#include "RegisterValue.h" +#include "BenchmarkResult.h" #include "llvm/MC/MCInst.h" #include #include @@ -19,12 +19,7 @@ // A collection of instructions that are to be assembled, executed and measured. struct BenchmarkCode { - // The sequence of instructions that are to be repeated. - std::vector Instructions; - - // Before the code is executed some instructions are added to setup the - // registers initial values. - std::vector RegisterInitialValues; + InstructionBenchmarkKey Key; // We also need to provide the registers that are live on entry for the // assembler to generate proper prologue/epilogue. diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -15,8 +15,8 @@ #ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H #define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H -#include "BenchmarkCode.h" #include "LlvmState.h" +#include "RegisterValue.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInst.h" diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -31,7 +31,6 @@ BenchmarkRunner::~BenchmarkRunner() = default; - namespace { class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { public: @@ -92,10 +91,9 @@ InstrBenchmark.NumRepetitions = NumRepetitions; InstrBenchmark.Info = BC.Info; - const std::vector &Instructions = BC.Instructions; + const std::vector &Instructions = BC.Key.Instructions; - InstrBenchmark.Key.Instructions = Instructions; - InstrBenchmark.Key.RegisterInitialValues = BC.RegisterInitialValues; + InstrBenchmark.Key = BC.Key; // Assemble at least kMinInstructionsForSnippet instructions by repeating the // snippet for debug/analysis. This is so that the user clearly understands @@ -104,10 +102,10 @@ { llvm::SmallString<0> Buffer; llvm::raw_svector_ostream OS(Buffer); - assembleToStream( - State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, - BC.RegisterInitialValues, - Repetitor.Repeat(BC.Instructions, kMinInstructionsForSnippet), OS); + assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), + BC.LiveIns, BC.Key.RegisterInitialValues, + Repetitor.Repeat(Instructions, kMinInstructionsForSnippet), + OS); const ExecutableFunction EF(State.createTargetMachine(), getObjectFromBuffer(OS.str())); const auto FnBytes = EF.getFunctionBytes(); @@ -117,7 +115,7 @@ // Assemble NumRepetitions instructions repetitions of the snippet for // measurements. const auto Filler = - Repetitor.Repeat(BC.Instructions, InstrBenchmark.NumRepetitions); + Repetitor.Repeat(Instructions, InstrBenchmark.NumRepetitions); llvm::object::OwningBinary ObjectFile; if (DumpObjectToDisk) { @@ -133,7 +131,7 @@ llvm::SmallString<0> Buffer; llvm::raw_svector_ostream OS(Buffer); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, Filler, OS); + BC.LiveIns, BC.Key.RegisterInitialValues, Filler, OS); ObjectFile = getObjectFromBuffer(OS.str()); } @@ -150,7 +148,7 @@ // Scale the measurements by instruction. BM.PerInstructionValue /= InstrBenchmark.NumRepetitions; // Scale the measurements by snippet. - BM.PerSnippetValue *= static_cast(BC.Instructions.size()) / + BM.PerSnippetValue *= static_cast(Instructions.size()) / InstrBenchmark.NumRepetitions; } @@ -167,7 +165,7 @@ return std::move(E); llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, FillFunction, OFS); + BC.LiveIns, BC.Key.RegisterInitialValues, FillFunction, OFS); return ResultPath.str(); } diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp --- a/llvm/tools/llvm-exegesis/lib/Clustering.cpp +++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp @@ -237,39 +237,40 @@ // We shall find every opcode with benchmarks not in just one cluster, and move // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode. void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) { - // Given an instruction Opcode, in which clusters do benchmarks of this - // instruction lie? Normally, they all should be in the same cluster. - std::vector> OpcodeToClusterIDs; - OpcodeToClusterIDs.resize(NumOpcodes); - // The list of opcodes that have more than one cluster. - llvm::SetVector UnstableOpcodes; - // Populate OpcodeToClusterIDs and UnstableOpcodes data structures. + // Given an instruction Opcode and Config, in which clusters do benchmarks of + // this instruction lie? Normally, they all should be in the same cluster. + struct OpcodeAndConfig { + explicit OpcodeAndConfig(const InstructionBenchmark &IB) + : Opcode(IB.keyInstruction().getOpcode()), Config(&IB.Key.Config) {} + unsigned Opcode; + const std::string *Config; + + auto Tie() const -> auto { return std::tie(Opcode, *Config); } + + bool operator<(const OpcodeAndConfig &O) const { return Tie() < O.Tie(); } + bool operator!=(const OpcodeAndConfig &O) const { return Tie() != O.Tie(); } + }; + std::map> + OpcodeConfigToClusterIDs; + // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures. assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch"); for (const auto &Point : zip(Points_, ClusterIdForPoint_)) { const ClusterId &ClusterIdOfPoint = std::get<1>(Point); if (!ClusterIdOfPoint.isValid()) continue; // Only process fully valid clusters. - const unsigned Opcode = std::get<0>(Point).keyInstruction().getOpcode(); - assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)"); + const OpcodeAndConfig Key(std::get<0>(Point)); llvm::SmallSet &ClusterIDsOfOpcode = - OpcodeToClusterIDs[Opcode]; + OpcodeConfigToClusterIDs[Key]; ClusterIDsOfOpcode.insert(ClusterIdOfPoint); - // Is there more than one ClusterID for this opcode?. - if (ClusterIDsOfOpcode.size() < 2) - continue; // If not, then at this moment this Opcode is stable. - // Else let's record this unstable opcode for future use. - UnstableOpcodes.insert(Opcode); } - assert(OpcodeToClusterIDs.size() == NumOpcodes && "sanity check"); - // We know with how many [new] clusters we will end up with. - const auto NewTotalClusterCount = Clusters_.size() + UnstableOpcodes.size(); - Clusters_.reserve(NewTotalClusterCount); - for (const size_t UnstableOpcode : UnstableOpcodes.getArrayRef()) { + for (const auto &OpcodeConfigToClusterID : OpcodeConfigToClusterIDs) { const llvm::SmallSet &ClusterIDs = - OpcodeToClusterIDs[UnstableOpcode]; - assert(ClusterIDs.size() > 1 && - "Should only have Opcodes with more than one cluster."); + OpcodeConfigToClusterID.second; + const OpcodeAndConfig &Key = OpcodeConfigToClusterID.first; + // We only care about unstable instructions. + if (ClusterIDs.size() < 2) + continue; // Create a new unstable cluster, one per Opcode. Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size())); @@ -290,8 +291,8 @@ // and the rest of the points is for the UnstableOpcode. const auto it = std::stable_partition( OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(), - [this, UnstableOpcode](size_t P) { - return Points_[P].keyInstruction().getOpcode() != UnstableOpcode; + [this, &Key](size_t P) { + return OpcodeAndConfig(Points_[P]) != Key; }); assert(std::distance(it, OldCluster.PointIndices.end()) > 0 && "Should have found at least one bad point"); @@ -314,7 +315,6 @@ "New unstable cluster should end up with no less points than there " "was clusters"); } - assert(Clusters_.size() == NewTotalClusterCount && "sanity check"); } llvm::Expected diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h @@ -115,6 +115,8 @@ CodeTemplate &operator=(const CodeTemplate &) = delete; ExecutionMode Execution = ExecutionMode::UNKNOWN; + // See InstructionBenchmarkKey.::Config. + std::string Config; // Some information about how this template has been created. std::string Info; // The list of the instructions for this template. diff --git a/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp b/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp @@ -36,7 +36,7 @@ // instructions. void EmitInstruction(const MCInst &Instruction, const MCSubtargetInfo &STI) override { - Result->Instructions.push_back(Instruction); + Result->Key.Instructions.push_back(Instruction); } // Implementation of the AsmCommentConsumer. @@ -65,7 +65,7 @@ const StringRef HexValue = Parts[1].trim(); RegVal.Value = APInt( /* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16); - Result->RegisterInitialValues.push_back(std::move(RegVal)); + Result->Key.RegisterInitialValues.push_back(std::move(RegVal)); return; } if (CommentText.consume_front("LIVEIN")) { diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -73,12 +73,13 @@ BC.Info = CT.Info; for (InstructionTemplate &IT : CT.Instructions) { randomizeUnsetVariables(State.getExegesisTarget(), ForbiddenRegs, IT); - BC.Instructions.push_back(IT.build()); + BC.Key.Instructions.push_back(IT.build()); } if (CT.ScratchSpacePointerInReg) BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); - BC.RegisterInitialValues = + BC.Key.RegisterInitialValues = computeRegisterInitialValues(CT.Instructions); + BC.Key.Config = CT.Config; Output.push_back(std::move(BC)); } } diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp @@ -78,8 +78,8 @@ EXPECT_FALSE((bool)Snippets.takeError()); ASSERT_THAT(*Snippets, SizeIs(1)); const auto &Snippet = (*Snippets)[0]; - ASSERT_THAT(Snippet.Instructions, ElementsAre(HasOpcode(X86::INC64r))); - ASSERT_THAT(Snippet.RegisterInitialValues, + ASSERT_THAT(Snippet.Key.Instructions, ElementsAre(HasOpcode(X86::INC64r))); + ASSERT_THAT(Snippet.Key.RegisterInitialValues, ElementsAre(RegisterInitialValueIs(X86::RAX, 15), RegisterInitialValueIs(X86::SIL, 0))); ASSERT_THAT(Snippet.LiveIns, ElementsAre(X86::RDI, X86::DL));