Index: tools/llvm-exegesis/lib/SnippetGenerator.h =================================================================== --- tools/llvm-exegesis/lib/SnippetGenerator.h +++ tools/llvm-exegesis/lib/SnippetGenerator.h @@ -55,6 +55,13 @@ virtual ~SnippetGenerator(); + // To measure characteristics of instruction Instr, we sometimes need to + // chain it together with some other instructions. But that gives us + // collective characteristics. So we also need to measure characteristics + // of those additional instructions, and later do some post-processing. + llvm::Expected> + generateAllCodeTemplates(const Instruction &MainInstr) const; + // Calls generateCodeTemplate and expands it into one or more BenchmarkCode. llvm::Expected> generateConfigurations(const Instruction &Instr) const; Index: tools/llvm-exegesis/lib/SnippetGenerator.cpp =================================================================== --- tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -13,6 +13,9 @@ #include "MCInstrDescView.h" #include "SnippetGenerator.h" #include "Target.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -36,9 +39,84 @@ SnippetGenerator::~SnippetGenerator() = default; +llvm::Expected> +SnippetGenerator::generateAllCodeTemplates(const Instruction &MainInstr) const { + std::vector> AllTemplates; + + using Opcode = decltype(MCInstrDesc::Opcode); + llvm::SmallSet ProcessedOpcodes; + llvm::SetVector, + decltype(ProcessedOpcodes)> + Worklist; + + Worklist.insert(MainInstr.Description->getOpcode()); + while (!Worklist.empty()) { + AllTemplates.reserve(AllTemplates.size() + Worklist.size()); + Opcode OpcodeToProcess = Worklist.pop_back_val(); + + assert(!ProcessedOpcodes.count(OpcodeToProcess) && + "Should not process instructions that we have already processed."); + const Instruction &InstrToProcess = State.getIC().getInstr(OpcodeToProcess); + assert(InstrToProcess.Description->getOpcode() == OpcodeToProcess); + + if (auto E = generateCodeTemplates(InstrToProcess)) { + ProcessedOpcodes.insert(OpcodeToProcess); + assert(AllTemplates.capacity() > 0 && "Should have fully preallocated."); + AllTemplates.emplace_back(std::move(E.get())); + const std::vector &NewTemplates = AllTemplates.back(); + + llvm::for_each(NewTemplates, [InstrToProcess, &ProcessedOpcodes, + &Worklist](const CodeTemplate &CT) { + assert(CT.Instructions.front().Instr.Description == + InstrToProcess.Description && + "Expected the target instruction to be the first instruction in " + "the template."); + // Add every instruction (except the first one, which is the + // instruction we have just processed) to the worklist, unless it + // was processed/queued for processing already. + for (const InstructionTemplate &IT : + ArrayRef(CT.Instructions).drop_front()) { + if (!ProcessedOpcodes.count(IT.getOpcode())) + Worklist.insert(IT.getOpcode()); + } + }); + } else { + // We have failed to produce templates for instruction InstrToProcess. + // If this is the MainInstr instruction, then this is fatal. + if (AllTemplates.empty()) + return E.takeError(); + // Else, we have failed to produce templates for secondary instructions. + // Ignore error. + } + } + + // Flatten vector-of-vectors-of-elements into a vector-of-elements. + size_t TotalTemplateCount = std::accumulate( + AllTemplates.begin(), AllTemplates.end(), size_t(0), + [](size_t Size, const std::vector &Templates) { + return Size + Templates.size(); + }); + std::vector FinalTemplates(std::move(AllTemplates.front())); + FinalTemplates.reserve(TotalTemplateCount); + llvm::for_each( + llvm::make_range(std::next(std::make_move_iterator(AllTemplates.begin())), + std::make_move_iterator(AllTemplates.end())), + [&FinalTemplates](std::vector &&Templates) { + llvm::for_each( + llvm::make_range(std::make_move_iterator(Templates.begin()), + std::make_move_iterator(Templates.end())), + [&FinalTemplates](CodeTemplate &&Template) { + FinalTemplates.emplace_back(std::move(Template)); + }); + }); + assert(FinalTemplates.size() == TotalTemplateCount); + + return FinalTemplates; +} + llvm::Expected> SnippetGenerator::generateConfigurations(const Instruction &Instr) const { - if (auto E = generateCodeTemplates(Instr)) { + if (auto E = generateAllCodeTemplates(Instr)) { const auto &RATC = State.getRATC(); std::vector Output; for (CodeTemplate &CT : E.get()) { Index: unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -27,6 +27,7 @@ using testing::ElementsAre; using testing::Gt; using testing::HasSubstr; +using testing::Lt; using testing::Not; using testing::SizeIs; using testing::UnorderedElementsAre; @@ -66,6 +67,14 @@ return std::move(CodeTemplateOrError.get()); } + std::vector checkAndGetAllCodeTemplates(unsigned Opcode) { + randomGenerator().seed(0); // Initialize seed. + const Instruction &Instr = State.getIC().getInstr(Opcode); + auto CodeTemplateOrError = Generator.generateAllCodeTemplates(Instr); + EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. + return std::move(CodeTemplateOrError.get()); + } + SnippetGeneratorT Generator; }; @@ -187,6 +196,56 @@ } } +TEST_F(LatencySnippetGeneratorTest, SETCCrExaustive) { + const unsigned Opcode = llvm::X86::SETCCr; + const std::vector CodeTemplates = + checkAndGetAllCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(Gt(2U))) << "Many templates are available"; + using OpcodeTy = decltype(MCInstrDesc::Opcode); + std::set PrimaryInstructions; + std::set SecondaryInstructions; + llvm::for_each(CodeTemplates, [&PrimaryInstructions, &SecondaryInstructions]( + const CodeTemplate &CT) { + ASSERT_THAT(CT.Instructions, SizeIs(Gt(0U))) << "Have at least one instr."; + ASSERT_THAT(CT.Instructions, SizeIs(Lt(3U))) + << "It is expected that all templates have 1 or 2 instructions"; + // IT0: PrimaryInstruction0[, SecondaryInstruction0[, ...]] + // IT1: PrimaryInstruction1(==SecondaryInstruction0)[, ...] + // ... + PrimaryInstructions.emplace(CT.Instructions.front().getOpcode()); + llvm::for_each(ArrayRef(CT.Instructions).drop_front(), + [&SecondaryInstructions](const InstructionTemplate &IT) { + SecondaryInstructions.emplace(IT.getOpcode()); + }); + }); + ASSERT_THAT(PrimaryInstructions, SizeIs(Gt(1U))) + << "Many templates are available"; + ASSERT_THAT(SecondaryInstructions, SizeIs(Gt(0U))) + << "Some secondary templates are avaliable"; + + ASSERT_TRUE(PrimaryInstructions.count(Opcode)) + << "We should have benchmarks to measure the target instruction"; + + // All entries in PrimaryInstructions (except the target instruction) should + // have been mentioned in SecondaryInstructions. + // I.e. we only measured instrs we needed to measure. + for (OpcodeTy PrimaryOpcode : PrimaryInstructions) { + if (PrimaryOpcode == Opcode) // Ignore the actual target instruction + continue; // It is unlikely that we manage to find a loop. + ASSERT_TRUE(SecondaryInstructions.count(PrimaryOpcode)) + << "Weird benchmark to measure instruction that was not required"; + } + + bool HaveAtLeastOneSecondaryOpcodeAsPrimaryOpcode = llvm::any_of( + SecondaryInstructions, [PrimaryInstructions](OpcodeTy SecondaryOpcode) { + return PrimaryInstructions.find(SecondaryOpcode) != + PrimaryInstructions.end(); + }); + ASSERT_TRUE(HaveAtLeastOneSecondaryOpcodeAsPrimaryOpcode) + << "And should have a template to measure at least one of the secondary " + "instructions as primary instruction"; +} + TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { // - BNDCL32rr // - Op0 Explicit Use RegClass(BNDR)