diff --git a/llvm/test/tools/llvm-exegesis/X86/latency-SETCCr-cond-codes-sweep.s b/llvm/test/tools/llvm-exegesis/X86/latency-SETCCr-cond-codes-sweep.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/latency-SETCCr-cond-codes-sweep.s @@ -0,0 +1,25 @@ +# RUN: llvm-exegesis -mode=latency -opcode-name=SETCCr --max-configs-per-opcode=1 | FileCheck %s --check-prefix=CHECK +# RUN: llvm-exegesis -mode=latency -opcode-name=SETCCr --max-configs-per-opcode=256 | FileCheck %s --check-prefix=SWEEP + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: 'SETCCr {{.*}} i_0x{{[0-9a-f]}}' + +SWEEP-DAG: 'SETCCr {{.*}} i_0x0' +SWEEP-DAG: 'SETCCr {{.*}} i_0x1' +SWEEP-DAG: 'SETCCr {{.*}} i_0x2' +SWEEP-DAG: 'SETCCr {{.*}} i_0x3' +SWEEP-DAG: 'SETCCr {{.*}} i_0x4' +SWEEP-DAG: 'SETCCr {{.*}} i_0x5' +SWEEP-DAG: 'SETCCr {{.*}} i_0x6' +SWEEP-DAG: 'SETCCr {{.*}} i_0x7' +SWEEP-DAG: 'SETCCr {{.*}} i_0x8' +SWEEP-DAG: 'SETCCr {{.*}} i_0x9' +SWEEP-DAG: 'SETCCr {{.*}} i_0xa' +SWEEP-DAG: 'SETCCr {{.*}} i_0xb' +SWEEP-DAG: 'SETCCr {{.*}} i_0xc' +SWEEP-DAG: 'SETCCr {{.*}} i_0xd' +SWEEP-DAG: 'SETCCr {{.*}} i_0xe' +SWEEP-DAG: 'SETCCr {{.*}} i_0xf' diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h @@ -38,6 +38,11 @@ bool hasImmediateVariables() const; const Instruction &getInstr() const { return *Instr; } ArrayRef getVariableValues() const { return VariableValues; } + void setVariableValues(ArrayRef NewVariableValues) { + assert(VariableValues.size() == NewVariableValues.size() && + "Value count mismatch"); + VariableValues.assign(NewVariableValues.begin(), NewVariableValues.end()); + } // Builds an MCInst from this InstructionTemplate setting its operands // to the corresponding variable values. Precondition: All VariableValues must diff --git a/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h @@ -25,7 +25,7 @@ ~ParallelSnippetGenerator() override; Expected> - generateCodeTemplates(const Instruction &Instr, + generateCodeTemplates(InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const override; static constexpr const size_t kMinNumDifferentAddresses = 6; diff --git a/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp @@ -154,8 +154,10 @@ } } -Expected> ParallelSnippetGenerator::generateCodeTemplates( - const Instruction &Instr, const BitVector &ForbiddenRegisters) const { +Expected> +ParallelSnippetGenerator::generateCodeTemplates( + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); CodeTemplate CT; CT.ScratchSpacePointerInReg = Instr.hasMemoryOperands() @@ -163,16 +165,15 @@ State.getTargetMachine().getTargetTriple()) : 0; const AliasingConfigurations SelfAliasing(Instr, Instr); - InstructionTemplate IT(&Instr); if (SelfAliasing.empty()) { CT.Info = "instruction is parallel, repeating a random one."; - CT.Instructions.push_back(std::move(IT)); + CT.Instructions.push_back(std::move(Variant)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); return getSingleton(std::move(CT)); } if (SelfAliasing.hasImplicitAliasing()) { CT.Info = "instruction is serial, repeating a random one."; - CT.Instructions.push_back(std::move(IT)); + CT.Instructions.push_back(std::move(Variant)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); return getSingleton(std::move(CT)); } @@ -180,7 +181,7 @@ if (!TiedVariables.empty()) { CT.Info = "instruction has tied variables, using static renaming."; CT.Instructions = generateSnippetUsingStaticRenaming( - State, IT, TiedVariables, ForbiddenRegisters); + State, Variant, TiedVariables, ForbiddenRegisters); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); return getSingleton(std::move(CT)); } @@ -194,7 +195,7 @@ assert(PossibleRegisters.any() && "No register left to choose from"); const auto RandomReg = randomBit(PossibleRegisters); Defs.set(RandomReg); - IT.getValueFor(Op) = MCOperand::createReg(RandomReg); + Variant.getValueFor(Op) = MCOperand::createReg(RandomReg); } } // And pick random use values that are not reserved and don't alias with defs. @@ -206,12 +207,12 @@ remove(PossibleRegisters, DefAliases); assert(PossibleRegisters.any() && "No register left to choose from"); const auto RandomReg = randomBit(PossibleRegisters); - IT.getValueFor(Op) = MCOperand::createReg(RandomReg); + Variant.getValueFor(Op) = MCOperand::createReg(RandomReg); } } CT.Info = "instruction has no tied variables picking Uses different from defs"; - CT.Instructions.push_back(std::move(IT)); + CT.Instructions.push_back(std::move(Variant)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); return getSingleton(std::move(CT)); } diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h @@ -27,7 +27,7 @@ ~SerialSnippetGenerator() override; Expected> - generateCodeTemplates(const Instruction &Instr, + generateCodeTemplates(InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const override; }; diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -87,7 +87,7 @@ } static void appendCodeTemplates(const LLVMState &State, - const Instruction *Instr, + InstructionTemplate Variant, const BitVector &ForbiddenRegisters, ExecutionMode ExecutionModeBit, StringRef ExecutionClassDescription, @@ -103,7 +103,7 @@ CodeTemplate CT; CT.Execution = ExecutionModeBit; CT.Info = std::string(ExecutionClassDescription); - CT.Instructions.push_back(Instr); + CT.Instructions.push_back(std::move(Variant)); CodeTemplates.push_back(std::move(CT)); return; } @@ -115,27 +115,28 @@ case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: { // Making the execution of this instruction serial by selecting one def // register to alias with one use register. - const AliasingConfigurations SelfAliasing(*Instr, *Instr); + const AliasingConfigurations SelfAliasing(Variant.getInstr(), + Variant.getInstr()); assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() && "Instr must alias itself explicitly"); - InstructionTemplate IT(Instr); // This is a self aliasing instruction so defs and uses are from the same - // instance, hence twice IT in the following call. - setRandomAliasing(SelfAliasing, IT, IT); + // instance, hence twice Variant in the following call. + setRandomAliasing(SelfAliasing, Variant, Variant); CodeTemplate CT; CT.Execution = ExecutionModeBit; CT.Info = std::string(ExecutionClassDescription); - CT.Instructions.push_back(std::move(IT)); + CT.Instructions.push_back(std::move(Variant)); CodeTemplates.push_back(std::move(CT)); return; } case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: { + const Instruction &Instr = Variant.getInstr(); // Select back-to-back non-memory instruction. for (const auto *OtherInstr : computeAliasingInstructions( - State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) { - const AliasingConfigurations Forward(*Instr, *OtherInstr); - const AliasingConfigurations Back(*OtherInstr, *Instr); - InstructionTemplate ThisIT(Instr); + State, &Instr, kMaxAliasingInstructions, ForbiddenRegisters)) { + const AliasingConfigurations Forward(Instr, *OtherInstr); + const AliasingConfigurations Back(*OtherInstr, Instr); + InstructionTemplate ThisIT(Variant); InstructionTemplate OtherIT(OtherInstr); if (!Forward.hasImplicitAliasing()) setRandomAliasing(Forward, ThisIT, OtherIT); @@ -159,12 +160,13 @@ Expected> SerialSnippetGenerator::generateCodeTemplates( - const Instruction &Instr, const BitVector &ForbiddenRegisters) const { + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { std::vector Results; - const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters); + const ExecutionMode EM = + getExecutionModes(Variant.getInstr(), ForbiddenRegisters); for (const auto EC : kExecutionClasses) { for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask)) - appendCodeTemplates(State, &Instr, ForbiddenRegisters, ExecutionModeBit, + appendCodeTemplates(State, Variant, ForbiddenRegisters, ExecutionModeBit, EC.Description, Results); if (!Results.empty()) break; diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -34,11 +34,12 @@ // Generates code templates that has a self-dependency. Expected> -generateSelfAliasingCodeTemplates(const Instruction &Instr); +generateSelfAliasingCodeTemplates(InstructionTemplate Variant); // Generates code templates without assignment constraints. Expected> -generateUnconstrainedCodeTemplates(const Instruction &Instr, StringRef Msg); +generateUnconstrainedCodeTemplates(const InstructionTemplate &Variant, + StringRef Msg); // A class representing failures that happened during Benchmark, they are used // to report informations to the user. @@ -59,9 +60,9 @@ virtual ~SnippetGenerator(); // Calls generateCodeTemplate and expands it into one or more BenchmarkCode. - Expected> - generateConfigurations(const Instruction &Instr, - const BitVector &ExtraForbiddenRegs) const; + Error generateConfigurations(const InstructionTemplate &Variant, + std::vector &Benchmarks, + const BitVector &ExtraForbiddenRegs) const; // Given a snippet, computes which registers the setup code needs to define. std::vector computeRegisterInitialValues( @@ -74,7 +75,7 @@ private: // API to be implemented by subclasses. virtual Expected> - generateCodeTemplates(const Instruction &Instr, + generateCodeTemplates(InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const = 0; }; @@ -101,6 +102,132 @@ const BitVector &ForbiddenRegs, InstructionTemplate &IT); +// Combination generator. +// +// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following +// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}. +// +// It is important to think of input as vector-of-vectors, where the +// outer vector is the variable space, and inner vector is choice space. +// The number of choices for each variable can be different. +// +// As for implementation, it is useful to think of this as a weird number, +// where each digit (==variable) may have different base (==number of choices). +// Thus modelling of 'produce next combination' is exactly analogous to the +// incrementing of an number - increment lowest digit (pick next choice for the +// variable), and if it wrapped to the beginning then increment next digit. +template +class CombinationGenerator { + template struct WrappingIterator { + using value_type = T; + + const ArrayRef Range; + typename decltype(Range)::const_iterator Position; + + // Rewind the tape, placing the position to again point at the beginning. + void rewind() { Position = Range.begin(); } + + // Advance position forward, possibly wrapping to the beginning. + // Returns whether the wrap happened. + bool operator++() { + ++Position; + bool Wrapped = Position == Range.end(); + if (Wrapped) + rewind(); + return Wrapped; + } + + // Get the value at which we are currently pointing. + operator const value_type &() const { return *Position; } + + WrappingIterator(ArrayRef Range_) : Range(Range_) { + assert(!Range.empty() && "The range must not be empty."); + rewind(); + } + + // Only allow using our custom constructor. + WrappingIterator() = delete; + WrappingIterator(const WrappingIterator &) = delete; + WrappingIterator(WrappingIterator &&) = delete; + WrappingIterator &operator=(WrappingIterator) = delete; + WrappingIterator &operator=(const WrappingIterator &) = delete; + WrappingIterator &operator=(WrappingIterator &&) = delete; + }; + + const ArrayRef VariablesChoices; + const function_ref)> &Callback; + + void performGeneration() const { + SmallVector, variable_smallsize> + VariablesState; + + // Initialize the per-variable state to refer to the possible choices for + // that variable. + VariablesState.reserve(VariablesChoices.size()); + for (ArrayRef VariablesChoices : VariablesChoices) + VariablesState.emplace_back(VariablesChoices); + + // Temporary buffer to store each combination before performing Callback. + SmallVector CurrentCombination; + CurrentCombination.resize(VariablesState.size()); + + while (true) { + // Gather the currently-selected variable choices into a vector. + for (auto I : llvm::zip(VariablesState, CurrentCombination)) + std::get<1>(I) = std::get<0>(I); + // And pass the new combination into callback, as intended. + if (/*Abort=*/Callback(CurrentCombination)) + return; + + // 'increment' the whole VariablesState, much like you would increment + // a number: starting from the least significant element, increment it, + // and if it wrapped, then propagate that carry by also incrementing next + // (more significant) element. + for (WrappingIterator &VariableState : + llvm::reverse(VariablesState)) { + bool Wrapped = ++VariableState; + if (!Wrapped) + break; + + if (VariablesState.begin() == &VariableState) + return; // The "most significant" variable has wrapped, which means + // that we have produced all the combinations. + + // We have carry - increment more significant variable next.. + } + } + }; + +public: + CombinationGenerator(ArrayRef VariablesChoices_, + const function_ref)> &Cb_) + : VariablesChoices(VariablesChoices_), Callback(Cb_) { +#ifndef NDEBUG + assert(!VariablesChoices.empty() && "There should be some variables."); + llvm::for_each(VariablesChoices, [](ArrayRef VariableChoices) { + assert(!VariableChoices.empty() && + "There must always be some choice, at least a placeholder one."); + }); +#endif + } + + // How many combinations can we produce, max? + // This is at most how many times the callback will be called. + size_t numCombinations() const { + size_t NumVariants = 1; + for (ArrayRef VariableChoices : VariablesChoices) + NumVariants *= VariableChoices.size(); + assert(NumVariants >= 1 && + "We should always end up producing at least one combination"); + return NumVariants; + } + + // Actually perform exhaustive combination generation. + // Each result will be passed into the callback. + void generate() { performGeneration(); } +}; + } // namespace exegesis } // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -38,13 +38,14 @@ SnippetGenerator::~SnippetGenerator() = default; -Expected> SnippetGenerator::generateConfigurations( - const Instruction &Instr, const BitVector &ExtraForbiddenRegs) const { +Error SnippetGenerator::generateConfigurations( + const InstructionTemplate &Variant, std::vector &Benchmarks, + const BitVector &ExtraForbiddenRegs) const { BitVector ForbiddenRegs = State.getRATC().reservedRegisters(); ForbiddenRegs |= ExtraForbiddenRegs; // If the instruction has memory registers, prevent the generator from // using the scratch register and its aliasing registers. - if (Instr.hasMemoryOperands()) { + if (Variant.getInstr().hasMemoryOperands()) { const auto &ET = State.getExegesisTarget(); unsigned ScratchSpacePointerInReg = ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple()); @@ -55,7 +56,7 @@ State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits(); // If the instruction implicitly writes to ScratchSpacePointerInReg , abort. // FIXME: We could make a copy of the scratch register. - for (const auto &Op : Instr.Operands) { + for (const auto &Op : Variant.getInstr().Operands) { if (Op.isDef() && Op.isImplicitReg() && ScratchRegAliases.test(Op.getImplicitReg())) return make_error( @@ -64,16 +65,19 @@ ForbiddenRegs |= ScratchRegAliases; } - if (auto E = generateCodeTemplates(Instr, ForbiddenRegs)) { - std::vector Output; - for (CodeTemplate &CT : E.get()) { + if (auto E = generateCodeTemplates(Variant, ForbiddenRegs)) { + MutableArrayRef Templates = E.get(); + + // Avoid reallocations in the loop. + Benchmarks.reserve(Benchmarks.size() + Templates.size()); + for (CodeTemplate &CT : Templates) { // TODO: Generate as many BenchmarkCode as needed. { BenchmarkCode BC; BC.Info = CT.Info; for (InstructionTemplate &IT : CT.Instructions) { if (auto error = randomizeUnsetVariables(State, ForbiddenRegs, IT)) - return std::move(error); + return error; BC.Key.Instructions.push_back(IT.build()); } if (CT.ScratchSpacePointerInReg) @@ -81,13 +85,14 @@ BC.Key.RegisterInitialValues = computeRegisterInitialValues(CT.Instructions); BC.Key.Config = CT.Config; - Output.push_back(std::move(BC)); - if (Output.size() >= Opts.MaxConfigsPerOpcode) - return Output; // Early exit if we exceeded the number of allowed - // configs. + Benchmarks.emplace_back(std::move(BC)); + if (Benchmarks.size() >= Opts.MaxConfigsPerOpcode) { + // We reached the number of allowed configs and return early. + return Error::success(); + } } } - return Output; + return Error::success(); } else return E.takeError(); } @@ -135,34 +140,35 @@ } Expected> -generateSelfAliasingCodeTemplates(const Instruction &Instr) { - const AliasingConfigurations SelfAliasing(Instr, Instr); +generateSelfAliasingCodeTemplates(InstructionTemplate Variant) { + const AliasingConfigurations SelfAliasing(Variant.getInstr(), + Variant.getInstr()); if (SelfAliasing.empty()) return make_error("empty self aliasing"); std::vector Result; Result.emplace_back(); CodeTemplate &CT = Result.back(); - InstructionTemplate IT(&Instr); if (SelfAliasing.hasImplicitAliasing()) { CT.Info = "implicit Self cycles, picking random values."; } else { CT.Info = "explicit self cycles, selecting one aliasing Conf."; // This is a self aliasing instruction so defs and uses are from the same - // instance, hence twice IT in the following call. - setRandomAliasing(SelfAliasing, IT, IT); + // instance, hence twice Variant in the following call. + setRandomAliasing(SelfAliasing, Variant, Variant); } - CT.Instructions.push_back(std::move(IT)); + CT.Instructions.push_back(std::move(Variant)); return std::move(Result); } Expected> -generateUnconstrainedCodeTemplates(const Instruction &Instr, StringRef Msg) { +generateUnconstrainedCodeTemplates(const InstructionTemplate &Variant, + StringRef Msg) { std::vector Result; Result.emplace_back(); CodeTemplate &CT = Result.back(); CT.Info = std::string(formatv("{0}, repeating an unconstrained assignment", Msg)); - CT.Instructions.emplace_back(&Instr); + CT.Instructions.push_back(std::move(Variant)); return std::move(Result); } diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -126,6 +126,16 @@ return true; } + // For some instructions, it is interesting to measure how it's performance + // characteristics differ depending on it's operands. + // This allows us to produce all the interesting variants. + virtual std::vector + generateInstructionVariants(const Instruction &Instr, + unsigned MaxConfigsPerOpcode) const { + // By default, we're happy with whatever randomizer will give us. + return {&Instr}; + } + // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -8,14 +8,15 @@ #include "../Target.h" #include "../Error.h" +#include "../ParallelSnippetGenerator.h" #include "../SerialSnippetGenerator.h" #include "../SnippetGenerator.h" -#include "../ParallelSnippetGenerator.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" +#include "llvm/ADT/Sequence.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/FormatVariadic.h" @@ -256,14 +257,16 @@ using SerialSnippetGenerator::SerialSnippetGenerator; Expected> - generateCodeTemplates(const Instruction &Instr, + generateCodeTemplates(InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const override; }; } // namespace Expected> X86SerialSnippetGenerator::generateCodeTemplates( - const Instruction &Instr, const BitVector &ForbiddenRegisters) const { + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); + if (const auto reason = isInvalidOpcode(Instr)) return make_error(reason); @@ -287,8 +290,8 @@ switch (getX86FPFlags(Instr)) { case X86II::NotFP: - return SerialSnippetGenerator::generateCodeTemplates(Instr, - ForbiddenRegisters); + return SerialSnippetGenerator::generateCodeTemplates(Variant, + ForbiddenRegisters); case X86II::ZeroArgFP: case X86II::OneArgFP: case X86II::SpecialFP: @@ -301,7 +304,7 @@ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) // They are intrinsically serial and do not modify the state of the stack. - return generateSelfAliasingCodeTemplates(Instr); + return generateSelfAliasingCodeTemplates(Variant); default: llvm_unreachable("Unknown FP Type!"); } @@ -313,7 +316,7 @@ using ParallelSnippetGenerator::ParallelSnippetGenerator; Expected> - generateCodeTemplates(const Instruction &Instr, + generateCodeTemplates(InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const override; }; @@ -321,7 +324,9 @@ Expected> X86ParallelSnippetGenerator::generateCodeTemplates( - const Instruction &Instr, const BitVector &ForbiddenRegisters) const { + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); + if (const auto reason = isInvalidOpcode(Instr)) return make_error(reason); @@ -342,8 +347,8 @@ switch (getX86FPFlags(Instr)) { case X86II::NotFP: - return ParallelSnippetGenerator::generateCodeTemplates(Instr, - ForbiddenRegisters); + return ParallelSnippetGenerator::generateCodeTemplates(Variant, + ForbiddenRegisters); case X86II::ZeroArgFP: case X86II::OneArgFP: case X86II::SpecialFP: @@ -355,13 +360,13 @@ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) // They are intrinsically serial and do not modify the state of the stack. // We generate the same code for latency and uops. - return generateSelfAliasingCodeTemplates(Instr); + return generateSelfAliasingCodeTemplates(Variant); case X86II::CompareFP: case X86II::CondMovFP: // We can compute uops for any FP instruction that does not grow or shrink // the stack (either do not touch the stack or push as much as they pop). return generateUnconstrainedCodeTemplates( - Instr, "instruction does not grow/shrink the FP stack"); + Variant, "instruction does not grow/shrink the FP stack"); default: llvm_unreachable("Unknown FP Type!"); } @@ -592,6 +597,10 @@ Opcode != X86::LEA64_32r && Opcode != X86::LEA16r; } + std::vector + generateInstructionVariants(const Instruction &Instr, + unsigned MaxConfigsPerOpcode) const override; + std::unique_ptr createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const override { @@ -653,10 +662,6 @@ AssignedValue = MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO)); return Error::success(); - case X86::OperandType::OPERAND_COND_CODE: - AssignedValue = - MCOperand::createImm(randomIndex(X86::CondCode::LAST_VALID_COND)); - return Error::success(); default: break; } @@ -741,6 +746,63 @@ return {}; // Not yet implemented. } +// Instruction can have some variable operands, and we may want to see how +// different operands affect performance. So for each operand position, +// precompute all the possible choices we might care about, +// and greedily generate all the possible combinations of choices. +std::vector ExegesisX86Target::generateInstructionVariants( + const Instruction &Instr, unsigned MaxConfigsPerOpcode) const { + bool Exploration = false; + SmallVector, 4> VariableChoices; + VariableChoices.resize(Instr.Variables.size()); + for (auto I : llvm::zip(Instr.Variables, VariableChoices)) { + const Variable &Var = std::get<0>(I); + SmallVectorImpl &Choices = std::get<1>(I); + + switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) { + default: + // We don't wish to explicitly explore this variable. + Choices.emplace_back(); // But add invalid MCOperand to simplify logic. + continue; + case X86::OperandType::OPERAND_COND_CODE: { + Exploration = true; + auto CondCodes = seq((int)X86::CondCode::COND_O, + 1 + (int)X86::CondCode::LAST_VALID_COND); + Choices.reserve(std::distance(CondCodes.begin(), CondCodes.end())); + for (int CondCode : CondCodes) + Choices.emplace_back(MCOperand::createImm(CondCode)); + break; + } + } + } + + // If we don't wish to explore any variables, defer to the baseline method. + if (!Exploration) + return ExegesisTarget::generateInstructionVariants(Instr, + MaxConfigsPerOpcode); + + std::vector Variants; + size_t NumVariants; + CombinationGenerator G( + VariableChoices, [&](ArrayRef State) -> bool { + Variants.emplace_back(&Instr); + Variants.back().setVariableValues(State); + // Did we run out of space for variants? + return Variants.size() >= NumVariants; + }); + + // How many operand combinations can we produce, within the limit? + NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode); + // And actually produce all the wanted operand combinations. + Variants.reserve(NumVariants); + G.generate(); + + assert(Variants.size() == NumVariants && + Variants.size() <= MaxConfigsPerOpcode && + "Should not produce too many variants"); + return Variants; +} + static ExegesisTarget *getTheExegesisX86Target() { static ExegesisX86Target Target; return &Target; diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -238,6 +238,10 @@ if (InstrDesc.isCall() || InstrDesc.isReturn()) return make_error("Unsupported opcode: isCall/isReturn"); + const std::vector InstructionVariants = + State.getExegesisTarget().generateInstructionVariants( + Instr, MaxConfigsPerOpcode); + SnippetGenerator::Options SnippetOptions; SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode; const std::unique_ptr Generator = @@ -245,7 +249,16 @@ SnippetOptions); if (!Generator) ExitWithError("cannot create snippet generator"); - return Generator->generateConfigurations(Instr, ForbiddenRegs); + + std::vector Benchmarks; + for (const InstructionTemplate &Variant : InstructionVariants) { + if (Benchmarks.size() >= MaxConfigsPerOpcode) + break; + if (auto Err = Generator->generateConfigurations(Variant, Benchmarks, + ForbiddenRegs)) + return std::move(Err); + } + return Benchmarks; } void benchmarkMain() { diff --git a/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt b/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt --- a/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt +++ b/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt @@ -15,6 +15,7 @@ ClusteringTest.cpp PerfHelperTest.cpp RegisterValueTest.cpp + SnippetGeneratorTest.cpp ) target_link_libraries(LLVMExegesisTests PRIVATE LLVMExegesis) diff --git a/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp @@ -40,7 +40,7 @@ randomGenerator().seed(0); // Initialize seed. const Instruction &Instr = State.getIC().getInstr(Opcode); auto CodeTemplateOrError = Generator.generateCodeTemplates( - Instr, State.getRATC().emptyRegisters()); + &Instr, State.getRATC().emptyRegisters()); EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. return std::move(CodeTemplateOrError.get()); } @@ -91,7 +91,8 @@ const Instruction &Instr = State.getIC().getInstr(Mips::XOR); auto AllRegisters = State.getRATC().emptyRegisters(); AllRegisters.flip(); - auto Error = Generator.generateCodeTemplates(Instr, AllRegisters).takeError(); + auto Error = + Generator.generateCodeTemplates(&Instr, AllRegisters).takeError(); EXPECT_TRUE((bool)Error); consumeError(std::move(Error)); } diff --git a/llvm/unittests/tools/llvm-exegesis/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/SnippetGeneratorTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/tools/llvm-exegesis/SnippetGeneratorTest.cpp @@ -0,0 +1,183 @@ +//===-- SnippetGeneratorTest.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SnippetGenerator.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +namespace llvm { +namespace exegesis { + +namespace { + +TEST(CombinationGenerator, Square) { + const std::vector> Choices{{0, 1}, {2, 3}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0, 2}, + {0, 3}, + {1, 2}, + {1, 3}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, MiddleColumn) { + const std::vector> Choices{{0}, {1, 2}, {3}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0, 1, 3}, + {0, 2, 3}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, SideColumns) { + const std::vector> Choices{{0, 1}, {2}, {3, 4}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0, 2, 3}, + {0, 2, 4}, + {1, 2, 3}, + {1, 2, 4}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, LeftColumn) { + const std::vector> Choices{{0, 1}, {2}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0, 2}, + {1, 2}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, RightColumn) { + const std::vector> Choices{{0}, {1, 2}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0, 1}, + {0, 2}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, Column) { + const std::vector> Choices{{0, 1}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0}, + {1}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, Row) { + const std::vector> Choices{{0}, {1}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0, 1}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +TEST(CombinationGenerator, Singleton) { + const std::vector> Choices{{0}}; + + std::vector> Variants; + CombinationGenerator, 4> G( + Choices, [&](ArrayRef State) -> bool { + Variants.emplace_back(State); + return false; // keep going + }); + const size_t NumVariants = G.numCombinations(); + G.generate(); + + const std::vector> ExpectedVariants{ + {0}, + }; + ASSERT_THAT(Variants, ::testing::SizeIs(NumVariants)); + ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); +} + +} // namespace +} // namespace exegesis +} // namespace llvm diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -51,7 +51,7 @@ randomGenerator().seed(0); // Initialize seed. const Instruction &Instr = State.getIC().getInstr(Opcode); auto CodeTemplateOrError = Generator.generateCodeTemplates( - Instr, State.getRATC().emptyRegisters()); + &Instr, State.getRATC().emptyRegisters()); EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. return std::move(CodeTemplateOrError.get()); } @@ -153,7 +153,8 @@ const Instruction &Instr = State.getIC().getInstr(Opcode); auto AllRegisters = State.getRATC().emptyRegisters(); AllRegisters.flip(); - auto Error = Generator.generateCodeTemplates(Instr, AllRegisters).takeError(); + auto Error = + Generator.generateCodeTemplates(&Instr, AllRegisters).takeError(); EXPECT_TRUE((bool)Error); consumeError(std::move(Error)); } @@ -207,11 +208,12 @@ // - Op4 Implicit Use Reg(MXSCR) const unsigned Opcode = X86::VCVTUSI642SDZrrb_Int; const Instruction &Instr = State.getIC().getInstr(Opcode); - auto Configs = - Generator.generateConfigurations(Instr, State.getRATC().emptyRegisters()); - ASSERT_FALSE(Configs.takeError()); - ASSERT_THAT(*Configs, SizeIs(1)); - const BenchmarkCode &BC = (*Configs)[0]; + std::vector Configs; + auto Error = Generator.generateConfigurations( + &Instr, Configs, State.getRATC().emptyRegisters()); + ASSERT_FALSE(Error); + ASSERT_THAT(Configs, SizeIs(1)); + const BenchmarkCode &BC = Configs[0]; ASSERT_THAT(BC.Key.Instructions, SizeIs(1)); ASSERT_TRUE(BC.Key.Instructions[0].getOperand(3).isImm()); } @@ -357,9 +359,9 @@ TEST_F(ParallelSnippetGeneratorTest, MOV16ms) { const unsigned Opcode = X86::MOV16ms; const Instruction &Instr = State.getIC().getInstr(Opcode); - auto Err = - Generator.generateConfigurations(Instr, State.getRATC().emptyRegisters()) - .takeError(); + std::vector Benchmarks; + auto Err = Generator.generateConfigurations(&Instr, Benchmarks, + State.getRATC().emptyRegisters()); EXPECT_TRUE((bool)Err); EXPECT_THAT(toString(std::move(Err)), testing::HasSubstr("no available registers")); @@ -380,7 +382,7 @@ private: Expected> - generateCodeTemplates(const Instruction &, const BitVector &) const override { + generateCodeTemplates(InstructionTemplate, const BitVector &) const override { return make_error("not implemented", inconvertibleErrorCode()); } }; @@ -412,9 +414,9 @@ // - hasAliasingRegisters const unsigned Opcode = X86::MOVSB; const Instruction &Instr = State.getIC().getInstr(Opcode); - auto Error = - Generator.generateConfigurations(Instr, State.getRATC().emptyRegisters()) - .takeError(); + std::vector Benchmarks; + auto Error = Generator.generateConfigurations( + &Instr, Benchmarks, State.getRATC().emptyRegisters()); EXPECT_TRUE((bool)Error); consumeError(std::move(Error)); }