Index: tools/llvm-exegesis/lib/CodeTemplate.h =================================================================== --- tools/llvm-exegesis/lib/CodeTemplate.h +++ tools/llvm-exegesis/lib/CodeTemplate.h @@ -17,6 +17,7 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H #include "MCInstrDescView.h" +#include "llvm/ADT/BitmaskEnum.h" namespace exegesis { @@ -45,9 +46,55 @@ llvm::SmallVector VariableValues; }; +enum class ExecutionMode : uint8_t { + UNKNOWN = 0U, + // The instruction is always serial because implicit Use and Def alias. + // e.g. AAA (alias via EFLAGS) + ALWAYS_SERIAL_VIA_IMPLICIT_REGS = 1u << 0, + + // The instruction is always serial because one Def is tied to a Use. + // e.g. AND32ri (alias via tied GR32) + ALWAYS_SERIAL_VIA_TIED_REGS = 1u << 1, + + // The execution can be made serial by inserting a second instruction that + // clobbers memory. + // e.g. MOV8rm + SERIAL_VIA_MEMORY_INSTR = 1u << 2, + + // The execution can be made serial by picking one Def that aliases with one + // Use. + // e.g. VXORPSrr XMM1, XMM1, XMM2 + SERIAL_VIA_EXPLICIT_REGS = 1u << 3, + + // The execution can be made serial by inserting a second instruction that + // uses one of the Defs and defs one of the Use. + // e.g. + // 1st instruction: MMX_PMOVMSKBrr ECX, MM7 + // 2nd instruction: MMX_MOVD64rr MM7, ECX + // or instruction: MMX_MOVD64to64rr MM7, ECX + // or instruction: MMX_PINSRWrr MM7, MM7, ECX, 1 + SERIAL_VIA_NON_MEMORY_INSTR = 1u << 4, + + PARALLEL_VIA_LACKING_OPERANDS = 1u << 5, + PARALLEL_VIA_EXPLICIT_REGS = 1u << 6, + LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS) +}; + +// Returns a human readable string for the enum. +llvm::StringRef getName(ExecutionMode Execution); + +// Returns a sequence of increasing powers of two corresponding to all the +// Execution flags. +llvm::ArrayRef getAllExecutionBits(); + +// Decomposes Execution into individual set bits. +llvm::SmallVector getExecutionModeBits(ExecutionMode); + +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + // A CodeTemplate is a set of InstructionTemplates that may not be fully // specified (i.e. some variables are not yet set). This allows the -// BenchmarkRunner to instantiate it many times with specific values to study +// SnippetGenerator to instantiate it many times with specific values to study // their impact on instruction's performance. struct CodeTemplate { CodeTemplate() = default; @@ -57,6 +104,7 @@ CodeTemplate(const CodeTemplate &) = delete; CodeTemplate &operator=(const CodeTemplate &) = delete; + ExecutionMode Execution = ExecutionMode::UNKNOWN; // Some information about how this template has been created. std::string Info; // The list of the instructions for this template. Index: tools/llvm-exegesis/lib/CodeTemplate.cpp =================================================================== --- tools/llvm-exegesis/lib/CodeTemplate.cpp +++ tools/llvm-exegesis/lib/CodeTemplate.cpp @@ -65,4 +65,51 @@ return Result; } +llvm::StringRef getName(ExecutionMode Bit) { + assert(llvm::isPowerOf2_32(static_cast(Bit)) && + "Bit must be a power of two"); + switch (Bit) { + case ExecutionMode::UNKNOWN: + return "UNKNOWN"; + case ExecutionMode::ALWAYS_SERIAL_VIA_IMPLICIT_REGS: + return "ALWAYS_SERIAL_VIA_IMPLICIT_REGS"; + case ExecutionMode::ALWAYS_SERIAL_VIA_TIED_REGS: + return "ALWAYS_SERIAL_VIA_TIED_REGS"; + case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: + return "SERIAL_VIA_MEMORY_INSTR"; + case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: + return "SERIAL_VIA_EXPLICIT_REGS"; + case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: + return "SERIAL_VIA_NON_MEMORY_INSTR"; + case ExecutionMode::PARALLEL_VIA_LACKING_OPERANDS: + return "PARALLEL_VIA_LACKING_OPERANDS"; + case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS: + return "PARALLEL_VIA_EXPLICIT_REGS"; + } + llvm_unreachable("Missing enum case"); +} + +static const ExecutionMode kAllExecutionModeBits[] = { + ExecutionMode::ALWAYS_SERIAL_VIA_IMPLICIT_REGS, + ExecutionMode::ALWAYS_SERIAL_VIA_TIED_REGS, + ExecutionMode::SERIAL_VIA_MEMORY_INSTR, + ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, + ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, + ExecutionMode::PARALLEL_VIA_LACKING_OPERANDS, + ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS, +}; + +llvm::ArrayRef getAllExecutionBits() { + return kAllExecutionModeBits; +} + +llvm::SmallVector +getExecutionModeBits(ExecutionMode Execution) { + llvm::SmallVector Result; + for (const auto Bit : getAllExecutionBits()) + if ((Execution & Bit) == Bit) + Result.push_back(Bit); + return Result; +} + } // namespace exegesis Index: tools/llvm-exegesis/lib/Latency.h =================================================================== --- tools/llvm-exegesis/lib/Latency.h +++ tools/llvm-exegesis/lib/Latency.h @@ -26,12 +26,8 @@ LatencySnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} ~LatencySnippetGenerator() override; - llvm::Expected - generateCodeTemplate(const Instruction &Instr) const override; - -private: - llvm::Expected - generateTwoInstructionPrototype(const Instruction &Instr) const; + llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const override; }; class LatencyBenchmarkRunner : public BenchmarkRunner { Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -20,53 +20,150 @@ namespace exegesis { -LatencySnippetGenerator::~LatencySnippetGenerator() = default; +struct ExecutionClass { + ExecutionMode Mask; + const char *Description; +} static const kExecutionClasses[] = { + {ExecutionMode::ALWAYS_SERIAL_VIA_IMPLICIT_REGS | + ExecutionMode::ALWAYS_SERIAL_VIA_TIED_REGS, + "Repeating a single implicitly serial instruction"}, + {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, + "Repeating a single explicitly serial instruction"}, + {ExecutionMode::SERIAL_VIA_MEMORY_INSTR | + ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, + "Repeating two instructions"}, +}; + +static constexpr size_t kMaxAliasingInstructions = 10; -llvm::Expected -LatencySnippetGenerator::generateTwoInstructionPrototype( - const Instruction &Instr) const { +static std::vector +computeAliasingInstructions(const LLVMState &State, const Instruction &Instr, + size_t MaxAliasingInstructions) { + // Randomly iterate the set of instructions. std::vector Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); std::iota(Opcodes.begin(), Opcodes.end(), 0U); std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator()); + + std::vector AliasingInstructions; for (const unsigned OtherOpcode : Opcodes) { - if (OtherOpcode == Instr.Description->Opcode) + if (OtherOpcode == Instr.Description->getOpcode()) continue; const Instruction OtherInstr(State, OtherOpcode); if (OtherInstr.hasMemoryOperands()) continue; - const AliasingConfigurations Forward(Instr, OtherInstr); - const AliasingConfigurations Back(OtherInstr, Instr); - if (Forward.empty() || Back.empty()) - continue; - InstructionTemplate ThisIT(Instr); - InstructionTemplate OtherIT(OtherInstr); - if (!Forward.hasImplicitAliasing()) - setRandomAliasing(Forward, ThisIT, OtherIT); - if (!Back.hasImplicitAliasing()) - setRandomAliasing(Back, OtherIT, ThisIT); - CodeTemplate CT; - CT.Info = llvm::formatv("creating cycle through {0}.", - State.getInstrInfo().getName(OtherOpcode)); - CT.Instructions.push_back(std::move(ThisIT)); - CT.Instructions.push_back(std::move(OtherIT)); - return std::move(CT); + if (Instr.hasAliasingRegistersThrough(OtherInstr)) + AliasingInstructions.push_back(std::move(OtherInstr)); + if (AliasingInstructions.size() >= MaxAliasingInstructions) + break; } - return llvm::make_error( - "Infeasible : Didn't find any scheme to make the instruction serial"); + return AliasingInstructions; } -llvm::Expected -LatencySnippetGenerator::generateCodeTemplate(const Instruction &Instr) const { +static ExecutionMode getExecutionModes(const Instruction &Instr) { + ExecutionMode EM; + if (Instr.hasAliasingImplicitRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_VIA_IMPLICIT_REGS; + if (Instr.hasTiedRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_VIA_TIED_REGS; if (Instr.hasMemoryOperands()) + EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; + else { + if (Instr.hasAliasingRegisters()) + EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; + if (Instr.hasOneUseOrOneDef()) + EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; + } + return EM; +} + +static void appendCodeTemplates(const LLVMState &State, + const Instruction &Instr, + ExecutionMode ExecutionModeBit, + llvm::StringRef ExecutionClassDescription, + std::vector &CodeTemplates) { + assert(llvm::isPowerOf2_32(static_cast(ExecutionModeBit)) && + "ExecutionModeBit must be a power of two"); + switch (ExecutionModeBit) { + case ExecutionMode::ALWAYS_SERIAL_VIA_IMPLICIT_REGS: + // Nothing to do, the instruction is always serial. + LLVM_FALLTHROUGH; + case ExecutionMode::ALWAYS_SERIAL_VIA_TIED_REGS: { + // Picking whatever value for the tied variable will make the instruction + // serial. + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = ExecutionClassDescription; + CT.Instructions.push_back(Instr); + CodeTemplates.push_back(std::move(CT)); + return; + } + case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: { + // Select back-to-back memory instruction. + // TODO: Implement me. + return; + } + case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: { + // Making the execution of this instruction serial by selecting one def + // register to alias with one use register. + const AliasingConfigurations SelfAliasing(Instr, Instr); + assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() && + "Instr must alias itself explicitly"); + InstructionTemplate IT(Instr); + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice IT in the following call. + setRandomAliasing(SelfAliasing, IT, IT); + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = ExecutionClassDescription; + CT.Instructions.push_back(std::move(IT)); + CodeTemplates.push_back(std::move(CT)); + return; + } + case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: { + // Select back-to-back non-memory instruction. + for (const auto OtherInstr : + computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) { + const AliasingConfigurations Forward(Instr, OtherInstr); + const AliasingConfigurations Back(OtherInstr, Instr); + InstructionTemplate ThisIT(Instr); + InstructionTemplate OtherIT(OtherInstr); + if (!Forward.hasImplicitAliasing()) + setRandomAliasing(Forward, ThisIT, OtherIT); + if (!Back.hasImplicitAliasing()) + setRandomAliasing(Back, OtherIT, ThisIT); + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = ExecutionClassDescription; + CT.Instructions.push_back(std::move(ThisIT)); + CT.Instructions.push_back(std::move(OtherIT)); + CodeTemplates.push_back(std::move(CT)); + } + return; + } + default: + llvm_unreachable("Unhandled enum value"); + } +} + +LatencySnippetGenerator::~LatencySnippetGenerator() = default; + +llvm::Expected> +LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { + std::vector Results; + const ExecutionMode EM = getExecutionModes(Instr); + for (const auto EC : kExecutionClasses) { + for (const auto ExecutionExecutionModeBit : + getExecutionModeBits(EM & EC.Mask)) + appendCodeTemplates(State, Instr, ExecutionExecutionModeBit, + EC.Description, Results); + if (!Results.empty()) + break; + } + if (Results.empty()) return llvm::make_error( - "Infeasible : has memory operands"); - if (auto CT = generateSelfAliasingCodeTemplate(Instr)) - return CT; - else - llvm::consumeError(CT.takeError()); - // No self aliasing, trying to create a dependency through another opcode. - return generateTwoInstructionPrototype(Instr); + "No strategy found to make the execution serial"); + return std::move(Results); } const char *LatencyBenchmarkRunner::getCounterName() const { Index: tools/llvm-exegesis/lib/MCInstrDescView.h =================================================================== --- tools/llvm-exegesis/lib/MCInstrDescView.h +++ tools/llvm-exegesis/lib/MCInstrDescView.h @@ -125,6 +125,11 @@ // reads or write the same memory region. bool hasMemoryOperands() const; + // Returns whether this instruction as at least one use or one def. + // Repeating this instruction may execute sequentially by adding an + // instruction that aliases one of these. + bool hasOneUseOrOneDef() const; + // Convenient function to help with debugging. void dump(const llvm::MCRegisterInfo &RegInfo, llvm::raw_ostream &Stream) const; @@ -174,10 +179,7 @@ bool empty() const; // True if no aliasing configuration is found. bool hasImplicitAliasing() const; - void setExplicitAliasing() const; - const Instruction &DefInstruction; - const Instruction &UseInstruction; llvm::SmallVector Configurations; }; Index: tools/llvm-exegesis/lib/MCInstrDescView.cpp =================================================================== --- tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -27,7 +27,14 @@ return TiedOperands[0]; } -bool Variable::hasTiedOperands() const { return TiedOperands.size() > 1; } +bool Variable::hasTiedOperands() const { + assert(TiedOperands.size() <= 2 && + "No more than two operands can be tied together"); + // By definition only Use and Def operands can be tied together. + // TiedOperands[0] is the Def operand (LLVM stores defs first). + // TiedOperands[1] is the Use operand. + return TiedOperands.size() > 1; +} unsigned Operand::getIndex() const { assert(Index >= 0 && "Index must be set"); @@ -197,6 +204,10 @@ return AllDefRegs.anyCommon(AllUseRegs); } +bool Instruction::hasOneUseOrOneDef() const { + return AllDefRegs.count() || AllUseRegs.count(); +} + void Instruction::dump(const llvm::MCRegisterInfo &RegInfo, llvm::raw_ostream &Stream) const { Stream << "- " << Name << "\n"; @@ -288,8 +299,7 @@ } AliasingConfigurations::AliasingConfigurations( - const Instruction &DefInstruction, const Instruction &UseInstruction) - : DefInstruction(DefInstruction), UseInstruction(UseInstruction) { + const Instruction &DefInstruction, const Instruction &UseInstruction) { if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) { auto CommonRegisters = UseInstruction.AllUseRegs; CommonRegisters &= DefInstruction.AllDefRegs; Index: tools/llvm-exegesis/lib/SnippetGenerator.h =================================================================== --- tools/llvm-exegesis/lib/SnippetGenerator.h +++ tools/llvm-exegesis/lib/SnippetGenerator.h @@ -30,6 +30,17 @@ namespace exegesis { +std::vector getSingleton(CodeTemplate &CT); + +// Generates code templates that has a self-dependency. +llvm::Expected> +generateSelfAliasingCodeTemplates(const Instruction &Instr); + +// Generates code templates without assignment constraints. +llvm::Expected> +generateUnconstrainedCodeTemplates(const Instruction &Instr, + llvm::StringRef Msg); + // A class representing failures that happened during Benchmark, they are used // to report informations to the user. class SnippetGeneratorFailure : public llvm::StringError { @@ -55,18 +66,10 @@ protected: const LLVMState &State; - // Generates a single code template that has a self-dependency. - llvm::Expected - generateSelfAliasingCodeTemplate(const Instruction &Instr) const; - // Generates a single code template without assignment constraints. - llvm::Expected - generateUnconstrainedCodeTemplate(const Instruction &Instr, - llvm::StringRef Msg) const; - private: // API to be implemented by subclasses. - virtual llvm::Expected - generateCodeTemplate(const Instruction &Instr) const = 0; + virtual llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const = 0; }; // A global Random Number Generator to randomize configurations. Index: tools/llvm-exegesis/lib/SnippetGenerator.cpp =================================================================== --- tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -22,6 +22,12 @@ namespace exegesis { +std::vector getSingleton(CodeTemplate &CT) { + std::vector Result; + Result.push_back(std::move(CT)); + return Result; +} + SnippetGeneratorFailure::SnippetGeneratorFailure(const llvm::Twine &S) : llvm::StringError(S, llvm::inconvertibleErrorCode()) {} @@ -31,26 +37,28 @@ llvm::Expected> SnippetGenerator::generateConfigurations(const Instruction &Instr) const { - if (auto E = generateCodeTemplate(Instr)) { - CodeTemplate &CT = E.get(); + if (auto E = generateCodeTemplates(Instr)) { const auto &RATC = State.getRATC(); - const llvm::BitVector &ForbiddenRegs = - CT.ScratchSpacePointerInReg - ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits() - : RATC.emptyRegisters(); std::vector Output; - // TODO: Generate as many BenchmarkCode as needed. - { - BenchmarkCode BC; - BC.Info = CT.Info; - for (InstructionTemplate &IT : CT.Instructions) { - randomizeUnsetVariables(ForbiddenRegs, IT); - BC.Instructions.push_back(IT.build()); + for (CodeTemplate &CT : E.get()) { + const llvm::BitVector &ForbiddenRegs = + CT.ScratchSpacePointerInReg + ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits() + : RATC.emptyRegisters(); + // TODO: Generate as many BenchmarkCode as needed. + { + BenchmarkCode BC; + BC.Info = CT.Info; + for (InstructionTemplate &IT : CT.Instructions) { + randomizeUnsetVariables(ForbiddenRegs, IT); + BC.Instructions.push_back(IT.build()); + } + if (CT.ScratchSpacePointerInReg) + BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); + BC.RegisterInitialValues = + computeRegisterInitialValues(CT.Instructions); + Output.push_back(std::move(BC)); } - if (CT.ScratchSpacePointerInReg) - BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); - BC.RegisterInitialValues = computeRegisterInitialValues(CT.Instructions); - Output.push_back(std::move(BC)); } return Output; } else @@ -99,13 +107,14 @@ return RIV; } -llvm::Expected SnippetGenerator::generateSelfAliasingCodeTemplate( - const Instruction &Instr) const { +llvm::Expected> +generateSelfAliasingCodeTemplates(const Instruction &Instr) { const AliasingConfigurations SelfAliasing(Instr, Instr); - if (SelfAliasing.empty()) { + if (SelfAliasing.empty()) return llvm::make_error("empty self aliasing"); - } - CodeTemplate CT; + std::vector Result; + Result.emplace_back(); + CodeTemplate &CT = Result.back(); InstructionTemplate IT(Instr); if (SelfAliasing.hasImplicitAliasing()) { CT.Info = "implicit Self cycles, picking random values."; @@ -116,16 +125,18 @@ setRandomAliasing(SelfAliasing, IT, IT); } CT.Instructions.push_back(std::move(IT)); - return std::move(CT); + return std::move(Result); } -llvm::Expected -SnippetGenerator::generateUnconstrainedCodeTemplate(const Instruction &Instr, - llvm::StringRef Msg) const { - CodeTemplate CT; +llvm::Expected> +generateUnconstrainedCodeTemplates(const Instruction &Instr, + llvm::StringRef Msg) { + std::vector Result; + Result.emplace_back(); + CodeTemplate &CT = Result.back(); CT.Info = llvm::formatv("{0}, repeating an unconstrained assignment", Msg); CT.Instructions.emplace_back(Instr); - return std::move(CT); + return std::move(Result); } std::mt19937 &randomGenerator() { Index: tools/llvm-exegesis/lib/Uops.h =================================================================== --- tools/llvm-exegesis/lib/Uops.h +++ tools/llvm-exegesis/lib/Uops.h @@ -25,8 +25,8 @@ UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} ~UopsSnippetGenerator() override; - llvm::Expected - generateCodeTemplate(const Instruction &Instr) const override; + llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const override; static constexpr const size_t kMinNumDifferentAddresses = 6; Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -124,8 +124,8 @@ "not enough scratch space"); } -llvm::Expected -UopsSnippetGenerator::generateCodeTemplate(const Instruction &Instr) const { +llvm::Expected> +UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { CodeTemplate CT; const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr; if (Instr.hasMemoryOperands()) { @@ -153,13 +153,13 @@ CT.Info = "instruction is parallel, repeating a random one."; CT.Instructions.push_back(std::move(IT)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return std::move(CT); + return getSingleton(CT); } if (SelfAliasing.hasImplicitAliasing()) { CT.Info = "instruction is serial, repeating a random one."; CT.Instructions.push_back(std::move(IT)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return std::move(CT); + return getSingleton(CT); } const auto TiedVariables = getVariablesWithTiedOperands(Instr); if (!TiedVariables.empty()) { @@ -181,7 +181,7 @@ CT.Instructions.push_back(std::move(TmpIT)); } instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return std::move(CT); + return getSingleton(CT); } const auto &ReservedRegisters = State.getRATC().reservedRegisters(); // No tied variables, we pick random values for defs. @@ -218,7 +218,7 @@ "instruction has no tied variables picking Uses different from defs"; CT.Instructions.push_back(std::move(IT)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return std::move(CT); + return getSingleton(CT); } std::vector Index: tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/X86/Target.cpp +++ tools/llvm-exegesis/lib/X86/Target.cpp @@ -38,14 +38,14 @@ public: using LatencySnippetGenerator::LatencySnippetGenerator; - llvm::Expected - generateCodeTemplate(const Instruction &Instr) const override { + llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const override { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); switch (GetX86FPFlags(Instr)) { case llvm::X86II::NotFP: - return LatencySnippetGenerator::generateCodeTemplate(Instr); + return LatencySnippetGenerator::generateCodeTemplates(Instr); case llvm::X86II::ZeroArgFP: case llvm::X86II::OneArgFP: case llvm::X86II::SpecialFP: @@ -58,7 +58,7 @@ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) // They are intrinsically serial and do not modify the state of the stack. - return generateSelfAliasingCodeTemplate(Instr); + return generateSelfAliasingCodeTemplates(Instr); default: llvm_unreachable("Unknown FP Type!"); } @@ -69,14 +69,14 @@ public: using UopsSnippetGenerator::UopsSnippetGenerator; - llvm::Expected - generateCodeTemplate(const Instruction &Instr) const override { + llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const override { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); switch (GetX86FPFlags(Instr)) { case llvm::X86II::NotFP: - return UopsSnippetGenerator::generateCodeTemplate(Instr); + return UopsSnippetGenerator::generateCodeTemplates(Instr); case llvm::X86II::ZeroArgFP: case llvm::X86II::OneArgFP: case llvm::X86II::SpecialFP: @@ -88,12 +88,12 @@ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) // They are intrinsically serial and do not modify the state of the stack. // We generate the same code for latency and uops. - return generateSelfAliasingCodeTemplate(Instr); + return generateSelfAliasingCodeTemplates(Instr); case llvm::X86II::CompareFP: case llvm::X86II::CondMovFP: // We can compute uops for any FP instruction that does not grow or shrink // the stack (either do not touch the stack or push as much as they pop). - return generateUnconstrainedCodeTemplate( + return generateUnconstrainedCodeTemplates( Instr, "instruction does not grow/shrink the FP stack"); default: llvm_unreachable("Unknown FP Type!"); Index: unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -25,6 +25,7 @@ using testing::AnyOf; using testing::ElementsAre; +using testing::Gt; using testing::HasSubstr; using testing::Not; using testing::SizeIs; @@ -57,10 +58,10 @@ protected: SnippetGeneratorTest() : Generator(State) {} - CodeTemplate checkAndGetCodeTemplate(unsigned Opcode) { + std::vector checkAndGetCodeTemplates(unsigned Opcode) { randomGenerator().seed(0); // Initialize seed. const Instruction Instr(State, Opcode); - auto CodeTemplateOrError = Generator.generateCodeTemplate(Instr); + auto CodeTemplateOrError = Generator.generateCodeTemplates(Instr); EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. return std::move(CodeTemplateOrError.get()); } @@ -73,21 +74,25 @@ using UopsSnippetGeneratorTest = SnippetGeneratorTest; -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) { - // ADC16i16 self alias because of implicit use and def. - - // explicit use 0 : imm - // implicit def : AX - // implicit def : EFLAGS - // implicit use : AX - // implicit use : EFLAGS +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { + // - ADC16i16 + // - Op0 Explicit Use Immediate + // - Op1 Implicit Def Reg(AX) + // - Op2 Implicit Def Reg(EFLAGS) + // - Op3 Implicit Use Reg(AX) + // - Op4 Implicit Use Reg(EFLAGS) + // - Var0 [Op0] + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::ADC16i16; EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::AX); EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[1], llvm::X86::EFLAGS); EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[0], llvm::X86::AX); EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[1], llvm::X86::EFLAGS); - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("implicit")); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; + EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_VIA_IMPLICIT_REGS); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -95,63 +100,105 @@ EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Immediate is not set"; } -TEST_F(LatencySnippetGeneratorTest, ExplicitSelfDependency) { - // ADD16ri self alias because Op0 and Op1 are tied together. - - // explicit def 0 : reg RegClass=GR16 - // explicit use 1 : reg RegClass=GR16 | TIED_TO:0 - // explicit use 2 : imm - // implicit def : EFLAGS +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { + // - ADD16ri + // - Op0 Explicit Def RegClass(GR16) + // - Op1 Explicit Use RegClass(GR16) TiedToOp0 + // - Op2 Explicit Use Immediate + // - Op3 Implicit Def Reg(EFLAGS) + // - Var0 [Op0,Op1] + // - Var1 [Op2] + // - hasTiedRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::ADD16ri; EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::EFLAGS); - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("explicit")); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; + EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_VIA_TIED_REGS); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); ASSERT_THAT(IT.VariableValues, SizeIs(2)); - EXPECT_THAT(IT.VariableValues[0], IsReg()) << "Operand 0 and 1"; + EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Operand 1 is not set"; EXPECT_THAT(IT.VariableValues[1], IsInvalid()) << "Operand 2 is not set"; } -TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { - // CMP64rr - // explicit use 0 : reg RegClass=GR64 - // explicit use 1 : reg RegClass=GR64 - // implicit def : EFLAGS - - const unsigned Opcode = llvm::X86::CMP64rr; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("cycle through")); - ASSERT_THAT(CT.Instructions, SizeIs(2)); +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { + // - VXORPSrr + // - Op0 Explicit Def RegClass(VR128) + // - Op1 Explicit Use RegClass(VR128) + // - Op2 Explicit Use RegClass(VR128) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - hasAliasingRegisters + const unsigned Opcode = llvm::X86::VXORPSrr; + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_EXPLICIT_REGS); + ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); - ASSERT_THAT(IT.VariableValues, SizeIs(2)); - EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()), - ElementsAre(IsInvalid(), IsReg()))); - EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode)); - // TODO: check that the two instructions alias each other. + ASSERT_THAT(IT.VariableValues, SizeIs(3)); + EXPECT_THAT(IT.VariableValues, + AnyOf(ElementsAre(IsReg(), IsInvalid(), IsReg()), + ElementsAre(IsReg(), IsReg(), IsInvalid()))) + << "Op0 is either set to Op1 or to Op2"; +} + +TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { + // - CMP64rr + // - Op0 Explicit Use RegClass(GR64) + // - Op1 Explicit Use RegClass(GR64) + // - Op2 Implicit Def Reg(EFLAGS) + // - Var0 [Op0] + // - Var1 [Op1] + const unsigned Opcode = llvm::X86::CMP64rr; + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available"; + for (const auto &CT : CodeTemplates) { + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR); + ASSERT_THAT(CT.Instructions, SizeIs(2)); + const InstructionTemplate &IT = CT.Instructions[0]; + EXPECT_THAT(IT.getOpcode(), Opcode); + ASSERT_THAT(IT.VariableValues, SizeIs(2)); + EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()), + ElementsAre(IsInvalid(), IsReg()))); + EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode)); + // TODO: check that the two instructions alias each other. + } } TEST_F(LatencySnippetGeneratorTest, LAHF) { + // - LAHF + // - Op0 Implicit Def Reg(AH) + // - Op1 Implicit Use Reg(EFLAGS) const unsigned Opcode = llvm::X86::LAHF; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("cycle through")); - ASSERT_THAT(CT.Instructions, SizeIs(2)); - const InstructionTemplate &IT = CT.Instructions[0]; - EXPECT_THAT(IT.getOpcode(), Opcode); - ASSERT_THAT(IT.VariableValues, SizeIs(0)); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available"; + for (const auto &CT : CodeTemplates) { + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR); + ASSERT_THAT(CT.Instructions, SizeIs(2)); + const InstructionTemplate &IT = CT.Instructions[0]; + EXPECT_THAT(IT.getOpcode(), Opcode); + ASSERT_THAT(IT.VariableValues, SizeIs(0)); + } } TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { - // BNDCL32rr is parallel no matter what. - - // explicit use 0 : reg RegClass=BNDR - // explicit use 1 : reg RegClass=GR32 - + // - BNDCL32rr + // - Op0 Explicit Use RegClass(BNDR) + // - Op1 Explicit Use RegClass(GR32) + // - Var0 [Op0] + // - Var1 [Op1] const unsigned Opcode = llvm::X86::BNDCL32rr; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("parallel")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -161,14 +208,18 @@ } TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { - // CDQ is serial no matter what. - - // implicit def : EAX - // implicit def : EDX - // implicit use : EAX + // - CDQ + // - Op0 Implicit Def Reg(EAX) + // - Op1 Implicit Def Reg(EDX) + // - Op2 Implicit Use Reg(EAX) + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::CDQ; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("serial")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -179,13 +230,21 @@ // CMOVA32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. - // explicit def 0 : reg RegClass=GR32 - // explicit use 1 : reg RegClass=GR32 | TIED_TO:0 - // explicit use 2 : reg RegClass=GR32 - // implicit use : EFLAGS + // - CMOVA32rr + // - Op0 Explicit Def RegClass(GR32) + // - Op1 Explicit Use RegClass(GR32) TiedToOp0 + // - Op2 Explicit Use RegClass(GR32) + // - Op3 Implicit Use Reg(EFLAGS) + // - Var0 [Op0,Op1] + // - Var1 [Op2] + // - hasTiedRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::CMOVA32rr; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("static renaming")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); constexpr const unsigned kInstructionCount = 15; ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); std::unordered_set AllDefRegisters; @@ -201,14 +260,23 @@ // CMOV_GR32 has no tied variables, we make sure def and use are different // from each other. - // explicit def 0 : reg RegClass=GR32 - // explicit use 1 : reg RegClass=GR32 - // explicit use 2 : reg RegClass=GR32 - // explicit use 3 : imm - // implicit use : EFLAGS + // - CMOV_GR32 + // - Op0 Explicit Def RegClass(GR32) + // - Op1 Explicit Use RegClass(GR32) + // - Op2 Explicit Use RegClass(GR32) + // - Op3 Explicit Use Immediate + // - Op4 Implicit Use Reg(EFLAGS) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - Var3 [Op3] + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::CMOV_GR32; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -222,9 +290,27 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) { // Mov32rm reads from memory. + // - MOV32rm + // - Op0 Explicit Def RegClass(GR32) + // - Op1 Explicit Use Memory RegClass(GR8) + // - Op2 Explicit Use Memory + // - Op3 Explicit Use Memory RegClass(GRH8) + // - Op4 Explicit Use Memory + // - Op5 Explicit Use Memory RegClass(SEGMENT_REG) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - Var3 [Op3] + // - Var4 [Op4] + // - Var5 [Op5] + // - hasMemoryOperands + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::MOV32rm; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0]; @@ -238,9 +324,24 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) { // MOVSB writes to scratch memory register. + // - MOVSB + // - Op0 Explicit Use Memory RegClass(GR8) + // - Op1 Explicit Use Memory RegClass(GR8) + // - Op2 Explicit Use Memory RegClass(SEGMENT_REG) + // - Op3 Implicit Def Reg(EDI) + // - Op4 Implicit Def Reg(ESI) + // - Op5 Implicit Use Reg(EDI) + // - Op6 Implicit Use Reg(ESI) + // - Op7 Implicit Use Reg(DF) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - hasMemoryOperands + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::MOVSB; const Instruction Instr(State, Opcode); - auto Error = Generator.generateCodeTemplate(Instr).takeError(); + auto Error = Generator.generateCodeTemplates(Instr).takeError(); EXPECT_TRUE((bool)Error); llvm::consumeError(std::move(Error)); } @@ -254,8 +355,8 @@ } private: - llvm::Expected - generateCodeTemplate(const Instruction &Instr) const override { + llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const override { return llvm::make_error("not implemented", llvm::inconvertibleErrorCode()); }