Index: llvm/trunk/tools/llvm-exegesis/lib/CodeTemplate.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/CodeTemplate.h +++ llvm/trunk/tools/llvm-exegesis/lib/CodeTemplate.h @@ -17,6 +17,7 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H #include "MCInstrDescView.h" +#include "llvm/ADT/BitmaskEnum.h" namespace exegesis { @@ -45,9 +46,65 @@ llvm::SmallVector VariableValues; }; +enum class ExecutionMode : uint8_t { + UNKNOWN = 0U, + // The instruction is always serial because implicit Use and Def alias. + // e.g. AAA (alias via EFLAGS) + ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS = 1u << 0, + + // The instruction is always serial because one Def is tied to a Use. + // e.g. AND32ri (alias via tied GR32) + ALWAYS_SERIAL_TIED_REGS_ALIAS = 1u << 1, + + // The execution can be made serial by inserting a second instruction that + // clobbers/reads memory. + // e.g. MOV8rm + SERIAL_VIA_MEMORY_INSTR = 1u << 2, + + // The execution can be made serial by picking one Def that aliases with one + // Use. + // e.g. VXORPSrr XMM1, XMM1, XMM2 + SERIAL_VIA_EXPLICIT_REGS = 1u << 3, + + // The execution can be made serial by inserting a second instruction that + // uses one of the Defs and defs one of the Uses. + // e.g. + // 1st instruction: MMX_PMOVMSKBrr ECX, MM7 + // 2nd instruction: MMX_MOVD64rr MM7, ECX + // or instruction: MMX_MOVD64to64rr MM7, ECX + // or instruction: MMX_PINSRWrr MM7, MM7, ECX, 1 + SERIAL_VIA_NON_MEMORY_INSTR = 1u << 4, + + // The execution is always parallel because the instruction is missing Use or + // Def operands. + ALWAYS_PARALLEL_MISSING_USE_OR_DEF = 1u << 5, + + // The execution can be made parallel by repeating the same instruction but + // making sure that Defs of one instruction do not alias with Uses of the + // second one. + PARALLEL_VIA_EXPLICIT_REGS = 1u << 6, + + LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS) +}; + +// Returns whether Execution is one of the values defined in the enum above. +bool isEnumValue(ExecutionMode Execution); + +// Returns a human readable string for the enum. +llvm::StringRef getName(ExecutionMode Execution); + +// Returns a sequence of increasing powers of two corresponding to all the +// Execution flags. +llvm::ArrayRef getAllExecutionBits(); + +// Decomposes Execution into individual set bits. +llvm::SmallVector getExecutionModeBits(ExecutionMode); + +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + // A CodeTemplate is a set of InstructionTemplates that may not be fully // specified (i.e. some variables are not yet set). This allows the -// BenchmarkRunner to instantiate it many times with specific values to study +// SnippetGenerator to instantiate it many times with specific values to study // their impact on instruction's performance. struct CodeTemplate { CodeTemplate() = default; @@ -57,6 +114,7 @@ CodeTemplate(const CodeTemplate &) = delete; CodeTemplate &operator=(const CodeTemplate &) = delete; + ExecutionMode Execution = ExecutionMode::UNKNOWN; // Some information about how this template has been created. std::string Info; // The list of the instructions for this template. Index: llvm/trunk/tools/llvm-exegesis/lib/CodeTemplate.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/CodeTemplate.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/CodeTemplate.cpp @@ -65,4 +65,54 @@ return Result; } +bool isEnumValue(ExecutionMode Execution) { + return llvm::isPowerOf2_32(static_cast(Execution)); +} + +llvm::StringRef getName(ExecutionMode Bit) { + assert(isEnumValue(Bit) && "Bit must be a power of two"); + switch (Bit) { + case ExecutionMode::UNKNOWN: + return "UNKNOWN"; + case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: + return "ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS"; + case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: + return "ALWAYS_SERIAL_TIED_REGS_ALIAS"; + case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: + return "SERIAL_VIA_MEMORY_INSTR"; + case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: + return "SERIAL_VIA_EXPLICIT_REGS"; + case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: + return "SERIAL_VIA_NON_MEMORY_INSTR"; + case ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF: + return "ALWAYS_PARALLEL_MISSING_USE_OR_DEF"; + case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS: + return "PARALLEL_VIA_EXPLICIT_REGS"; + } + llvm_unreachable("Missing enum case"); +} + +static const ExecutionMode kAllExecutionModeBits[] = { + ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS, + ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, + ExecutionMode::SERIAL_VIA_MEMORY_INSTR, + ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, + ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, + ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF, + ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS, +}; + +llvm::ArrayRef getAllExecutionBits() { + return kAllExecutionModeBits; +} + +llvm::SmallVector +getExecutionModeBits(ExecutionMode Execution) { + llvm::SmallVector Result; + for (const auto Bit : getAllExecutionBits()) + if ((Execution & Bit) == Bit) + Result.push_back(Bit); + return Result; +} + } // namespace exegesis Index: llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/Latency.cpp @@ -20,53 +20,148 @@ namespace exegesis { -LatencySnippetGenerator::~LatencySnippetGenerator() = default; - -llvm::Expected> -generateTwoInstructionPrototypes(const LLVMState &State, - const Instruction &Instr) { +struct ExecutionClass { + ExecutionMode Mask; + const char *Description; +} static const kExecutionClasses[] = { + {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS | + ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, + "Repeating a single implicitly serial instruction"}, + {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, + "Repeating a single explicitly serial instruction"}, + {ExecutionMode::SERIAL_VIA_MEMORY_INSTR | + ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, + "Repeating two instructions"}, +}; + +static constexpr size_t kMaxAliasingInstructions = 10; + +static std::vector +computeAliasingInstructions(const LLVMState &State, const Instruction &Instr, + size_t MaxAliasingInstructions) { + // Randomly iterate the set of instructions. std::vector Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); std::iota(Opcodes.begin(), Opcodes.end(), 0U); std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator()); + + std::vector AliasingInstructions; for (const unsigned OtherOpcode : Opcodes) { - if (OtherOpcode == Instr.Description->Opcode) + if (OtherOpcode == Instr.Description->getOpcode()) continue; const Instruction OtherInstr(State, OtherOpcode); if (OtherInstr.hasMemoryOperands()) continue; - const AliasingConfigurations Forward(Instr, OtherInstr); - const AliasingConfigurations Back(OtherInstr, Instr); - if (Forward.empty() || Back.empty()) - continue; - InstructionTemplate ThisIT(Instr); - InstructionTemplate OtherIT(OtherInstr); - if (!Forward.hasImplicitAliasing()) - setRandomAliasing(Forward, ThisIT, OtherIT); - if (!Back.hasImplicitAliasing()) - setRandomAliasing(Back, OtherIT, ThisIT); + if (Instr.hasAliasingRegistersThrough(OtherInstr)) + AliasingInstructions.push_back(std::move(OtherInstr)); + if (AliasingInstructions.size() >= MaxAliasingInstructions) + break; + } + return AliasingInstructions; +} + +static ExecutionMode getExecutionModes(const Instruction &Instr) { + ExecutionMode EM; + if (Instr.hasAliasingImplicitRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS; + if (Instr.hasTiedRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS; + if (Instr.hasMemoryOperands()) + EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; + else { + if (Instr.hasAliasingRegisters()) + EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; + if (Instr.hasOneUseOrOneDef()) + EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; + } + return EM; +} + +static void appendCodeTemplates(const LLVMState &State, + const Instruction &Instr, + ExecutionMode ExecutionModeBit, + llvm::StringRef ExecutionClassDescription, + std::vector &CodeTemplates) { + assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two"); + switch (ExecutionModeBit) { + case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: + // Nothing to do, the instruction is always serial. + LLVM_FALLTHROUGH; + case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: { + // Picking whatever value for the tied variable will make the instruction + // serial. CodeTemplate CT; - CT.Info = llvm::formatv("creating cycle through {0}.", - State.getInstrInfo().getName(OtherOpcode)); - CT.Instructions.push_back(std::move(ThisIT)); - CT.Instructions.push_back(std::move(OtherIT)); - return getSingleton(CT); + CT.Execution = ExecutionModeBit; + CT.Info = ExecutionClassDescription; + CT.Instructions.push_back(Instr); + CodeTemplates.push_back(std::move(CT)); + return; + } + case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: { + // Select back-to-back memory instruction. + // TODO: Implement me. + return; + } + case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: { + // Making the execution of this instruction serial by selecting one def + // register to alias with one use register. + const AliasingConfigurations SelfAliasing(Instr, Instr); + assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() && + "Instr must alias itself explicitly"); + InstructionTemplate IT(Instr); + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice IT in the following call. + setRandomAliasing(SelfAliasing, IT, IT); + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = ExecutionClassDescription; + CT.Instructions.push_back(std::move(IT)); + CodeTemplates.push_back(std::move(CT)); + return; + } + case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: { + // Select back-to-back non-memory instruction. + for (const auto OtherInstr : + computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) { + const AliasingConfigurations Forward(Instr, OtherInstr); + const AliasingConfigurations Back(OtherInstr, Instr); + InstructionTemplate ThisIT(Instr); + InstructionTemplate OtherIT(OtherInstr); + if (!Forward.hasImplicitAliasing()) + setRandomAliasing(Forward, ThisIT, OtherIT); + if (!Back.hasImplicitAliasing()) + setRandomAliasing(Back, OtherIT, ThisIT); + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + CT.Info = ExecutionClassDescription; + CT.Instructions.push_back(std::move(ThisIT)); + CT.Instructions.push_back(std::move(OtherIT)); + CodeTemplates.push_back(std::move(CT)); + } + return; + } + default: + llvm_unreachable("Unhandled enum value"); } - return llvm::make_error( - "Infeasible : Didn't find any scheme to make the instruction serial"); } +LatencySnippetGenerator::~LatencySnippetGenerator() = default; + llvm::Expected> LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { - if (Instr.hasMemoryOperands()) + std::vector Results; + const ExecutionMode EM = getExecutionModes(Instr); + for (const auto EC : kExecutionClasses) { + for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask)) + appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description, + Results); + if (!Results.empty()) + break; + } + if (Results.empty()) return llvm::make_error( - "Infeasible : has memory operands"); - return llvm::handleExpected( // - generateSelfAliasingCodeTemplates(Instr), - [this, &Instr]() { - return generateTwoInstructionPrototypes(State, Instr); - }, - [](const BenchmarkFailure &) { /*Consume Error*/ }); + "No strategy found to make the execution serial"); + return std::move(Results); } const char *LatencyBenchmarkRunner::getCounterName() const { Index: llvm/trunk/tools/llvm-exegesis/lib/MCInstrDescView.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/MCInstrDescView.h +++ llvm/trunk/tools/llvm-exegesis/lib/MCInstrDescView.h @@ -125,6 +125,11 @@ // reads or write the same memory region. bool hasMemoryOperands() const; + // Returns whether this instruction as at least one use or one def. + // Repeating this instruction may execute sequentially by adding an + // instruction that aliases one of these. + bool hasOneUseOrOneDef() const; + // Convenient function to help with debugging. void dump(const llvm::MCRegisterInfo &RegInfo, llvm::raw_ostream &Stream) const; @@ -174,10 +179,7 @@ bool empty() const; // True if no aliasing configuration is found. bool hasImplicitAliasing() const; - void setExplicitAliasing() const; - const Instruction &DefInstruction; - const Instruction &UseInstruction; llvm::SmallVector Configurations; }; Index: llvm/trunk/tools/llvm-exegesis/lib/MCInstrDescView.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -27,7 +27,14 @@ return TiedOperands[0]; } -bool Variable::hasTiedOperands() const { return TiedOperands.size() > 1; } +bool Variable::hasTiedOperands() const { + assert(TiedOperands.size() <= 2 && + "No more than two operands can be tied together"); + // By definition only Use and Def operands can be tied together. + // TiedOperands[0] is the Def operand (LLVM stores defs first). + // TiedOperands[1] is the Use operand. + return TiedOperands.size() > 1; +} unsigned Operand::getIndex() const { assert(Index >= 0 && "Index must be set"); @@ -197,6 +204,10 @@ return AllDefRegs.anyCommon(AllUseRegs); } +bool Instruction::hasOneUseOrOneDef() const { + return AllDefRegs.count() || AllUseRegs.count(); +} + void Instruction::dump(const llvm::MCRegisterInfo &RegInfo, llvm::raw_ostream &Stream) const { Stream << "- " << Name << "\n"; @@ -288,8 +299,7 @@ } AliasingConfigurations::AliasingConfigurations( - const Instruction &DefInstruction, const Instruction &UseInstruction) - : DefInstruction(DefInstruction), UseInstruction(UseInstruction) { + const Instruction &DefInstruction, const Instruction &UseInstruction) { if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) { auto CommonRegisters = UseInstruction.AllUseRegs; CommonRegisters &= DefInstruction.AllDefRegs; Index: llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.h +++ llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -30,7 +30,7 @@ namespace exegesis { -std::vector getSingleton(CodeTemplate &CT); +std::vector getSingleton(CodeTemplate &&CT); // Generates code templates that has a self-dependency. llvm::Expected> Index: llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -22,7 +22,7 @@ namespace exegesis { -std::vector getSingleton(CodeTemplate &CT) { +std::vector getSingleton(CodeTemplate &&CT) { std::vector Result; Result.push_back(std::move(CT)); return Result; Index: llvm/trunk/tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Uops.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/Uops.cpp @@ -153,13 +153,13 @@ CT.Info = "instruction is parallel, repeating a random one."; CT.Instructions.push_back(std::move(IT)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(CT); + return getSingleton(std::move(CT)); } if (SelfAliasing.hasImplicitAliasing()) { CT.Info = "instruction is serial, repeating a random one."; CT.Instructions.push_back(std::move(IT)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(CT); + return getSingleton(std::move(CT)); } const auto TiedVariables = getVariablesWithTiedOperands(Instr); if (!TiedVariables.empty()) { @@ -181,7 +181,7 @@ CT.Instructions.push_back(std::move(TmpIT)); } instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(CT); + return getSingleton(std::move(CT)); } const auto &ReservedRegisters = State.getRATC().reservedRegisters(); // No tied variables, we pick random values for defs. @@ -218,7 +218,7 @@ "instruction has no tied variables picking Uses different from defs"; CT.Instructions.push_back(std::move(IT)); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(CT); + return getSingleton(std::move(CT)); } std::vector Index: llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -25,6 +25,7 @@ using testing::AnyOf; using testing::ElementsAre; +using testing::Gt; using testing::HasSubstr; using testing::Not; using testing::SizeIs; @@ -57,14 +58,12 @@ protected: SnippetGeneratorTest() : Generator(State) {} - CodeTemplate checkAndGetCodeTemplate(unsigned Opcode) { + std::vector checkAndGetCodeTemplates(unsigned Opcode) { randomGenerator().seed(0); // Initialize seed. const Instruction Instr(State, Opcode); auto CodeTemplateOrError = Generator.generateCodeTemplates(Instr); EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. - auto &CodeTemplate = CodeTemplateOrError.get(); - EXPECT_EQ(CodeTemplate.size(), 1U); - return std::move(CodeTemplate.front()); + return std::move(CodeTemplateOrError.get()); } SnippetGeneratorT Generator; @@ -75,21 +74,25 @@ using UopsSnippetGeneratorTest = SnippetGeneratorTest; -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) { - // ADC16i16 self alias because of implicit use and def. - - // explicit use 0 : imm - // implicit def : AX - // implicit def : EFLAGS - // implicit use : AX - // implicit use : EFLAGS +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { + // - ADC16i16 + // - Op0 Explicit Use Immediate + // - Op1 Implicit Def Reg(AX) + // - Op2 Implicit Def Reg(EFLAGS) + // - Op3 Implicit Use Reg(AX) + // - Op4 Implicit Use Reg(EFLAGS) + // - Var0 [Op0] + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::ADC16i16; EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::AX); EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[1], llvm::X86::EFLAGS); EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[0], llvm::X86::AX); EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[1], llvm::X86::EFLAGS); - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("implicit")); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; + EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -97,63 +100,105 @@ EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Immediate is not set"; } -TEST_F(LatencySnippetGeneratorTest, ExplicitSelfDependency) { - // ADD16ri self alias because Op0 and Op1 are tied together. - - // explicit def 0 : reg RegClass=GR16 - // explicit use 1 : reg RegClass=GR16 | TIED_TO:0 - // explicit use 2 : imm - // implicit def : EFLAGS +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { + // - ADD16ri + // - Op0 Explicit Def RegClass(GR16) + // - Op1 Explicit Use RegClass(GR16) TiedToOp0 + // - Op2 Explicit Use Immediate + // - Op3 Implicit Def Reg(EFLAGS) + // - Var0 [Op0,Op1] + // - Var1 [Op2] + // - hasTiedRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::ADD16ri; EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::EFLAGS); - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("explicit")); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; + EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); ASSERT_THAT(IT.VariableValues, SizeIs(2)); - EXPECT_THAT(IT.VariableValues[0], IsReg()) << "Operand 0 and 1"; + EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Operand 1 is not set"; EXPECT_THAT(IT.VariableValues[1], IsInvalid()) << "Operand 2 is not set"; } -TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { - // CMP64rr - // explicit use 0 : reg RegClass=GR64 - // explicit use 1 : reg RegClass=GR64 - // implicit def : EFLAGS - - const unsigned Opcode = llvm::X86::CMP64rr; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("cycle through")); - ASSERT_THAT(CT.Instructions, SizeIs(2)); +TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { + // - VXORPSrr + // - Op0 Explicit Def RegClass(VR128) + // - Op1 Explicit Use RegClass(VR128) + // - Op2 Explicit Use RegClass(VR128) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - hasAliasingRegisters + const unsigned Opcode = llvm::X86::VXORPSrr; + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_EXPLICIT_REGS); + ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); - ASSERT_THAT(IT.VariableValues, SizeIs(2)); - EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()), - ElementsAre(IsInvalid(), IsReg()))); - EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode)); - // TODO: check that the two instructions alias each other. + ASSERT_THAT(IT.VariableValues, SizeIs(3)); + EXPECT_THAT(IT.VariableValues, + AnyOf(ElementsAre(IsReg(), IsInvalid(), IsReg()), + ElementsAre(IsReg(), IsReg(), IsInvalid()))) + << "Op0 is either set to Op1 or to Op2"; +} + +TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { + // - CMP64rr + // - Op0 Explicit Use RegClass(GR64) + // - Op1 Explicit Use RegClass(GR64) + // - Op2 Implicit Def Reg(EFLAGS) + // - Var0 [Op0] + // - Var1 [Op1] + const unsigned Opcode = llvm::X86::CMP64rr; + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available"; + for (const auto &CT : CodeTemplates) { + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR); + ASSERT_THAT(CT.Instructions, SizeIs(2)); + const InstructionTemplate &IT = CT.Instructions[0]; + EXPECT_THAT(IT.getOpcode(), Opcode); + ASSERT_THAT(IT.VariableValues, SizeIs(2)); + EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()), + ElementsAre(IsInvalid(), IsReg()))); + EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode)); + // TODO: check that the two instructions alias each other. + } } TEST_F(LatencySnippetGeneratorTest, LAHF) { + // - LAHF + // - Op0 Implicit Def Reg(AH) + // - Op1 Implicit Use Reg(EFLAGS) const unsigned Opcode = llvm::X86::LAHF; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); - EXPECT_THAT(CT.Info, HasSubstr("cycle through")); - ASSERT_THAT(CT.Instructions, SizeIs(2)); - const InstructionTemplate &IT = CT.Instructions[0]; - EXPECT_THAT(IT.getOpcode(), Opcode); - ASSERT_THAT(IT.VariableValues, SizeIs(0)); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available"; + for (const auto &CT : CodeTemplates) { + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR); + ASSERT_THAT(CT.Instructions, SizeIs(2)); + const InstructionTemplate &IT = CT.Instructions[0]; + EXPECT_THAT(IT.getOpcode(), Opcode); + ASSERT_THAT(IT.VariableValues, SizeIs(0)); + } } TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { - // BNDCL32rr is parallel no matter what. - - // explicit use 0 : reg RegClass=BNDR - // explicit use 1 : reg RegClass=GR32 - + // - BNDCL32rr + // - Op0 Explicit Use RegClass(BNDR) + // - Op1 Explicit Use RegClass(GR32) + // - Var0 [Op0] + // - Var1 [Op1] const unsigned Opcode = llvm::X86::BNDCL32rr; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("parallel")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -163,14 +208,18 @@ } TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { - // CDQ is serial no matter what. - - // implicit def : EAX - // implicit def : EDX - // implicit use : EAX + // - CDQ + // - Op0 Implicit Def Reg(EAX) + // - Op1 Implicit Def Reg(EDX) + // - Op2 Implicit Use Reg(EAX) + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::CDQ; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("serial")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -181,13 +230,21 @@ // CMOVA32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. - // explicit def 0 : reg RegClass=GR32 - // explicit use 1 : reg RegClass=GR32 | TIED_TO:0 - // explicit use 2 : reg RegClass=GR32 - // implicit use : EFLAGS + // - CMOVA32rr + // - Op0 Explicit Def RegClass(GR32) + // - Op1 Explicit Use RegClass(GR32) TiedToOp0 + // - Op2 Explicit Use RegClass(GR32) + // - Op3 Implicit Use Reg(EFLAGS) + // - Var0 [Op0,Op1] + // - Var1 [Op2] + // - hasTiedRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::CMOVA32rr; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("static renaming")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); constexpr const unsigned kInstructionCount = 15; ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); std::unordered_set AllDefRegisters; @@ -203,14 +260,23 @@ // CMOV_GR32 has no tied variables, we make sure def and use are different // from each other. - // explicit def 0 : reg RegClass=GR32 - // explicit use 1 : reg RegClass=GR32 - // explicit use 2 : reg RegClass=GR32 - // explicit use 3 : imm - // implicit use : EFLAGS + // - CMOV_GR32 + // - Op0 Explicit Def RegClass(GR32) + // - Op1 Explicit Use RegClass(GR32) + // - Op2 Explicit Use RegClass(GR32) + // - Op3 Explicit Use Immediate + // - Op4 Implicit Use Reg(EFLAGS) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - Var3 [Op3] + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::CMOV_GR32; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -224,9 +290,27 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) { // Mov32rm reads from memory. + // - MOV32rm + // - Op0 Explicit Def RegClass(GR32) + // - Op1 Explicit Use Memory RegClass(GR8) + // - Op2 Explicit Use Memory + // - Op3 Explicit Use Memory RegClass(GRH8) + // - Op4 Explicit Use Memory + // - Op5 Explicit Use Memory RegClass(SEGMENT_REG) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - Var3 [Op3] + // - Var4 [Op4] + // - Var5 [Op5] + // - hasMemoryOperands + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::MOV32rm; - const CodeTemplate CT = checkAndGetCodeTemplate(Opcode); + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(1)); + const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0]; @@ -240,6 +324,21 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) { // MOVSB writes to scratch memory register. + // - MOVSB + // - Op0 Explicit Use Memory RegClass(GR8) + // - Op1 Explicit Use Memory RegClass(GR8) + // - Op2 Explicit Use Memory RegClass(SEGMENT_REG) + // - Op3 Implicit Def Reg(EDI) + // - Op4 Implicit Def Reg(ESI) + // - Op5 Implicit Use Reg(EDI) + // - Op6 Implicit Use Reg(ESI) + // - Op7 Implicit Use Reg(DF) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - hasMemoryOperands + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters const unsigned Opcode = llvm::X86::MOVSB; const Instruction Instr(State, Opcode); auto Error = Generator.generateCodeTemplates(Instr).takeError();