Index: tools/llvm-exegesis/lib/CodeTemplate.h =================================================================== --- tools/llvm-exegesis/lib/CodeTemplate.h +++ tools/llvm-exegesis/lib/CodeTemplate.h @@ -35,11 +35,10 @@ const llvm::MCOperand &getValueFor(const Variable &Var) const; llvm::MCOperand &getValueFor(const Operand &Op); const llvm::MCOperand &getValueFor(const Operand &Op) const; - bool hasImmediateVariables() const; // Builds an llvm::MCInst from this InstructionTemplate setting its operands - // to the corresponding variable values. Precondition: All VariableValues must - // be set. + // to the corresponding variable values. + // Precondition: All VariableValues must be set. llvm::MCInst build() const; Instruction Instr; @@ -52,9 +51,9 @@ // e.g. AAA (alias via EFLAGS) ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS = 1u << 0, - // The instruction is always serial because one Def is tied to a Use. + // The instruction is serial if repeated because one Def is tied to a Use. // e.g. AND32ri (alias via tied GR32) - ALWAYS_SERIAL_TIED_REGS_ALIAS = 1u << 1, + SERIAL_VIA_TIED_REGS = 1u << 1, // The execution can be made serial by inserting a second instruction that // clobbers/reads memory. @@ -82,9 +81,13 @@ // The execution can be made parallel by repeating the same instruction but // making sure that Defs of one instruction do not alias with Uses of the // second one. - PARALLEL_VIA_EXPLICIT_REGS = 1u << 6, + PARALLEL_VIA_DISTINCT_EXPLICIT_REGS = 1u << 6, - LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS) + // The execution can be made parallel by repeating the same instruction but + // making sure each memory access hits a different cache line. + PARALLEL_VIA_DISTINCT_MEMORY_ACCESS = 1u << 7, + + LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_DISTINCT_MEMORY_ACCESS) }; // Returns whether Execution is one of the values defined in the enum above. @@ -100,6 +103,9 @@ // Decomposes Execution into individual set bits. llvm::SmallVector getExecutionModeBits(ExecutionMode); +// Computes all possible execution modes for Instr. +ExecutionMode getExecutionModes(const Instruction &Instr); + LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); // A CodeTemplate is a set of InstructionTemplates that may not be fully Index: tools/llvm-exegesis/lib/CodeTemplate.cpp =================================================================== --- tools/llvm-exegesis/lib/CodeTemplate.cpp +++ tools/llvm-exegesis/lib/CodeTemplate.cpp @@ -50,12 +50,6 @@ return getValueFor(Instr.Variables[Op.getVariableIndex()]); } -bool InstructionTemplate::hasImmediateVariables() const { - return llvm::any_of(Instr.Variables, [this](const Variable &Var) { - return Instr.getPrimaryOperand(Var).isImmediate(); - }); -} - llvm::MCInst InstructionTemplate::build() const { llvm::MCInst Result; Result.setOpcode(Instr.Description->Opcode); @@ -76,8 +70,8 @@ return "UNKNOWN"; case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: return "ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS"; - case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: - return "ALWAYS_SERIAL_TIED_REGS_ALIAS"; + case ExecutionMode::SERIAL_VIA_TIED_REGS: + return "SERIAL_VIA_TIED_REGS"; case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: return "SERIAL_VIA_MEMORY_INSTR"; case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: @@ -86,8 +80,10 @@ return "SERIAL_VIA_NON_MEMORY_INSTR"; case ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF: return "ALWAYS_PARALLEL_MISSING_USE_OR_DEF"; - case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS: - return "PARALLEL_VIA_EXPLICIT_REGS"; + case ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS: + return "PARALLEL_VIA_DISTINCT_EXPLICIT_REGS"; + case ExecutionMode::PARALLEL_VIA_DISTINCT_MEMORY_ACCESS: + return "PARALLEL_VIA_DISTINCT_MEMORY_ACCESS"; } llvm_unreachable("Missing enum case"); } @@ -95,12 +91,13 @@ llvm::ArrayRef getAllExecutionBits() { static const ExecutionMode kAllExecutionModeBits[] = { ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS, - ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, + ExecutionMode::SERIAL_VIA_TIED_REGS, ExecutionMode::SERIAL_VIA_MEMORY_INSTR, ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF, - ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS, + ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS, + ExecutionMode::PARALLEL_VIA_DISTINCT_MEMORY_ACCESS, }; return llvm::makeArrayRef(kAllExecutionModeBits); } @@ -114,4 +111,25 @@ return Result; } +ExecutionMode getExecutionModes(const Instruction &Instr) { + ExecutionMode EM = ExecutionMode::UNKNOWN; + if (Instr.hasAliasingImplicitRegisters()) + EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS; + if (Instr.hasTiedRegisters()) + EM |= ExecutionMode::SERIAL_VIA_TIED_REGS; + if (Instr.hasMemoryOperands()) { + EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; + EM |= ExecutionMode::PARALLEL_VIA_DISTINCT_MEMORY_ACCESS; + } + if (Instr.hasAliasingRegisters() && !Instr.hasMemoryOperands()) + EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; + if (Instr.hasOneUseOrOneDef() && !Instr.hasMemoryOperands()) + EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; + if (Instr.hasNoUseOrNoDef() && !Instr.hasMemoryOperands()) + EM |= ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF; + if (Instr.hasAliasingRegisters() || Instr.hasTiedRegisters() || + Instr.hasOneUseOrOneDef()) + EM |= ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS; + return EM; +} } // namespace exegesis Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -24,7 +24,7 @@ const char *Description; } static const kExecutionClasses[] = { {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS | - ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, + ExecutionMode::SERIAL_VIA_TIED_REGS, "Repeating a single implicitly serial instruction"}, {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, "Repeating a single explicitly serial instruction"}, @@ -59,23 +59,6 @@ return AliasingInstructions; } -static ExecutionMode getExecutionModes(const Instruction &Instr) { - ExecutionMode EM = ExecutionMode::UNKNOWN; - if (Instr.hasAliasingImplicitRegisters()) - EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS; - if (Instr.hasTiedRegisters()) - EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS; - if (Instr.hasMemoryOperands()) - EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; - else { - if (Instr.hasAliasingRegisters()) - EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; - if (Instr.hasOneUseOrOneDef()) - EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; - } - return EM; -} - static void appendCodeTemplates(const LLVMState &State, const Instruction &Instr, ExecutionMode ExecutionModeBit, @@ -86,7 +69,7 @@ case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: // Nothing to do, the instruction is always serial. LLVM_FALLTHROUGH; - case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: { + case ExecutionMode::SERIAL_VIA_TIED_REGS: { // Picking whatever value for the tied variable will make the instruction // serial. CodeTemplate CT; @@ -148,6 +131,9 @@ llvm::Expected> LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { + if (Instr.hasMemoryOperands()) + return llvm::make_error( + "Latency does not handle memory operands yet"); std::vector Results; const ExecutionMode EM = getExecutionModes(Instr); for (const auto EC : kExecutionClasses) { Index: tools/llvm-exegesis/lib/MCInstrDescView.h =================================================================== --- tools/llvm-exegesis/lib/MCInstrDescView.h +++ tools/llvm-exegesis/lib/MCInstrDescView.h @@ -95,6 +95,10 @@ struct Instruction { Instruction(const LLVMState &State, unsigned Opcode); + // Return the Variable that ties together two operands or nullptr if this + // instruction has no tied operands. + const Variable *getTiedVariable() const; + // Returns the Operand linked to this Variable. // In case the Variable is tied, the primary (i.e. Def) Operand is returned. const Operand &getPrimaryOperand(const Variable &Var) const; @@ -125,11 +129,18 @@ // reads or write the same memory region. bool hasMemoryOperands() const; - // Returns whether this instruction as at least one use or one def. + // Returns whether this instruction has at least one use or one def. // Repeating this instruction may execute sequentially by adding an // instruction that aliases one of these. bool hasOneUseOrOneDef() const; + // Returns whether this instruction has no use or no def. + // Repeating this instruction always execute in parallel. + bool hasNoUseOrNoDef() const; + + // Whether this instruction implicitly reads or write this register. + bool isImplicitlyAliasingWith(unsigned reg) const; + // Convenient function to help with debugging. void dump(const llvm::MCRegisterInfo &RegInfo, llvm::raw_ostream &Stream) const; Index: tools/llvm-exegesis/lib/MCInstrDescView.cpp =================================================================== --- tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -167,6 +167,13 @@ } } +const Variable *Instruction::getTiedVariable() const { + for (const auto &Var : Variables) + if (Var.hasTiedOperands()) + return &Var; + return nullptr; +} + const Operand &Instruction::getPrimaryOperand(const Variable &Var) const { const auto PrimaryOperandIndex = Var.getPrimaryOperandIndex(); assert(PrimaryOperandIndex < Operands.size()); @@ -208,6 +215,14 @@ return AllDefRegs.count() || AllUseRegs.count(); } +bool Instruction::hasNoUseOrNoDef() const { + return !AllDefRegs.count() || !AllUseRegs.count(); +} + +bool Instruction::isImplicitlyAliasingWith(unsigned reg) const { + return ImplUseRegs.test(reg) || ImplDefRegs.test(reg); +} + void Instruction::dump(const llvm::MCRegisterInfo &RegInfo, llvm::raw_ostream &Stream) const { Stream << "- " << Name << "\n"; @@ -251,14 +266,18 @@ Stream << "]"; Stream << "\n"; } - if (hasMemoryOperands()) - Stream << "- hasMemoryOperands\n"; if (hasAliasingImplicitRegisters()) Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n"; if (hasTiedRegisters()) - Stream << "- hasTiedRegisters (execution is always serial)\n"; + Stream << "- hasTiedRegisters\n"; if (hasAliasingRegisters()) Stream << "- hasAliasingRegisters\n"; + if (hasMemoryOperands()) + Stream << "- hasMemoryOperands\n"; + if (hasOneUseOrOneDef()) + Stream << "- hasOneUseOrOneDef\n"; + if (hasNoUseOrNoDef()) + Stream << "- hasNoUseOrNoDef (execution is always parallel)\n"; } bool RegisterOperandAssignment:: Index: tools/llvm-exegesis/lib/SnippetGenerator.cpp =================================================================== --- tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -168,9 +168,11 @@ case llvm::MCOI::OperandType::OPERAND_REGISTER: { assert(Op.isReg()); auto AllowedRegs = Op.getRegisterAliasing().sourceBits(); + assert(AllowedRegs.any() && "No registers to choose from"); assert(AllowedRegs.size() == ForbiddenRegs.size()); for (auto I : ForbiddenRegs.set_bits()) AllowedRegs.reset(I); + assert(AllowedRegs.any() && "No more registers to choose from"); AssignedValue = llvm::MCOperand::createReg(randomBit(AllowedRegs)); break; } Index: tools/llvm-exegesis/lib/Uops.h =================================================================== --- tools/llvm-exegesis/lib/Uops.h +++ tools/llvm-exegesis/lib/Uops.h @@ -17,12 +17,17 @@ #include "BenchmarkRunner.h" #include "SnippetGenerator.h" +#include "Target.h" namespace exegesis { class UopsSnippetGenerator : public SnippetGenerator { public: - UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + UopsSnippetGenerator(const LLVMState &State) + : SnippetGenerator(State), ET(State.getExegesisTarget()), + ScratchSpaceReg(ET.getScratchMemoryRegister( + State.getTargetMachine().getTargetTriple())) {} + ~UopsSnippetGenerator() override; llvm::Expected> @@ -31,32 +36,26 @@ static constexpr const size_t kMinNumDifferentAddresses = 6; private: - // Instantiates memory operands within a snippet. - // To make computations as parallel as possible, we generate independant - // memory locations for instructions that load and store. If there are less - // than kMinNumDifferentAddresses in the original snippet, we duplicate - // instructions until there are this number of instructions. - // For example, assuming kMinNumDifferentAddresses=5 and - // getMaxMemoryAccessSize()=64, if the original snippet is: - // mov eax, [memory] - // we might generate: - // mov eax, [rdi] - // mov eax, [rdi + 64] - // mov eax, [rdi + 128] - // mov eax, [rdi + 192] - // mov eax, [rdi + 256] - // If the original snippet is: - // mov eax, [memory] - // add eax, [memory] - // we might generate: - // mov eax, [rdi] - // add eax, [rdi + 64] - // mov eax, [rdi + 128] - // add eax, [rdi + 192] - // mov eax, [rdi + 256] - void instantiateMemoryOperands( - unsigned ScratchSpaceReg, - std::vector &SnippetTemplate) const; + llvm::Expected + generateCodeTemplate(const Instruction &Instr) const; + + llvm::Expected generateMemoryCodeTemplate( + const ExecutionMode EM, llvm::StringRef Info, + const std::vector &OriginalInstrs) const; + + std::vector + generateUseDistinctFromDefs(const Instruction &Instr, + llvm::BitVector ForbiddenRegs) const; + + std::vector + generateStaticRenaming(const Instruction &Instr, const Variable &TiedVar, + llvm::BitVector ForbiddenRegs) const; + + std::vector + generateSingleInstruction(const Instruction &Instr) const; + + const ExegesisTarget &ET; + const unsigned ScratchSpaceReg = 0; }; class UopsBenchmarkRunner : public BenchmarkRunner { Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -14,9 +14,6 @@ #include "MCInstrDescView.h" #include "Target.h" -// FIXME: Load constants into registers (e.g. with fld1) to not break -// instructions like x87. - // Ideally we would like the only limitation on executing uops to be the issue // ports. Maximizing port pressure increases the likelihood that the load is // distributed evenly across possible ports. @@ -80,119 +77,160 @@ namespace exegesis { -static llvm::SmallVector -getVariablesWithTiedOperands(const Instruction &Instr) { - llvm::SmallVector Result; - for (const auto &Var : Instr.Variables) - if (Var.hasTiedOperands()) - Result.push_back(&Var); - return Result; -} - static void remove(llvm::BitVector &a, const llvm::BitVector &b) { assert(a.size() == b.size()); for (auto I : b.set_bits()) a.reset(I); } +static bool has(const ExecutionMode EM, const ExecutionMode Bits) { + return (EM & Bits) == Bits; +} UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; UopsSnippetGenerator::~UopsSnippetGenerator() = default; -void UopsSnippetGenerator::instantiateMemoryOperands( - const unsigned ScratchSpacePointerInReg, - std::vector &Instructions) const { - if (ScratchSpacePointerInReg == 0) - return; // no memory operands. - const auto &ET = State.getExegesisTarget(); - const unsigned MemStep = ET.getMaxMemoryAccessSize(); - const size_t OriginalInstructionsSize = Instructions.size(); - size_t I = 0; - for (InstructionTemplate &IT : Instructions) { - ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep); - ++I; +llvm::Expected +UopsSnippetGenerator::generateCodeTemplate(const Instruction &Instr) const { + const auto EM = getExecutionModes(Instr); + if (has(EM, ExecutionMode::PARALLEL_VIA_DISTINCT_MEMORY_ACCESS)) { + if (ScratchSpaceReg == 0) + return llvm::make_error( + "infeasible : target does not support memory instructions"); + if (Instr.isImplicitlyAliasingWith(ScratchSpaceReg)) + return llvm::make_error( + "infeasible : memory instruction uses scratch memory register"); + } + + if (has(EM, ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS)) + return generateMemoryCodeTemplate( + EM, "instruction is serial, repeating a random one.", + generateSingleInstruction(Instr)); + + if (has(EM, ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF)) + return generateMemoryCodeTemplate( + EM, "instruction is parallel, repeating a random one.", + generateSingleInstruction(Instr)); + + const auto &RATC = State.getRATC(); + llvm::BitVector ForbiddenRegs = RATC.reservedRegisters(); + ForbiddenRegs |= RATC.getRegister(ScratchSpaceReg).aliasedBits(); + + if (has(EM, ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS)) { + if (const Variable *const TiedVar = Instr.getTiedVariable()) + return generateMemoryCodeTemplate( + EM, "instruction has tied variables using static renaming.", + generateStaticRenaming(Instr, *TiedVar, ForbiddenRegs)); + else + return generateMemoryCodeTemplate( + EM, "instruction has no tied variables picking distinct Uses/Defs.", + generateUseDistinctFromDefs(Instr, ForbiddenRegs)); } - while (Instructions.size() < kMinNumDifferentAddresses) { - InstructionTemplate IT = Instructions[I % OriginalInstructionsSize]; - ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep); - ++I; - Instructions.push_back(std::move(IT)); + if (has(EM, ExecutionMode::PARALLEL_VIA_DISTINCT_MEMORY_ACCESS)) { + return generateMemoryCodeTemplate( + EM, "instruction is parallel via distinct memory access.", + generateSingleInstruction(Instr)); } - assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize && - "not enough scratch space"); + + for (const auto Bit : getExecutionModeBits(EM)) + llvm::errs() << " - " << getName(Bit) << "\n"; + + return llvm::make_error("Unhandled case"); } llvm::Expected> UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { - CodeTemplate CT; - const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr; - if (Instr.hasMemoryOperands()) { - const auto &ET = State.getExegesisTarget(); - CT.ScratchSpacePointerInReg = - ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple()); - if (CT.ScratchSpacePointerInReg == 0) - return llvm::make_error( - "Infeasible : target does not support memory instructions"); - ScratchSpaceAliasedRegs = - &State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits(); - // If the instruction implicitly writes to ScratchSpacePointerInReg , abort. - // FIXME: We could make a copy of the scratch register. - for (const auto &Op : Instr.Operands) { - if (Op.isDef() && Op.isImplicitReg() && - ScratchSpaceAliasedRegs->test(Op.getImplicitReg())) - return llvm::make_error( - "Infeasible : memory instruction uses scratch memory register"); + if (auto ECT = generateCodeTemplate(Instr)) + return getSingleton(std::move(*ECT)); + else + return ECT.takeError(); +} + +// Instantiates memory operands within a snippet. +// +// To make computations as parallel as possible, we generate independant memory +// locations for instructions that load and store. If there are less than +// kMinNumDifferentAddresses in the original snippet, we duplicate instructions +// until there are this number of instructions. +// +// For example, assuming kMinNumDifferentAddresses=5 and +// getMaxMemoryAccessSize()=64, if the original snippet is: +// mov eax, [memory] +// we might generate: +// mov eax, [rdi] +// mov eax, [rdi + 64] +// mov eax, [rdi + 128] +// mov eax, [rdi + 192] +// mov eax, [rdi + 256] +// If the original snippet is: +// mov eax, [memory] +// add eax, [memory] +// we might generate: +// mov eax, [rdi] +// add eax, [rdi + 64] +// mov eax, [rdi + 128] +// add eax, [rdi + 192] +// mov eax, [rdi + 256] +llvm::Expected UopsSnippetGenerator::generateMemoryCodeTemplate( + ExecutionMode EM, llvm::StringRef Info, + const std::vector &OriginalInstrs) const { + std::vector Instrs; + if (has(EM, ExecutionMode::PARALLEL_VIA_DISTINCT_MEMORY_ACCESS)) { + assert(ScratchSpaceReg && "must not be 0"); + const unsigned MemStep = ET.getMaxMemoryAccessSize(); + const size_t MaxInstrs = std::max( + OriginalInstrs.size(), UopsSnippetGenerator::kMinNumDifferentAddresses); + for (size_t I = 0, E = MaxInstrs; I < E; ++I) { + InstructionTemplate IT = OriginalInstrs[I % OriginalInstrs.size()]; + ET.fillMemoryOperands(IT, ScratchSpaceReg, I * MemStep); + Instrs.push_back(std::move(IT)); } + if (MaxInstrs * MemStep >= BenchmarkRunner::ScratchSpace::kSize) + return llvm::make_error("Not enough scratch space"); + } else { + Instrs = std::move(OriginalInstrs); } + CodeTemplate CT; + CT.Execution = EM; + CT.Info = Info; + CT.ScratchSpacePointerInReg = ScratchSpaceReg; + CT.Instructions = std::move(Instrs); + return std::move(CT); +} - const AliasingConfigurations SelfAliasing(Instr, Instr); - InstructionTemplate IT(Instr); - if (SelfAliasing.empty()) { - CT.Info = "instruction is parallel, repeating a random one."; - CT.Instructions.push_back(std::move(IT)); - instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(std::move(CT)); - } - if (SelfAliasing.hasImplicitAliasing()) { - CT.Info = "instruction is serial, repeating a random one."; - CT.Instructions.push_back(std::move(IT)); - instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(std::move(CT)); - } - const auto TiedVariables = getVariablesWithTiedOperands(Instr); - if (!TiedVariables.empty()) { - if (TiedVariables.size() > 1) - return llvm::make_error( - "Infeasible : don't know how to handle several tied variables", - llvm::inconvertibleErrorCode()); - const Variable *Var = TiedVariables.front(); - assert(Var); - const Operand &Op = Instr.getPrimaryOperand(*Var); - assert(Op.isReg()); - CT.Info = "instruction has tied variables using static renaming."; - for (const llvm::MCPhysReg Reg : - Op.getRegisterAliasing().sourceBits().set_bits()) { - if (ScratchSpaceAliasedRegs && ScratchSpaceAliasedRegs->test(Reg)) - continue; // Do not use the scratch memory address register. - InstructionTemplate TmpIT = IT; - TmpIT.getValueFor(*Var) = llvm::MCOperand::createReg(Reg); - CT.Instructions.push_back(std::move(TmpIT)); - } - instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(std::move(CT)); +std::vector UopsSnippetGenerator::generateStaticRenaming( + const Instruction &Instr, const Variable &TiedVar, + llvm::BitVector ForbiddenRegs) const { + std::vector Instructions; + const auto &Op = Instr.getPrimaryOperand(TiedVar); + assert(Op.isReg() && Op.isExplicit() && Op.isDef() && + "Op is expected to be an explicit def register operand"); + const auto &PossibleRegisters = Op.getRegisterAliasing().sourceBits(); + for (auto Reg : PossibleRegisters.set_bits()) { + if (ForbiddenRegs.test(Reg)) + continue; + Instructions.emplace_back(Instr); + Instructions.back().getValueFor(Op) = llvm::MCOperand::createReg(Reg); + ForbiddenRegs |= State.getRATC().getRegister(Reg).aliasedBits(); } - const auto &ReservedRegisters = State.getRATC().reservedRegisters(); - // No tied variables, we pick random values for defs. + return Instructions; +} + +std::vector +UopsSnippetGenerator::generateUseDistinctFromDefs( + const Instruction &Instr, llvm::BitVector ForbiddenRegs) const { + std::vector Result; + Result.emplace_back(Instr); + InstructionTemplate &IT = Result.back(); + // We pick random values for defs. llvm::BitVector Defs(State.getRegInfo().getNumRegs()); for (const auto &Op : Instr.Operands) { if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) { auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); - remove(PossibleRegisters, ReservedRegisters); - // Do not use the scratch memory address register. - if (ScratchSpaceAliasedRegs) - remove(PossibleRegisters, *ScratchSpaceAliasedRegs); - assert(PossibleRegisters.any() && "No register left to choose from"); + remove(PossibleRegisters, ForbiddenRegs); + if (PossibleRegisters.none()) + continue; const auto RandomReg = randomBit(PossibleRegisters); Defs.set(RandomReg); IT.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg); @@ -203,21 +241,23 @@ for (const auto &Op : Instr.Operands) { if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) { auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); - remove(PossibleRegisters, ReservedRegisters); - // Do not use the scratch memory address register. - if (ScratchSpaceAliasedRegs) - remove(PossibleRegisters, *ScratchSpaceAliasedRegs); + remove(PossibleRegisters, ForbiddenRegs); remove(PossibleRegisters, DefAliases); - assert(PossibleRegisters.any() && "No register left to choose from"); + if (PossibleRegisters.none()) + continue; const auto RandomReg = randomBit(PossibleRegisters); IT.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg); } } - CT.Info = - "instruction has no tied variables picking Uses different from defs"; - CT.Instructions.push_back(std::move(IT)); - instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(std::move(CT)); + return Result; +} + +std::vector +UopsSnippetGenerator::generateSingleInstruction( + const Instruction &Instr) const { + std::vector Result; + Result.emplace_back(Instr); + return Result; } llvm::Expected> Index: unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -33,6 +33,7 @@ MATCHER(IsInvalid, "") { return !arg.isValid(); } MATCHER(IsReg, "") { return arg.isReg(); } +MATCHER_P(HasExecution, EM, "") { return (arg & EM) == EM; } class X86SnippetGeneratorTest : public ::testing::Test { protected: @@ -115,7 +116,7 @@ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; - EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS); + EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_TIED_REGS); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -198,7 +199,8 @@ ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("parallel")); - EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); + EXPECT_THAT(CT.Execution, + HasExecution(ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF)); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -219,7 +221,8 @@ ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("serial")); - EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); + EXPECT_THAT(CT.Execution, + HasExecution(ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS)); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -244,8 +247,9 @@ ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("static renaming")); - EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); - constexpr const unsigned kInstructionCount = 15; + EXPECT_THAT(CT.Execution, + HasExecution(ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS)); + constexpr const unsigned kInstructionCount = 14; // 15 registers - EDI. ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); std::unordered_set AllDefRegisters; for (const auto &IT : CT.Instructions) { @@ -276,7 +280,8 @@ ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); - EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); + EXPECT_THAT(CT.Execution, + HasExecution(ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS)); ASSERT_THAT(CT.Instructions, SizeIs(1)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); @@ -310,7 +315,8 @@ ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); - EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); + EXPECT_THAT(CT.Execution, + HasExecution(ExecutionMode::PARALLEL_VIA_DISTINCT_EXPLICIT_REGS)); ASSERT_THAT(CT.Instructions, SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0];