diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h @@ -119,8 +119,8 @@ CodeTemplate(CodeTemplate &&); // default CodeTemplate &operator=(CodeTemplate &&); // default - CodeTemplate(const CodeTemplate &) = delete; - CodeTemplate &operator=(const CodeTemplate &) = delete; + + CodeTemplate clone() const; ExecutionMode Execution = ExecutionMode::UNKNOWN; // See InstructionBenchmarkKey.::Config. @@ -132,6 +132,10 @@ // If the template uses the provided scratch memory, the register in which // the pointer to this memory is passed in to the function. unsigned ScratchSpacePointerInReg = 0; + +private: + CodeTemplate(const CodeTemplate &); // default + CodeTemplate &operator=(const CodeTemplate &); // default }; } // namespace exegesis diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp b/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp @@ -11,10 +11,19 @@ namespace llvm { namespace exegesis { +CodeTemplate::CodeTemplate(const CodeTemplate &) = default; + CodeTemplate::CodeTemplate(CodeTemplate &&) = default; CodeTemplate &CodeTemplate::operator=(CodeTemplate &&) = default; +CodeTemplate &CodeTemplate::operator=(const CodeTemplate &) = default; + +CodeTemplate CodeTemplate::clone() const { + CodeTemplate CT = *this; + return CT; +} + InstructionTemplate::InstructionTemplate(const Instruction *Instr) : Instr(Instr), VariableValues(Instr->Variables.size()) {} diff --git a/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp @@ -114,9 +114,10 @@ "not enough scratch space"); } -static std::vector generateSnippetUsingStaticRenaming( +static std::vector +generateSnippetForInstrWithTiedVariables( const LLVMState &State, const InstructionTemplate &IT, - const ArrayRef TiedVariables, + const ArrayRef TiedVariables, bool RandomizeUseRegs, const BitVector &ForbiddenRegisters) { // We don't want to accidentally serialize the instruction, // so we must be sure that we don't pick a def that is an implicit use, @@ -139,45 +140,51 @@ const auto ImplicitDefAliases = getAliasedBits(State.getRegInfo(), ImplicitDefs); - std::vector PossibleRegsForVars; - for (const Variable *Var : TiedVariables) { - assert(Var); - const Operand &Op = IT.getInstr().getPrimaryOperand(*Var); - assert(Op.isReg()); - BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits(); - remove(PossibleRegs, ForbiddenRegisters); - remove(PossibleRegs, ImplicitUseAliases); - PossibleRegsForVars.push_back(std::move(PossibleRegs)); - } - - BitVector Defs(State.getRegInfo().getNumRegs()); + BitVector DefsOrTiedUses(State.getRegInfo().getNumRegs()); + BitVector Uses(State.getRegInfo().getNumRegs()); std::vector Instructions; - // Assign registers to variables in a round-robin manner. This is simple but - // ensures that the most register-constrained variable does not get starved. - SmallVector Iterators(TiedVariables.size(), 0); - while (true) { InstructionTemplate TmpIT = IT; - // Find a possible register for each variable in turn, marking the - // register as taken. - for (auto I : zip(TiedVariables, PossibleRegsForVars, Iterators)) { - auto &TiedVariable = std::get<0>(I); - auto &PossibleRegsForVar = std::get<1>(I); - auto &Iterator = std::get<2>(I); - const int NextPossibleReg = PossibleRegsForVar.find_next(Iterator); - if (NextPossibleReg <= 0) { - return Instructions; - } - TmpIT.getValueFor(*TiedVariable) = MCOperand::createReg(NextPossibleReg); - // Bump iterator. - Iterator = NextPossibleReg; - // Prevent other variables from using the register. - for (BitVector &OtherPossibleRegs : PossibleRegsForVars) { - OtherPossibleRegs.reset(NextPossibleReg); + for (const auto &Op : IT.getInstr().Operands) { + if (Op.isReg() && Op.isExplicit() && !Op.isMemory() && + !TmpIT.getValueFor(Op).isValid()) { + assert((!Op.isUse() || !Op.isTied()) && + "Not expecting to see a tied use reg"); + std::optional RandomReg; + auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); + const auto UseAliases = getAliasedBits(State.getRegInfo(), Uses); + if (!RandomizeUseRegs && Op.isUse()) { + if (auto CommonBit = getFirstCommonBit(PossibleRegisters, UseAliases)) + RandomReg = *CommonBit; + } + if (!RandomReg) { + remove(PossibleRegisters, ForbiddenRegisters); + if (Op.isDef()) { + remove(PossibleRegisters, ImplicitUseAliases); + remove(PossibleRegisters, UseAliases); + } + if (Op.isUse()) + remove(PossibleRegisters, ImplicitDefAliases); + // Now, important bit: neither do we want to reuse def registers, nor + // we want to use them as use registers. So def'd regs are off-limits! + const auto DefsOrTiedUsesAliases = + getAliasedBits(State.getRegInfo(), DefsOrTiedUses); + remove(PossibleRegisters, DefsOrTiedUsesAliases); + if (!PossibleRegisters.any()) + return Instructions; + RandomReg = randomBit(PossibleRegisters); + } + assert(RandomReg); + if (Op.isDef() || Op.isTied()) + DefsOrTiedUses.set(*RandomReg); + if (Op.isUse()) + Uses.set(*RandomReg); + TmpIT.getValueFor(Op) = MCOperand::createReg(*RandomReg); } } Instructions.push_back(std::move(TmpIT)); + assert(Instructions.size() <= 128 && "Stuck in endless loop?"); } } @@ -206,15 +213,28 @@ } const auto TiedVariables = getVariablesWithTiedOperands(Instr); if (!TiedVariables.empty()) { - CT.Info = "instruction has tied variables, using static renaming."; - CT.Instructions = generateSnippetUsingStaticRenaming( - State, Variant, TiedVariables, ForbiddenRegisters); - if (CT.Instructions.empty()) - return make_error( - Twine("Failed to produce any snippet via: ").concat(CT.Info), - inconvertibleErrorCode()); - instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); - return getSingleton(std::move(CT)); + std::vector Result; + for (bool RandomizeUseRegs : {true, false}) { + CodeTemplate CurrCT = CT.clone(); + CurrCT.Info = + RandomizeUseRegs + ? "instruction has tied variables, avoiding " + "Read-After-Write issue, picking random def and use " + "registers not aliasing each other" + : "instruction has tied variables, avoiding " + "Read-After-Write issue, picking random def registers " + "not aliasing a single random use register"; + CurrCT.Instructions = generateSnippetForInstrWithTiedVariables( + State, Variant, TiedVariables, RandomizeUseRegs, ForbiddenRegisters); + if (CurrCT.Instructions.empty()) + return make_error( + Twine("Failed to produce any snippet via: ").concat(CurrCT.Info), + inconvertibleErrorCode()); + instantiateMemoryOperands(CurrCT.ScratchSpacePointerInReg, + CurrCT.Instructions); + Result.push_back(std::move(CurrCT)); + } + return Result; } // No tied variables, we pick random values for defs. diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -92,6 +92,9 @@ // Precondition: Vector must have at least one bit set. size_t randomBit(const BitVector &Vector); +// Picks a first bit that is common to these two vectors. +std::optional getFirstCommonBit(const BitVector &A, const BitVector &B); + // Picks a random configuration, then selects a random def and a random use from // it and finally set the selected values in the provided InstructionInstances. void setRandomAliasing(const AliasingConfigurations &AliasingConfigurations, diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -213,6 +213,16 @@ return *Itr; } +std::optional getFirstCommonBit(const BitVector &A, + const BitVector &B) { + BitVector Intersect = A; + Intersect &= B; + int idx = Intersect.find_first(); + if (idx != -1) + return idx; + return {}; +} + void setRandomAliasing(const AliasingConfigurations &AliasingConfigurations, InstructionTemplate &DefIB, InstructionTemplate &UseIB) { assert(!AliasingConfigurations.empty()); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -14,6 +14,7 @@ #include "SerialSnippetGenerator.h" #include "TestBase.h" #include "X86InstrInfo.h" +#include "llvm/ADT/SetOperations.h" #include @@ -26,8 +27,10 @@ using testing::AnyOf; using testing::ElementsAre; +using testing::Ge; using testing::Gt; using testing::HasSubstr; +using testing::IsEmpty; using testing::Not; using testing::SizeIs; using testing::UnorderedElementsAre; @@ -232,7 +235,7 @@ ASSERT_THAT(IT.getVariableValues(), SizeIs(0)); } -TEST_F(X86ParallelSnippetGeneratorTest, StaticRenaming) { +TEST_F(X86ParallelSnippetGeneratorTest, ReadAfterWrite_CMOV32rr) { // CMOV32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. @@ -248,19 +251,69 @@ // - hasAliasingRegisters const unsigned Opcode = X86::CMOV32rr; const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); - ASSERT_THAT(CodeTemplates, SizeIs(1)); - const auto &CT = CodeTemplates[0]; - EXPECT_THAT(CT.Info, HasSubstr("static renaming")); - EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); - constexpr const unsigned kInstructionCount = 15; - ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount)); - std::unordered_set AllDefRegisters; - for (const auto &IT : CT.Instructions) { - ASSERT_THAT(IT.getVariableValues(), SizeIs(3)); - AllDefRegisters.insert(IT.getVariableValues()[0].getReg()); + ASSERT_THAT(CodeTemplates, SizeIs(2)); + for (const auto &CT : CodeTemplates) { + EXPECT_THAT(CT.Info, HasSubstr("avoiding Read-After-Write issue")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); + ASSERT_GT(CT.Instructions.size(), 1U); + std::unordered_set AllDefRegisters; + std::unordered_set AllUseRegisters; + for (const auto &IT : CT.Instructions) { + ASSERT_THAT(IT.getVariableValues(), SizeIs(3)); + AllDefRegisters.insert(IT.getVariableValues()[0].getReg()); + AllUseRegisters.insert(IT.getVariableValues()[1].getReg()); + } + EXPECT_THAT(AllDefRegisters, SizeIs(CT.Instructions.size())) + << "Each instruction writes to a different register"; + EXPECT_THAT(AllUseRegisters, Not(IsEmpty())) + << "In total, some other registers are used"; + auto AllDefAndUseRegs = AllUseRegisters; + llvm::set_intersect(AllDefAndUseRegs, AllDefRegisters); // A := A ^ B + EXPECT_THAT(AllDefAndUseRegs, IsEmpty()) + << "No instruction uses any register defined by any of the " + "instructions"; + } +} + +TEST_F(X86ParallelSnippetGeneratorTest, ReadAfterWrite_VFMADD132PDr) { + // VFMADD132PDr has tied variables, we enumerate the possible values + // to execute as many in parallel as possible. + + // - VFMADD132PDr + // - Op0 Explicit Def RegClass(XMM) + // - Op1 Explicit Use RegClass(XMM) TiedToOp0 + // - Op2 Explicit Use RegClass(XMM) + // - Op3 Explicit Use RegClass(XMM) + // - Var0 [Op0,Op1] + // - Var1 [Op2] + // - Var2 [Op3] + // - hasTiedRegisters (execution is always serial) + // - hasAliasingRegisters + const unsigned Opcode = X86::VFMADD132PDr; + const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); + ASSERT_THAT(CodeTemplates, SizeIs(2)); + for (const auto &CT : CodeTemplates) { + EXPECT_THAT(CT.Info, HasSubstr("avoiding Read-After-Write issue")); + EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); + ASSERT_GT(CT.Instructions.size(), 1U); + std::unordered_set AllDefRegisters; + std::unordered_set AllUseRegisters; + for (const auto &IT : CT.Instructions) { + ASSERT_THAT(IT.getVariableValues(), SizeIs(3)); + AllDefRegisters.insert(IT.getVariableValues()[0].getReg()); + AllUseRegisters.insert(IT.getVariableValues()[1].getReg()); + AllUseRegisters.insert(IT.getVariableValues()[2].getReg()); + } + EXPECT_THAT(AllDefRegisters, SizeIs(CT.Instructions.size())) + << "Each instruction writes to a different register"; + EXPECT_THAT(AllUseRegisters, Not(IsEmpty())) + << "In total, some other registers are used"; + auto AllDefAndUseRegs = AllUseRegisters; + llvm::set_intersect(AllDefAndUseRegs, AllDefRegisters); // A := A ^ B + EXPECT_THAT(AllDefAndUseRegs, IsEmpty()) + << "No instruction uses any register defined by any of the " + "instructions"; } - EXPECT_THAT(AllDefRegisters, SizeIs(kInstructionCount)) - << "Each instruction writes to a different register"; } TEST_F(X86ParallelSnippetGeneratorTest, NoTiedVariables) {