diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/Latency.h --- a/llvm/tools/llvm-exegesis/lib/Latency.h +++ b/llvm/tools/llvm-exegesis/lib/Latency.h @@ -27,7 +27,8 @@ ~LatencySnippetGenerator() override; llvm::Expected> - generateCodeTemplates(const Instruction &Instr) const override; + generateCodeTemplates(const Instruction &Instr, + const BitVector &ForbiddenRegisters) const override; }; class LatencyBenchmarkRunner : public BenchmarkRunner { diff --git a/llvm/tools/llvm-exegesis/lib/Latency.cpp b/llvm/tools/llvm-exegesis/lib/Latency.cpp --- a/llvm/tools/llvm-exegesis/lib/Latency.cpp +++ b/llvm/tools/llvm-exegesis/lib/Latency.cpp @@ -39,7 +39,8 @@ static std::vector computeAliasingInstructions(const LLVMState &State, const Instruction &Instr, - size_t MaxAliasingInstructions) { + size_t MaxAliasingInstructions, + const BitVector &ForbiddenRegisters) { // Randomly iterate the set of instructions. std::vector Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); @@ -53,15 +54,16 @@ const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode); if (OtherInstr.hasMemoryOperands()) continue; - if (Instr.hasAliasingRegistersThrough(OtherInstr)) - AliasingInstructions.push_back(std::move(OtherInstr)); + if (Instr.hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters)) + AliasingInstructions.push_back(OtherInstr); if (AliasingInstructions.size() >= MaxAliasingInstructions) break; } return AliasingInstructions; } -static ExecutionMode getExecutionModes(const Instruction &Instr) { +static ExecutionMode getExecutionModes(const Instruction &Instr, + const BitVector &ForbiddenRegisters) { ExecutionMode EM = ExecutionMode::UNKNOWN; if (Instr.hasAliasingImplicitRegisters()) EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS; @@ -70,7 +72,7 @@ if (Instr.hasMemoryOperands()) EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; else { - if (Instr.hasAliasingRegisters()) + if (Instr.hasAliasingRegisters(ForbiddenRegisters)) EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; if (Instr.hasOneUseOrOneDef()) EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; @@ -80,6 +82,7 @@ static void appendCodeTemplates(const LLVMState &State, const Instruction &Instr, + const BitVector &ForbiddenRegisters, ExecutionMode ExecutionModeBit, llvm::StringRef ExecutionClassDescription, std::vector &CodeTemplates) { @@ -122,8 +125,8 @@ } case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: { // Select back-to-back non-memory instruction. - for (const auto OtherInstr : - computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) { + for (const auto OtherInstr : computeAliasingInstructions( + State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) { const AliasingConfigurations Forward(Instr, OtherInstr); const AliasingConfigurations Back(OtherInstr, Instr); InstructionTemplate ThisIT(Instr); @@ -149,13 +152,14 @@ LatencySnippetGenerator::~LatencySnippetGenerator() = default; llvm::Expected> -LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { +LatencySnippetGenerator::generateCodeTemplates( + const Instruction &Instr, const BitVector &ForbiddenRegisters) const { std::vector Results; - const ExecutionMode EM = getExecutionModes(Instr); + const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters); for (const auto EC : kExecutionClasses) { for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask)) - appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description, - Results); + appendCodeTemplates(State, Instr, ForbiddenRegisters, ExecutionModeBit, + EC.Description, Results); if (!Results.empty()) break; } diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h @@ -112,10 +112,11 @@ // Repeating this instruction may execute sequentially by picking aliasing // Use and Def registers. It may also execute in parallel by picking non // aliasing Use and Def registers. - bool hasAliasingRegisters() const; + bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const; // Whether this instruction's registers alias with OtherInstr's registers. - bool hasAliasingRegistersThrough(const Instruction &OtherInstr) const; + bool hasAliasingRegistersThrough(const Instruction &OtherInstr, + const BitVector &ForbiddenRegisters) const; // Returns whether this instruction has Memory Operands. // Repeating this instruction executes sequentially with an instruction that @@ -129,6 +130,7 @@ // Convenient function to help with debugging. void dump(const llvm::MCRegisterInfo &RegInfo, + const RegisterAliasingTrackerCache &RATC, llvm::raw_ostream &Stream) const; const llvm::MCInstrDesc *Description; // Never nullptr. diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -183,10 +183,29 @@ return ImplDefRegs.anyCommon(ImplUseRegs); } +// Returns true if there are registers that are both in `A` and `B` but not in +// `Forbidden`. +static bool anyCommonExcludingForbidden(const BitVector &A, const BitVector &B, + const BitVector &Forbidden) { + assert(A.size() == B.size() && B.size() == Forbidden.size()); + const auto Size = A.size(); + for (int AIndex = A.find_first(); AIndex != -1;) { + const int BIndex = B.find_first_in(AIndex, Size); + if (BIndex == -1) + return false; + if (AIndex == BIndex && !Forbidden.test(AIndex)) + return true; + AIndex = A.find_first_in(BIndex + 1, Size); + } + return false; +} + bool Instruction::hasAliasingRegistersThrough( - const Instruction &OtherInstr) const { - return AllDefRegs.anyCommon(OtherInstr.AllUseRegs) && - OtherInstr.AllDefRegs.anyCommon(AllUseRegs); + const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const { + return anyCommonExcludingForbidden(AllDefRegs, OtherInstr.AllUseRegs, + ForbiddenRegisters) && + anyCommonExcludingForbidden(OtherInstr.AllDefRegs, AllUseRegs, + ForbiddenRegisters); } bool Instruction::hasTiedRegisters() const { @@ -194,8 +213,10 @@ Variables, [](const Variable &Var) { return Var.hasTiedOperands(); }); } -bool Instruction::hasAliasingRegisters() const { - return AllDefRegs.anyCommon(AllUseRegs); +bool Instruction::hasAliasingRegisters( + const BitVector &ForbiddenRegisters) const { + return anyCommonExcludingForbidden(AllDefRegs, AllUseRegs, + ForbiddenRegisters); } bool Instruction::hasOneUseOrOneDef() const { @@ -203,6 +224,7 @@ } void Instruction::dump(const llvm::MCRegisterInfo &RegInfo, + const RegisterAliasingTrackerCache &RATC, llvm::raw_ostream &Stream) const { Stream << "- " << Name << "\n"; for (const auto &Op : Operands) { @@ -251,7 +273,7 @@ Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n"; if (hasTiedRegisters()) Stream << "- hasTiedRegisters (execution is always serial)\n"; - if (hasAliasingRegisters()) + if (hasAliasingRegisters(RATC.emptyRegisters())) Stream << "- hasAliasingRegisters\n"; } diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -69,7 +69,8 @@ private: // API to be implemented by subclasses. virtual llvm::Expected> - generateCodeTemplates(const Instruction &Instr) const = 0; + generateCodeTemplates(const Instruction &Instr, + const BitVector &ForbiddenRegisters) const = 0; }; // A global Random Number Generator to randomize configurations. diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -38,14 +38,33 @@ llvm::Expected> SnippetGenerator::generateConfigurations(const Instruction &Instr) const { - if (auto E = generateCodeTemplates(Instr)) { - const auto &RATC = State.getRATC(); + llvm::BitVector ForbiddenRegs = State.getRATC().reservedRegisters(); + + // If the instruction has memory registers, prevent the generator from + // using the scratch register and its aliasing registers. + if (Instr.hasMemoryOperands()) { + const auto &ET = State.getExegesisTarget(); + unsigned ScratchSpacePointerInReg = + ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple()); + if (ScratchSpacePointerInReg == 0) + return llvm::make_error( + "Infeasible : target does not support memory instructions"); + const auto &ScratchRegAliases = + State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits(); + // If the instruction implicitly writes to ScratchSpacePointerInReg , abort. + // FIXME: We could make a copy of the scratch register. + for (const auto &Op : Instr.Operands) { + if (Op.isDef() && Op.isImplicitReg() && + ScratchRegAliases.test(Op.getImplicitReg())) + return llvm::make_error( + "Infeasible : memory instruction uses scratch memory register"); + } + ForbiddenRegs |= ScratchRegAliases; + } + + if (auto E = generateCodeTemplates(Instr, ForbiddenRegs)) { std::vector Output; for (CodeTemplate &CT : E.get()) { - const llvm::BitVector &ForbiddenRegs = - CT.ScratchSpacePointerInReg - ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits() - : RATC.emptyRegisters(); // TODO: Generate as many BenchmarkCode as needed. { BenchmarkCode BC; diff --git a/llvm/tools/llvm-exegesis/lib/Uops.h b/llvm/tools/llvm-exegesis/lib/Uops.h --- a/llvm/tools/llvm-exegesis/lib/Uops.h +++ b/llvm/tools/llvm-exegesis/lib/Uops.h @@ -26,7 +26,8 @@ ~UopsSnippetGenerator() override; llvm::Expected> - generateCodeTemplates(const Instruction &Instr) const override; + generateCodeTemplates(const Instruction &Instr, + const BitVector &ForbiddenRegisters) const override; static constexpr const size_t kMinNumDifferentAddresses = 6; diff --git a/llvm/tools/llvm-exegesis/lib/Uops.cpp b/llvm/tools/llvm-exegesis/lib/Uops.cpp --- a/llvm/tools/llvm-exegesis/lib/Uops.cpp +++ b/llvm/tools/llvm-exegesis/lib/Uops.cpp @@ -126,7 +126,7 @@ static std::vector generateSnippetUsingStaticRenaming( const LLVMState &State, const InstructionTemplate &IT, const ArrayRef TiedVariables, - const BitVector *ScratchSpaceAliasedRegs) { + const BitVector &ForbiddenRegisters) { std::vector Instructions; // Assign registers to variables in a round-robin manner. This is simple but // ensures that the most register-constrained variable does not get starved. @@ -135,12 +135,8 @@ assert(Var); const Operand &Op = IT.Instr.getPrimaryOperand(*Var); assert(Op.isReg()); - BitVector PossibleRegs = State.getRATC().emptyRegisters(); - if (ScratchSpaceAliasedRegs) { - PossibleRegs |= *ScratchSpaceAliasedRegs; - } - PossibleRegs.flip(); - PossibleRegs &= Op.getRegisterAliasing().sourceBits(); + BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits(); + remove(PossibleRegs, ForbiddenRegisters); PossibleRegsForVar.push_back(std::move(PossibleRegs)); } SmallVector Iterators(TiedVariables.size(), 0); @@ -168,28 +164,14 @@ } llvm::Expected> -UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { +UopsSnippetGenerator::generateCodeTemplates( + const Instruction &Instr, const BitVector &ForbiddenRegisters) const { CodeTemplate CT; - const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr; - if (Instr.hasMemoryOperands()) { - const auto &ET = State.getExegesisTarget(); - CT.ScratchSpacePointerInReg = - ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple()); - if (CT.ScratchSpacePointerInReg == 0) - return llvm::make_error( - "Infeasible : target does not support memory instructions"); - ScratchSpaceAliasedRegs = - &State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits(); - // If the instruction implicitly writes to ScratchSpacePointerInReg , abort. - // FIXME: We could make a copy of the scratch register. - for (const auto &Op : Instr.Operands) { - if (Op.isDef() && Op.isImplicitReg() && - ScratchSpaceAliasedRegs->test(Op.getImplicitReg())) - return llvm::make_error( - "Infeasible : memory instruction uses scratch memory register"); - } - } - + CT.ScratchSpacePointerInReg = + Instr.hasMemoryOperands() + ? State.getExegesisTarget().getScratchMemoryRegister( + State.getTargetMachine().getTargetTriple()) + : 0; const AliasingConfigurations SelfAliasing(Instr, Instr); InstructionTemplate IT(Instr); if (SelfAliasing.empty()) { @@ -208,20 +190,17 @@ if (!TiedVariables.empty()) { CT.Info = "instruction has tied variables, using static renaming."; CT.Instructions = generateSnippetUsingStaticRenaming( - State, IT, TiedVariables, ScratchSpaceAliasedRegs); + State, IT, TiedVariables, ForbiddenRegisters); instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions); return getSingleton(std::move(CT)); } - const auto &ReservedRegisters = State.getRATC().reservedRegisters(); // No tied variables, we pick random values for defs. llvm::BitVector Defs(State.getRegInfo().getNumRegs()); for (const auto &Op : Instr.Operands) { if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) { auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); - remove(PossibleRegisters, ReservedRegisters); - // Do not use the scratch memory address register. - if (ScratchSpaceAliasedRegs) - remove(PossibleRegisters, *ScratchSpaceAliasedRegs); + // Do not use forbidden registers. + remove(PossibleRegisters, ForbiddenRegisters); assert(PossibleRegisters.any() && "No register left to choose from"); const auto RandomReg = randomBit(PossibleRegisters); Defs.set(RandomReg); @@ -233,10 +212,7 @@ for (const auto &Op : Instr.Operands) { if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) { auto PossibleRegisters = Op.getRegisterAliasing().sourceBits(); - remove(PossibleRegisters, ReservedRegisters); - // Do not use the scratch memory address register. - if (ScratchSpaceAliasedRegs) - remove(PossibleRegisters, *ScratchSpaceAliasedRegs); + remove(PossibleRegisters, ForbiddenRegisters); remove(PossibleRegisters, DefAliases); assert(PossibleRegisters.any() && "No register left to choose from"); const auto RandomReg = randomBit(PossibleRegisters); diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -182,19 +182,21 @@ using LatencySnippetGenerator::LatencySnippetGenerator; llvm::Expected> - generateCodeTemplates(const Instruction &Instr) const override; + generateCodeTemplates(const Instruction &Instr, + const BitVector &ForbiddenRegisters) const override; }; } // namespace llvm::Expected> X86LatencySnippetGenerator::generateCodeTemplates( - const Instruction &Instr) const { + const Instruction &Instr, const BitVector &ForbiddenRegisters) const { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); switch (getX86FPFlags(Instr)) { case llvm::X86II::NotFP: - return LatencySnippetGenerator::generateCodeTemplates(Instr); + return LatencySnippetGenerator::generateCodeTemplates(Instr, + ForbiddenRegisters); case llvm::X86II::ZeroArgFP: case llvm::X86II::OneArgFP: case llvm::X86II::SpecialFP: @@ -219,19 +221,21 @@ using UopsSnippetGenerator::UopsSnippetGenerator; llvm::Expected> - generateCodeTemplates(const Instruction &Instr) const override; + generateCodeTemplates(const Instruction &Instr, + const BitVector &ForbiddenRegisters) const override; }; } // namespace llvm::Expected> X86UopsSnippetGenerator::generateCodeTemplates( - const Instruction &Instr) const { + const Instruction &Instr, const BitVector &ForbiddenRegisters) const { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); switch (getX86FPFlags(Instr)) { case llvm::X86II::NotFP: - return UopsSnippetGenerator::generateCodeTemplates(Instr); + return UopsSnippetGenerator::generateCodeTemplates(Instr, + ForbiddenRegisters); case llvm::X86II::ZeroArgFP: case llvm::X86II::OneArgFP: case llvm::X86II::SpecialFP: diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -61,7 +61,8 @@ std::vector checkAndGetCodeTemplates(unsigned Opcode) { randomGenerator().seed(0); // Initialize seed. const Instruction &Instr = State.getIC().getInstr(Opcode); - auto CodeTemplateOrError = Generator.generateCodeTemplates(Instr); + auto CodeTemplateOrError = Generator.generateCodeTemplates( + Instr, State.getRATC().emptyRegisters()); EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration. return std::move(CodeTemplateOrError.get()); } @@ -148,6 +149,26 @@ << "Op0 is either set to Op1 or to Op2"; } +TEST_F(LatencySnippetGeneratorTest, + ImplicitSelfDependencyThroughExplicitRegsForbidAll) { + // - VXORPSrr + // - Op0 Explicit Def RegClass(VR128) + // - Op1 Explicit Use RegClass(VR128) + // - Op2 Explicit Use RegClass(VR128) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - hasAliasingRegisters + const unsigned Opcode = llvm::X86::VXORPSrr; + randomGenerator().seed(0); // Initialize seed. + const Instruction &Instr = State.getIC().getInstr(Opcode); + auto AllRegisters = State.getRATC().emptyRegisters(); + AllRegisters.flip(); + auto Error = Generator.generateCodeTemplates(Instr, AllRegisters).takeError(); + EXPECT_TRUE((bool)Error); + llvm::consumeError(std::move(Error)); +} + TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { // - CMP64rr // - Op0 Explicit Use RegClass(GR64) @@ -323,30 +344,6 @@ EXPECT_EQ(IT.VariableValues[5].getReg(), 0u); } -TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) { - // MOVSB writes to scratch memory register. - // - MOVSB - // - Op0 Explicit Use Memory RegClass(GR8) - // - Op1 Explicit Use Memory RegClass(GR8) - // - Op2 Explicit Use Memory RegClass(SEGMENT_REG) - // - Op3 Implicit Def Reg(EDI) - // - Op4 Implicit Def Reg(ESI) - // - Op5 Implicit Use Reg(EDI) - // - Op6 Implicit Use Reg(ESI) - // - Op7 Implicit Use Reg(DF) - // - Var0 [Op0] - // - Var1 [Op1] - // - Var2 [Op2] - // - hasMemoryOperands - // - hasAliasingImplicitRegisters (execution is always serial) - // - hasAliasingRegisters - const unsigned Opcode = llvm::X86::MOVSB; - const Instruction &Instr = State.getIC().getInstr(Opcode); - auto Error = Generator.generateCodeTemplates(Instr).takeError(); - EXPECT_TRUE((bool)Error); - llvm::consumeError(std::move(Error)); -} - class FakeSnippetGenerator : public SnippetGenerator { public: FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} @@ -357,7 +354,7 @@ private: llvm::Expected> - generateCodeTemplates(const Instruction &Instr) const override { + generateCodeTemplates(const Instruction &, const BitVector &) const override { return llvm::make_error("not implemented", llvm::inconvertibleErrorCode()); } @@ -371,6 +368,30 @@ testing::Field(&RegisterValue::Value, Value)); } +TEST_F(FakeSnippetGeneratorTest, MemoryUse_Movsb) { + // MOVSB writes to scratch memory register. + // - MOVSB + // - Op0 Explicit Use Memory RegClass(GR8) + // - Op1 Explicit Use Memory RegClass(GR8) + // - Op2 Explicit Use Memory RegClass(SEGMENT_REG) + // - Op3 Implicit Def Reg(EDI) + // - Op4 Implicit Def Reg(ESI) + // - Op5 Implicit Use Reg(EDI) + // - Op6 Implicit Use Reg(ESI) + // - Op7 Implicit Use Reg(DF) + // - Var0 [Op0] + // - Var1 [Op1] + // - Var2 [Op2] + // - hasMemoryOperands + // - hasAliasingImplicitRegisters (execution is always serial) + // - hasAliasingRegisters + const unsigned Opcode = llvm::X86::MOVSB; + const Instruction &Instr = State.getIC().getInstr(Opcode); + auto Error = Generator.generateConfigurations(Instr).takeError(); + EXPECT_TRUE((bool)Error); + llvm::consumeError(std::move(Error)); +} + TEST_F(FakeSnippetGeneratorTest, ComputeRegisterInitialValuesAdd16ri) { // ADD16ri: // explicit def 0 : reg RegClass=GR16