Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.h +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -69,6 +69,9 @@ const LLVMState &State; const RegisterAliasingTrackerCache RATC; + llvm::Expected generateSelfAliasingPrototype( + const Instruction &Instr) const; + private: // API to be implemented by subclasses. virtual llvm::Expected Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -196,4 +196,25 @@ return ResultPath.str(); } +llvm::Expected BenchmarkRunner::generateSelfAliasingPrototype( + const Instruction &Instr) const { + const AliasingConfigurations SelfAliasing(Instr, Instr); + if (SelfAliasing.empty()) { + return llvm::make_error("empty self aliasing", llvm::inconvertibleErrorCode()); + } + SnippetPrototype Prototype; + InstructionInstance II(Instr); + if (SelfAliasing.hasImplicitAliasing()) { + Prototype.Explanation = "implicit Self cycles, picking random values."; + } else { + Prototype.Explanation = + "explicit self cycles, selecting one aliasing Conf."; + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice II in the following call. + setRandomAliasing(SelfAliasing, II, II); + } + Prototype.Snippet.push_back(std::move(II)); + return std::move(Prototype); +} + } // namespace exegesis Index: tools/llvm-exegesis/lib/Latency.h =================================================================== --- tools/llvm-exegesis/lib/Latency.h +++ tools/llvm-exegesis/lib/Latency.h @@ -32,13 +32,8 @@ private: llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const; - llvm::Expected generateSelfAliasingPrototype( - const Instruction &Instr, - const AliasingConfigurations &SelfAliasing) const; - llvm::Expected generateTwoInstructionPrototype( - const Instruction &Instr, - const AliasingConfigurations &SelfAliasing) const; + const Instruction &Instr) const; std::vector runMeasurements(const ExecutableFunction &EF, Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -42,29 +42,9 @@ return llvm::Error::success(); } -llvm::Expected -LatencyBenchmarkRunner::generateSelfAliasingPrototype( - const Instruction &Instr, - const AliasingConfigurations &SelfAliasing) const { - SnippetPrototype Prototype; - InstructionInstance II(Instr); - if (SelfAliasing.hasImplicitAliasing()) { - Prototype.Explanation = "implicit Self cycles, picking random values."; - } else { - Prototype.Explanation = - "explicit self cycles, selecting one aliasing Conf."; - // This is a self aliasing instruction so defs and uses are from the same - // instance, hence twice II in the following call. - setRandomAliasing(SelfAliasing, II, II); - } - Prototype.Snippet.push_back(std::move(II)); - return std::move(Prototype); -} - llvm::Expected LatencyBenchmarkRunner::generateTwoInstructionPrototype( - const Instruction &Instr, - const AliasingConfigurations &SelfAliasing) const { + const Instruction &Instr) const { std::vector Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); std::iota(Opcodes.begin(), Opcodes.end(), 0U); @@ -89,8 +69,9 @@ if (!Back.hasImplicitAliasing()) setRandomAliasing(Back, OtherII, ThisII); SnippetPrototype Prototype; - Prototype.Explanation = llvm::formatv("creating cycle through {0}.", - State.getInstrInfo().getName(OtherOpcode)); + Prototype.Explanation = + llvm::formatv("creating cycle through {0}.", + State.getInstrInfo().getName(OtherOpcode)); Prototype.Snippet.push_back(std::move(ThisII)); Prototype.Snippet.push_back(std::move(OtherII)); return std::move(Prototype); @@ -105,13 +86,12 @@ if (auto E = isInfeasible(InstrDesc)) return std::move(E); const Instruction Instr(InstrDesc, RATC); - const AliasingConfigurations SelfAliasing(Instr, Instr); - if (SelfAliasing.empty()) { - // No self aliasing, trying to create a dependency through another opcode. - return generateTwoInstructionPrototype(Instr, SelfAliasing); - } else { - return generateSelfAliasingPrototype(Instr, SelfAliasing); - } + if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr)) + return SelfAliasingPrototype; + else + llvm::consumeError(SelfAliasingPrototype.takeError()); + // No self aliasing, trying to create a dependency through another opcode. + return generateTwoInstructionPrototype(Instr); } std::vector Index: tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/X86/Target.cpp +++ tools/llvm-exegesis/lib/X86/Target.cpp @@ -10,6 +10,7 @@ #include "../Latency.h" #include "../Uops.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" #include "X86RegisterInfo.h" @@ -17,43 +18,107 @@ namespace exegesis { -// Test whether we can generate a snippet for this instruction. -static llvm::Error shouldRun(const LLVMState &State, const unsigned Opcode) { - const auto &InstrInfo = State.getInstrInfo(); - const auto OpcodeName = InstrInfo.getName(Opcode); - if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") || - OpcodeName.startswith("ADJCALLSTACK")) { - return llvm::make_error( - "Unsupported opcode: Push/Pop/AdjCallStack"); - } - return llvm::ErrorSuccess(); -} - namespace { -class X86LatencyBenchmarkRunner : public LatencyBenchmarkRunner { -private: - using LatencyBenchmarkRunner::LatencyBenchmarkRunner; +// Common code for X86 Uops and Latency runners. +template class X86BenchmarkRunner : public Impl { + using Impl::Impl; llvm::Expected generatePrototype(unsigned Opcode) const override { - if (llvm::Error E = shouldRun(State, Opcode)) { - return std::move(E); + // Test whether we can generate a snippet for this instruction. + const auto &InstrInfo = this->State.getInstrInfo(); + const auto OpcodeName = InstrInfo.getName(Opcode); + if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") || + OpcodeName.startswith("ADJCALLSTACK")) { + return llvm::make_error( + "Unsupported opcode: Push/Pop/AdjCallStack"); } - return LatencyBenchmarkRunner::generatePrototype(Opcode); + + // Handle X87. + const auto &InstrDesc = InstrInfo.get(Opcode); + const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask; + const Instruction Instr(InstrDesc, this->RATC); + switch (FPInstClass) { + case llvm::X86II::NotFP: + break; + case llvm::X86II::ZeroArgFP: + return Impl::handleZeroArgFP(Instr); + case llvm::X86II::OneArgFP: + return Impl::handleOneArgFP(Instr); // fstp ST(0) + case llvm::X86II::OneArgFPRW: + case llvm::X86II::TwoArgFP: { + // These are instructions like + // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) + // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) + // They are intrinsically serial and do not modify the state of the stack. + // We generate the same code for latency and uops. + return this->generateSelfAliasingPrototype(Instr); + } + case llvm::X86II::CompareFP: + return Impl::handleCompareFP(Instr); + case llvm::X86II::CondMovFP: + return Impl::handleCondMovFP(Instr); + case llvm::X86II::SpecialFP: + return Impl::handleSpecialFP(Instr); + default: + llvm_unreachable("Unknown FP Type!"); + } + + // Fallback to generic implementation. + return Impl::Base::generatePrototype(Opcode); } }; -class X86UopsBenchmarkRunner : public UopsBenchmarkRunner { -private: - using UopsBenchmarkRunner::UopsBenchmarkRunner; +class X86LatencyImpl : public LatencyBenchmarkRunner { +protected: + using Base = LatencyBenchmarkRunner; + using Base::Base; + llvm::Expected + handleZeroArgFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 ZeroArgFP"); + } + llvm::Expected + handleOneArgFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 OneArgFP"); + } + llvm::Expected + handleCompareFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 CompareFP"); + } + llvm::Expected + handleCondMovFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 CondMovFP"); + } + llvm::Expected + handleSpecialFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 SpecialFP"); + } +}; +class X86UopsImpl : public UopsBenchmarkRunner { +protected: + using Base = UopsBenchmarkRunner; + using Base::Base; llvm::Expected - generatePrototype(unsigned Opcode) const override { - if (llvm::Error E = shouldRun(State, Opcode)) { - return std::move(E); - } - return UopsBenchmarkRunner::generatePrototype(Opcode); + handleZeroArgFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 ZeroArgFP"); + } + llvm::Expected + handleOneArgFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 OneArgFP"); + } + llvm::Expected + handleCompareFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 CompareFP"); + } + llvm::Expected + handleCondMovFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 CondMovFP"); + } + llvm::Expected + handleSpecialFP(const Instruction &Instr) const { + return llvm::make_error("Unsupported x87 SpecialFP"); } }; @@ -62,15 +127,11 @@ // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. // FIXME: Enable when the exegesis assembler no longer does // Properties.reset(TracksLiveness); - // PM.add(llvm::createX86FloatingPointStackifierPass()); + PM.add(llvm::createX86FloatingPointStackifierPass()); } std::vector setRegToConstant(unsigned Reg) const override { - // FIXME: Handle FP stack: - // llvm::X86::RFP32RegClass - // llvm::X86::RFP64RegClass - // llvm::X86::RFP80RegClass if (llvm::X86::GR8RegClass.contains(Reg)) { return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)}; } @@ -92,17 +153,23 @@ if (llvm::X86::VR512RegClass.contains(Reg)) { return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU64Zrm); } + if (llvm::X86::RFP32RegClass.contains(Reg) || + llvm::X86::RFP64RegClass.contains(Reg) || + llvm::X86::RFP80RegClass.contains(Reg)) { + return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m); + } return {}; } std::unique_ptr createLatencyBenchmarkRunner(const LLVMState &State) const override { - return llvm::make_unique(State); + return llvm::make_unique>( + State); } std::unique_ptr createUopsBenchmarkRunner(const LLVMState &State) const override { - return llvm::make_unique(State); + return llvm::make_unique>(State); } bool matchesArch(llvm::Triple::ArchType Arch) const override {