Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.h +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -54,7 +54,8 @@ // Common code for all benchmark modes. class BenchmarkRunner { public: - explicit BenchmarkRunner(const LLVMState &State, InstructionBenchmark::ModeE Mode); + explicit BenchmarkRunner(const LLVMState &State, + InstructionBenchmark::ModeE Mode); virtual ~BenchmarkRunner(); @@ -69,17 +70,22 @@ const LLVMState &State; const RegisterAliasingTrackerCache RATC; - llvm::Expected generateSelfAliasingPrototype( - const Instruction &Instr) const; + // Generates a single instruction prototype that has a self-dependency. + llvm::Expected + generateSelfAliasingPrototype(const Instruction &Instr) const; + // Generates a single instruction prototype without assignment constraints. + llvm::Expected + generateUnconstrainedPrototype(const Instruction &Instr, + llvm::StringRef Msg) const; private: // API to be implemented by subclasses. virtual llvm::Expected - generatePrototype(unsigned Opcode) const = 0; + generatePrototype(unsigned Opcode) const = 0; virtual std::vector - runMeasurements(const ExecutableFunction &EF, - const unsigned NumRepetitions) const = 0; + runMeasurements(const ExecutableFunction &EF, + const unsigned NumRepetitions) const = 0; // Internal helpers. InstructionBenchmark runOne(const BenchmarkConfiguration &Configuration, @@ -90,7 +96,6 @@ llvm::Expected> generateConfigurations(unsigned Opcode) const; - llvm::Expected writeObjectFile(const BenchmarkConfiguration::Setup &Setup, llvm::ArrayRef Code) const; Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -196,8 +196,8 @@ return ResultPath.str(); } -llvm::Expected BenchmarkRunner::generateSelfAliasingPrototype( - const Instruction &Instr) const { +llvm::Expected +BenchmarkRunner::generateSelfAliasingPrototype(const Instruction &Instr) const { const AliasingConfigurations SelfAliasing(Instr, Instr); if (SelfAliasing.empty()) { return llvm::make_error("empty self aliasing"); @@ -217,4 +217,13 @@ return std::move(Prototype); } +llvm::Expected +BenchmarkRunner::generateUnconstrainedPrototype(const Instruction &Instr, + llvm::StringRef Msg) const { + SnippetPrototype Prototype; + Prototype.Explanation = Msg.str(); + Prototype.Explanation += ", repeating an unconstrained assignment"; + Prototype.Snippet.emplace_back(Instr); + return std::move(Prototype); +} } // namespace exegesis Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -139,16 +139,10 @@ const Instruction Instr(InstrDesc, RATC); const AliasingConfigurations SelfAliasing(Instr, Instr); if (SelfAliasing.empty()) { - SnippetPrototype Prototype; - Prototype.Explanation = "instruction is parallel, repeating a random one."; - Prototype.Snippet.emplace_back(Instr); - return std::move(Prototype); + return generateUnconstrainedPrototype(Instr, "instruction is parallel"); } if (SelfAliasing.hasImplicitAliasing()) { - SnippetPrototype Prototype; - Prototype.Explanation = "instruction is serial, repeating a random one."; - Prototype.Snippet.emplace_back(Instr); - return std::move(Prototype); + return generateUnconstrainedPrototype(Instr, "instruction is serial"); } const auto TiedVariables = getTiedVariables(Instr); if (!TiedVariables.empty()) { Index: tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/X86/Target.cpp +++ tools/llvm-exegesis/lib/X86/Target.cpp @@ -43,9 +43,9 @@ case llvm::X86II::NotFP: break; case llvm::X86II::ZeroArgFP: - return Impl::handleZeroArgFP(Instr); + return llvm::make_error("Unsupported x87 ZeroArgFP"); case llvm::X86II::OneArgFP: - return Impl::handleOneArgFP(Instr); // fstp ST(0) + return llvm::make_error("Unsupported x87 OneArgFP"); case llvm::X86II::OneArgFPRW: case llvm::X86II::TwoArgFP: { // These are instructions like @@ -60,7 +60,7 @@ case llvm::X86II::CondMovFP: return Impl::handleCondMovFP(Instr); case llvm::X86II::SpecialFP: - return Impl::handleSpecialFP(Instr); + return llvm::make_error("Unsupported x87 SpecialFP"); default: llvm_unreachable("Unknown FP Type!"); } @@ -75,14 +75,6 @@ using Base = LatencyBenchmarkRunner; using Base::Base; llvm::Expected - handleZeroArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 ZeroArgFP"); - } - llvm::Expected - handleOneArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 OneArgFP"); - } - llvm::Expected handleCompareFP(const Instruction &Instr) const { return llvm::make_error("Unsupported x87 CompareFP"); } @@ -90,35 +82,23 @@ handleCondMovFP(const Instruction &Instr) const { return llvm::make_error("Unsupported x87 CondMovFP"); } - llvm::Expected - handleSpecialFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 SpecialFP"); - } }; class X86UopsImpl : public UopsBenchmarkRunner { protected: using Base = UopsBenchmarkRunner; using Base::Base; - llvm::Expected - handleZeroArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 ZeroArgFP"); - } - llvm::Expected - handleOneArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 OneArgFP"); - } + // We can compute uops for any FP instruction that does not grow or shrink the + // stack (either do not touch the stack or push as much as they pop). llvm::Expected handleCompareFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 CompareFP"); + return generateUnconstrainedPrototype( + Instr, "instruction does not grow/shrink the FP stack"); } llvm::Expected handleCondMovFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 CondMovFP"); - } - llvm::Expected - handleSpecialFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 SpecialFP"); + return generateUnconstrainedPrototype( + Instr, "instruction does not grow/shrink the FP stack"); } }; @@ -130,8 +110,7 @@ PM.add(llvm::createX86FloatingPointStackifierPass()); } - std::vector - setRegToConstant(unsigned Reg) const override { + std::vector setRegToConstant(unsigned Reg) const override { if (llvm::X86::GR8RegClass.contains(Reg)) { return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)}; } @@ -158,13 +137,21 @@ llvm::X86::RFP80RegClass.contains(Reg)) { return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m); } + if (Reg == llvm::X86::EFLAGS) { + constexpr const uint32_t kImmValue = 0x00007002u; + std::vector Result; + Result.push_back(allocateStackSpace(8)); + Result.push_back(fillScratchMemory(llvm::X86::MOV64mi32, 0, kImmValue)); + Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops. + return Result; + } + llvm::errs() << Reg << "\n"; return {}; } std::unique_ptr createLatencyBenchmarkRunner(const LLVMState &State) const override { - return llvm::make_unique>( - State); + return llvm::make_unique>(State); } std::unique_ptr @@ -189,41 +176,58 @@ // value that has set bits for all byte values and is a normal float/ // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when // interpreted as a float. - constexpr const uint64_t kImmValue = 0x40404040ull; + constexpr const uint32_t kImmValue = 0x40404040u; std::vector Result; - // Allocate scratch memory on the stack. - Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8) - .addReg(llvm::X86::RSP) - .addReg(llvm::X86::RSP) - .addImm(RegSizeBytes)); - // Fill scratch memory. + Result.push_back(allocateStackSpace(RegSizeBytes)); for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) { - Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi) - // Address = ESP - .addReg(llvm::X86::RSP) // BaseReg - .addImm(1) // ScaleAmt - .addReg(0) // IndexReg - .addImm(Disp) // Disp - .addReg(0) // Segment - // Immediate. - .addImm(kImmValue)); + Result.push_back(fillScratchMemory(llvm::X86::MOV32mi, Disp, kImmValue)); } - // Load Reg from scratch memory. - Result.push_back(llvm::MCInstBuilder(RMOpcode) - .addReg(Reg) - // Address = ESP - .addReg(llvm::X86::RSP) // BaseReg - .addImm(1) // ScaleAmt - .addReg(0) // IndexReg - .addImm(0) // Disp - .addReg(0)); // Segment - // Release scratch memory. - Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8) - .addReg(llvm::X86::RSP) - .addReg(llvm::X86::RSP) - .addImm(RegSizeBytes)); + Result.push_back(loadToReg(Reg, RMOpcode)); + Result.push_back(releaseStackSpace(RegSizeBytes)); return Result; } + + // Allocates scratch memory on the stack. + static llvm::MCInst allocateStackSpace(unsigned Bytes) { + return llvm::MCInstBuilder(llvm::X86::SUB64ri8) + .addReg(llvm::X86::RSP) + .addReg(llvm::X86::RSP) + .addImm(Bytes); + } + + // Fills scratch memory at offset `OffsetBytes` with value `Imm`. + static llvm::MCInst fillScratchMemory(unsigned MovOpcode, + unsigned OffsetBytes, uint64_t Imm) { + return llvm::MCInstBuilder(MovOpcode) + // Address = ESP + .addReg(llvm::X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(OffsetBytes) // Disp + .addReg(0) // Segment + // Immediate. + .addImm(Imm); + } + + // Loads scratch memory into register `Reg` using opcode `RMOpcode`. + static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) { + return llvm::MCInstBuilder(RMOpcode) + .addReg(Reg) + // Address = ESP + .addReg(llvm::X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0); // Segment + } + + // Releases scratch memory. + static llvm::MCInst releaseStackSpace(unsigned Bytes) { + return llvm::MCInstBuilder(llvm::X86::ADD64ri8) + .addReg(llvm::X86::RSP) + .addReg(llvm::X86::RSP) + .addImm(Bytes); + } }; } // namespace Index: unittests/tools/llvm-exegesis/X86/TargetTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/TargetTest.cpp +++ unittests/tools/llvm-exegesis/X86/TargetTest.cpp @@ -37,7 +37,7 @@ TEST_F(X86TargetTest, SetRegToConstantXMM) { const auto Insts = Target_->setRegToConstant(llvm::X86::XMM1); - EXPECT_THAT(Insts, SizeIs(Gt(0U))); + EXPECT_THAT(Insts, SizeIs(Gt(4U))); } } // namespace