Index: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -70,8 +70,13 @@ const LLVMState &State; const RegisterAliasingTrackerCache RATC; + // Generates a single instruction prototype that has a self-dependency. llvm::Expected generateSelfAliasingPrototype(const Instruction &Instr) const; + // Generates a single instruction prototype without assignment constraints. + llvm::Expected + generateUnconstrainedPrototype(const Instruction &Instr, + llvm::StringRef Msg) const; private: // API to be implemented by subclasses. Index: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -196,8 +196,8 @@ return ResultPath.str(); } -llvm::Expected BenchmarkRunner::generateSelfAliasingPrototype( - const Instruction &Instr) const { +llvm::Expected +BenchmarkRunner::generateSelfAliasingPrototype(const Instruction &Instr) const { const AliasingConfigurations SelfAliasing(Instr, Instr); if (SelfAliasing.empty()) { return llvm::make_error("empty self aliasing"); @@ -217,4 +217,13 @@ return std::move(Prototype); } +llvm::Expected +BenchmarkRunner::generateUnconstrainedPrototype(const Instruction &Instr, + llvm::StringRef Msg) const { + SnippetPrototype Prototype; + Prototype.Explanation = + llvm::formatv("{0}, repeating an unconstrained assignment", Msg); + Prototype.Snippet.emplace_back(Instr); + return std::move(Prototype); +} } // namespace exegesis Index: llvm/trunk/tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Uops.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/Uops.cpp @@ -139,16 +139,10 @@ const Instruction Instr(InstrDesc, RATC); const AliasingConfigurations SelfAliasing(Instr, Instr); if (SelfAliasing.empty()) { - SnippetPrototype Prototype; - Prototype.Explanation = "instruction is parallel, repeating a random one."; - Prototype.Snippet.emplace_back(Instr); - return std::move(Prototype); + return generateUnconstrainedPrototype(Instr, "instruction is parallel"); } if (SelfAliasing.hasImplicitAliasing()) { - SnippetPrototype Prototype; - Prototype.Explanation = "instruction is serial, repeating a random one."; - Prototype.Snippet.emplace_back(Instr); - return std::move(Prototype); + return generateUnconstrainedPrototype(Instr, "instruction is serial"); } const auto TiedVariables = getTiedVariables(Instr); if (!TiedVariables.empty()) { Index: llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp @@ -44,9 +44,9 @@ case llvm::X86II::NotFP: break; case llvm::X86II::ZeroArgFP: - return Impl::handleZeroArgFP(Instr); + return llvm::make_error("Unsupported x87 ZeroArgFP"); case llvm::X86II::OneArgFP: - return Impl::handleOneArgFP(Instr); // fstp ST(0) + return llvm::make_error("Unsupported x87 OneArgFP"); case llvm::X86II::OneArgFPRW: case llvm::X86II::TwoArgFP: { // These are instructions like @@ -61,7 +61,7 @@ case llvm::X86II::CondMovFP: return Impl::handleCondMovFP(Instr); case llvm::X86II::SpecialFP: - return Impl::handleSpecialFP(Instr); + return llvm::make_error("Unsupported x87 SpecialFP"); default: llvm_unreachable("Unknown FP Type!"); } @@ -76,14 +76,6 @@ using Base = LatencyBenchmarkRunner; using Base::Base; llvm::Expected - handleZeroArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 ZeroArgFP"); - } - llvm::Expected - handleOneArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 OneArgFP"); - } - llvm::Expected handleCompareFP(const Instruction &Instr) const { return llvm::make_error("Unsupported x87 CompareFP"); } @@ -91,35 +83,23 @@ handleCondMovFP(const Instruction &Instr) const { return llvm::make_error("Unsupported x87 CondMovFP"); } - llvm::Expected - handleSpecialFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 SpecialFP"); - } }; class X86UopsImpl : public UopsBenchmarkRunner { protected: using Base = UopsBenchmarkRunner; using Base::Base; - llvm::Expected - handleZeroArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 ZeroArgFP"); - } - llvm::Expected - handleOneArgFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 OneArgFP"); - } + // We can compute uops for any FP instruction that does not grow or shrink the + // stack (either do not touch the stack or push as much as they pop). llvm::Expected handleCompareFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 CompareFP"); + return generateUnconstrainedPrototype( + Instr, "instruction does not grow/shrink the FP stack"); } llvm::Expected handleCondMovFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 CondMovFP"); - } - llvm::Expected - handleSpecialFP(const Instruction &Instr) const { - return llvm::make_error("Unsupported x87 SpecialFP"); + return generateUnconstrainedPrototype( + Instr, "instruction does not grow/shrink the FP stack"); } }; @@ -163,6 +143,15 @@ llvm::X86::RFP64RegClass.contains(Reg) || llvm::X86::RFP80RegClass.contains(Reg)) return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m); + if (Reg == llvm::X86::EFLAGS) { + // Set all flags to 0 but the bits that are "reserved and set to 1". + constexpr const uint32_t kImmValue = 0x00007002u; + std::vector Result; + Result.push_back(allocateStackSpace(8)); + Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue)); + Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops. + return Result; + } return {}; } @@ -193,41 +182,59 @@ // value that has set bits for all byte values and is a normal float/ // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when // interpreted as a float. - constexpr const uint64_t kImmValue = 0x40404040ull; + constexpr const uint32_t kImmValue = 0x40404040u; std::vector Result; - // Allocate scratch memory on the stack. - Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8) - .addReg(llvm::X86::RSP) - .addReg(llvm::X86::RSP) - .addImm(RegSizeBytes)); - // Fill scratch memory. - for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) { - Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi) - // Address = ESP - .addReg(llvm::X86::RSP) // BaseReg - .addImm(1) // ScaleAmt - .addReg(0) // IndexReg - .addImm(Disp) // Disp - .addReg(0) // Segment - // Immediate. - .addImm(kImmValue)); - } - // Load Reg from scratch memory. - Result.push_back(llvm::MCInstBuilder(RMOpcode) - .addReg(Reg) - // Address = ESP - .addReg(llvm::X86::RSP) // BaseReg - .addImm(1) // ScaleAmt - .addReg(0) // IndexReg - .addImm(0) // Disp - .addReg(0)); // Segment - // Release scratch memory. - Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8) - .addReg(llvm::X86::RSP) - .addReg(llvm::X86::RSP) - .addImm(RegSizeBytes)); + Result.push_back(allocateStackSpace(RegSizeBytes)); + constexpr const unsigned kMov32NumBytes = 4; + for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) { + Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue)); + } + Result.push_back(loadToReg(Reg, RMOpcode)); + Result.push_back(releaseStackSpace(RegSizeBytes)); return Result; } + + // Allocates scratch memory on the stack. + static llvm::MCInst allocateStackSpace(unsigned Bytes) { + return llvm::MCInstBuilder(llvm::X86::SUB64ri8) + .addReg(llvm::X86::RSP) + .addReg(llvm::X86::RSP) + .addImm(Bytes); + } + + // Fills scratch memory at offset `OffsetBytes` with value `Imm`. + static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, + uint64_t Imm) { + return llvm::MCInstBuilder(MovOpcode) + // Address = ESP + .addReg(llvm::X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(OffsetBytes) // Disp + .addReg(0) // Segment + // Immediate. + .addImm(Imm); + } + + // Loads scratch memory into register `Reg` using opcode `RMOpcode`. + static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) { + return llvm::MCInstBuilder(RMOpcode) + .addReg(Reg) + // Address = ESP + .addReg(llvm::X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0); // Segment + } + + // Releases scratch memory. + static llvm::MCInst releaseStackSpace(unsigned Bytes) { + return llvm::MCInstBuilder(llvm::X86::ADD64ri8) + .addReg(llvm::X86::RSP) + .addReg(llvm::X86::RSP) + .addImm(Bytes); + } }; } // namespace