Index: tools/llvm-exegesis/lib/Assembler.h =================================================================== --- tools/llvm-exegesis/lib/Assembler.h +++ tools/llvm-exegesis/lib/Assembler.h @@ -39,12 +39,13 @@ // convention and target machine). llvm::BitVector getFunctionReservedRegs(const llvm::TargetMachine &TM); -// Creates a temporary `void foo()` function containing the provided +// Creates a temporary `void foo(char*)` function containing the provided // Instructions. Runs a set of llvm Passes to provide correct prologue and // epilogue. Once the MachineFunction is ready, it is assembled for TM to // AsmStream, the temporary function is eventually discarded. void assembleToStream(const ExegesisTarget &ET, std::unique_ptr TM, + llvm::ArrayRef LiveIns, llvm::ArrayRef RegsToDef, llvm::ArrayRef Instructions, llvm::raw_pwrite_stream &AsmStream); @@ -59,7 +60,7 @@ llvm::object::OwningBinary getObjectFromFile(llvm::StringRef Filename); -// Consumes an ObjectFile containing a `void foo()` function and make it +// Consumes an ObjectFile containing a `void foo(char*)` function and make it // executable. struct ExecutableFunction { explicit ExecutableFunction( @@ -70,7 +71,9 @@ llvm::StringRef getFunctionBytes() const { return FunctionBytes; } // Executes the function. - void operator()() const { ((void (*)())(intptr_t)FunctionBytes.data())(); } + void operator()(char *Memory) const { + ((void (*)(char *))(intptr_t)FunctionBytes.data())(Memory); + } std::unique_ptr Context; std::unique_ptr ExecEngine; Index: tools/llvm-exegesis/lib/Assembler.cpp =================================================================== --- tools/llvm-exegesis/lib/Assembler.cpp +++ tools/llvm-exegesis/lib/Assembler.cpp @@ -66,12 +66,16 @@ return false; } -// Creates a void MachineFunction with no argument. +// Creates a void(int8*) MachineFunction. static llvm::MachineFunction & -createVoidVoidMachineFunction(llvm::StringRef FunctionID, llvm::Module *Module, - llvm::MachineModuleInfo *MMI) { +createVoidVoidPtrMachineFunction(llvm::StringRef FunctionID, + llvm::Module *Module, + llvm::MachineModuleInfo *MMI) { llvm::Type *const ReturnType = llvm::Type::getInt32Ty(Module->getContext()); - llvm::FunctionType *FunctionType = llvm::FunctionType::get(ReturnType, false); + llvm::Type *const MemParamType = llvm::PointerType::get( + llvm::Type::getInt8Ty(Module->getContext()), 0 /*default address space*/); + llvm::FunctionType *FunctionType = + llvm::FunctionType::get(ReturnType, {MemParamType}, false); llvm::Function *const F = llvm::Function::Create( FunctionType, llvm::GlobalValue::InternalLinkage, FunctionID, Module); // Making sure we can create a MachineFunction out of this Function even if it @@ -81,9 +85,12 @@ } static void fillMachineFunction(llvm::MachineFunction &MF, + llvm::ArrayRef LiveIns, llvm::ArrayRef Instructions) { llvm::MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); MF.push_back(MBB); + for (const unsigned Reg : LiveIns) + MBB->addLiveIn(Reg); const llvm::MCInstrInfo *MCII = MF.getTarget().getMCInstrInfo(); llvm::DebugLoc DL; for (const llvm::MCInst &Inst : Instructions) { @@ -134,13 +141,14 @@ std::unique_ptr MMI = llvm::make_unique(&TM); llvm::MachineFunction &MF = - createVoidVoidMachineFunction(FunctionID, Module.get(), MMI.get()); + createVoidVoidPtrMachineFunction(FunctionID, Module.get(), MMI.get()); // Saving reserved registers for client. return MF.getSubtarget().getRegisterInfo()->getReservedRegs(MF); } void assembleToStream(const ExegesisTarget &ET, std::unique_ptr TM, + llvm::ArrayRef LiveIns, llvm::ArrayRef RegsToDef, llvm::ArrayRef Instructions, llvm::raw_pwrite_stream &AsmStream) { @@ -151,13 +159,17 @@ std::unique_ptr MMI = llvm::make_unique(TM.get()); llvm::MachineFunction &MF = - createVoidVoidMachineFunction(FunctionID, Module.get(), MMI.get()); + createVoidVoidPtrMachineFunction(FunctionID, Module.get(), MMI.get()); // We need to instruct the passes that we're done with SSA and virtual // registers. auto &Properties = MF.getProperties(); Properties.set(llvm::MachineFunctionProperties::Property::NoVRegs); Properties.reset(llvm::MachineFunctionProperties::Property::IsSSA); + + for (const unsigned Reg : LiveIns) + MF.getRegInfo().addLiveIn(Reg); + bool IsSnippetSetupComplete = false; std::vector SnippetWithSetup = generateSnippetSetupCode(RegsToDef, ET, *TM, IsSnippetSetupComplete); @@ -176,7 +188,7 @@ MF.getRegInfo().freezeReservedRegs(MF); // Fill the MachineFunction from the instructions. - fillMachineFunction(MF, Instructions); + fillMachineFunction(MF, LiveIns, Instructions); // We create the pass manager, run the passes to populate AsmBuffer. llvm::MCContext &MCContext = MMI->getContext(); Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.h +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -23,6 +23,8 @@ #include "RegisterAliasing.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Error.h" +#include +#include #include namespace exegesis { @@ -40,6 +42,7 @@ // measurement it should be as short as possible. It is usually used to setup // the content of the Registers. struct Setup { + std::vector LiveIns; // The registers that are live on entry. std::vector RegsToDef; }; Setup SnippetSetup; @@ -66,6 +69,23 @@ std::vector computeRegsToDef(const std::vector &Snippet) const; + // Scratch space to run instructions that touch memory. + struct ScratchSpace { + static constexpr const size_t kAlignment = 1024; + static constexpr const size_t kSize = 1 << 20; // 1MB. + ScratchSpace() + : UnalignedPtr(llvm::make_unique(kSize + kAlignment)), + AlignedPtr( + UnalignedPtr.get() + kAlignment - + (reinterpret_cast(UnalignedPtr.get()) % kAlignment)) {} + char *ptr() const { return AlignedPtr; } + void clear() { std::memset(ptr(), 0, kSize); } + + private: + const std::unique_ptr UnalignedPtr; + char *const AlignedPtr; + }; + protected: const LLVMState &State; const RegisterAliasingTrackerCache RATC; @@ -79,7 +99,7 @@ generatePrototype(unsigned Opcode) const = 0; virtual std::vector - runMeasurements(const ExecutableFunction &EF, + runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, const unsigned NumRepetitions) const = 0; // Internal helpers. @@ -96,6 +116,8 @@ llvm::ArrayRef Code) const; const InstructionBenchmark::ModeE Mode; + + const std::unique_ptr Scratch; }; } // namespace exegesis Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -30,7 +30,7 @@ InstructionBenchmark::ModeE Mode) : State(State), RATC(State.getRegInfo(), getFunctionReservedRegs(State.getTargetMachine())), - Mode(Mode) {} + Mode(Mode), Scratch(llvm::make_unique()) {} BenchmarkRunner::~BenchmarkRunner() = default; @@ -119,7 +119,7 @@ << *ObjectFilePath << "\n"; const ExecutableFunction EF(State.createTargetMachine(), getObjectFromFile(*ObjectFilePath)); - InstrBenchmark.Measurements = runMeasurements(EF, NumRepetitions); + InstrBenchmark.Measurements = runMeasurements(EF, *Scratch, NumRepetitions); return InstrBenchmark; } @@ -132,9 +132,14 @@ BenchmarkConfiguration Configuration; Configuration.Info = Prototype.Explanation; for (InstructionInstance &II : Prototype.Snippet) { - II.randomizeUnsetVariables(); + II.randomizeUnsetVariables( + Prototype.ScratchSpaceReg + ? RATC.getRegister(Prototype.ScratchSpaceReg).aliasedBits() + : RATC.emptyRegisters()); Configuration.Snippet.push_back(II.build()); } + if (Prototype.ScratchSpaceReg) + Configuration.SnippetSetup.LiveIns.push_back(Prototype.ScratchSpaceReg); Configuration.SnippetSetup.RegsToDef = computeRegsToDef(Prototype.Snippet); return std::vector{Configuration}; } else @@ -144,6 +149,7 @@ std::vector BenchmarkRunner::computeRegsToDef( const std::vector &Snippet) const { // Collect all register uses and create an assignment for each of them. + // Ignore memory operands which are handled separately. // Loop invariant: DefinedRegs[i] is true iif it has been set at least once // before the current instruction. llvm::BitVector DefinedRegs = RATC.emptyRegisters(); @@ -152,11 +158,12 @@ // Returns the register that this Operand sets or uses, or 0 if this is not // a register. const auto GetOpReg = [&II](const Operand &Op) -> unsigned { - if (Op.ImplicitReg) { + if (Op.IsMem) + return 0; + if (Op.ImplicitReg) return *Op.ImplicitReg; - } else if (Op.IsExplicit && II.getValueFor(Op).isReg()) { + if (Op.IsExplicit && II.getValueFor(Op).isReg()) return II.getValueFor(Op).getReg(); - } return 0; }; // Collect used registers that have never been def'ed. @@ -173,9 +180,8 @@ for (const Operand &Op : II.Instr.Operands) { if (Op.IsDef) { const unsigned Reg = GetOpReg(Op); - if (Reg > 0) { + if (Reg > 0) DefinedRegs.set(Reg); - } } } } @@ -192,12 +198,12 @@ return std::move(E); llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - Setup.RegsToDef, Code, OFS); + Setup.LiveIns, Setup.RegsToDef, Code, OFS); return ResultPath.str(); } -llvm::Expected BenchmarkRunner::generateSelfAliasingPrototype( - const Instruction &Instr) const { +llvm::Expected +BenchmarkRunner::generateSelfAliasingPrototype(const Instruction &Instr) const { const AliasingConfigurations SelfAliasing(Instr, Instr); if (SelfAliasing.empty()) { return llvm::make_error("empty self aliasing"); Index: tools/llvm-exegesis/lib/Latency.h =================================================================== --- tools/llvm-exegesis/lib/Latency.h +++ tools/llvm-exegesis/lib/Latency.h @@ -36,7 +36,7 @@ const Instruction &Instr) const; std::vector - runMeasurements(const ExecutableFunction &EF, + runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, const unsigned NumRepetitions) const override; virtual const char *getCounterName() const; Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -108,6 +108,7 @@ std::vector LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, + ScratchSpace &Scratch, const unsigned NumRepetitions) const { // Cycle measurements include some overhead from the kernel. Repeat the // measure several times and take the minimum value. @@ -121,8 +122,9 @@ llvm::report_fatal_error("invalid perf event"); for (size_t I = 0; I < NumMeasurements; ++I) { pfm::Counter Counter(CyclesPerfEvent); + Scratch.clear(); Counter.start(); - Function(); + Function(Scratch.ptr()); Counter.stop(); const int64_t Value = Counter.read(); if (Value < MinLatency) Index: tools/llvm-exegesis/lib/MCInstrDescView.h =================================================================== --- tools/llvm-exegesis/lib/MCInstrDescView.h +++ tools/llvm-exegesis/lib/MCInstrDescView.h @@ -58,6 +58,7 @@ struct Operand { unsigned Index = 0; bool IsDef = false; + bool IsMem = false; bool IsExplicit = false; const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op. const llvm::MCOperandInfo *Info = nullptr; // Set for Explicit Op. @@ -72,6 +73,8 @@ Instruction(const llvm::MCInstrDesc &MCInstrDesc, const RegisterAliasingTrackerCache &ATC); + bool hasMemoryOperands() const; + const llvm::MCInstrDesc *Description; // Never nullptr. llvm::SmallVector Operands; llvm::SmallVector Variables; @@ -83,11 +86,8 @@ struct InstructionInstance { InstructionInstance(const Instruction &Instr); - // No copy. - InstructionInstance(const InstructionInstance &) = delete; - InstructionInstance &operator=(const InstructionInstance &) = delete; - - // Moving is OK. + InstructionInstance(const InstructionInstance &); + InstructionInstance &operator=(const InstructionInstance &); InstructionInstance(InstructionInstance &&); InstructionInstance &operator=(InstructionInstance &&); @@ -99,7 +99,8 @@ bool hasImmediateVariables() const; // Assigns a Random Value to all Variables that are still Invalid. - void randomizeUnsetVariables(); + // Do not use any of the registers in `ForbiddenRegs`. + void randomizeUnsetVariables(const llvm::BitVector &ForbiddenRegs); // Returns the instance as an llvm::MCInst. The InstructionInstance must be // fully allocated (no invalid variables). @@ -125,6 +126,9 @@ SnippetPrototype &operator=(SnippetPrototype &&); std::string Explanation; + // If the prototype uses the provided scratch memory, the register in which + // the pointer to this memory is passed in to the function. + unsigned ScratchSpaceReg = 0; std::vector Snippet; }; Index: tools/llvm-exegesis/lib/MCInstrDescView.cpp =================================================================== --- tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -26,6 +26,7 @@ Operand Operand; Operand.Index = OpIndex; Operand.IsDef = (OpIndex < MCInstrDesc.getNumDefs()); + Operand.IsMem = OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY; Operand.IsExplicit = true; // TODO(gchatelet): Handle isLookupPtrRegClass. if (OpInfo.RegClass >= 0) @@ -83,6 +84,11 @@ } } +bool Instruction::hasMemoryOperands() const { + return std::any_of(Operands.begin(), Operands.end(), + [](const Operand &Op) { return Op.IsMem; }); +} + InstructionInstance::InstructionInstance(const Instruction &Instr) : Instr(Instr), VariableValues(Instr.Variables.size()) {} @@ -91,6 +97,11 @@ InstructionInstance &InstructionInstance:: operator=(InstructionInstance &&) = default; +InstructionInstance::InstructionInstance(const InstructionInstance &) = default; + +InstructionInstance &InstructionInstance:: +operator=(const InstructionInstance &) = default; + unsigned InstructionInstance::getOpcode() const { return Instr.Description->getOpcode(); } @@ -117,7 +128,8 @@ // forward declaration. static void randomize(const Instruction &Instr, const Variable &Var, - llvm::MCOperand &AssignedValue); + llvm::MCOperand &AssignedValue, + const llvm::BitVector &ForbiddenRegs); bool InstructionInstance::hasImmediateVariables() const { return llvm::any_of(Instr.Variables, [this](const Variable &Var) { @@ -129,11 +141,12 @@ }); } -void InstructionInstance::randomizeUnsetVariables() { +void InstructionInstance::randomizeUnsetVariables( + const llvm::BitVector &ForbiddenRegs) { for (const Variable &Var : Instr.Variables) { llvm::MCOperand &AssignedValue = getValueFor(Var); if (!AssignedValue.isValid()) - randomize(Instr, Var, AssignedValue); + randomize(Instr, Var, AssignedValue, ForbiddenRegs); } } @@ -222,7 +235,8 @@ } static void randomize(const Instruction &Instr, const Variable &Var, - llvm::MCOperand &AssignedValue) { + llvm::MCOperand &AssignedValue, + const llvm::BitVector &ForbiddenRegs) { assert(!Var.TiedOperands.empty()); const Operand &Op = Instr.Operands[Var.TiedOperands.front()]; assert(Op.Info != nullptr); @@ -234,8 +248,11 @@ break; case llvm::MCOI::OperandType::OPERAND_REGISTER: { assert(Op.Tracker); - const auto &Registers = Op.Tracker->sourceBits(); - AssignedValue = llvm::MCOperand::createReg(randomBit(Registers)); + auto AllowedRegs = Op.Tracker->sourceBits(); + assert(AllowedRegs.size() == ForbiddenRegs.size()); + for (auto I : ForbiddenRegs.set_bits()) + AllowedRegs.reset(I); + AssignedValue = llvm::MCOperand::createReg(randomBit(AllowedRegs)); break; } default: Index: tools/llvm-exegesis/lib/RegisterAliasing.h =================================================================== --- tools/llvm-exegesis/lib/RegisterAliasing.h +++ tools/llvm-exegesis/lib/RegisterAliasing.h @@ -62,6 +62,7 @@ private: RegisterAliasingTracker(const llvm::MCRegisterInfo &RegInfo); + RegisterAliasingTracker(const RegisterAliasingTracker &) = delete; void FillOriginAndAliasedBits(const llvm::MCRegisterInfo &RegInfo, const llvm::BitVector &OriginalBits); Index: tools/llvm-exegesis/lib/Target.h =================================================================== --- tools/llvm-exegesis/lib/Target.h +++ tools/llvm-exegesis/lib/Target.h @@ -22,6 +22,7 @@ #include "LlvmState.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" @@ -39,6 +40,27 @@ return {}; } + // Returns the register pointing to scratch memory, or 0 if this target does + // not support memory operands. The benchmark function uses the default + // calling convention. + virtual unsigned getScratchMemoryRegister(const llvm::Triple &) const { + return 0; + } + + // Fills memory operands with references to the address at [Reg] + Offset. + virtual void fillMemoryOperands(InstructionInstance &II, unsigned Reg, + unsigned Offset) const { + llvm_unreachable( + "fillMemoryOperands() requires getScratchMemoryRegister() > 0"); + } + + // Returns the maximum number of bytes a load/store instruction can access at + // once. This is typically the size of the largest register available on the + // processor. Note that this only used as a hint to generate independant + // load/stores to/from memory, so the exact returned value does not really + // matter as long as it's large enough. + virtual unsigned getMaxMemoryAccessSize() const { return 0; } + // Creates a benchmark runner for the given mode. std::unique_ptr createBenchmarkRunner(InstructionBenchmark::ModeE Mode, Index: tools/llvm-exegesis/lib/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/Target.cpp +++ tools/llvm-exegesis/lib/Target.cpp @@ -30,9 +30,8 @@ FirstTarget = Target; return; } - assert(Target->Next == nullptr && "target has already been registered"); if (Target->Next != nullptr) - return; + return; // Already registered. Target->Next = FirstTarget; FirstTarget = Target; } Index: tools/llvm-exegesis/lib/Uops.h =================================================================== --- tools/llvm-exegesis/lib/Uops.h +++ tools/llvm-exegesis/lib/Uops.h @@ -28,12 +28,41 @@ llvm::Expected generatePrototype(unsigned Opcode) const override; + static constexpr const size_t kMinNumDifferentAddresses = 6; + private: llvm::Error isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const; std::vector - runMeasurements(const ExecutableFunction &EF, + runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, const unsigned NumRepetitions) const override; + + // Instantiates memory operands within a snippet. + // To make computations as parallel as possible, we generate independant + // memory locations for instructions that load and store. If there are less + // than kMinNumDifferentAddresses in the original snippet, we duplicate + // instructions until there are this number of instructions. + // For example, assuming kMinNumDifferentAddresses=5 and + // getMaxMemoryAccessSize()=64, if the original snippet is: + // mov eax, [memory] + // we might generate: + // mov eax, [rdi] + // mov eax, [rdi + 64] + // mov eax, [rdi + 128] + // mov eax, [rdi + 192] + // mov eax, [rdi + 256] + // If the original snippet is: + // mov eax, [memory] + // add eax, [memory] + // we might generate: + // mov eax, [rdi] + // add eax, [rdi + 64] + // mov eax, [rdi + 128] + // add eax, [rdi + 192] + // mov eax, [rdi + 256] + void + instantiateMemoryOperands(unsigned ScratchSpaceReg, + std::vector &Snippet) const; }; } // namespace exegesis Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -13,6 +13,7 @@ #include "BenchmarkRunner.h" #include "MCInstrDescView.h" #include "PerfHelper.h" +#include "Target.h" // FIXME: Load constants into registers (e.g. with fld1) to not break // instructions like x87. @@ -84,19 +85,11 @@ return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN; } -// FIXME: Handle memory, see PR36905. -static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) { - return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY; -} - llvm::Error UopsBenchmarkRunner::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const { if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand)) return llvm::make_error( "Infeasible : has unknown operands"); - if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand)) - return llvm::make_error( - "Infeasible : has memory operands"); return llvm::Error::success(); } @@ -131,23 +124,69 @@ UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; +void UopsBenchmarkRunner::instantiateMemoryOperands( + const unsigned ScratchSpaceReg, + std::vector &Snippet) const { + if (ScratchSpaceReg == 0) + return; // no memory operands. + const auto &ET = State.getExegesisTarget(); + const unsigned MemStep = ET.getMaxMemoryAccessSize(); + const size_t OriginalSnippetSize = Snippet.size(); + size_t I = 0; + for (InstructionInstance &II : Snippet) { + ET.fillMemoryOperands(II, ScratchSpaceReg, I * MemStep); + ++I; + } + + while (Snippet.size() < kMinNumDifferentAddresses) { + InstructionInstance II = Snippet[I % OriginalSnippetSize]; + ET.fillMemoryOperands(II, ScratchSpaceReg, I * MemStep); + ++I; + Snippet.push_back(std::move(II)); + } + assert(I * MemStep < ScratchSpace::kSize && "not enough scratch space"); +} + llvm::Expected UopsBenchmarkRunner::generatePrototype(unsigned Opcode) const { const auto &InstrDesc = State.getInstrInfo().get(Opcode); if (auto E = isInfeasible(InstrDesc)) return std::move(E); const Instruction Instr(InstrDesc, RATC); + const auto &ET = State.getExegesisTarget(); + SnippetPrototype Prototype; + + const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr; + if (Instr.hasMemoryOperands()) { + Prototype.ScratchSpaceReg = + ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple()); + if (Prototype.ScratchSpaceReg == 0) + return llvm::make_error( + "Infeasible : target does not support memory instructions"); + ScratchSpaceAliasedRegs = + &RATC.getRegister(Prototype.ScratchSpaceReg).aliasedBits(); + // If the instruction implicitly writes to ScratchSpaceReg , abort. + // FIXME: We could make a copy of the scratch register. + for (const auto &Op : Instr.Operands) { + if (Op.IsDef && Op.ImplicitReg && + ScratchSpaceAliasedRegs->test(*Op.ImplicitReg)) + return llvm::make_error( + "Infeasible : memory instruction uses scratch memory register"); + } + } + const AliasingConfigurations SelfAliasing(Instr, Instr); + InstructionInstance II(Instr); if (SelfAliasing.empty()) { - SnippetPrototype Prototype; Prototype.Explanation = "instruction is parallel, repeating a random one."; - Prototype.Snippet.emplace_back(Instr); + Prototype.Snippet.push_back(std::move(II)); + instantiateMemoryOperands(Prototype.ScratchSpaceReg, Prototype.Snippet); return std::move(Prototype); } if (SelfAliasing.hasImplicitAliasing()) { - SnippetPrototype Prototype; Prototype.Explanation = "instruction is serial, repeating a random one."; - Prototype.Snippet.emplace_back(Instr); + Prototype.Snippet.push_back(std::move(II)); + instantiateMemoryOperands(Prototype.ScratchSpaceReg, Prototype.Snippet); return std::move(Prototype); } const auto TiedVariables = getTiedVariables(Instr); @@ -161,23 +200,27 @@ assert(!Var->TiedOperands.empty()); const Operand &Op = Instr.Operands[Var->TiedOperands.front()]; assert(Op.Tracker); - SnippetPrototype Prototype; Prototype.Explanation = "instruction has tied variables using static renaming."; for (const llvm::MCPhysReg Reg : Op.Tracker->sourceBits().set_bits()) { - Prototype.Snippet.emplace_back(Instr); - Prototype.Snippet.back().getValueFor(*Var) = - llvm::MCOperand::createReg(Reg); + if (ScratchSpaceAliasedRegs && ScratchSpaceAliasedRegs->test(Reg)) + continue; // Do not use the scratch memory address register. + InstructionInstance TmpII = II; + TmpII.getValueFor(*Var) = llvm::MCOperand::createReg(Reg); + Prototype.Snippet.push_back(std::move(TmpII)); } + instantiateMemoryOperands(Prototype.ScratchSpaceReg, Prototype.Snippet); return std::move(Prototype); } - InstructionInstance II(Instr); // No tied variables, we pick random values for defs. llvm::BitVector Defs(State.getRegInfo().getNumRegs()); for (const auto &Op : Instr.Operands) { - if (Op.Tracker && Op.IsExplicit && Op.IsDef) { + if (Op.Tracker && Op.IsExplicit && Op.IsDef && !Op.IsMem) { auto PossibleRegisters = Op.Tracker->sourceBits(); remove(PossibleRegisters, RATC.reservedRegisters()); + // Do not use the scratch memory address register. + if (ScratchSpaceAliasedRegs) + remove(PossibleRegisters, *ScratchSpaceAliasedRegs); assert(PossibleRegisters.any() && "No register left to choose from"); const auto RandomReg = randomBit(PossibleRegisters); Defs.set(RandomReg); @@ -187,24 +230,28 @@ // And pick random use values that are not reserved and don't alias with defs. const auto DefAliases = getAliasedBits(State.getRegInfo(), Defs); for (const auto &Op : Instr.Operands) { - if (Op.Tracker && Op.IsExplicit && !Op.IsDef) { + if (Op.Tracker && Op.IsExplicit && !Op.IsDef && !Op.IsMem) { auto PossibleRegisters = Op.Tracker->sourceBits(); remove(PossibleRegisters, RATC.reservedRegisters()); + // Do not use the scratch memory address register. + if (ScratchSpaceAliasedRegs) + remove(PossibleRegisters, *ScratchSpaceAliasedRegs); remove(PossibleRegisters, DefAliases); assert(PossibleRegisters.any() && "No register left to choose from"); const auto RandomReg = randomBit(PossibleRegisters); II.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg); } } - SnippetPrototype Prototype; Prototype.Explanation = "instruction has no tied variables picking Uses different from defs"; Prototype.Snippet.push_back(std::move(II)); + instantiateMemoryOperands(Prototype.ScratchSpaceReg, Prototype.Snippet); return std::move(Prototype); } std::vector UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, + ScratchSpace &Scratch, const unsigned NumRepetitions) const { const auto &SchedModel = State.getSubtargetInfo().getSchedModel(); @@ -226,8 +273,9 @@ llvm::report_fatal_error( llvm::Twine("invalid perf event ").concat(PfmCounters)); pfm::Counter Counter(UopPerfEvent); + Scratch.clear(); Counter.start(); - Function(); + Function(Scratch.ptr()); Counter.stop(); CounterValue += Counter.read(); } @@ -238,4 +286,6 @@ return Result; } +constexpr const size_t UopsBenchmarkRunner::kMinNumDifferentAddresses; + } // namespace exegesis Index: tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- tools/llvm-exegesis/lib/X86/Target.cpp +++ tools/llvm-exegesis/lib/X86/Target.cpp @@ -129,6 +129,48 @@ PM.add(llvm::createX86FloatingPointStackifierPass()); } + unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override { + if (!TT.isArch64Bit()) { + // FIXME: This would require popping from the stack, so we would have to + // add some additional setup code. + return 0; + } + return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI; + } + + unsigned getMaxMemoryAccessSize() const override { return 64; } + + void fillMemoryOperands(InstructionInstance &II, unsigned Reg, + unsigned Offset) const override { + // FIXME: For instructions that read AND write to memory, we use the same + // value for input and output. + for (size_t I = 0, E = II.Instr.Operands.size(); I < E; ++I) { + const Operand *Op = &II.Instr.Operands[I]; + if (Op->IsExplicit && Op->IsMem) { + // Case 1: 5-op memory. + assert((I + 5 <= E) && "x86 memory references are always 5 ops"); + II.getValueFor(*Op) = llvm::MCOperand::createReg(Reg); // BaseReg + Op = &II.Instr.Operands[++I]; + assert(Op->IsMem); + assert(Op->IsExplicit); + II.getValueFor(*Op) = llvm::MCOperand::createImm(1); // ScaleAmt + Op = &II.Instr.Operands[++I]; + assert(Op->IsMem); + assert(Op->IsExplicit); + II.getValueFor(*Op) = llvm::MCOperand::createReg(0); // IndexReg + Op = &II.Instr.Operands[++I]; + assert(Op->IsMem); + assert(Op->IsExplicit); + II.getValueFor(*Op) = llvm::MCOperand::createImm(Offset); // Disp + Op = &II.Instr.Operands[++I]; + assert(Op->IsMem); + assert(Op->IsExplicit); + II.getValueFor(*Op) = llvm::MCOperand::createReg(0); // Segment + // Case2: segment:index addressing. We assume that ES is 0. + } + } + } + std::vector setRegToConstant(const llvm::MCSubtargetInfo &STI, unsigned Reg) const override { // GPR. Index: unittests/tools/llvm-exegesis/BenchmarkRunnerTest.cpp =================================================================== --- /dev/null +++ unittests/tools/llvm-exegesis/BenchmarkRunnerTest.cpp @@ -0,0 +1,31 @@ +//===-- BenchmarkRunnerTest.cpp ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BenchmarkRunner.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace exegesis { + +namespace { + +TEST(ScratchSpaceTest, Works) { + BenchmarkRunner::ScratchSpace Space; + EXPECT_EQ(reinterpret_cast(Space.ptr()) % + BenchmarkRunner::ScratchSpace::kAlignment, + 0u); + Space.ptr()[0] = 42; + Space.ptr()[BenchmarkRunner::ScratchSpace::kSize - 1] = 43; + Space.clear(); + EXPECT_EQ(Space.ptr()[0], 0); + EXPECT_EQ(Space.ptr()[BenchmarkRunner::ScratchSpace::kSize - 1], 0); +} + +} // namespace +} // namespace exegesis Index: unittests/tools/llvm-exegesis/CMakeLists.txt =================================================================== --- unittests/tools/llvm-exegesis/CMakeLists.txt +++ unittests/tools/llvm-exegesis/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_unittest(LLVMExegesisTests BenchmarkResultTest.cpp + BenchmarkRunnerTest.cpp ClusteringTest.cpp PerfHelperTest.cpp ) Index: unittests/tools/llvm-exegesis/Common/AssemblerUtils.h =================================================================== --- unittests/tools/llvm-exegesis/Common/AssemblerUtils.h +++ unittests/tools/llvm-exegesis/Common/AssemblerUtils.h @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Assembler.h" +#include "BenchmarkRunner.h" #include "Target.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -36,21 +37,19 @@ } } - template inline void Check(llvm::MCInst MCInst, Bs... Bytes) { - CheckWithSetup(ExegesisTarget::getDefault(), {}, MCInst, Bytes...); - } - template - inline void CheckWithSetup(const ExegesisTarget &ET, - llvm::ArrayRef RegsToDef, - llvm::MCInst MCInst, Bs... Bytes) { + inline void Check(const ExegesisTarget &ET, + llvm::ArrayRef RegsToDef, llvm::MCInst MCInst, + Bs... Bytes) { ExecutableFunction Function = (MCInst.getOpcode() == 0) ? assembleToFunction(ET, RegsToDef, {}) : assembleToFunction(ET, RegsToDef, {MCInst}); ASSERT_THAT(Function.getFunctionBytes().str(), testing::ElementsAre(Bytes...)); - if (CanExecute) - Function(); + if (CanExecute) { + BenchmarkRunner::ScratchSpace Scratch; + Function(Scratch.ptr()); + } } private: @@ -73,7 +72,8 @@ llvm::ArrayRef Instructions) { llvm::SmallString<256> Buffer; llvm::raw_svector_ostream AsmStream(Buffer); - assembleToStream(ET, createTargetMachine(), RegsToDef, Instructions, + assembleToStream(ET, createTargetMachine(), /*LiveIns=*/{}, + RegsToDef, Instructions, AsmStream); return ExecutableFunction(createTargetMachine(), getObjectFromBuffer(AsmStream.str())); Index: unittests/tools/llvm-exegesis/X86/AssemblerTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/AssemblerTest.cpp +++ unittests/tools/llvm-exegesis/X86/AssemblerTest.cpp @@ -11,6 +11,9 @@ #include "X86InstrInfo.h" namespace exegesis { + +void InitializeX86ExegesisTarget(); + namespace { using llvm::MCInstBuilder; @@ -31,25 +34,39 @@ LLVMInitializeX86TargetMC(); LLVMInitializeX86Target(); LLVMInitializeX86AsmPrinter(); + InitializeX86ExegesisTarget(); } }; TEST_F(X86MachineFunctionGeneratorTest, DISABLED_JitFunction) { - Check(llvm::MCInst(), 0xc3); + Check(ExegesisTarget::getDefault(), {}, llvm::MCInst(), 0xc3); } -TEST_F(X86MachineFunctionGeneratorTest, DISABLED_JitFunctionXOR32rr) { - Check(MCInstBuilder(XOR32rr).addReg(EAX).addReg(EAX).addReg(EAX), 0x31, 0xc0, +TEST_F(X86MachineFunctionGeneratorTest, DISABLED_JitFunctionXOR32rr_Default) { + Check(ExegesisTarget::getDefault(), {EAX}, + MCInstBuilder(XOR32rr).addReg(EAX).addReg(EAX).addReg(EAX), 0x31, 0xc0, 0xc3); } +TEST_F(X86MachineFunctionGeneratorTest, DISABLED_JitFunctionXOR32rr_X86) { + const auto *ET = ExegesisTarget::lookup(llvm::Triple("x86_64-unknown-linux")); + ASSERT_NE(ET, nullptr); + Check(*ET, {EAX}, MCInstBuilder(XOR32rr).addReg(EAX).addReg(EAX).addReg(EAX), + // mov eax, 1 + 0xb8, 0x01, 0x00, 0x00, 0x00, + // xor eax, eax + 0x31, 0xc0, 0xc3); +} + TEST_F(X86MachineFunctionGeneratorTest, DISABLED_JitFunctionMOV64ri) { - Check(MCInstBuilder(MOV64ri32).addReg(RAX).addImm(42), 0x48, 0xc7, 0xc0, 0x2a, + Check(ExegesisTarget::getDefault(), {}, + MCInstBuilder(MOV64ri32).addReg(RAX).addImm(42), 0x48, 0xc7, 0xc0, 0x2a, 0x00, 0x00, 0x00, 0xc3); } TEST_F(X86MachineFunctionGeneratorTest, DISABLED_JitFunctionMOV32ri) { - Check(MCInstBuilder(MOV32ri).addReg(EAX).addImm(42), 0xb8, 0x2a, 0x00, 0x00, + Check(ExegesisTarget::getDefault(), {}, + MCInstBuilder(MOV32ri).addReg(EAX).addImm(42), 0xb8, 0x2a, 0x00, 0x00, 0x00, 0xc3); } Index: unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -18,6 +18,9 @@ #include namespace exegesis { + +void InitializeX86ExegesisTarget(); + namespace { using testing::AnyOf; @@ -41,6 +44,7 @@ LLVMInitializeX86TargetMC(); LLVMInitializeX86Target(); LLVMInitializeX86AsmPrinter(); + InitializeX86ExegesisTarget(); } const LLVMState State; @@ -215,6 +219,30 @@ EXPECT_THAT(II.VariableValues[3], IsInvalid()); } +TEST_F(UopsSnippetGeneratorTest, MemoryUse) { + // Mov32rm reads from memory. + const unsigned Opcode = llvm::X86::MOV32rm; + const SnippetPrototype Proto = checkAndGetConfigurations(Opcode); + EXPECT_THAT(Proto.Explanation, HasSubstr("no tied variables")); + ASSERT_THAT(Proto.Snippet, + SizeIs(UopsBenchmarkRunner::kMinNumDifferentAddresses)); + const InstructionInstance &II = Proto.Snippet[0]; + EXPECT_THAT(II.getOpcode(), Opcode); + ASSERT_THAT(II.VariableValues, SizeIs(6)); + EXPECT_EQ(II.VariableValues[2].getImm(), 1); + EXPECT_EQ(II.VariableValues[3].getReg(), 0u); + EXPECT_EQ(II.VariableValues[4].getImm(), 0); + EXPECT_EQ(II.VariableValues[5].getReg(), 0u); +} + +TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) { + // MOVSB writes to scratch memory register. + const unsigned Opcode = llvm::X86::MOVSB; + auto Error = Runner.generatePrototype(Opcode).takeError(); + EXPECT_TRUE((bool)Error); + llvm::consumeError(std::move(Error)); +} + class FakeBenchmarkRunner : public BenchmarkRunner { public: FakeBenchmarkRunner(const LLVMState &State) @@ -232,7 +260,7 @@ } std::vector - runMeasurements(const ExecutableFunction &EF, + runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, const unsigned NumRepetitions) const override { return {}; }