Index: llvm/trunk/test/tools/llvm-exegesis/X86/inverse_throughput-by-opcode-name.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/inverse_throughput-by-opcode-name.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/inverse_throughput-by-opcode-name.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr | FileCheck %s +# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: inverse_throughput Index: llvm/trunk/test/tools/llvm-exegesis/X86/latency-CMOV32rr.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/latency-CMOV32rr.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/latency-CMOV32rr.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency Index: llvm/trunk/test/tools/llvm-exegesis/X86/latency-SBB8rr.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/latency-SBB8rr.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/latency-SBB8rr.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency Index: llvm/trunk/test/tools/llvm-exegesis/X86/latency-by-opcode-name.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/latency-by-opcode-name.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/latency-by-opcode-name.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32mi8.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32mi8.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32mi8.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32mr.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32mr.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32mr.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32rm.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32rm.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-ADD32rm.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-BEXTR32rm.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-BEXTR32rm.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-BEXTR32rm.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-BSF16rm.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-BSF16rm.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-BSF16rm.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-BTR64mr.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-BTR64mr.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-BTR64mr.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-XCHG64rr.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-XCHG64rr.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-XCHG64rr.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/test/tools/llvm-exegesis/X86/uops-by-opcode-name.s =================================================================== --- llvm/trunk/test/tools/llvm-exegesis/X86/uops-by-opcode-name.s +++ llvm/trunk/test/tools/llvm-exegesis/X86/uops-by-opcode-name.s @@ -1,4 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: Index: llvm/trunk/tools/llvm-exegesis/lib/Assembler.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Assembler.h +++ llvm/trunk/tools/llvm-exegesis/lib/Assembler.h @@ -40,6 +40,49 @@ // convention and target machine). llvm::BitVector getFunctionReservedRegs(const llvm::TargetMachine &TM); +// Helper to fill in a basic block. +class BasicBlockFiller { +public: + BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB, + const MCInstrInfo *MCII); + + void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc()); + void addInstructions(ArrayRef Insts, + const DebugLoc &DL = DebugLoc()); + + void addReturn(const DebugLoc &DL = DebugLoc()); + + MachineFunction &MF; + MachineBasicBlock *const MBB; + const MCInstrInfo *const MCII; +}; + +// Helper to fill in a function. +class FunctionFiller { +public: + FunctionFiller(MachineFunction &MF, std::vector RegistersSetUp); + + // Adds a basic block to the function. + BasicBlockFiller addBasicBlock(); + + // Returns the function entry point. + BasicBlockFiller getEntry() { return Entry; } + + MachineFunction &MF; + const MCInstrInfo *const MCII; + + // Returns the set of registers in the snippet setup code. + ArrayRef getRegistersSetUp() const; + +private: + BasicBlockFiller Entry; + // The set of registers that are set up in the basic block. + std::vector RegistersSetUp; +}; + +// A callback that fills a function. +using FillFunction = std::function; + // Creates a temporary `void foo(char*)` function containing the provided // Instructions. Runs a set of llvm Passes to provide correct prologue and // epilogue. Once the MachineFunction is ready, it is assembled for TM to @@ -48,7 +91,7 @@ std::unique_ptr TM, llvm::ArrayRef LiveIns, llvm::ArrayRef RegisterInitialValues, - llvm::ArrayRef Instructions, + const FillFunction &Fill, llvm::raw_pwrite_stream &AsmStream); // Creates an ObjectFile in the format understood by the host. @@ -81,6 +124,12 @@ llvm::StringRef FunctionBytes; }; +// Creates a void(int8*) MachineFunction. +llvm::MachineFunction & +createVoidVoidPtrMachineFunction(llvm::StringRef FunctionID, + llvm::Module *Module, + llvm::MachineModuleInfo *MMI); + } // namespace exegesis } // namespace llvm Index: llvm/trunk/tools/llvm-exegesis/lib/Assembler.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Assembler.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/Assembler.cpp @@ -8,6 +8,7 @@ #include "Assembler.h" +#include "SnippetRepetitor.h" #include "Target.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -28,21 +29,22 @@ static constexpr const char ModuleID[] = "ExegesisInfoTest"; static constexpr const char FunctionID[] = "foo"; -static std::vector +// Fills the given basic block with register setup code, and returns true if +// all registers could be setup correctly. +static bool generateSnippetSetupCode(const ExegesisTarget &ET, const llvm::MCSubtargetInfo *const MSI, llvm::ArrayRef RegisterInitialValues, - bool &IsSnippetSetupComplete) { - IsSnippetSetupComplete = true; - std::vector Result; + BasicBlockFiller &BBF) { + bool IsSnippetSetupComplete = true; for (const RegisterValue &RV : RegisterInitialValues) { // Load a constant in the register. const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); if (SetRegisterCode.empty()) IsSnippetSetupComplete = false; - Result.insert(Result.end(), SetRegisterCode.begin(), SetRegisterCode.end()); + BBF.addInstructions(SetRegisterCode); } - return Result; + return IsSnippetSetupComplete; } // Small utility function to add named passes. @@ -67,8 +69,7 @@ return false; } -// Creates a void(int8*) MachineFunction. -static llvm::MachineFunction & +llvm::MachineFunction & createVoidVoidPtrMachineFunction(llvm::StringRef FunctionID, llvm::Module *Module, llvm::MachineModuleInfo *MMI) { @@ -85,38 +86,43 @@ return MMI->getOrCreateMachineFunction(*F); } -static void fillMachineFunction(llvm::MachineFunction &MF, - llvm::ArrayRef LiveIns, - llvm::ArrayRef Instructions) { - llvm::MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); - MF.push_back(MBB); - for (const unsigned Reg : LiveIns) - MBB->addLiveIn(Reg); - const llvm::MCInstrInfo *MCII = MF.getTarget().getMCInstrInfo(); - llvm::DebugLoc DL; - for (const llvm::MCInst &Inst : Instructions) { - const unsigned Opcode = Inst.getOpcode(); - const llvm::MCInstrDesc &MCID = MCII->get(Opcode); - llvm::MachineInstrBuilder Builder = llvm::BuildMI(MBB, DL, MCID); - for (unsigned OpIndex = 0, E = Inst.getNumOperands(); OpIndex < E; - ++OpIndex) { - const llvm::MCOperand &Op = Inst.getOperand(OpIndex); - if (Op.isReg()) { - const bool IsDef = OpIndex < MCID.getNumDefs(); - unsigned Flags = 0; - const llvm::MCOperandInfo &OpInfo = MCID.operands().begin()[OpIndex]; - if (IsDef && !OpInfo.isOptionalDef()) - Flags |= llvm::RegState::Define; - Builder.addReg(Op.getReg(), Flags); - } else if (Op.isImm()) { - Builder.addImm(Op.getImm()); - } else if (!Op.isValid()) { - llvm_unreachable("Operand is not set"); - } else { - llvm_unreachable("Not yet implemented"); - } +BasicBlockFiller::BasicBlockFiller(llvm::MachineFunction &MF, + llvm::MachineBasicBlock *MBB, + const llvm::MCInstrInfo *MCII) + : MF(MF), MBB(MBB), MCII(MCII) {} + +void BasicBlockFiller::addInstruction(const llvm::MCInst &Inst, + const llvm::DebugLoc &DL) { + const unsigned Opcode = Inst.getOpcode(); + const llvm::MCInstrDesc &MCID = MCII->get(Opcode); + llvm::MachineInstrBuilder Builder = llvm::BuildMI(MBB, DL, MCID); + for (unsigned OpIndex = 0, E = Inst.getNumOperands(); OpIndex < E; + ++OpIndex) { + const llvm::MCOperand &Op = Inst.getOperand(OpIndex); + if (Op.isReg()) { + const bool IsDef = OpIndex < MCID.getNumDefs(); + unsigned Flags = 0; + const llvm::MCOperandInfo &OpInfo = MCID.operands().begin()[OpIndex]; + if (IsDef && !OpInfo.isOptionalDef()) + Flags |= llvm::RegState::Define; + Builder.addReg(Op.getReg(), Flags); + } else if (Op.isImm()) { + Builder.addImm(Op.getImm()); + } else if (!Op.isValid()) { + llvm_unreachable("Operand is not set"); + } else { + llvm_unreachable("Not yet implemented"); } } +} + +void BasicBlockFiller::addInstructions(ArrayRef Insts, + const llvm::DebugLoc &DL) { + for (const MCInst &Inst : Insts) + addInstruction(Inst, DL); +} + +void BasicBlockFiller::addReturn(const llvm::DebugLoc &DL) { // Insert the return code. const llvm::TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (TII->getReturnOpcode() < TII->getNumOpcodes()) { @@ -128,6 +134,21 @@ } } +FunctionFiller::FunctionFiller(llvm::MachineFunction &MF, + std::vector RegistersSetUp) + : MF(MF), MCII(MF.getTarget().getMCInstrInfo()), Entry(addBasicBlock()), + RegistersSetUp(std::move(RegistersSetUp)) {} + +BasicBlockFiller FunctionFiller::addBasicBlock() { + llvm::MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); + MF.push_back(MBB); + return BasicBlockFiller(MF, MBB, MCII); +} + +ArrayRef FunctionFiller::getRegistersSetUp() const { + return RegistersSetUp; +} + static std::unique_ptr createModule(const std::unique_ptr &Context, const llvm::DataLayout DL) { @@ -155,7 +176,7 @@ std::unique_ptr TM, llvm::ArrayRef LiveIns, llvm::ArrayRef RegisterInitialValues, - llvm::ArrayRef Instructions, + const FillFunction &Fill, llvm::raw_pwrite_stream &AsmStream) { std::unique_ptr Context = std::make_unique(); @@ -171,29 +192,34 @@ auto &Properties = MF.getProperties(); Properties.set(llvm::MachineFunctionProperties::Property::NoVRegs); Properties.reset(llvm::MachineFunctionProperties::Property::IsSSA); + Properties.set(llvm::MachineFunctionProperties::Property::NoPHIs); for (const unsigned Reg : LiveIns) MF.getRegInfo().addLiveIn(Reg); - bool IsSnippetSetupComplete; - std::vector Code = - generateSnippetSetupCode(ET, TM->getMCSubtargetInfo(), - RegisterInitialValues, IsSnippetSetupComplete); + std::vector RegistersSetUp; + for (const auto &InitValue : RegisterInitialValues) { + RegistersSetUp.push_back(InitValue.Register); + } + FunctionFiller Sink(MF, std::move(RegistersSetUp)); + auto Entry = Sink.getEntry(); + for (const unsigned Reg : LiveIns) + Entry.MBB->addLiveIn(Reg); - Code.insert(Code.end(), Instructions.begin(), Instructions.end()); + const bool IsSnippetSetupComplete = generateSnippetSetupCode( + ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry); // If the snippet setup is not complete, we disable liveliness tracking. This // means that we won't know what values are in the registers. if (!IsSnippetSetupComplete) Properties.reset(llvm::MachineFunctionProperties::Property::TracksLiveness); + Fill(Sink); + // prologue/epilogue pass needs the reserved registers to be frozen, this // is usually done by the SelectionDAGISel pass. MF.getRegInfo().freezeReservedRegs(MF); - // Fill the MachineFunction from the instructions. - fillMachineFunction(MF, LiveIns, Code); - // We create the pass manager, run the passes to populate AsmBuffer. llvm::MCContext &MCContext = MMI->getContext(); llvm::legacy::PassManager PM; Index: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkResult.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkResult.h +++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -66,6 +66,8 @@ // The number of instructions inside the repeated snippet. For example, if a // snippet of 3 instructions is repeated 4 times, this is 12. int NumRepetitions = 0; + enum RepetitionModeE { Duplicate, Loop }; + RepetitionModeE RepetitionMode; // Note that measurements are per instruction. std::vector Measurements; std::string Error; Index: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -20,6 +20,7 @@ #include "BenchmarkResult.h" #include "LlvmState.h" #include "MCInstrDescView.h" +#include "SnippetRepetitor.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Error.h" #include @@ -46,6 +47,7 @@ InstructionBenchmark runConfiguration(const BenchmarkCode &Configuration, unsigned NumRepetitions, + const SnippetRepetitor &Repetitor, bool DumpObjectToDisk) const; // Scratch space to run instructions that touch memory. @@ -84,7 +86,7 @@ llvm::Expected writeObjectFile(const BenchmarkCode &Configuration, - llvm::ArrayRef Code) const; + const FillFunction &Fill) const; const std::unique_ptr Scratch; }; Index: llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -33,17 +33,6 @@ BenchmarkRunner::~BenchmarkRunner() = default; -// Repeat the snippet until there are at least MinInstructions in the resulting -// code. -static std::vector -GenerateInstructions(const BenchmarkCode &BC, const size_t MinInstructions) { - if (BC.Instructions.empty()) - return {}; - std::vector Code = BC.Instructions; - for (int I = 0; Code.size() < MinInstructions; ++I) - Code.push_back(BC.Instructions[I % BC.Instructions.size()]); - return Code; -} namespace { class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { @@ -95,10 +84,9 @@ }; } // namespace -InstructionBenchmark -BenchmarkRunner::runConfiguration(const BenchmarkCode &BC, - unsigned NumRepetitions, - bool DumpObjectToDisk) const { +InstructionBenchmark BenchmarkRunner::runConfiguration( + const BenchmarkCode &BC, unsigned NumRepetitions, + const SnippetRepetitor &Repetitor, bool DumpObjectToDisk) const { InstructionBenchmark InstrBenchmark; InstrBenchmark.Mode = Mode; InstrBenchmark.CpuName = State.getTargetMachine().getTargetCPU(); @@ -119,9 +107,10 @@ { llvm::SmallString<0> Buffer; llvm::raw_svector_ostream OS(Buffer); - assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, - GenerateInstructions(BC, kMinInstructionsForSnippet), OS); + assembleToStream( + State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, + BC.RegisterInitialValues, + Repetitor.Repeat(BC.Instructions, kMinInstructionsForSnippet), OS); const ExecutableFunction EF(State.createTargetMachine(), getObjectFromBuffer(OS.str())); const auto FnBytes = EF.getFunctionBytes(); @@ -130,11 +119,12 @@ // Assemble NumRepetitions instructions repetitions of the snippet for // measurements. - const auto Code = GenerateInstructions(BC, InstrBenchmark.NumRepetitions); + const auto Filler = + Repetitor.Repeat(BC.Instructions, InstrBenchmark.NumRepetitions); llvm::object::OwningBinary ObjectFile; if (DumpObjectToDisk) { - auto ObjectFilePath = writeObjectFile(BC, Code); + auto ObjectFilePath = writeObjectFile(BC, Filler); if (llvm::Error E = ObjectFilePath.takeError()) { InstrBenchmark.Error = llvm::toString(std::move(E)); return InstrBenchmark; @@ -146,7 +136,7 @@ llvm::SmallString<0> Buffer; llvm::raw_svector_ostream OS(Buffer); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, Code, OS); + BC.LiveIns, BC.RegisterInitialValues, Filler, OS); ObjectFile = getObjectFromBuffer(OS.str()); } @@ -172,7 +162,7 @@ llvm::Expected BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC, - llvm::ArrayRef Code) const { + const FillFunction &FillFunction) const { int ResultFD = 0; llvm::SmallString<256> ResultPath; if (llvm::Error E = llvm::errorCodeToError(llvm::sys::fs::createTemporaryFile( @@ -180,7 +170,7 @@ return std::move(E); llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, Code, OFS); + BC.LiveIns, BC.RegisterInitialValues, FillFunction, OFS); return ResultPath.str(); } Index: llvm/trunk/tools/llvm-exegesis/lib/CMakeLists.txt =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/CMakeLists.txt +++ llvm/trunk/tools/llvm-exegesis/lib/CMakeLists.txt @@ -31,6 +31,7 @@ RegisterValue.cpp SchedClassResolution.cpp SnippetGenerator.cpp + SnippetRepetitor.cpp Target.cpp Uops.cpp ) Index: llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.h +++ llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -57,7 +57,8 @@ // Calls generateCodeTemplate and expands it into one or more BenchmarkCode. llvm::Expected> - generateConfigurations(const Instruction &Instr) const; + generateConfigurations(const Instruction &Instr, + const llvm::BitVector &ExtraForbiddenRegs) const; // Given a snippet, computes which registers the setup code needs to define. std::vector computeRegisterInitialValues( Index: llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -37,9 +37,10 @@ SnippetGenerator::~SnippetGenerator() = default; llvm::Expected> -SnippetGenerator::generateConfigurations(const Instruction &Instr) const { +SnippetGenerator::generateConfigurations( + const Instruction &Instr, const llvm::BitVector &ExtraForbiddenRegs) const { llvm::BitVector ForbiddenRegs = State.getRATC().reservedRegisters(); - + ForbiddenRegs |= ExtraForbiddenRegs; // If the instruction has memory registers, prevent the generator from // using the scratch register and its aliasing registers. if (Instr.hasMemoryOperands()) { Index: llvm/trunk/tools/llvm-exegesis/lib/SnippetRepetitor.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/SnippetRepetitor.h +++ llvm/trunk/tools/llvm-exegesis/lib/SnippetRepetitor.h @@ -0,0 +1,53 @@ +//===-- SnippetRepetitor.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines helpers to fill functions with repetitions of a snippet. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_FUNCTIONFILLER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_FUNCTIONFILLER_H + +#include "Assembler.h" +#include "BenchmarkResult.h" +#include "LlvmState.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Object/Binary.h" + +namespace llvm { +namespace exegesis { + +class SnippetRepetitor { +public: + static std::unique_ptr + Create(InstructionBenchmark::RepetitionModeE Mode, const LLVMState &State); + + virtual ~SnippetRepetitor(); + + // Returns the set of registers that are reserved by the repetitor. + virtual BitVector getReservedRegs() const = 0; + + // Returns a functor that repeats `Instructions` so that the function executes + // at least `MinInstructions` instructions. + virtual FillFunction Repeat(ArrayRef Instructions, + unsigned MinInstructions) const = 0; + + explicit SnippetRepetitor(const LLVMState &State) : State(State) {} + +protected: + const LLVMState &State; +}; + +} // namespace exegesis +} // namespace llvm + +#endif Index: llvm/trunk/tools/llvm-exegesis/lib/SnippetRepetitor.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/SnippetRepetitor.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/SnippetRepetitor.cpp @@ -0,0 +1,116 @@ +//===-- SnippetRepetitor.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "SnippetRepetitor.h" +#include "Target.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" + +namespace llvm { +namespace exegesis { +namespace { + +class DuplicateSnippetRepetitor : public SnippetRepetitor { +public: + using SnippetRepetitor::SnippetRepetitor; + + // Repeats the snippet until there are at least MinInstructions in the + // resulting code. + FillFunction Repeat(ArrayRef Instructions, + unsigned MinInstructions) const override { + return [Instructions, MinInstructions](FunctionFiller &Filler) { + auto Entry = Filler.getEntry(); + if (!Instructions.empty()) { + // Add the whole snippet at least once. + Entry.addInstructions(Instructions); + for (unsigned I = Instructions.size(); I < MinInstructions; ++I) { + Entry.addInstruction(Instructions[I % Instructions.size()]); + } + } + Entry.addReturn(); + }; + } + + BitVector getReservedRegs() const override { + // We're using no additional registers. + return State.getRATC().emptyRegisters(); + } +}; + +class LoopSnippetRepetitor : public SnippetRepetitor { +public: + explicit LoopSnippetRepetitor(const LLVMState &State) + : SnippetRepetitor(State), + LoopCounter(State.getExegesisTarget().getLoopCounterRegister( + State.getTargetMachine().getTargetTriple())) {} + + // Loop over the snippet ceil(MinInstructions / Instructions.Size()) times. + FillFunction Repeat(ArrayRef Instructions, + unsigned MinInstructions) const override { + return [this, Instructions, MinInstructions](FunctionFiller &Filler) { + const auto &ET = State.getExegesisTarget(); + auto Entry = Filler.getEntry(); + auto Loop = Filler.addBasicBlock(); + auto Exit = Filler.addBasicBlock(); + + // Set loop counter to the right value: + const APInt LoopCount(32, (MinInstructions + Instructions.size() - 1) / + Instructions.size()); + for (const MCInst &Inst : + ET.setRegTo(State.getSubtargetInfo(), LoopCounter, LoopCount)) + Entry.addInstruction(Inst); + + // Set up the loop basic block. + Entry.MBB->addSuccessor(Loop.MBB, llvm::BranchProbability::getOne()); + Loop.MBB->addSuccessor(Loop.MBB, llvm::BranchProbability::getOne()); + // The live ins are: the loop counter, the registers that were setup by + // the entry block, and entry block live ins. + Loop.MBB->addLiveIn(LoopCounter); + for (unsigned Reg : Filler.getRegistersSetUp()) + Loop.MBB->addLiveIn(Reg); + for (const auto &LiveIn : Entry.MBB->liveins()) + Loop.MBB->addLiveIn(LiveIn); + Loop.addInstructions(Instructions); + ET.decrementLoopCounterAndLoop(*Loop.MBB, State.getInstrInfo()); + + // Set up the exit basic block. + Loop.MBB->addSuccessor(Exit.MBB, llvm::BranchProbability::getZero()); + Exit.addReturn(); + }; + } + + BitVector getReservedRegs() const override { + // We're using a single loop counter, but we have to reserve all aliasing + // registers. + return State.getRATC().getRegister(LoopCounter).aliasedBits(); + } + +private: + const unsigned LoopCounter; +}; + +} // namespace + +SnippetRepetitor::~SnippetRepetitor() {} + +std::unique_ptr +SnippetRepetitor::Create(InstructionBenchmark::RepetitionModeE Mode, + const LLVMState &State) { + switch (Mode) { + case InstructionBenchmark::Duplicate: + return std::make_unique(State); + case InstructionBenchmark::Loop: + return std::make_unique(State); + } +} + +} // namespace exegesis +} // namespace llvm Index: llvm/trunk/tools/llvm-exegesis/lib/Target.h =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/Target.h +++ llvm/trunk/tools/llvm-exegesis/lib/Target.h @@ -85,11 +85,22 @@ // Fills memory operands with references to the address at [Reg] + Offset. virtual void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, unsigned Offset) const { - llvm_unreachable( "fillMemoryOperands() requires getScratchMemoryRegister() > 0"); } + // Returns a counter usable as a loop counter. + virtual unsigned getLoopCounterRegister(const llvm::Triple &) const { + return 0; + } + + // Adds the code to decrement the loop counter and + virtual void decrementLoopCounterAndLoop(MachineBasicBlock &MBB, + const llvm::MCInstrInfo &MII) const { + llvm_unreachable("decrementLoopCounterAndBranch() requires " + "getLoopCounterRegister() > 0"); + } + // Returns a list of unavailable registers. // Targets can use this to prevent some registers to be automatically selected // for use in snippets. Index: llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp +++ llvm/trunk/tools/llvm-exegesis/lib/X86/Target.cpp @@ -436,6 +436,8 @@ unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override; + unsigned getLoopCounterRegister(const llvm::Triple &) const override; + unsigned getMaxMemoryAccessSize() const override { return 64; } void randomizeMCOperand(const Instruction &Instr, const Variable &Var, @@ -445,6 +447,9 @@ void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, unsigned Offset) const override; + void decrementLoopCounterAndLoop(MachineBasicBlock &MBB, + const llvm::MCInstrInfo &MII) const override; + std::vector setRegTo(const llvm::MCSubtargetInfo &STI, unsigned Reg, const llvm::APInt &Value) const override; @@ -476,6 +481,12 @@ // prefix. const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH, X86::CH, X86::DH}; + +// We're using one of R8-R15 because these registers are never hardcoded in +// instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less +// conflicts. +constexpr const unsigned kLoopCounterReg = X86::R8; + } // namespace void ExegesisX86Target::addTargetSpecificPasses( @@ -494,6 +505,14 @@ return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI; } +unsigned +ExegesisX86Target::getLoopCounterRegister(const llvm::Triple &TT) const { + if (!TT.isArch64Bit()) { + return 0; + } + return kLoopCounterReg; +} + void ExegesisX86Target::randomizeMCOperand( const Instruction &Instr, const Variable &Var, llvm::MCOperand &AssignedValue, @@ -538,6 +557,17 @@ SetOp(MemOpIdx + 4, MCOperand::createReg(0)); // Segment } +void ExegesisX86Target::decrementLoopCounterAndLoop( + MachineBasicBlock &MBB, const llvm::MCInstrInfo &MII) const { + BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8)) + .addDef(kLoopCounterReg) + .addUse(kLoopCounterReg) + .addImm(-1); + BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1)) + .addMBB(&MBB) + .addImm(X86::COND_NE); +} + std::vector ExegesisX86Target::setRegTo(const llvm::MCSubtargetInfo &STI, unsigned Reg, const llvm::APInt &Value) const { Index: llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp +++ llvm/trunk/tools/llvm-exegesis/llvm-exegesis.cpp @@ -17,6 +17,7 @@ #include "lib/Clustering.h" #include "lib/LlvmState.h" #include "lib/PerfHelper.h" +#include "lib/SnippetRepetitor.h" #include "lib/Target.h" #include "lib/TargetSelect.h" #include "llvm/ADT/StringExtras.h" @@ -80,6 +81,14 @@ clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis", "Analysis"))); +static cl::opt RepetitionMode( + "repetition-mode", cl::desc("how to repeat the instruction snippet"), + cl::cat(BenchmarkOptions), + cl::values(clEnumValN(exegesis::InstructionBenchmark::Duplicate, + "duplicate", "Duplicate the snippet"), + clEnumValN(exegesis::InstructionBenchmark::Loop, "loop", + "Loop over the snippet"))); + static cl::opt NumRepetitions("num-repetitions", cl::desc("number of time to repeat the asm snippet"), @@ -192,7 +201,8 @@ // Generates code snippets for opcode `Opcode`. static llvm::Expected> -generateSnippets(const LLVMState &State, unsigned Opcode) { +generateSnippets(const LLVMState &State, unsigned Opcode, + const llvm::BitVector &ForbiddenRegs) { const Instruction &Instr = State.getIC().getInstr(Opcode); const llvm::MCInstrDesc &InstrDesc = *Instr.Description; // Ignore instructions that we cannot run. @@ -209,7 +219,7 @@ State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State); if (!Generator) llvm::report_fatal_error("cannot create snippet generator"); - return Generator->generateConfigurations(Instr); + return Generator->generateConfigurations(Instr, ForbiddenRegs); } namespace { @@ -372,6 +382,8 @@ const LLVMState State(CpuName); const auto Opcodes = getOpcodesOrDie(State.getInstrInfo()); + const auto Repetitor = SnippetRepetitor::Create(RepetitionMode, State); + std::vector Configurations; if (!Opcodes.empty()) { for (const unsigned Opcode : Opcodes) { @@ -383,7 +395,8 @@ << ": ignoring instruction without sched class\n"; continue; } - auto ConfigsForInstr = generateSnippets(State, Opcode); + auto ConfigsForInstr = + generateSnippets(State, Opcode, Repetitor->getReservedRegs()); if (!ConfigsForInstr) { llvm::logAllUnhandledErrors( ConfigsForInstr.takeError(), llvm::errs(), @@ -411,8 +424,8 @@ BenchmarkFile = "-"; for (const BenchmarkCode &Conf : Configurations) { - InstructionBenchmark Result = - Runner->runConfiguration(Conf, NumRepetitions, DumpObjectToDisk); + InstructionBenchmark Result = Runner->runConfiguration( + Conf, NumRepetitions, *Repetitor, DumpObjectToDisk); ExitOnErr(Result.writeYaml(State, BenchmarkFile)); } exegesis::pfm::pfmTerminate(); Index: llvm/trunk/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h =================================================================== --- llvm/trunk/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h +++ llvm/trunk/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h @@ -43,12 +43,15 @@ } template - inline void Check(llvm::ArrayRef RegisterInitialValues, - llvm::MCInst MCInst, Bs... Bytes) { + inline void Check(ArrayRef RegisterInitialValues, MCInst Inst, + Bs... Bytes) { ExecutableFunction Function = - (MCInst.getOpcode() == 0) - ? assembleToFunction(RegisterInitialValues, {}) - : assembleToFunction(RegisterInitialValues, {MCInst}); + (Inst.getOpcode() == 0) + ? assembleToFunction(RegisterInitialValues, [](FunctionFiller &) {}) + : assembleToFunction(RegisterInitialValues, + [Inst](FunctionFiller &Filler) { + Filler.getEntry().addInstruction(Inst); + }); ASSERT_THAT(Function.getFunctionBytes().str(), testing::ElementsAre(Bytes...)); if (CanExecute) { @@ -73,11 +76,11 @@ ExecutableFunction assembleToFunction(llvm::ArrayRef RegisterInitialValues, - llvm::ArrayRef Instructions) { + FillFunction Fill) { llvm::SmallString<256> Buffer; llvm::raw_svector_ostream AsmStream(Buffer); assembleToStream(*ET, createTargetMachine(), /*LiveIns=*/{}, - RegisterInitialValues, Instructions, AsmStream); + RegisterInitialValues, Fill, AsmStream); return ExecutableFunction(createTargetMachine(), getObjectFromBuffer(AsmStream.str())); } Index: llvm/trunk/unittests/tools/llvm-exegesis/X86/CMakeLists.txt =================================================================== --- llvm/trunk/unittests/tools/llvm-exegesis/X86/CMakeLists.txt +++ llvm/trunk/unittests/tools/llvm-exegesis/X86/CMakeLists.txt @@ -19,6 +19,7 @@ RegisterAliasingTest.cpp SchedClassResolutionTest.cpp SnippetGeneratorTest.cpp + SnippetRepetitorTest.cpp TargetTest.cpp ) target_link_libraries(LLVMExegesisX86Tests PRIVATE Index: llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp =================================================================== --- llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -387,7 +387,9 @@ // - hasAliasingRegisters const unsigned Opcode = llvm::X86::MOVSB; const Instruction &Instr = State.getIC().getInstr(Opcode); - auto Error = Generator.generateConfigurations(Instr).takeError(); + auto Error = + Generator.generateConfigurations(Instr, State.getRATC().emptyRegisters()) + .takeError(); EXPECT_TRUE((bool)Error); llvm::consumeError(std::move(Error)); } Index: llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp =================================================================== --- llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp +++ llvm/trunk/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp @@ -0,0 +1,107 @@ +//===-- SnippetRepetitorTest.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../Common/AssemblerUtils.h" +#include "Latency.h" +#include "LlvmState.h" +#include "MCInstrDescView.h" +#include "RegisterAliasing.h" +#include "Uops.h" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" + +namespace llvm { +namespace exegesis { + +void InitializeX86ExegesisTarget(); + +namespace { + +using testing::ElementsAre; +using testing::Eq; +using testing::Field; +using testing::Property; +using testing::UnorderedElementsAre; + +class X86SnippetRepetitorTest : public ::testing::Test { +protected: + X86SnippetRepetitorTest() : State("x86_64-unknown-linux", "haswell") {} + + static void SetUpTestCase() { + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86Target(); + LLVMInitializeX86AsmPrinter(); + InitializeX86ExegesisTarget(); + } + + void SetUp() { + TM = State.createTargetMachine(); + Context = std::make_unique(); + Module = + std::make_unique("X86SnippetRepetitorTest", *Context); + Module->setDataLayout(TM->createDataLayout()); + MMI = std::make_unique(TM.get()); + MF = &createVoidVoidPtrMachineFunction("TestFn", Module.get(), MMI.get()); + } + + void TestCommon(InstructionBenchmark::RepetitionModeE RepetitionMode) { + const auto Repetitor = SnippetRepetitor::Create(RepetitionMode, State); + const std::vector Instructions = {MCInstBuilder(X86::NOOP)}; + FunctionFiller Sink(*MF, {X86::EAX}); + const auto Fill = Repetitor->Repeat(Instructions, kMinInstructions); + Fill(Sink); + } + + static constexpr const unsigned kMinInstructions = 3; + + const LLVMState State; + std::unique_ptr TM; + std::unique_ptr Context; + std::unique_ptr Module; + std::unique_ptr MMI; + MachineFunction *MF = nullptr; +}; + +static auto HasOpcode = [](unsigned Opcode) { + return Property(&MachineInstr::getOpcode, Eq(Opcode)); +}; + +static auto LiveReg = [](unsigned Reg) { + return Field(&MachineBasicBlock::RegisterMaskPair::PhysReg, Eq(Reg)); +}; + +TEST_F(X86SnippetRepetitorTest, Duplicate) { + TestCommon(InstructionBenchmark::Duplicate); + // Duplicating creates a single basic block that repeats the instructions. + ASSERT_EQ(MF->getNumBlockIDs(), 1u); + EXPECT_THAT(MF->getBlockNumbered(0)->instrs(), + ElementsAre(HasOpcode(X86::NOOP), HasOpcode(X86::NOOP), + HasOpcode(X86::NOOP), HasOpcode(X86::RETQ))); +} + +TEST_F(X86SnippetRepetitorTest, Loop) { + TestCommon(InstructionBenchmark::Loop); + // Duplicating creates an entry block, a loop body and a ret block. + ASSERT_EQ(MF->getNumBlockIDs(), 3u); + const auto &LoopBlock = *MF->getBlockNumbered(1); + EXPECT_THAT(LoopBlock.instrs(), + ElementsAre(HasOpcode(X86::NOOP), HasOpcode(X86::ADD64ri8), + HasOpcode(X86::JCC_1))); + EXPECT_THAT(LoopBlock.liveins(), + UnorderedElementsAre( + LiveReg(X86::EAX), + LiveReg(State.getExegesisTarget().getLoopCounterRegister( + State.getTargetMachine().getTargetTriple())))); + EXPECT_THAT(MF->getBlockNumbered(2)->instrs(), + ElementsAre(HasOpcode(X86::RETQ))); +} + +} // namespace +} // namespace exegesis +} // namespace llvm