diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s @@ -0,0 +1,13 @@ +# REQUIRES: exegesis-can-execute-x86_64, exegesis-can-measure-latency, x86_64-linux + +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess -repetition-mode=loop | FileCheck %s + +# CHECK: measurements: +# CHECK-NEXT: value: {{.*}}, per_snippet_value: {{.*}} + +# LLVM-EXEGESIS-MEM-DEF test1 4096 2147483647 +# LLVM-EXEGESIS-MEM-MAP test1 8192 + +movq $8192, %rax +movq (%rax), %rdi diff --git a/llvm/test/tools/llvm-exegesis/X86/memory-annotations-livein.test b/llvm/test/tools/llvm-exegesis/X86/memory-annotations-livein.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/memory-annotations-livein.test @@ -0,0 +1,13 @@ +# REQUIRES: exegesis-can-execute-x86_64, exegesis-can-measure-latency, x86_64-linux + +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s + +# CHECK: measurements: +# CHECK-NEXT: value: {{.*}}, per_snippet_value: {{.*}} + +# LLVM-EXEGESIS-MEM-DEF test1 4096 2147483647 +# LLVM-EXEGESIS-MEM-MAP test1 8192 +# LLVM-EXEGESIS-LIVEIN R14 + +movq $8192, %rax +movq %r14, (%rax) diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.h b/llvm/tools/llvm-exegesis/lib/Assembler.h --- a/llvm/tools/llvm-exegesis/lib/Assembler.h +++ b/llvm/tools/llvm-exegesis/lib/Assembler.h @@ -50,7 +50,8 @@ void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc()); void addInstructions(ArrayRef Insts, const DebugLoc &DL = DebugLoc()); - void addReturn(const DebugLoc &DL = DebugLoc()); + void addReturn(const ExegesisTarget &ET, bool SubprocessCleanup, + const DebugLoc &DL = DebugLoc()); MachineFunction &MF; MachineBasicBlock *const MBB; @@ -91,7 +92,9 @@ std::unique_ptr TM, ArrayRef LiveIns, ArrayRef RegisterInitialValues, - const FillFunction &Fill, raw_pwrite_stream &AsmStream); + const FillFunction &Fill, raw_pwrite_stream &AsmStreamm, + const BenchmarkKey &Key, + bool GenerateMemoryInstructions); // Creates an ObjectFile in the format understood by the host. // Note: the resulting object keeps a copy of Buffer so it can be discarded once diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -9,6 +9,7 @@ #include "Assembler.h" #include "SnippetRepetitor.h" +#include "SubprocessMemory.h" #include "Target.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -28,6 +29,10 @@ #include "llvm/Support/Alignment.h" #include "llvm/Support/MemoryBuffer.h" +#ifdef HAVE_LIBPFM +#include "perfmon/perf_event.h" +#endif // HAVE_LIBPFM + namespace llvm { namespace exegesis { @@ -39,8 +44,20 @@ // all registers could be setup correctly. static bool generateSnippetSetupCode( const ExegesisTarget &ET, const MCSubtargetInfo *const MSI, - ArrayRef RegisterInitialValues, BasicBlockFiller &BBF) { + ArrayRef RegisterInitialValues, BasicBlockFiller &BBF, + const BenchmarkKey &Key, bool GenerateMemoryInstructions) { bool IsSnippetSetupComplete = true; + if (GenerateMemoryInstructions) { + BBF.addInstructions(ET.generateMemoryInitialSetup()); + for (const MemoryMapping &MM : Key.MemoryMappings) { + BBF.addInstructions(ET.generateMmap( + MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes, + ET.getAuxiliaryMemoryStartAddress() + + sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index + + SubprocessMemory::AuxiliaryMemoryOffset))); + } + BBF.addInstructions(ET.setStackRegisterToAuxMem()); + } for (const RegisterValue &RV : RegisterInitialValues) { // Load a constant in the register. const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); @@ -48,6 +65,11 @@ IsSnippetSetupComplete = false; BBF.addInstructions(SetRegisterCode); } + if (GenerateMemoryInstructions) { +#ifdef HAVE_LIBPFM + BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true)); +#endif // HAVE_LIBPFM + } return IsSnippetSetupComplete; } @@ -122,7 +144,17 @@ addInstruction(Inst, DL); } -void BasicBlockFiller::addReturn(const DebugLoc &DL) { +void BasicBlockFiller::addReturn(const ExegesisTarget &ET, + bool SubprocessCleanup, const DebugLoc &DL) { + // Insert cleanup code + if (SubprocessCleanup) { +#ifdef HAVE_LIBPFM + addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_DISABLE, false)); +#endif // HAVE_LIBPFM +#ifdef __linux__ + addInstructions(ET.generateExitSyscall(0)); +#endif // __linux__ + } // Insert the return code. const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (TII->getReturnOpcode() < TII->getNumOpcodes()) { @@ -176,7 +208,9 @@ std::unique_ptr TM, ArrayRef LiveIns, ArrayRef RegisterInitialValues, - const FillFunction &Fill, raw_pwrite_stream &AsmStream) { + const FillFunction &Fill, raw_pwrite_stream &AsmStream, + const BenchmarkKey &Key, + bool GenerateMemoryInstructions) { auto Context = std::make_unique(); std::unique_ptr Module = createModule(Context, TM->createDataLayout()); @@ -195,17 +229,37 @@ for (const unsigned Reg : LiveIns) MF.getRegInfo().addLiveIn(Reg); + if (GenerateMemoryInstructions) { + for (const unsigned Reg : ET.getArgumentRegisters()) + MF.getRegInfo().addLiveIn(Reg); + // Add a live in for registers that need saving so that the machine verifier + // doesn't fail if the register is never defined. + for (const unsigned Reg : ET.getRegistersNeedSaving()) + MF.getRegInfo().addLiveIn(Reg); + } + std::vector RegistersSetUp; for (const auto &InitValue : RegisterInitialValues) { RegistersSetUp.push_back(InitValue.Register); } FunctionFiller Sink(MF, std::move(RegistersSetUp)); auto Entry = Sink.getEntry(); + for (const unsigned Reg : LiveIns) Entry.MBB->addLiveIn(Reg); + if (GenerateMemoryInstructions) { + for (const unsigned Reg : ET.getArgumentRegisters()) + Entry.MBB->addLiveIn(Reg); + // Add a live in for registers that need saving so that the machine verifier + // doesn't fail if the register is never defined. + for (const unsigned Reg : ET.getRegistersNeedSaving()) + Entry.MBB->addLiveIn(Reg); + } + const bool IsSnippetSetupComplete = generateSnippetSetupCode( - ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry); + ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry, Key, + GenerateMemoryInstructions); // If the snippet setup is not complete, we disable liveliness tracking. This // means that we won't know what values are in the registers. diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -113,10 +113,10 @@ virtual Expected> runMeasurements(const FunctionExecutor &Executor) const = 0; - Expected> assembleSnippet(const BenchmarkCode &BC, - const SnippetRepetitor &Repetitor, - unsigned MinInstructions, - unsigned LoopBodySize) const; + Expected> + assembleSnippet(const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, + unsigned MinInstructions, unsigned LoopBodySize, + bool GenerateMemoryInstructions) const; Expected writeObjectFile(StringRef Buffer, StringRef FileName) const; diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -15,6 +15,7 @@ #include "Error.h" #include "MCInstrDescView.h" #include "PerfHelper.h" +#include "SubprocessMemory.h" #include "Target.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" @@ -36,6 +37,16 @@ #include #include #include + +#ifdef __GLIBC__ +#if __GLIBC_MINOR__ >= 35 +#define GLIBC_INITS_RSEQ +#endif // __GLIBC__MINOR > 35 +#endif // __GLIBC__ + +#ifdef GLIBC_INITS_RSEQ +#include +#endif // HAS_RSEQ #endif // __linux__ namespace llvm { @@ -161,7 +172,11 @@ private: enum ChildProcessExitCodeE { CounterFDReadFailed = 1, - TranslatingCounterFDFailed + TranslatingCounterFDFailed, + RSeqDisableFailed, + FunctionDataMappingFailed, + AuxiliaryMemorySetupFailed + }; StringRef childProcessExitCodeToString(int ExitCode) const { @@ -173,6 +188,13 @@ "the child process failed. This might be due running an older " "Linux kernel that doesn't support the pidfd_getfd system call " "(anything before Linux 5.6)."; + "the child process failed"; + case ChildProcessExitCodeE::RSeqDisableFailed: + return "Disabling restartable sequences failed"; + case ChildProcessExitCodeE::FunctionDataMappingFailed: + return "Failed to map memory for assembled snippet"; + case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: + return "Failed to setup auxiliary memory"; default: return "Child process returned with unknown exit code"; } @@ -188,6 +210,16 @@ "llvm-exegesis and the benchmarking subprocess"); } + SubprocessMemory SPMemory; + Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid()); + if (MemoryInitError) + return MemoryInitError; + + Error AddMemDefError = + SPMemory.addMemoryDefinition(Key.MemoryValues, getpid()); + if (AddMemDefError) + return AddMemDefError; + pid_t ParentOrChildPID = fork(); if (ParentOrChildPID == 0) { // We are in the child process, close the write end of the pipe @@ -285,13 +317,37 @@ exit(ChildProcessExitCodeE::TranslatingCounterFDFailed); } -#ifdef HAVE_LIBPFM - ioctl(CounterFileDescriptor, PERF_EVENT_IOC_RESET); -#endif - this->Function(nullptr); -#ifdef HAVE_LIBPFM - ioctl(CounterFileDescriptor, PERF_EVENT_IOC_DISABLE); -#endif +// Glibc versions greater than 2.35 automatically call rseq during +// initialization. Unmapping the region that glibc sets up for this causes +// segfaults in the program Unregister the rseq region so that we can safely +// unmap it later +#ifdef GLIBC_INITS_RSEQ + long RseqDisableOutput = + syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset, + __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); + if (RseqDisableOutput != 0) + exit(ChildProcessExitCodeE::RSeqDisableFailed); +#endif // GLIBC_INITS_RSEQ + + size_t FunctionDataCopySize = this->Function.FunctionBytes.size(); + char *FunctionDataCopy = + (char *)mmap(NULL, FunctionDataCopySize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if ((intptr_t)FunctionDataCopy == -1) + exit(ChildProcessExitCodeE::FunctionDataMappingFailed); + + memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(), + this->Function.FunctionBytes.size()); + mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC); + + Expected AuxMemFDOrError = + SubprocessMemory::setupAuxiliaryMemoryInSubprocess( + Key.MemoryValues, ParentPID, CounterFileDescriptor); + if (!AuxMemFDOrError) + exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed); + + ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize, + *AuxMemFDOrError); exit(0); } @@ -318,14 +374,17 @@ Expected> BenchmarkRunner::assembleSnippet( const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, - unsigned MinInstructions, unsigned LoopBodySize) const { + unsigned MinInstructions, unsigned LoopBodySize, + bool GenerateMemoryInstructions) const { const std::vector &Instructions = BC.Key.Instructions; SmallString<0> Buffer; raw_svector_ostream OS(Buffer); if (Error E = assembleToStream( State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, BC.Key.RegisterInitialValues, - Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize), OS)) { + Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, + GenerateMemoryInstructions), + OS, BC.Key, GenerateMemoryInstructions)) { return std::move(E); } return Buffer; @@ -347,6 +406,8 @@ const std::vector &Instructions = BC.Key.Instructions; + bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess; + InstrBenchmark.Key = BC.Key; // Assemble at least kMinInstructionsForSnippet instructions by repeating @@ -355,8 +416,9 @@ if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { const int MinInstructionsForSnippet = 4 * Instructions.size(); const int LoopBodySizeForSnippet = 2 * Instructions.size(); - auto Snippet = assembleSnippet(BC, Repetitor, MinInstructionsForSnippet, - LoopBodySizeForSnippet); + auto Snippet = + assembleSnippet(BC, Repetitor, MinInstructionsForSnippet, + LoopBodySizeForSnippet, GenerateMemoryInstructions); if (Error E = Snippet.takeError()) return std::move(E); const ExecutableFunction EF(State.createTargetMachine(), @@ -367,9 +429,10 @@ // Assemble NumRepetitions instructions repetitions of the snippet for // measurements. - if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { + if (BenchmarkPhaseSelector > + BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { auto Snippet = assembleSnippet(BC, Repetitor, InstrBenchmark.NumRepetitions, - LoopBodySize); + LoopBodySize, GenerateMemoryInstructions); if (Error E = Snippet.takeError()) return std::move(E); RC.ObjectFile = getObjectFromBuffer(*Snippet); diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h --- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h @@ -39,8 +39,8 @@ // Returns a functor that repeats `Instructions` so that the function executes // at least `MinInstructions` instructions. virtual FillFunction Repeat(ArrayRef Instructions, - unsigned MinInstructions, - unsigned LoopBodySize) const = 0; + unsigned MinInstructions, unsigned LoopBodySize, + bool CleanupMemory) const = 0; explicit SnippetRepetitor(const LLVMState &State) : State(State) {} diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp --- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp @@ -26,8 +26,10 @@ // Repeats the snippet until there are at least MinInstructions in the // resulting code. FillFunction Repeat(ArrayRef Instructions, unsigned MinInstructions, - unsigned LoopBodySize) const override { - return [Instructions, MinInstructions](FunctionFiller &Filler) { + unsigned LoopBodySize, + bool CleanupMemory) const override { + return [this, Instructions, MinInstructions, + CleanupMemory](FunctionFiller &Filler) { auto Entry = Filler.getEntry(); if (!Instructions.empty()) { // Add the whole snippet at least once. @@ -36,7 +38,7 @@ Entry.addInstruction(Instructions[I % Instructions.size()]); } } - Entry.addReturn(); + Entry.addReturn(State.getExegesisTarget(), CleanupMemory); }; } @@ -55,9 +57,10 @@ // Loop over the snippet ceil(MinInstructions / Instructions.Size()) times. FillFunction Repeat(ArrayRef Instructions, unsigned MinInstructions, - unsigned LoopBodySize) const override { - return [this, Instructions, MinInstructions, - LoopBodySize](FunctionFiller &Filler) { + unsigned LoopBodySize, + bool CleanupMemory) const override { + return [this, Instructions, MinInstructions, LoopBodySize, + CleanupMemory](FunctionFiller &Filler) { const auto &ET = State.getExegesisTarget(); auto Entry = Filler.getEntry(); @@ -67,7 +70,7 @@ const MCInstrDesc &MCID = Filler.MCII->get(Opcode); if (!MCID.isTerminator()) continue; - Entry.addReturn(); + Entry.addReturn(State.getExegesisTarget(), CleanupMemory); return; } @@ -112,7 +115,7 @@ // Set up the exit basic block. Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero()); - Exit.addReturn(); + Exit.addReturn(State.getExegesisTarget(), CleanupMemory); }; } diff --git a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h --- a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h +++ b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h @@ -77,8 +77,11 @@ FillFunction Fill) { SmallString<256> Buffer; raw_svector_ostream AsmStream(Buffer); + BenchmarkKey Key; + Key.RegisterInitialValues = RegisterInitialValues; EXPECT_FALSE(assembleToStream(*ET, createTargetMachine(), /*LiveIns=*/{}, - RegisterInitialValues, Fill, AsmStream)); + RegisterInitialValues, Fill, AsmStream, Key, + false)); return ExecutableFunction(createTargetMachine(), getObjectFromBuffer(AsmStream.str())); } diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp @@ -43,7 +43,7 @@ const std::vector Instructions = {MCInstBuilder(X86::NOOP)}; FunctionFiller Sink(*MF, {X86::EAX}); const auto Fill = - Repetitor->Repeat(Instructions, kMinInstructions, kLoopBodySize); + Repetitor->Repeat(Instructions, kMinInstructions, kLoopBodySize, false); Fill(Sink); }