diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s
@@ -0,0 +1,13 @@
+# REQUIRES: exegesis-can-execute-x86_64, exegesis-can-measure-latency, x86_64-linux
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess -repetition-mode=loop | FileCheck %s
+
+# CHECK: measurements:
+# CHECK-NEXT: value: {{.*}}, per_snippet_value: {{.*}}
+
+# LLVM-EXEGESIS-MEM-DEF test1 4096 2147483647
+# LLVM-EXEGESIS-MEM-MAP test1 8192
+
+movq $8192, %rax
+movq (%rax), %rdi
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.h b/llvm/tools/llvm-exegesis/lib/Assembler.h
--- a/llvm/tools/llvm-exegesis/lib/Assembler.h
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.h
@@ -50,7 +50,8 @@
   void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc());
   void addInstructions(ArrayRef<MCInst> Insts, const DebugLoc &DL = DebugLoc());
 
-  void addReturn(const DebugLoc &DL = DebugLoc());
+  void addReturn(const ExegesisTarget &ET, bool SubprocessCleanup,
+                 const DebugLoc &DL = DebugLoc());
 
   MachineFunction &MF;
   MachineBasicBlock *const MBB;
@@ -91,7 +92,9 @@
                        std::unique_ptr<LLVMTargetMachine> TM,
                        ArrayRef<unsigned> LiveIns,
                        ArrayRef<RegisterValue> RegisterInitialValues,
-                       const FillFunction &Fill, raw_pwrite_stream &AsmStream);
+                       const FillFunction &Fill, raw_pwrite_stream &AsmStreamm,
+                       const BenchmarkKey &Key,
+                       bool GenerateMemoryInstructions);
 
 // Creates an ObjectFile in the format understood by the host.
 // Note: the resulting object keeps a copy of Buffer so it can be discarded once
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -9,6 +9,7 @@
 #include "Assembler.h"
 
 #include "SnippetRepetitor.h"
+#include "SubprocessMemory.h"
 #include "Target.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -28,6 +29,10 @@
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/MemoryBuffer.h"
 
+#ifdef __linux__
+#include "perfmon/perf_event.h"
+#endif // __linux__
+
 namespace llvm {
 namespace exegesis {
 
@@ -39,8 +44,20 @@
 // all registers could be setup correctly.
 static bool generateSnippetSetupCode(
     const ExegesisTarget &ET, const MCSubtargetInfo *const MSI,
-    ArrayRef<RegisterValue> RegisterInitialValues, BasicBlockFiller &BBF) {
+    ArrayRef<RegisterValue> RegisterInitialValues, BasicBlockFiller &BBF,
+    const BenchmarkKey &Key, bool GenerateMemoryInstructions) {
   bool IsSnippetSetupComplete = true;
+  if (GenerateMemoryInstructions) {
+    BBF.addInstructions(ET.generateMemoryInitialSetup());
+    for (const MemoryMapping &MM : Key.MemoryMappings) {
+      BBF.addInstructions(ET.generateMmap(
+          MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes,
+          ET.getAuxiliaryMemoryStartAddress() +
+              sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index +
+                             AuxiliaryMemoryOffset)));
+    }
+    BBF.addInstructions(ET.setStackRegisterToAuxMem());
+  }
   for (const RegisterValue &RV : RegisterInitialValues) {
     // Load a constant in the register.
     const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
@@ -48,6 +65,11 @@
       IsSnippetSetupComplete = false;
     BBF.addInstructions(SetRegisterCode);
   }
+  if (GenerateMemoryInstructions) {
+#ifdef __linux__
+    BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET));
+#endif // __linux__
+  }
   return IsSnippetSetupComplete;
 }
 
@@ -122,7 +144,15 @@
     addInstruction(Inst, DL);
 }
 
-void BasicBlockFiller::addReturn(const DebugLoc &DL) {
+void BasicBlockFiller::addReturn(const ExegesisTarget &ET,
+                                 bool SubprocessCleanup, const DebugLoc &DL) {
+  // Insert cleanup code
+  if (SubprocessCleanup) {
+#ifdef __linux__
+    addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_DISABLE));
+    addInstructions(ET.generateExitSyscall(0));
+#endif // __linux__
+  }
   // Insert the return code.
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   if (TII->getReturnOpcode() < TII->getNumOpcodes()) {
@@ -176,7 +206,9 @@
                        std::unique_ptr<LLVMTargetMachine> TM,
                        ArrayRef<unsigned> LiveIns,
                        ArrayRef<RegisterValue> RegisterInitialValues,
-                       const FillFunction &Fill, raw_pwrite_stream &AsmStream) {
+                       const FillFunction &Fill, raw_pwrite_stream &AsmStream,
+                       const BenchmarkKey &Key,
+                       bool GenerateMemoryInstructions) {
   auto Context = std::make_unique<LLVMContext>();
   std::unique_ptr<Module> Module =
       createModule(Context, TM->createDataLayout());
@@ -192,8 +224,14 @@
   Properties.reset(MachineFunctionProperties::Property::IsSSA);
   Properties.set(MachineFunctionProperties::Property::NoPHIs);
 
-  for (const unsigned Reg : LiveIns)
-    MF.getRegInfo().addLiveIn(Reg);
+  if (GenerateMemoryInstructions) {
+    for (const unsigned Reg : ET.getArgumentRegisters()) {
+      MF.getRegInfo().addLiveIn(Reg);
+    }
+  } else {
+    for (const unsigned Reg : LiveIns)
+      MF.getRegInfo().addLiveIn(Reg);
+  }
 
   std::vector<unsigned> RegistersSetUp;
   for (const auto &InitValue : RegisterInitialValues) {
@@ -201,11 +239,18 @@
   }
   FunctionFiller Sink(MF, std::move(RegistersSetUp));
   auto Entry = Sink.getEntry();
-  for (const unsigned Reg : LiveIns)
-    Entry.MBB->addLiveIn(Reg);
+  if (GenerateMemoryInstructions) {
+    for (const unsigned Reg : ET.getArgumentRegisters())
+      Entry.MBB->addLiveIn(Reg);
+  } else {
+    for (const unsigned Reg : LiveIns) {
+      Entry.MBB->addLiveIn(Reg);
+    }
+  }
 
   const bool IsSnippetSetupComplete = generateSnippetSetupCode(
-      ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry);
+      ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry, Key,
+      GenerateMemoryInstructions);
 
   // If the snippet setup is not complete, we disable liveliness tracking. This
   // means that we won't know what values are in the registers.
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -113,10 +113,10 @@
   virtual Expected<std::vector<BenchmarkMeasure>>
   runMeasurements(const FunctionExecutor &Executor) const = 0;
 
-  Expected<SmallString<0>> assembleSnippet(const BenchmarkCode &BC,
-                                           const SnippetRepetitor &Repetitor,
-                                           unsigned MinInstructions,
-                                           unsigned LoopBodySize) const;
+  Expected<SmallString<0>>
+  assembleSnippet(const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
+                  unsigned MinInstructions, unsigned LoopBodySize,
+                  bool GenerateMemoryInstructions) const;
 
   Expected<std::string> writeObjectFile(StringRef Buffer,
                                         StringRef FileName) const;
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -15,6 +15,7 @@
 #include "Error.h"
 #include "MCInstrDescView.h"
 #include "PerfHelper.h"
+#include "SubprocessMemory.h"
 #include "Target.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringExtras.h"
@@ -36,6 +37,12 @@
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <unistd.h>
+
+#ifdef __GLIBC__
+#if __GLIBC_MINOR__ >= 35
+#include <sys/rseq.h>
+#endif // __GLIBC__MINOR > 35
+#endif // __GLIBC__
 #endif // __linux__
 
 namespace llvm {
@@ -161,7 +168,11 @@
 private:
   enum ChildProcessExitCodeE {
     CounterFDReadFailed = 1,
-    TranslatingCounterFDFailed
+    TranslatingCounterFDFailed,
+    RSeqDisableFailed,
+    FunctionDataMappingFailed,
+    AuxiliaryMemorySetupFailed
+
   };
 
   StringRef childProcessExitCodeToString(int ExitCode) const {
@@ -171,6 +182,12 @@
     case ChildProcessExitCodeE::TranslatingCounterFDFailed:
       return "Translating counter file descriptor into a file descriptor in "
              "the child process failed";
+    case ChildProcessExitCodeE::RSeqDisableFailed:
+      return "Disabling restartable sequences failed";
+    case ChildProcessExitCodeE::FunctionDataMappingFailed:
+      return "Failed to map memory for assembled snippet";
+    case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
+      return "Failed to setup auxiliary memory";
     default:
       return "Child process returned with unknown exit code";
     }
@@ -186,6 +203,16 @@
           "llvm-exegesis and the benchmarking subprocess");
     }
 
+    SubprocessMemory SPMemory;
+    Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
+    if (MemoryInitError)
+      return MemoryInitError;
+
+    Error AddMemDefError =
+        SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
+    if (AddMemDefError)
+      return AddMemDefError;
+
     pid_t ParentOrChildPID = fork();
     if (ParentOrChildPID == 0) {
       // We are in the child process, close the write end of the pipe
@@ -278,14 +305,40 @@
       exit(ChildProcessExitCodeE::TranslatingCounterFDFailed);
     }
 
-#ifdef HAVE_LIBPFM
-    ioctl(CounterFileDescriptor, PERF_EVENT_IOC_RESET);
+// Glibc versions greater than 2.35 automatically call rseq during
+// initialization Unmapping the region that glibc sets up for this causes
+// segfaults in the program Unregister the rseq region so that we can safely
+// unmap it later
+#ifdef __GLIBC__
+#if __GLIBC_MINOR__ >= 35
+    long RseqDisableOutput =
+        syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
+                __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+    if (RseqDisableOutput != 0)
+      exit(ChildProcessExitCodeE::RSeqDisableFailed);
 #endif
-    this->Function(nullptr);
-#ifdef HAVE_LIBPFM
-    ioctl(CounterFileDescriptor, PERF_EVENT_IOC_DISABLE);
 #endif
 
+    size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
+    char *FunctionDataCopy =
+        (char *)mmap(NULL, FunctionDataCopySize, PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+    if ((intptr_t)FunctionDataCopy == -1)
+      exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
+
+    memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
+           this->Function.FunctionBytes.size());
+    mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
+
+    Expected<int> AuxMemFDOrError =
+        SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
+            Key.MemoryValues, ParentPID, CounterFileDescriptor);
+    if (!AuxMemFDOrError)
+      exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
+
+    ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
+                                                        *AuxMemFDOrError);
+
     exit(0);
   }
 
@@ -311,14 +364,17 @@
 
 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
     const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
-    unsigned MinInstructions, unsigned LoopBodySize) const {
+    unsigned MinInstructions, unsigned LoopBodySize,
+    bool GenerateMemoryInstructions) const {
   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
   SmallString<0> Buffer;
   raw_svector_ostream OS(Buffer);
   if (Error E = assembleToStream(
           State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
           BC.Key.RegisterInitialValues,
-          Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize), OS)) {
+          Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
+                           GenerateMemoryInstructions),
+          OS, BC.Key, GenerateMemoryInstructions)) {
     return std::move(E);
   }
   return Buffer;
@@ -340,6 +396,8 @@
 
   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
 
+  bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
+
   InstrBenchmark.Key = BC.Key;
 
   // Assemble at least kMinInstructionsForSnippet instructions by repeating
@@ -348,8 +406,9 @@
   if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
     const int MinInstructionsForSnippet = 4 * Instructions.size();
     const int LoopBodySizeForSnippet = 2 * Instructions.size();
-    auto Snippet = assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
-                                   LoopBodySizeForSnippet);
+    auto Snippet =
+        assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
+                        LoopBodySizeForSnippet, GenerateMemoryInstructions);
     if (Error E = Snippet.takeError())
       return std::move(E);
     const ExecutableFunction EF(State.createTargetMachine(),
@@ -360,9 +419,10 @@
 
   // Assemble NumRepetitions instructions repetitions of the snippet for
   // measurements.
-  if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
+  if (BenchmarkPhaseSelector >
+      BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
     auto Snippet = assembleSnippet(BC, Repetitor, InstrBenchmark.NumRepetitions,
-                                   LoopBodySize);
+                                   LoopBodySize, GenerateMemoryInstructions);
     if (Error E = Snippet.takeError())
       return std::move(E);
     RC.ObjectFile = getObjectFromBuffer(*Snippet);
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h
--- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h
@@ -39,8 +39,8 @@
   // Returns a functor that repeats `Instructions` so that the function executes
   // at least `MinInstructions` instructions.
   virtual FillFunction Repeat(ArrayRef<MCInst> Instructions,
-                              unsigned MinInstructions,
-                              unsigned LoopBodySize) const = 0;
+                              unsigned MinInstructions, unsigned LoopBodySize,
+                              bool CleanupMemory) const = 0;
 
   explicit SnippetRepetitor(const LLVMState &State) : State(State) {}
 
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
--- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
@@ -26,8 +26,10 @@
   // Repeats the snippet until there are at least MinInstructions in the
   // resulting code.
   FillFunction Repeat(ArrayRef<MCInst> Instructions, unsigned MinInstructions,
-                      unsigned LoopBodySize) const override {
-    return [Instructions, MinInstructions](FunctionFiller &Filler) {
+                      unsigned LoopBodySize,
+                      bool CleanupMemory) const override {
+    return [this, Instructions, MinInstructions,
+            CleanupMemory](FunctionFiller &Filler) {
       auto Entry = Filler.getEntry();
       if (!Instructions.empty()) {
         // Add the whole snippet at least once.
@@ -36,7 +38,7 @@
           Entry.addInstruction(Instructions[I % Instructions.size()]);
         }
       }
-      Entry.addReturn();
+      Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
     };
   }
 
@@ -55,9 +57,10 @@
 
   // Loop over the snippet ceil(MinInstructions / Instructions.Size()) times.
   FillFunction Repeat(ArrayRef<MCInst> Instructions, unsigned MinInstructions,
-                      unsigned LoopBodySize) const override {
-    return [this, Instructions, MinInstructions,
-            LoopBodySize](FunctionFiller &Filler) {
+                      unsigned LoopBodySize,
+                      bool CleanupMemory) const override {
+    return [this, Instructions, MinInstructions, LoopBodySize,
+            CleanupMemory](FunctionFiller &Filler) {
       const auto &ET = State.getExegesisTarget();
       auto Entry = Filler.getEntry();
 
@@ -67,7 +70,7 @@
         const MCInstrDesc &MCID = Filler.MCII->get(Opcode);
         if (!MCID.isTerminator())
           continue;
-        Entry.addReturn();
+        Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
         return;
       }
 
@@ -112,7 +115,7 @@
 
       // Set up the exit basic block.
       Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero());
-      Exit.addReturn();
+      Exit.addReturn(State.getExegesisTarget(), CleanupMemory);
     };
   }
 
diff --git a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
--- a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
+++ b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
@@ -77,8 +77,11 @@
                      FillFunction Fill) {
     SmallString<256> Buffer;
     raw_svector_ostream AsmStream(Buffer);
+    BenchmarkKey Key;
+    Key.RegisterInitialValues = RegisterInitialValues;
     EXPECT_FALSE(assembleToStream(*ET, createTargetMachine(), /*LiveIns=*/{},
-                                  RegisterInitialValues, Fill, AsmStream));
+                                  RegisterInitialValues, Fill, AsmStream, Key,
+                                  false));
     return ExecutableFunction(createTargetMachine(),
                               getObjectFromBuffer(AsmStream.str()));
   }
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
--- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
@@ -43,7 +43,7 @@
     const std::vector<MCInst> Instructions = {MCInstBuilder(X86::NOOP)};
     FunctionFiller Sink(*MF, {X86::EAX});
     const auto Fill =
-        Repetitor->Repeat(Instructions, kMinInstructions, kLoopBodySize);
+        Repetitor->Repeat(Instructions, kMinInstructions, kLoopBodySize, false);
     Fill(Sink);
   }