diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h
--- a/lldb/include/lldb/Target/Trace.h
+++ b/lldb/include/lldb/Target/Trace.h
@@ -433,15 +433,15 @@
   GetLiveProcessBinaryData(llvm::StringRef kind);
 
   /// Get the size of the data returned by \a GetLiveThreadBinaryData
-  llvm::Optional<size_t> GetLiveThreadBinaryDataSize(lldb::tid_t tid,
-                                                     llvm::StringRef kind);
+  llvm::Optional<uint64_t> GetLiveThreadBinaryDataSize(lldb::tid_t tid,
+                                                       llvm::StringRef kind);
 
   /// Get the size of the data returned by \a GetLiveCoreBinaryData
-  llvm::Optional<size_t> GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
-                                                   llvm::StringRef kind);
+  llvm::Optional<uint64_t> GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
+                                                     llvm::StringRef kind);
 
   /// Get the size of the data returned by \a GetLiveProcessBinaryData
-  llvm::Optional<size_t> GetLiveProcessBinaryDataSize(llvm::StringRef kind);
+  llvm::Optional<uint64_t> GetLiveProcessBinaryDataSize(llvm::StringRef kind);
 
   /// Constructor for post mortem processes
   Trace(llvm::ArrayRef<lldb::ProcessSP> postmortem_processes,
@@ -513,14 +513,14 @@
   /// \{
 
   /// tid -> data kind -> size
-  llvm::DenseMap<lldb::tid_t, std::unordered_map<std::string, size_t>>
+  llvm::DenseMap<lldb::tid_t, std::unordered_map<std::string, uint64_t>>
       m_live_thread_data;
 
   /// core id -> data kind -> size
-  llvm::DenseMap<lldb::core_id_t, std::unordered_map<std::string, size_t>>
+  llvm::DenseMap<lldb::core_id_t, std::unordered_map<std::string, uint64_t>>
       m_live_core_data;
   /// data kind -> size
-  std::unordered_map<std::string, size_t> m_live_process_data;
+  std::unordered_map<std::string, uint64_t> m_live_process_data;
   /// \}
 
   /// The list of cores being traced. Might be \b None depending on the plug-in.
diff --git a/lldb/include/lldb/Utility/TraceGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceGDBRemotePackets.h
--- a/lldb/include/lldb/Utility/TraceGDBRemotePackets.h
+++ b/lldb/include/lldb/Utility/TraceGDBRemotePackets.h
@@ -154,7 +154,7 @@
   /// Optional tid if the data is related to a thread.
   llvm::Optional<lldb::tid_t> tid;
   /// Optional core id if the data is related to a cpu core.
-  llvm::Optional<lldb::tid_t> core_id;
+  llvm::Optional<lldb::core_id_t> core_id;
   /// Offset in bytes from where to start reading the data.
   uint64_t offset;
   /// Number of bytes to read.
diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h
--- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h
+++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h
@@ -80,7 +80,9 @@
   ///
   /// \return
   ///   Nanosecond wall time.
-  std::chrono::nanoseconds ToNanos(uint64_t tsc);
+  std::chrono::nanoseconds ToNanos(uint64_t tsc) const;
+
+  uint64_t ToTSC(std::chrono::nanoseconds nanos) const;
 
   uint32_t time_mult;
   uint16_t time_shift;
diff --git a/lldb/source/Plugins/Process/Linux/Perf.cpp b/lldb/source/Plugins/Process/Linux/Perf.cpp
--- a/lldb/source/Plugins/Process/Linux/Perf.cpp
+++ b/lldb/source/Plugins/Process/Linux/Perf.cpp
@@ -231,7 +231,7 @@
                                      "buffer but only {1} are available",
                                      size, output.size()));
 
-  return data;
+  return output;
 }
 
 Expected<std::vector<uint8_t>>
@@ -282,7 +282,7 @@
                                      "buffer but only {1} are available",
                                      size, output.size()));
 
-  return data;
+  return output;
 }
 
 Error PerfEvent::DisableWithIoctl() {
@@ -313,7 +313,7 @@
 
 size_t PerfEvent::GetEffectiveDataBufferSize() const {
   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
-  if (mmap_metadata.data_head <= mmap_metadata.data_size)
+  if (mmap_metadata.data_head < mmap_metadata.data_size)
     return mmap_metadata.data_head;
   else
     return mmap_metadata.data_size; // The buffer has wrapped.
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -2697,8 +2697,8 @@
     packet.Printf("%" PRIx64, tid);
 
   StringExtractorGDBRemote response;
-  if (SendPacketAndWaitForResponse(packet.GetString(), response) 
-      == PacketResult::Success) {
+  if (SendPacketAndWaitForResponse(packet.GetString(), response) ==
+      PacketResult::Success) {
     if (response.IsOKResponse())
       return {{pid, tid}};
 
@@ -3701,9 +3701,6 @@
       GDBRemoteCommunication::PacketResult::Success) {
     if (response.IsErrorResponse())
       return response.GetStatus().ToError();
-    if (response.IsUnsupportedResponse())
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "jLLDBTraceGetBinaryData is unsupported");
     std::string data;
     response.GetEscapedBinaryData(data);
     return std::vector<uint8_t>(data.begin(), data.end());
diff --git a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt
--- a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt
+++ b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt
@@ -22,6 +22,7 @@
   TraceCursorIntelPT.cpp
   TraceIntelPT.cpp
   TraceIntelPTJSONStructs.cpp
+  TraceIntelPTMultiCoreDecoder.cpp
   TraceIntelPTSessionFileParser.cpp
   TraceIntelPTSessionSaver.cpp
 
diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h
--- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h
+++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h
@@ -31,9 +31,9 @@
 
     llvm::ArrayRef<OptionDefinition> GetDefinitions() override;
 
-    size_t m_trace_buffer_size;
+    uint64_t m_trace_buffer_size;
     bool m_enable_tsc;
-    llvm::Optional<size_t> m_psb_period;
+    llvm::Optional<uint64_t> m_psb_period;
   };
 
   CommandObjectThreadTraceStartIntelPT(TraceIntelPT &trace,
@@ -74,10 +74,10 @@
 
     llvm::ArrayRef<OptionDefinition> GetDefinitions() override;
 
-    size_t m_trace_buffer_size;
-    size_t m_process_buffer_size_limit;
+    uint64_t m_trace_buffer_size;
+    uint64_t m_process_buffer_size_limit;
     bool m_enable_tsc;
-    llvm::Optional<size_t> m_psb_period;
+    llvm::Optional<uint64_t> m_psb_period;
     bool m_per_core_tracing;
   };
 
diff --git a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h
--- a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h
@@ -22,8 +22,8 @@
 namespace trace_intel_pt {
 
 /// Class used to track the duration of long running tasks related to a single
-/// thread for reporting.
-class ThreadTaskTimer {
+/// scope for reporting.
+class ScopedTaskTimer {
 public:
   /// Execute the given \p task and record its duration.
   ///
@@ -63,10 +63,15 @@
 public:
   /// \return
   ///     The timer object for the given thread.
-  ThreadTaskTimer &ForThread(lldb::tid_t tid);
+  ScopedTaskTimer &ForThread(lldb::tid_t tid);
+
+  /// \return
+  ///     The timer object for global tasks.
+  ScopedTaskTimer &ForGlobal();
 
 private:
-  llvm::DenseMap<lldb::tid_t, ThreadTaskTimer> m_thread_timers;
+  llvm::DenseMap<lldb::tid_t, ScopedTaskTimer> m_thread_timers;
+  ScopedTaskTimer m_global_timer;
 };
 
 } // namespace trace_intel_pt
diff --git a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp
--- a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp
@@ -5,7 +5,7 @@
 using namespace lldb_private::trace_intel_pt;
 using namespace llvm;
 
-void ThreadTaskTimer::ForEachTimedTask(
+void ScopedTaskTimer::ForEachTimedTask(
     std::function<void(const std::string &event,
                        std::chrono::milliseconds duration)>
         callback) {
@@ -14,9 +14,11 @@
   }
 }
 
-ThreadTaskTimer &TaskTimer::ForThread(lldb::tid_t tid) {
+ScopedTaskTimer &TaskTimer::ForThread(lldb::tid_t tid) {
   auto it = m_thread_timers.find(tid);
   if (it == m_thread_timers.end())
-    it = m_thread_timers.try_emplace(tid, ThreadTaskTimer{}).first;
+    it = m_thread_timers.try_emplace(tid, ScopedTaskTimer{}).first;
   return it->second;
 }
+
+ScopedTaskTimer &TaskTimer::ForGlobal() { return m_global_timer; }
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
@@ -11,7 +11,9 @@
 
 #include "TaskTimer.h"
 #include "ThreadDecoder.h"
+#include "TraceIntelPTMultiCoreDecoder.h"
 #include "TraceIntelPTSessionFileParser.h"
+
 #include "lldb/Utility/FileSpec.h"
 #include "lldb/lldb-types.h"
 #include "llvm/Support/raw_ostream.h"
@@ -73,7 +75,7 @@
 
   void DumpTraceInfo(Thread &thread, Stream &s, bool verbose) override;
 
-  llvm::Expected<size_t> GetRawTraceSize(Thread &thread);
+  llvm::Expected<llvm::Optional<uint64_t>> GetRawTraceSize(Thread &thread);
 
   llvm::Error DoRefreshLiveProcessState(TraceGetStateResponse state,
                                         llvm::StringRef json_response) override;
@@ -106,8 +108,9 @@
   /// \return
   ///     \a llvm::Error::success if the operation was successful, or
   ///     \a llvm::Error otherwise.
-  llvm::Error Start(size_t trace_buffer_size, size_t total_buffer_size_limit,
-                    bool enable_tsc, llvm::Optional<size_t> psb_period,
+  llvm::Error Start(uint64_t trace_buffer_size,
+                    uint64_t total_buffer_size_limit, bool enable_tsc,
+                    llvm::Optional<uint64_t> psb_period,
                     bool m_per_core_tracing);
 
   /// \copydoc Trace::Start
@@ -134,8 +137,9 @@
   /// \return
   ///     \a llvm::Error::success if the operation was successful, or
   ///     \a llvm::Error otherwise.
-  llvm::Error Start(llvm::ArrayRef<lldb::tid_t> tids, size_t trace_buffer_size,
-                    bool enable_tsc, llvm::Optional<size_t> psb_period);
+  llvm::Error Start(llvm::ArrayRef<lldb::tid_t> tids,
+                    uint64_t trace_buffer_size, bool enable_tsc,
+                    llvm::Optional<uint64_t> psb_period);
 
   /// \copydoc Trace::Start
   llvm::Error Start(llvm::ArrayRef<lldb::tid_t> tids,
@@ -195,6 +199,8 @@
   /// It is provided by either a session file or a live process' "cpuInfo"
   /// binary data.
   llvm::Optional<pt_cpu> m_cpu_info;
+  llvm::Optional<TraceIntelPTMultiCoreDecoder> m_multicore_decoder;
+  /// These decoders are used for the non-per-core case
   std::map<lldb::tid_t, std::unique_ptr<ThreadDecoder>> m_thread_decoders;
   /// Helper variable used to track long running operations for telemetry.
   TaskTimer m_task_timer;
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -89,13 +89,24 @@
     }
   }
   if (session.cores) {
+    std::vector<core_id_t> cores;
+
     for (const JSONCore &core : *session.cores) {
       SetPostMortemCoreDataFile(core.core_id, IntelPTDataKinds::kTraceBuffer,
                                 FileSpec(core.trace_buffer));
       SetPostMortemCoreDataFile(core.core_id,
                                 IntelPTDataKinds::kPerfContextSwitchTrace,
                                 FileSpec(core.context_switch_trace));
+      cores.push_back(core.core_id);
     }
+
+    std::vector<tid_t> tids;
+    for (const JSONProcess &process : session.processes)
+      for (const JSONThread &thread : process.threads)
+        tids.push_back(thread.tid);
+
+    m_multicore_decoder.emplace(*this, cores, tids,
+                                *session.tsc_perf_zero_conversion);
   }
 }
 
@@ -105,6 +116,9 @@
         thread.shared_from_this(),
         createStringError(inconvertibleErrorCode(), error));
 
+  if (m_multicore_decoder)
+    return m_multicore_decoder->Decode(thread);
+
   auto it = m_thread_decoders.find(thread.GetID());
   if (it == m_thread_decoders.end())
     return std::make_shared<DecodedThread>(
@@ -126,60 +140,98 @@
   }
   s << "\n";
 
-  Expected<size_t> raw_size = GetRawTraceSize(thread);
-  if (!raw_size) {
-    s.Format("  {0}\n", toString(raw_size.takeError()));
+  Expected<Optional<uint64_t>> raw_size_or_error = GetRawTraceSize(thread);
+  if (!raw_size_or_error) {
+    s.Format("  {0}\n", toString(raw_size_or_error.takeError()));
     return;
   }
+  Optional<uint64_t> raw_size = *raw_size_or_error;
 
   DecodedThreadSP decoded_trace_sp = Decode(thread);
-  size_t insn_len = decoded_trace_sp->GetInstructionsCount();
-  size_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage();
-
-  s.Format("  Total number of instructions: {0}\n", insn_len);
-
-  s << "\n  Memory usage:\n";
-  s.Format("    Raw trace size: {0} KiB\n", *raw_size / 1024);
-  s.Format(
-      "    Total approximate memory usage (excluding raw trace): {0:2} KiB\n",
-      (double)mem_used / 1024);
-  if (insn_len != 0)
-    s.Format("    Average memory usage per instruction (excluding raw trace): "
-             "{0:2} bytes\n",
-             (double)mem_used / insn_len);
-
-  s << "\n  Timing:\n";
-  GetTimer().ForThread(tid).ForEachTimedTask(
-      [&](const std::string &name, std::chrono::milliseconds duration) {
-        s.Format("    {0}: {1:2}s\n", name, duration.count() / 1000.0);
-      });
-
-  const DecodedThread::EventsStats &events_stats =
-      decoded_trace_sp->GetEventsStats();
-  s << "\n  Events:\n";
-  s.Format("    Number of instructions with events: {0}\n",
-           events_stats.total_instructions_with_events);
-  s.Format("    Number of individual events: {0}\n", events_stats.total_count);
-  for (const auto &event_to_count : events_stats.events_counts) {
-    s.Format("      {0}: {1}\n",
-             trace_event_utils::EventToDisplayString(event_to_count.first),
-             event_to_count.second);
+
+  /// Instruction stats
+  {
+    uint64_t insn_len = decoded_trace_sp->GetInstructionsCount();
+    uint64_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage();
+
+    s.Format("  Total number of instructions: {0}\n", insn_len);
+
+    s << "\n  Memory usage:\n";
+    if (raw_size)
+      s.Format("    Raw trace size: {0} KiB\n", *raw_size / 1024);
+
+    s.Format(
+        "    Total approximate memory usage (excluding raw trace): {0:2} KiB\n",
+        (double)mem_used / 1024);
+    if (insn_len != 0)
+      s.Format(
+          "    Average memory usage per instruction (excluding raw trace): "
+          "{0:2} bytes\n",
+          (double)mem_used / insn_len);
+  }
+
+  // Timing
+  {
+    s << "\n  Timing for this thread:\n";
+    auto print_duration = [&](const std::string &name,
+                              std::chrono::milliseconds duration) {
+      s.Format("    {0}: {1:2}s\n", name, duration.count() / 1000.0);
+    };
+    GetTimer().ForThread(tid).ForEachTimedTask(print_duration);
+
+    s << "\n  Timing for global tasks:\n";
+    GetTimer().ForGlobal().ForEachTimedTask(print_duration);
+  }
+
+  // Instruction events stats
+  {
+    const DecodedThread::EventsStats &events_stats =
+        decoded_trace_sp->GetEventsStats();
+    s << "\n  Events:\n";
+    s.Format("    Number of instructions with events: {0}\n",
+             events_stats.total_instructions_with_events);
+    s.Format("    Number of individual events: {0}\n",
+             events_stats.total_count);
+    for (const auto &event_to_count : events_stats.events_counts) {
+      s.Format("      {0}: {1}\n",
+               trace_event_utils::EventToDisplayString(event_to_count.first),
+               event_to_count.second);
+    }
   }
 
-  s << "\n  Errors:\n";
-  const DecodedThread::LibiptErrorsStats &tsc_errors_stats =
-      decoded_trace_sp->GetTscErrorsStats();
-  s.Format("    Number of TSC decoding errors: {0}\n",
-           tsc_errors_stats.total_count);
-  for (const auto &error_message_to_count :
-       tsc_errors_stats.libipt_errors_counts) {
-    s.Format("      {0}: {1}\n", error_message_to_count.first,
-             error_message_to_count.second);
+  // Multicode decoding stats
+  if (m_multicore_decoder) {
+    s << "\n  Multi-core decoding:\n";
+    s.Format("    Total number of continuous executions found: {0}\n",
+             m_multicore_decoder->GetTotalContinuousExecutionsCount());
+    s.Format("    Number of continuous executions for this thread: {0}\n",
+             m_multicore_decoder->GetNumContinuousExecutionsForThread(tid));
+  }
+
+  // Errors
+  {
+    s << "\n  Errors:\n";
+    const DecodedThread::LibiptErrorsStats &tsc_errors_stats =
+        decoded_trace_sp->GetTscErrorsStats();
+    s.Format("    Number of TSC decoding errors: {0}\n",
+             tsc_errors_stats.total_count);
+    for (const auto &error_message_to_count :
+         tsc_errors_stats.libipt_errors_counts) {
+      s.Format("      {0}: {1}\n", error_message_to_count.first,
+               error_message_to_count.second);
+    }
   }
 }
 
-llvm::Expected<size_t> TraceIntelPT::GetRawTraceSize(Thread &thread) {
-  size_t size;
+llvm::Expected<Optional<uint64_t>>
+TraceIntelPT::GetRawTraceSize(Thread &thread) {
+  if (m_multicore_decoder)
+    return None; // TODO: calculate the amount of intel pt raw trace associated
+                 // with the given thread.
+  if (GetLiveProcess())
+    return GetLiveThreadBinaryDataSize(thread.GetID(),
+                                       IntelPTDataKinds::kTraceBuffer);
+  uint64_t size;
   auto callback = [&](llvm::ArrayRef<uint8_t> data) {
     size = data.size();
     return Error::success();
@@ -262,13 +314,8 @@
 Error TraceIntelPT::DoRefreshLiveProcessState(TraceGetStateResponse state,
                                               StringRef json_response) {
   m_thread_decoders.clear();
-
-  for (const TraceThreadState &thread_state : state.traced_threads) {
-    ThreadSP thread_sp =
-        GetLiveProcess()->GetThreadList().FindThreadByID(thread_state.tid);
-    m_thread_decoders.emplace(
-        thread_state.tid, std::make_unique<ThreadDecoder>(thread_sp, *this));
-  }
+  m_tsc_conversion.reset();
+  m_multicore_decoder.reset();
 
   Expected<TraceIntelPTGetStateResponse> intelpt_state =
       json::parse<TraceIntelPTGetStateResponse>(json_response,
@@ -276,6 +323,29 @@
   if (!intelpt_state)
     return intelpt_state.takeError();
 
+  if (!intelpt_state->cores) {
+    for (const TraceThreadState &thread_state : state.traced_threads) {
+      ThreadSP thread_sp =
+          GetLiveProcess()->GetThreadList().FindThreadByID(thread_state.tid);
+      m_thread_decoders.emplace(
+          thread_state.tid, std::make_unique<ThreadDecoder>(thread_sp, *this));
+    }
+  } else {
+    std::vector<core_id_t> cores;
+    for (const TraceCoreState &core : *intelpt_state->cores)
+      cores.push_back(core.core_id);
+
+    std::vector<tid_t> tids;
+    for (const TraceThreadState &thread : intelpt_state->traced_threads)
+      tids.push_back(thread.tid);
+
+    if (!intelpt_state->tsc_perf_zero_conversion)
+      return createStringError(inconvertibleErrorCode(),
+                               "Missing perf time_zero conversion values");
+    m_multicore_decoder.emplace(*this, cores, tids,
+                                *intelpt_state->tsc_perf_zero_conversion);
+  }
+
   m_tsc_conversion = intelpt_state->tsc_perf_zero_conversion;
   if (m_tsc_conversion) {
     Log *log = GetLog(LLDBLog::Target);
@@ -286,6 +356,8 @@
 
 bool TraceIntelPT::IsTraced(lldb::tid_t tid) {
   RefreshLiveProcessState();
+  if (m_multicore_decoder)
+    return m_multicore_decoder->TracesThread(tid);
   return m_thread_decoders.count(tid);
 }
 
@@ -322,25 +394,25 @@
   return message->c_str();
 }
 
-Error TraceIntelPT::Start(size_t trace_buffer_size,
-                          size_t total_buffer_size_limit, bool enable_tsc,
-                          Optional<size_t> psb_period, bool per_core_tracing) {
+Error TraceIntelPT::Start(uint64_t trace_buffer_size,
+                          uint64_t total_buffer_size_limit, bool enable_tsc,
+                          Optional<uint64_t> psb_period,
+                          bool per_core_tracing) {
   TraceIntelPTStartRequest request;
   request.trace_buffer_size = trace_buffer_size;
   request.process_buffer_size_limit = total_buffer_size_limit;
   request.enable_tsc = enable_tsc;
-  request.psb_period =
-      psb_period.map([](size_t val) { return static_cast<uint64_t>(val); });
+  request.psb_period = psb_period;
   request.type = GetPluginName().str();
   request.per_core_tracing = per_core_tracing;
   return Trace::Start(toJSON(request));
 }
 
 Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) {
-  size_t trace_buffer_size = kDefaultTraceBufferSize;
-  size_t process_buffer_size_limit = kDefaultProcessBufferSizeLimit;
+  uint64_t trace_buffer_size = kDefaultTraceBufferSize;
+  uint64_t process_buffer_size_limit = kDefaultProcessBufferSizeLimit;
   bool enable_tsc = kDefaultEnableTscValue;
-  Optional<size_t> psb_period = kDefaultPsbPeriod;
+  Optional<uint64_t> psb_period = kDefaultPsbPeriod;
   bool per_core_tracing = kDefaultPerCoreTracing;
 
   if (configuration) {
@@ -362,13 +434,12 @@
 }
 
 llvm::Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids,
-                                size_t trace_buffer_size, bool enable_tsc,
-                                Optional<size_t> psb_period) {
+                                uint64_t trace_buffer_size, bool enable_tsc,
+                                Optional<uint64_t> psb_period) {
   TraceIntelPTStartRequest request;
   request.trace_buffer_size = trace_buffer_size;
   request.enable_tsc = enable_tsc;
-  request.psb_period =
-      psb_period.map([](size_t val) { return static_cast<uint64_t>(val); });
+  request.psb_period = psb_period;
   request.type = GetPluginName().str();
   request.tids.emplace();
   for (lldb::tid_t tid : tids)
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp
@@ -132,6 +132,11 @@
       !o.map("type", session.type) || !o.map("cores", session.cores) ||
       !o.map("tscPerfZeroConversion", session.tsc_perf_zero_conversion))
     return false;
+  if (session.cores && !session.tsc_perf_zero_conversion) {
+    path.report(
+        "\"tscPerfZeroConversion\" is required when \"cores\" is provided");
+    return false;
+  }
   // We have to do this because the compiler fails at doing it automatically
   // because pt_cpu is not in a namespace
   if (!fromJSON(*value.getAsObject()->get("cpuInfo"), session.cpu_info,
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h
new file mode 100644
--- /dev/null
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h
@@ -0,0 +1,161 @@
+//===-- TraceIntelPTMultiCoreDecoder.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H
+#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H
+
+#include "ThreadDecoder.h"
+
+namespace lldb_private {
+namespace trace_intel_pt {
+
+/// This class indicates the time interval in which a thread was running
+/// continuously on a cpu core.
+///
+/// In most cases both endpoints of the intervals can be accurately recovered
+/// from a context switch trace, but in some cases one of these endpoints might
+/// be guessed or not known at all, due to contention problems in the trace or
+/// because tracing was interrupted.
+///
+/// Note: we use the terms CPU and cores interchangeably.
+struct ThreadContinuousExecution {
+  enum class Variant {
+    /// Both endpoints are known
+    Complete,
+    /// The end is known and we have a guess for the start
+    HintedStart,
+    /// The start is known and we have a guess for the end
+    HintedEnd,
+    /// We only know the start. This might be the last entry of a core trace.
+    OnlyStart,
+    /// We only know the end. This might be the first entry or a core trace.
+    OnlyEnd,
+  } variant;
+
+  union {
+    struct {
+      uint64_t start;
+      uint64_t end;
+    } complete;
+    struct {
+      uint64_t start;
+    } only_start;
+    struct {
+      uint64_t end;
+    } only_end;
+    /// The following 'hinted' structures are useful when there are contention
+    /// problems in the trace
+    struct {
+      uint64_t hinted_start;
+      uint64_t end;
+    } hinted_start;
+    struct {
+      uint64_t start;
+      uint64_t hinted_end;
+    } hinted_end;
+  } tscs;
+
+  lldb::core_id_t core_id;
+  lldb::tid_t tid;
+
+  /// \return
+  ///   A tsc that we are certain of, either the start or the end.
+  uint64_t GetErrorFreeTSC() const;
+
+  /// Constructors for the different variants of this object
+  ///
+  /// \{
+  static ThreadContinuousExecution
+  CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+                          uint64_t start, uint64_t end);
+
+  static ThreadContinuousExecution
+  CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+                             uint64_t hinted_start, uint64_t end);
+
+  static ThreadContinuousExecution
+  CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+                           uint64_t start, uint64_t hinted_end);
+
+  static ThreadContinuousExecution
+  CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+                         uint64_t end);
+
+  static ThreadContinuousExecution
+  CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid,
+                           uint64_t start);
+  /// \}
+
+  /// Comparator by TSCs
+  bool operator<(const ThreadContinuousExecution &o) const;
+
+private:
+  ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid)
+      : core_id(core_id), tid(tid) {}
+};
+
+/// Class used to decode a multi-core Intel PT trace. It assumes that each
+/// thread could have potentially been executed on different cores. It uses a
+/// context switch trace per CPU with timestamps to identify which thread owns
+/// each Intel PT decoded instruction and in which order. It also assumes that
+/// the Intel PT data and context switches might have gaps in their traces due
+/// to contention or race conditions.
+class TraceIntelPTMultiCoreDecoder {
+public:
+  /// \param[in] core_ids
+  ///   The list of cores where the traced programs were running on.
+  ///
+  /// \param[in] tid
+  ///   The full list of tids that were traced.
+  ///
+  /// \param[in] tsc_conversion
+  ///   The conversion values for converting between nanoseconds and TSCs.
+  TraceIntelPTMultiCoreDecoder(
+      TraceIntelPT &trace, llvm::ArrayRef<lldb::core_id_t> core_ids,
+      llvm::ArrayRef<lldb::tid_t> tids,
+      const LinuxPerfZeroTscConversion &tsc_conversion);
+
+  /// \return
+  ///   A \a DecodedThread for the \p thread by decoding its instructions on all
+  ///   CPUs, sorted by TSCs.
+  DecodedThreadSP Decode(Thread &thread);
+
+  /// \return
+  ///   \b true if the given \p tid is managed by this decoder, regardless of
+  ///   whether there's tracing data associated to it or not.
+  bool TracesThread(lldb::tid_t tid) const;
+
+  /// \return
+  ///   The number of continuous executions found for the given \p tid.
+  size_t GetNumContinuousExecutionsForThread(lldb::tid_t tid) const;
+
+  /// \return
+  ///   The total number of continuous executions found across CPUs.
+  size_t GetTotalContinuousExecutionsCount() const;
+
+private:
+  /// Traverse the context switch traces and recover the continuous executions
+  /// by thread.
+  llvm::Error DecodeContextSwitchTraces();
+
+  TraceIntelPT &m_trace;
+  std::set<lldb::core_id_t> m_cores;
+  std::set<lldb::tid_t> m_tids;
+  llvm::Optional<
+      llvm::DenseMap<lldb::tid_t, std::vector<ThreadContinuousExecution>>>
+      m_continuous_executions_per_thread;
+  LinuxPerfZeroTscConversion m_tsc_conversion;
+  /// This variable will be non-None if a severe error happened during the setup
+  /// of the decoder.
+  llvm::Optional<std::string> m_setup_error;
+};
+
+} // namespace trace_intel_pt
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp
new file mode 100644
--- /dev/null
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp
@@ -0,0 +1,312 @@
+//===-- TraceIntelPTMultiCoreDecoder.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TraceIntelPTMultiCoreDecoder.h"
+
+#include "TraceIntelPT.h"
+
+#include "llvm/Support/Error.h"
+
+#include <linux/perf_event.h>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::trace_intel_pt;
+using namespace llvm;
+
+struct PerfContextSwitchRecord {
+  struct perf_event_header header;
+  uint32_t next_prev_pid;
+  uint32_t next_prev_tid;
+  uint32_t pid, tid;
+  uint64_t time_in_nanos;
+
+  bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; }
+};
+
+struct ContextSwitchRecord {
+  uint64_t tsc;
+  bool is_out;
+  /// A pid of 0 indicates an execution in the kernel
+  lldb::pid_t pid;
+  lldb::tid_t tid;
+
+  bool IsOut() const { return is_out; }
+
+  bool IsIn() const { return !is_out; }
+};
+
+uint64_t ThreadContinuousExecution::GetErrorFreeTSC() const {
+  switch (variant) {
+  case Variant::Complete:
+    return tscs.complete.start; // end would also work
+  case Variant::HintedStart:
+    return tscs.hinted_start.end;
+  case Variant::HintedEnd:
+    return tscs.hinted_end.start;
+  case Variant::OnlyEnd:
+    return tscs.only_end.end;
+  case Variant::OnlyStart:
+    return tscs.only_start.start;
+  }
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution(
+    lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start, uint64_t end) {
+  ThreadContinuousExecution o(core_id, tid);
+  o.variant = Variant::Complete;
+  o.tscs.complete.start = start;
+  o.tscs.complete.end = end;
+  return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution(
+    lldb::core_id_t core_id, lldb::tid_t tid, uint64_t hinted_start,
+    uint64_t end) {
+  ThreadContinuousExecution o(core_id, tid);
+  o.variant = Variant::HintedStart;
+  o.tscs.hinted_start.hinted_start = hinted_start;
+  o.tscs.hinted_start.end = end;
+  return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution(
+    lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start,
+    uint64_t hinted_end) {
+  ThreadContinuousExecution o(core_id, tid);
+  o.variant = Variant::HintedEnd;
+  o.tscs.hinted_end.start = start;
+  o.tscs.hinted_end.hinted_end = hinted_end;
+  return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution(
+    lldb::core_id_t core_id, lldb::tid_t tid, uint64_t end) {
+  ThreadContinuousExecution o(core_id, tid);
+  o.variant = Variant::OnlyEnd;
+  o.tscs.only_end.end = end;
+  return o;
+}
+
+ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution(
+    lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start) {
+  ThreadContinuousExecution o(core_id, tid);
+  o.variant = Variant::OnlyStart;
+  o.tscs.only_start.start = start;
+  return o;
+}
+
+bool ThreadContinuousExecution::operator<(
+    const ThreadContinuousExecution &o) const {
+  // We can compare by GetErrorFreeTSC because context switches across CPUs can
+  // be sorted by any of its TSC.
+  return GetErrorFreeTSC() < o.GetErrorFreeTSC();
+}
+
+/// Tries to recover a continuous execution by analyzing two consecutive context
+/// switch records.
+static Error
+HandleContextSwitch(core_id_t core_id,
+                    const LinuxPerfZeroTscConversion &tsc_conversion,
+                    const ContextSwitchRecord &record,
+                    const Optional<ContextSwitchRecord> &prev_record,
+                    std::function<void(ThreadContinuousExecution &&execution)>
+                        on_new_thread_execution) {
+  if (!prev_record) {
+    if (record.IsOut())
+      on_new_thread_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
+          core_id, record.tid, record.tsc));
+    // The 'in' case will be handled later when we try to look for its end
+    return Error::success();
+  }
+
+  const ContextSwitchRecord &prev = *prev_record;
+  if (prev.tsc > record.tsc)
+    return createStringError(
+        inconvertibleErrorCode(),
+        formatv("A context switch record out doesn't happen after the previous "
+                "record. Previous TSC= {0}, current TSC = {1}.",
+                prev.tsc, record.tsc));
+
+  if (record.IsIn() && prev.IsIn()) {
+    // We found two consecutive ins, which means that we didn't capture
+    // the end of the previous execution.
+    on_new_thread_execution(ThreadContinuousExecution::CreateHintedEndExecution(
+        core_id, prev.tid, prev.tsc, record.tsc - 1));
+  } else if (record.IsOut() && prev.IsOut()) {
+    // We found two consecutive outs, that means that we didn't capture
+    // the beginning of the current execution.
+    on_new_thread_execution(
+        ThreadContinuousExecution::CreateHintedStartExecution(
+            core_id, record.tid, prev.tsc + 1, record.tsc));
+  } else if (record.IsOut() && prev.IsIn()) {
+    if (record.pid == prev.pid && record.tid == prev.tid) {
+      /// A complete execution
+      on_new_thread_execution(
+          ThreadContinuousExecution::CreateCompleteExecution(
+              core_id, record.tid, prev.tsc, record.tsc));
+    } else {
+      // An out after the in of a different thread. The first one doesn't
+      // have an end, and the second one doesn't have a start.
+      on_new_thread_execution(
+          ThreadContinuousExecution::CreateHintedEndExecution(
+              core_id, prev.tid, prev.tsc, record.tsc - 1));
+      on_new_thread_execution(
+          ThreadContinuousExecution::CreateHintedStartExecution(
+              core_id, record.tid, prev.tsc + 1, record.tsc));
+    }
+  }
+  return Error::success();
+}
+
+/// Decodes a context switch trace gotten with perf_event_open.
+///
+/// \param[in] data
+///   The context switch trace in binary format.
+///
+/// \param[i] core_id
+///   The core_id where the trace were gotten from.
+///
+/// \param[in] tsc_conversion
+///   The conversion values used to confert nanoseconds to TSC.
+///
+/// \param[in] on_new_thread_execution
+///   Callback to be invoked whenever a continuous execution is recovered from
+///   the trace.
+static Error DecodePerfContextSwitchTrace(
+    ArrayRef<uint8_t> data, core_id_t core_id,
+    const LinuxPerfZeroTscConversion &tsc_conversion,
+    std::function<void(ThreadContinuousExecution &&execution)>
+        on_new_thread_execution) {
+  auto CreateError = [&](size_t offset, auto error) -> Error {
+    return createStringError(inconvertibleErrorCode(),
+                             formatv("Malformed perf context switch trace for "
+                                     "cpu {0} at offset {1}. {2}",
+                                     core_id, offset, error));
+  };
+
+  Optional<ContextSwitchRecord> prev_record;
+  for (size_t offset = 0; offset < data.size();) {
+    const PerfContextSwitchRecord &perf_record =
+        *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
+                                                           offset);
+    // A record of 1000 uint64_t's or more should mean that the data is wrong
+    if (perf_record.header.size == 0 ||
+        perf_record.header.size > sizeof(uint64_t) * 1000)
+      return CreateError(offset, formatv("A record of {0} bytes was found.",
+                                         perf_record.header.size));
+
+    // We add + 100 to this record because some systems might have custom
+    // records. In any case, we are looking only for abnormal data.
+    if (perf_record.header.type >= PERF_RECORD_MAX + 100)
+      return CreateError(offset, formatv("Invalid record type {0} was found.",
+                                         perf_record.header.type));
+
+    if (perf_record.header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+      ContextSwitchRecord record{tsc_conversion.ToTSC(std::chrono::nanoseconds(
+                                     perf_record.time_in_nanos)),
+                                 perf_record.IsOut(),
+                                 static_cast<lldb::pid_t>(perf_record.pid),
+                                 static_cast<lldb::tid_t>(perf_record.tid)};
+
+      if (Error err = HandleContextSwitch(core_id, tsc_conversion, record,
+                                          prev_record, on_new_thread_execution))
+        return CreateError(offset, toString(std::move(err)));
+
+      prev_record = record;
+    }
+    offset += perf_record.header.size;
+  }
+
+  // We might have an incomplete last record
+  if (prev_record && prev_record->IsIn())
+    on_new_thread_execution(ThreadContinuousExecution::CreateOnlyStartExecution(
+        core_id, prev_record->tid, prev_record->tsc));
+
+  return Error::success();
+}
+
+TraceIntelPTMultiCoreDecoder::TraceIntelPTMultiCoreDecoder(
+    TraceIntelPT &trace, ArrayRef<core_id_t> core_ids, ArrayRef<tid_t> tids,
+    const LinuxPerfZeroTscConversion &tsc_conversion)
+    : m_trace(trace), m_cores(core_ids.begin(), core_ids.end()),
+      m_tids(tids.begin(), tids.end()), m_tsc_conversion(tsc_conversion) {}
+
+bool TraceIntelPTMultiCoreDecoder::TracesThread(lldb::tid_t tid) const {
+  return m_tids.count(tid);
+}
+
+DecodedThreadSP TraceIntelPTMultiCoreDecoder::Decode(Thread &thread) {
+  if (Error err = DecodeContextSwitchTraces())
+    return std::make_shared<DecodedThread>(thread.shared_from_this(),
+                                           std::move(err));
+
+  return std::make_shared<DecodedThread>(
+      thread.shared_from_this(),
+      createStringError(inconvertibleErrorCode(), "unimplemented"));
+}
+
+Error TraceIntelPTMultiCoreDecoder::DecodeContextSwitchTraces() {
+  if (m_setup_error)
+    return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
+
+  if (m_continuous_executions_per_thread)
+    return Error::success();
+
+  m_continuous_executions_per_thread.emplace();
+
+  auto do_decode = [&]() -> Error {
+    // We'll decode all context switch traces, identify continuous executions
+    // and group them by thread.
+    for (core_id_t core_id : m_cores) {
+      Error err = m_trace.OnCoreBinaryDataRead(
+          core_id, IntelPTDataKinds::kPerfContextSwitchTrace,
+          [&](ArrayRef<uint8_t> data) -> Error {
+            return DecodePerfContextSwitchTrace(
+                data, core_id, m_tsc_conversion,
+                [&](const ThreadContinuousExecution &execution) {
+                  (*m_continuous_executions_per_thread)[execution.tid]
+                      .push_back(execution);
+                });
+          });
+      if (err) {
+        m_setup_error = toString(std::move(err));
+        return createStringError(inconvertibleErrorCode(),
+                                 m_setup_error->c_str());
+      }
+    }
+    // We now sort the executions of each to have them ready for instruction
+    // decoding
+    for (auto &tid_executions : *m_continuous_executions_per_thread)
+      std::sort(tid_executions.second.begin(), tid_executions.second.end());
+
+    return Error::success();
+  };
+
+  return m_trace.GetTimer().ForGlobal().TimeTask<Error>(
+      "Context switch trace decoding", do_decode);
+}
+
+size_t TraceIntelPTMultiCoreDecoder::GetNumContinuousExecutionsForThread(
+    lldb::tid_t tid) const {
+  if (!m_continuous_executions_per_thread)
+    return 0;
+  auto it = m_continuous_executions_per_thread->find(tid);
+  if (it == m_continuous_executions_per_thread->end())
+    return 0;
+  return it->second.size();
+}
+
+size_t TraceIntelPTMultiCoreDecoder::GetTotalContinuousExecutionsCount() const {
+  if (!m_continuous_executions_per_thread)
+    return 0;
+  size_t count = 0;
+  for (const auto &kv : *m_continuous_executions_per_thread)
+    count += kv.second.size();
+  return count;
+}
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
@@ -141,6 +141,7 @@
     else
       return HandleError(parsed_process.takeError());
   }
+
   return parsed_processes;
 }
 
@@ -191,6 +192,8 @@
           // Path to the raw Intel PT buffer for this core.
       "contextSwitchTrace": string,
           // Path to the raw perf_event_open context switch trace file for this core.
+          // The perf_event must have been configured with PERF_SAMPLE_TID and
+          // PERF_SAMPLE_TIME, as well as sample_id_all = 1.
     }
   ],
   "tscPerfZeroConversion"?: {
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
@@ -34,8 +34,9 @@
                                     ArrayRef<uint8_t> data) {
   std::basic_fstream<char> out_fs = std::fstream(
       output_file.GetPath().c_str(), std::ios::out | std::ios::binary);
-  out_fs.write(reinterpret_cast<const char *>(&data[0]),
-               data.size() * sizeof(uint8_t));
+  if (!data.empty())
+    out_fs.write(reinterpret_cast<const char *>(&data[0]), data.size());
+
   out_fs.close();
   if (!out_fs)
     return createStringError(inconvertibleErrorCode(),
diff --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp
--- a/lldb/source/Target/Trace.cpp
+++ b/lldb/source/Target/Trace.cpp
@@ -114,31 +114,31 @@
   return m_live_process->TraceGetState(GetPluginName());
 }
 
-Optional<size_t> Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid,
-                                                    llvm::StringRef kind) {
+Optional<uint64_t> Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid,
+                                                      llvm::StringRef kind) {
   auto it = m_live_thread_data.find(tid);
   if (it == m_live_thread_data.end())
     return None;
-  std::unordered_map<std::string, size_t> &single_thread_data = it->second;
+  std::unordered_map<std::string, uint64_t> &single_thread_data = it->second;
   auto single_thread_data_it = single_thread_data.find(kind.str());
   if (single_thread_data_it == single_thread_data.end())
     return None;
   return single_thread_data_it->second;
 }
 
-Optional<size_t> Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
-                                                  llvm::StringRef kind) {
+Optional<uint64_t> Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id,
+                                                    llvm::StringRef kind) {
   auto it = m_live_core_data.find(core_id);
   if (it == m_live_core_data.end())
     return None;
-  std::unordered_map<std::string, size_t> &single_core_data = it->second;
+  std::unordered_map<std::string, uint64_t> &single_core_data = it->second;
   auto single_thread_data_it = single_core_data.find(kind.str());
   if (single_thread_data_it == single_core_data.end())
     return None;
   return single_thread_data_it->second;
 }
 
-Optional<size_t> Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) {
+Optional<uint64_t> Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) {
   auto data_it = m_live_process_data.find(kind.str());
   if (data_it == m_live_process_data.end())
     return None;
@@ -150,7 +150,7 @@
   if (!m_live_process)
     return createStringError(inconvertibleErrorCode(),
                              "Tracing requires a live process.");
-  llvm::Optional<size_t> size = GetLiveThreadBinaryDataSize(tid, kind);
+  llvm::Optional<uint64_t> size = GetLiveThreadBinaryDataSize(tid, kind);
   if (!size)
     return createStringError(
         inconvertibleErrorCode(),
@@ -167,7 +167,7 @@
   if (!m_live_process)
     return createStringError(inconvertibleErrorCode(),
                              "Tracing requires a live process.");
-  llvm::Optional<size_t> size = GetLiveCoreBinaryDataSize(core_id, kind);
+  llvm::Optional<uint64_t> size = GetLiveCoreBinaryDataSize(core_id, kind);
   if (!size)
     return createStringError(
         inconvertibleErrorCode(),
@@ -185,7 +185,7 @@
   if (!m_live_process)
     return createStringError(inconvertibleErrorCode(),
                              "Tracing requires a live process.");
-  llvm::Optional<size_t> size = GetLiveProcessBinaryDataSize(kind);
+  llvm::Optional<uint64_t> size = GetLiveProcessBinaryDataSize(kind);
   if (!size)
     return createStringError(
         inconvertibleErrorCode(),
diff --git a/lldb/source/Utility/TraceGDBRemotePackets.cpp b/lldb/source/Utility/TraceGDBRemotePackets.cpp
--- a/lldb/source/Utility/TraceGDBRemotePackets.cpp
+++ b/lldb/source/Utility/TraceGDBRemotePackets.cpp
@@ -120,7 +120,7 @@
 bool fromJSON(const json::Value &value, TraceCoreState &packet,
               json::Path path) {
   ObjectMapper o(value, path);
-  int64_t core_id;
+  uint64_t core_id;
   if (!o || !o.map("coreId", core_id) ||
       !o.map("binaryData", packet.binary_data))
     return false;
@@ -148,9 +148,15 @@
 bool fromJSON(const json::Value &value, TraceGetBinaryDataRequest &packet,
               Path path) {
   ObjectMapper o(value, path);
-  return o && o.map("type", packet.type) && o.map("kind", packet.kind) &&
-         o.map("tid", packet.tid) && o.map("offset", packet.offset) &&
-         o.map("size", packet.size) && o.map("coreId", packet.core_id);
+  Optional<uint64_t> core_id;
+  if (!o || !o.map("type", packet.type) || !o.map("kind", packet.kind) ||
+      !o.map("tid", packet.tid) || !o.map("offset", packet.offset) ||
+      !o.map("size", packet.size) || !o.map("coreId", core_id))
+    return false;
+
+  if (core_id)
+    packet.core_id = static_cast<lldb::core_id_t>(*core_id);
+  return true;
 }
 /// \}
 
diff --git a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp
--- a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp
+++ b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp
@@ -50,7 +50,8 @@
   return base;
 }
 
-std::chrono::nanoseconds LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) {
+std::chrono::nanoseconds
+LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) const {
   uint64_t quot = tsc >> time_shift;
   uint64_t rem_flag = (((uint64_t)1 << time_shift) - 1);
   uint64_t rem = tsc & rem_flag;
@@ -58,6 +59,14 @@
                                   ((rem * time_mult) >> time_shift)};
 }
 
+uint64_t
+LinuxPerfZeroTscConversion::ToTSC(std::chrono::nanoseconds nanos) const {
+  uint64_t time = nanos.count() - time_zero;
+  uint64_t quot = time / time_mult;
+  uint64_t rem = time % time_mult;
+  return (quot << time_shift) + (rem << time_shift) / time_mult;
+}
+
 json::Value toJSON(const LinuxPerfZeroTscConversion &packet) {
   return json::Value(json::Object{
       {"timeMult", packet.time_mult},
diff --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py
--- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py
+++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py
@@ -45,8 +45,8 @@
     Total approximate memory usage (excluding raw trace): 1.27 KiB
     Average memory usage per instruction (excluding raw trace): 61.76 bytes
 
-  Timing:
-    Decoding instructions: ''', '''s
+  Timing for this thread:
+    Decoding instructions: ''', '''
 
   Events:
     Number of instructions with events: 1
diff --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py
--- a/lldb/test/API/commands/trace/TestTraceLoad.py
+++ b/lldb/test/API/commands/trace/TestTraceLoad.py
@@ -43,8 +43,8 @@
     Total approximate memory usage (excluding raw trace): 1.27 KiB
     Average memory usage per instruction (excluding raw trace): 61.76 bytes
 
-  Timing:
-    Decoding instructions: ''', '''s
+  Timing for this thread:
+    Decoding instructions: ''', '''
 
   Events:
     Number of instructions with events: 1
diff --git a/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py b/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py
--- a/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py
+++ b/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py
@@ -233,5 +233,6 @@
         # We must have captured the context switch of when the target resumed
         self.assertTrue(found_non_empty_context_switch)
 
+        self.expect("thread trace dump instructions", substrs=['unimplemented'])
 
         self.traceStopProcess()