[compiler-rt][XRay] re-submitting r276117, with fixes for build breakage due to extraneous and missing dependencies and attempts to build on unsupported OSes

deanberris · deanberris · commit 938c5031ab99 · 2016-07-21T07:39:55.000Z
Summary: This is a fixed-up version of D21612, to address failure identified post-commit. Original commit description: This patch implements the initialisation and patching routines for the XRay runtime, along with the necessary trampolines for function entry/exit handling. For now we only define the basic hooks for allowing an implementation to define a handler that gets run on function entry/exit. We expose a minimal API for controlling the behaviour of the runtime (patching, cleanup, and setting the handler to invoke when instrumenting). Fixes include: - Gating XRay build to only Linux x86_64 and with the right dependencies in case it is the only library being built - Including <cstddef> to fix std::size_t issue Reviewers: kcc, rnk, echristo Subscribers: mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D22611 llvm-svn: 276251
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
@@ -37,6 +37,8 @@ option(COMPILER_RT_BUILD_BUILTINS "Build builtins" ON)
 mark_as_advanced(COMPILER_RT_BUILD_BUILTINS)
 option(COMPILER_RT_BUILD_SANITIZERS "Build sanitizers" ON)
 mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS)
+option(COMPILER_RT_BUILD_XRAY "Build xray" ON)
+mark_as_advanced(COMPILER_RT_BUILD_XRAY)
 
 if (COMPILER_RT_STANDALONE_BUILD)
   if (NOT LLVM_CONFIG_PATH)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
@@ -161,6 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
 set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
 set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
 set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
 
 if(APPLE)
   include(CompilerRTDarwinUtils)
@@ -350,6 +351,9 @@ if(APPLE)
   list_intersect(SCUDO_SUPPORTED_ARCH
     ALL_SCUDO_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_SUPPORTED_ARCH
+    ALL_XRAY_SUPPORTED_ARCH
+		SANITIZER_COMMON_SUPPORTED_ARCH)
 else()
   # Architectures supported by compiler-rt libraries.
   filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH
@@ -373,6 +377,7 @@ else()
   filter_available_targets(ESAN_SUPPORTED_ARCH ${ALL_ESAN_SUPPORTED_ARCH})
   filter_available_targets(SCUDO_SUPPORTED_ARCH
     ${ALL_SCUDO_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
 endif()
 
 if (MSVC)
@@ -493,3 +498,9 @@ else()
   set(COMPILER_RT_HAS_SCUDO FALSE)
 endif()
 
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_XRAY TRUE)
+else()
+  set(COMPILER_RT_HAS_XRAY FALSE)
+endif()
diff --git a/compiler-rt/include/CMakeLists.txt b/compiler-rt/include/CMakeLists.txt
@@ -10,11 +10,18 @@ set(SANITIZER_HEADERS
   sanitizer/msan_interface.h
   sanitizer/tsan_interface_atomic.h)
 
+set(XRAY_HEADERS
+  xray/xray_interface.h)
+
+set(COMPILER_RT_HEADERS
+  ${SANITIZER_HEADERS}
+	${XRAY_HEADERS})
+
 set(output_dir ${COMPILER_RT_OUTPUT_DIR}/include)
 
 # Copy compiler-rt headers to the build tree.
 set(out_files)
-foreach( f ${SANITIZER_HEADERS} )
+foreach( f ${COMPILER_RT_HEADERS} )
   set( src ${CMAKE_CURRENT_SOURCE_DIR}/${f} )
   set( dst ${output_dir}/${f} )
   add_custom_command(OUTPUT ${dst}
@@ -32,3 +39,7 @@ set_target_properties(compiler-rt-headers PROPERTIES FOLDER "Compiler-RT Misc")
 install(FILES ${SANITIZER_HEADERS}
   PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
   DESTINATION ${COMPILER_RT_INSTALL_PATH}/include/sanitizer)
+# Install xray headers.
+install(FILES ${XRAY_HEADERS}
+  PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+  DESTINATION ${COMPILER_RT_INSTALL_PATH}/include/xray)
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
@@ -0,0 +1,66 @@
+//===-- xray_interface.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// APIs for controlling XRay functionality explicitly.
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_INTERFACE_H
+#define XRAY_XRAY_INTERFACE_H
+
+#include <cstdint>
+
+extern "C" {
+
+enum XRayEntryType { ENTRY = 0, EXIT = 1 };
+
+// Provide a function to invoke for when instrumentation points are hit. This is
+// a user-visible control surface that overrides the default implementation. The
+// function provided should take the following arguments:
+//
+//   - function id: an identifier that indicates the id of a function; this id
+//                  is generated by xray; the mapping between the function id
+//                  and the actual function pointer is available through
+//                  __xray_table.
+//   - entry type: identifies what kind of instrumentation point was encountered
+//                 (function entry, function exit, etc.). See the enum
+//                 XRayEntryType for more details.
+//
+// Returns 1 on success, 0 on error.
+extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
+
+// This removes whatever the currently provided handler is. Returns 1 on
+// success, 0 on error.
+extern int __xray_remove_handler();
+
+enum XRayPatchingStatus {
+  NOT_INITIALIZED = 0,
+  NOTIFIED = 1,
+  ONGOING = 2,
+  FAILED = 3
+};
+
+// This tells XRay to patch the instrumentation points. This is an asynchronous
+// process, and returns the following status in specific cases:
+//
+//   - 0 : XRay is not initialized.
+//   - 1 : We've done the notification.
+//   - 2 : Patching / un-patching is on-going.
+extern XRayPatchingStatus __xray_patch();
+
+// Reverses the effect of __xray_patch(). This is an asynchronous process, and
+// returns the following status in specific cases.
+//
+//   - 0 : XRay is not initialized.
+//   - 1 : We've done the notification.
+//   - 2 : Patching / un-patching is on-going.
+extern int __xray_unpatch();
+}
+
+#endif
diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt
@@ -4,6 +4,15 @@
 include(AddCompilerRT)
 include(SanitizerUtils)
 
+# Hoist the building of sanitizer_common on whether we're building either the
+# sanitizers or xray (or both).
+#
+#TODO: Refactor sanitizer_common into smaller pieces (e.g. flag parsing, utils).
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND
+    (COMPILER_RT_BUILD_SANITIZERS OR COMPILER_RT_BUILD_XRAY))
+  add_subdirectory(sanitizer_common)
+endif()
+
 if(COMPILER_RT_BUILD_BUILTINS)
   add_subdirectory(builtins)
 endif()
@@ -14,7 +23,6 @@ if(COMPILER_RT_BUILD_SANITIZERS)
   endif()
 
   if(COMPILER_RT_HAS_SANITIZER_COMMON)
-    add_subdirectory(sanitizer_common)
     add_subdirectory(stats)
     add_subdirectory(lsan)
     add_subdirectory(ubsan)
@@ -57,3 +65,7 @@ if(COMPILER_RT_BUILD_SANITIZERS)
     add_subdirectory(scudo)
   endif()
 endif()
+
+if(COMPILER_RT_BUILD_XRAY AND COMPILER_RT_HAS_XRAY)
+  add_subdirectory(xray)
+endif()
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
@@ -0,0 +1,47 @@
+# Build for the XRay runtime support library.
+
+set(XRAY_SOURCES
+  xray_init.cc
+	xray_interface.cc
+	xray_flags.cc
+)
+
+set(x86_64_SOURCES
+		xray_trampoline_x86.S
+		${XRAY_SOURCES})
+
+include_directories(..)
+include_directories(../../include)
+
+set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+
+set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
+
+add_compiler_rt_object_libraries(RTXray
+		ARCHS ${XRAY_SUPPORTED_ARCH}
+		SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+		DEFS ${XRAY_COMMON_DEFINITIONS})
+
+add_custom_target(xray)
+set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
+		RTSanitizerCommon
+		RTSanitizerCommonLibc)
+
+foreach (arch ${XRAY_SUPPORTED_ARCH})
+		if (CAN_TARGET_${arch})
+				add_compiler_rt_runtime(clang_rt.xray
+						STATIC
+						ARCHS ${arch}
+						SOURCES ${${arch}_SOURCES}
+						CFLAGS ${XRAY_CFLAGS}
+						DEFS ${XRAY_COMMON_DEFINITIONS}
+						OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+						PARENT_TARGET xray)
+		endif ()
+endforeach()
+
+add_dependencies(compiler-rt xray)
+
+# if(COMPILER_RT_INCLUDE_TESTS)
+#   add_subdirectory(tests)
+# endif()
diff --git a/compiler-rt/lib/xray/xray_flags.cc b/compiler-rt/lib/xray/xray_flags.cc
@@ -0,0 +1,61 @@
+//===-- xray_flags.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+Flags xray_flags_dont_use_directly; // use via flags().
+
+void Flags::SetDefaults() {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+static void RegisterXRayFlags(FlagParser *P, Flags *F) {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \
+  RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+void InitializeFlags() {
+  SetCommonFlagsDefaults();
+  auto *F = flags();
+  F->SetDefaults();
+
+  FlagParser XRayParser;
+  RegisterXRayFlags(&XRayParser, F);
+  RegisterCommonFlags(&XRayParser);
+
+  // Override from command line.
+  XRayParser.ParseString(GetEnv("XRAY_OPTIONS"));
+
+  InitializeCommonFlags();
+
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+
+  if (common_flags()->help) {
+    XRayParser.PrintFlagDescriptions();
+  }
+}
+
+} // namespace __xray
diff --git a/compiler-rt/lib/xray/xray_flags.h b/compiler-rt/lib/xray/xray_flags.h
@@ -0,0 +1,37 @@
+//===-- xray_flags.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// XRay runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_FLAGS_H
+#define XRAY_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+namespace __xray {
+
+struct Flags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+
+  void SetDefaults();
+};
+
+extern Flags xray_flags_dont_use_directly;
+inline Flags *flags() { return &xray_flags_dont_use_directly; }
+
+void InitializeFlags();
+
+} // namespace __xray
+
+#endif // XRAY_FLAGS_H
diff --git a/compiler-rt/lib/xray/xray_flags.inc b/compiler-rt/lib/xray/xray_flags.inc
@@ -0,0 +1,18 @@
+//===-- xray_flags.inc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(bool, patch_premain, true,
+          "Whether to patch instrumentation points before main.")
diff --git a/compiler-rt/lib/xray/xray_init.cc b/compiler-rt/lib/xray/xray_init.cc
diff --git a/compiler-rt/lib/xray/xray_interface.cc b/compiler-rt/lib/xray/xray_interface.cc
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
diff --git a/compiler-rt/lib/xray/xray_trampoline_x86.S b/compiler-rt/lib/xray/xray_trampoline_x86.S

-Original file line number
+Diff line change
 +//===-- xray_init.cc --------------------------------------------*- C++ -*-===//
 +//
 +//                     The LLVM Compiler Infrastructure
 +//
 +// This file is distributed under the University of Illinois Open Source
 +// License. See LICENSE.TXT for details.
 +//
 +//===----------------------------------------------------------------------===//
 +//
 +// This file is a part of XRay, a dynamic runtime instrumentation system.
 +//
 +// XRay initialisation logic.
 +//===----------------------------------------------------------------------===//
++
 +#include <atomic>
 +#include <fcntl.h>
 +#include <strings.h>
 +#include <unistd.h>
++
 +#include "sanitizer_common/sanitizer_common.h"
 +#include "xray_flags.h"
 +#include "xray_interface_internal.h"
++
 +extern "C" {
 +extern void __xray_init();
 +extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
 +extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
 +}
++
 +using namespace __xray;
++
 +// We initialize some global variables that pertain to specific sections of XRay
 +// data structures in the binary. We do this for the current process using
 +// /proc/curproc/map and make sure that we're able to get it. We signal failure
 +// via a global atomic boolean to indicate whether we've initialized properly.
 +//
 +std::atomic<bool> XRayInitialized{false};
++
 +// This should always be updated before XRayInitialized is updated.
 +std::atomic<__xray::XRaySledMap> XRayInstrMap{};
++
 +// __xray_init() will do the actual loading of the current process' memory map
 +// and then proceed to look for the .xray_instr_map section/segment.
 +void __xray_init() {
 +  InitializeFlags();
 +  if (__start_xray_instr_map == nullptr) {
 +    Report("XRay instrumentation map missing. Not initializing XRay.\n");
 +    return;
 +  }
++
 +  // Now initialize the XRayInstrMap global struct with the address of the
 +  // entries, reinterpreted as an array of XRaySledEntry objects. We use the
 +  // virtual pointer we have from the section to provide us the correct
 +  // information.
 +  __xray::XRaySledMap SledMap{};
 +  SledMap.Sleds = __start_xray_instr_map;
 +  SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
 +  XRayInstrMap.store(SledMap, std::memory_order_release);
 +  XRayInitialized.store(true, std::memory_order_release);
++
 +  if (flags()->patch_premain)
 +    __xray_patch();
 +}
++
 +__attribute__((section(".preinit_array"),
 +               used)) void (*__local_xray_preinit)(void) = __xray_init;
-Original file line number
+Diff line change
 +//===-- xray_interface.cpp --------------------------------------*- C++ -*-===//
 +//
 +//                     The LLVM Compiler Infrastructure
 +//
 +// This file is distributed under the University of Illinois Open Source
 +// License. See LICENSE.TXT for details.
 +//
 +//===----------------------------------------------------------------------===//
 +//
 +// This file is a part of XRay, a dynamic runtime instrumentation system.
 +//
 +// Implementation of the API functions.
 +//
 +//===----------------------------------------------------------------------===//
++
 +#include "xray_interface_internal.h"
 +#include <atomic>
 +#include <cstdint>
 +#include <cstdio>
 +#include <errno.h>
 +#include <limits>
 +#include <sys/mman.h>
++
 +namespace __xray {
++
 +// This is the function to call when we encounter the entry or exit sleds.
 +std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
++
 +} // namespace __xray
++
 +extern "C" {
 +// The following functions have to be defined in assembler, on a per-platform
 +// basis. See xray_trampoline_*.s files for implementations.
 +extern void __xray_FunctionEntry();
 +extern void __xray_FunctionExit();
 +}
++
 +extern std::atomic<bool> XRayInitialized;
 +extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
++
 +int __xray_set_handler(void (*entry)(int32_t, XRayEntryType)) {
 +  if (XRayInitialized.load(std::memory_order_acquire)) {
 +    __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
 +    return 1;
 +  }
 +  return 0;
 +}
++
 +std::atomic<bool> XRayPatching{false};
++
 +XRayPatchingStatus __xray_patch() {
 +  // FIXME: Make this happen asynchronously. For now just do this sequentially.
 +  if (!XRayInitialized.load(std::memory_order_acquire))
 +    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
++
 +  static bool NotPatching = false;
 +  if (!XRayPatching.compare_exchange_strong(NotPatching, true,
 +                                            std::memory_order_acq_rel,
 +                                            std::memory_order_acquire)) {
 +    return XRayPatchingStatus::ONGOING; // Already patching.
 +  }
++
 +  // Step 1: Compute the function id, as a unique identifier per function in the
 +  // instrumentation map.
 +  __xray::XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire);
 +  if (InstrMap.Entries == 0)
 +    return XRayPatchingStatus::NOT_INITIALIZED;
++
 +  int32_t FuncId = 1;
 +  static constexpr uint8_t CallOpCode = 0xe8;
 +  static constexpr uint16_t MovR10Seq = 0xba41;
 +  static constexpr uint8_t JmpOpCode = 0xe9;
 +  uint64_t CurFun = 0;
 +  for (std::size_t I = 0; I < InstrMap.Entries; I++) {
 +    auto Sled = InstrMap.Sleds[I];
 +    auto F = Sled.Function;
 +    if (CurFun == 0)
 +      CurFun = F;
 +    if (F != CurFun) {
 +      ++FuncId;
 +      CurFun = F;
 +    }
++
 +    // While we're here, we should patch the nop sled. To do that we mprotect
 +    // the page containing the function to be writeable.
 +    void *PageAlignedAddr =
 +        reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1));
 +    std::size_t MProtectLen =
 +        (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr);
 +    if (mprotect(PageAlignedAddr, MProtectLen,
 +                 PROT_READ | PROT_WRITE | PROT_EXEC) == -1) {
 +      printf("Failed mprotect: %d\n", errno);
 +      return XRayPatchingStatus::FAILED;
 +    }
++
 +    static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
 +    static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
 +    if (Sled.Kind == XRayEntryType::ENTRY) {
 +      // Here we do the dance of replacing the following sled:
 +      //
 +      // xray_sled_n:
 +      //   jmp +9
 +      //   <9 byte nop>
 +      //
 +      // With the following:
 +      //
 +      //   mov r10d, <function id>
 +      //   call <relative 32bit offset to entry trampoline>
 +      //
 +      // We need to do this in the following order:
 +      //
 +      // 1. Put the function id first, 2 bytes from the start of the sled (just
 +      // after the 2-byte jmp instruction).
 +      // 2. Put the call opcode 6 bytes from the start of the sled.
 +      // 3. Put the relative offset 7 bytes from the start of the sled.
 +      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
 +      // opcode and first operand.
 +      //
 +      // Prerequisite is to compute the relative offset to the
 +      // __xray_FunctionEntry function's address.
 +      int64_t TrampolineOffset =
 +          reinterpret_cast<int64_t>(__xray_FunctionEntry) -
 +          (static_cast<int64_t>(Sled.Address) + 11);
 +      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
 +        // FIXME: Print out an error here.
 +        continue;
 +      }
 +      *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
 +      *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
 +      *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
 +      std::atomic_store_explicit(
 +          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
 +          std::memory_order_release);
 +    }
++
 +    if (Sled.Kind == XRayEntryType::EXIT) {
 +      // Here we do the dance of replacing the following sled:
 +      //
 +      // xray_sled_n:
 +      //   ret
 +      //   <10 byte nop>
 +      //
 +      // With the following:
 +      //
 +      //   mov r10d, <function id>
 +      //   jmp <relative 32bit offset to exit trampoline>
 +      //
 +      // 1. Put the function id first, 2 bytes from the start of the sled (just
 +      // after the 1-byte ret instruction).
 +      // 2. Put the jmp opcode 6 bytes from the start of the sled.
 +      // 3. Put the relative offset 7 bytes from the start of the sled.
 +      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
 +      // opcode and first operand.
 +      //
 +      // Prerequisite is to compute the relative offset fo the
 +      // __xray_FunctionExit function's address.
 +      int64_t TrampolineOffset =
 +          reinterpret_cast<int64_t>(__xray_FunctionExit) -
 +          (static_cast<int64_t>(Sled.Address) + 11);
 +      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
 +        // FIXME: Print out an error here.
 +        continue;
 +      }
 +      *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
 +      *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
 +      *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
 +      std::atomic_store_explicit(
 +          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
 +          std::memory_order_release);
 +    }
++
 +    if (mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC) == -1) {
 +      printf("Failed mprotect: %d\n", errno);
 +      return XRayPatchingStatus::FAILED;
 +    }
 +  }
 +  XRayPatching.store(false, std::memory_order_release);
 +  return XRayPatchingStatus::NOTIFIED;
 +}
-Original file line number
+Diff line change
 +//===-- xray_interface_internal.h -------------------------------*- C++ -*-===//
 +//
 +//                     The LLVM Compiler Infrastructure
 +//
 +// This file is distributed under the University of Illinois Open Source
 +// License. See LICENSE.TXT for details.
 +//
 +//===----------------------------------------------------------------------===//
 +//
 +// This file is a part of XRay, a dynamic runtime instrumentation system.
 +//
 +// Implementation of the API functions. See also include/xray/xray_interface.h.
 +//
 +//===----------------------------------------------------------------------===//
 +#ifndef XRAY_INTERFACE_INTERNAL_H
 +#define XRAY_INTERFACE_INTERNAL_H
++
 +#include "xray/xray_interface.h"
 +#include <cstddef>
 +#include <cstdint>
++
 +extern "C" {
++
 +struct XRaySledEntry {
 +  uint64_t Address;
 +  uint64_t Function;
 +  unsigned char Kind;
 +  unsigned char AlwaysInstrument;
 +  unsigned char Padding[14]; // Need 32 bytes
 +};
 +}
++
 +namespace __xray {
++
 +struct XRaySledMap {
 +  const XRaySledEntry *Sleds;
 +  size_t Entries;
 +};
++
 +} // namespace __xray
++
 +#endif
-Original file line number
+Diff line change
 +//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
 +//
 +//                     The LLVM Compiler Infrastructure
 +//
 +// This file is distributed under the University of Illinois Open Source
 +// License. See LICENSE.TXT for details.
 +//
 +//===----------------------------------------------------------------------===//
 +//
 +// This file is a part of XRay, a dynamic runtime instrumentation system.
 +//
 +// This implements the X86-specific assembler for the trampolines.
 +//
 +//===----------------------------------------------------------------------===//
++
 +	.text
 +	.file "xray_trampoline_x86.S"
 +	.globl __xray_FunctionEntry
 +	.align 16, 0x90
 +	.type __xray_FunctionEntry,@function
++
 +__xray_FunctionEntry:
 +  .cfi_startproc
 +  // Save caller provided registers before doing any actual work.
 +	pushq %rbp
 +	.cfi_def_cfa_offset 16
 +	subq $72, %rsp
 +	movq	%rdi, 64(%rsp)
 +	movq  %rax, 56(%rsp)
 +	movq  %rdx, 48(%rsp)
 +	movq	%rsi, 40(%rsp)
 +	movq	%rcx, 32(%rsp)
 +	movq	%r8, 24(%rsp)
 +	movq	%r9, 16(%rsp)
++
 +	// de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
 +	// load (on x86 is a normal mov instruction).
 +	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
 +	testq	%rax, %rax
 +	je	.Ltmp0
++
 +	// assume that %r10d has the function id.
 +	movl	%r10d, %edi
 +	xor	%esi,%esi
 +	callq	*%rax
 +.Ltmp0:
 +  // restore the registers
 +	movq	64(%rsp), %rdi
 +	movq  56(%rsp), %rax
 +	movq  48(%rsp), %rdx
 +	movq	40(%rsp), %rsi
 +	movq	32(%rsp), %rcx
 +	movq	24(%rsp), %r8
 +	movq	16(%rsp), %r9
 +	addq	$72, %rsp
 +	popq	%rbp
 +	retq
 +.Ltmp1:
 +	.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
 +	.cfi_endproc
++
 +	.globl __xray_FunctionExit
 +	.align 16, 0x90
 +	.type __xray_FunctionExit,@function
 +__xray_FunctionExit:
 +	.cfi_startproc
 +	// Save the important registers first. Since we're assuming that this
 +	// function is only jumped into, we only preserve the registers for
 +	// returning.
 +	// FIXME: Figure out whether this is sufficient.
 +	pushq	%rbp
 +	.cfi_def_cfa_offset 16
 +	subq	$24, %rsp
 +	.cfi_def_cfa_offset 32
 +	movq	%rax, 16(%rsp)
 +	movq	%rdx, 8(%rsp)
 +	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
 +	testq %rax,%rax
 +	je	.Ltmp2
++
 +	movl	%r10d, %edi
 +	movl	$1, %esi
 +	callq	*%rax
 +.Ltmp2:
 +  // Restore the important registers.
 +	movq	16(%rsp), %rax
 +	movq	8(%rsp), %rdx
 +	addq	$24, %rsp
 +	popq	%rbp
 +	retq
 +.Ltmp3:
 +	.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
 +	.cfi_endproc