Index: lib/xray/xray_arm.cc
===================================================================
--- lib/xray/xray_arm.cc
+++ lib/xray/xray_arm.cc
@@ -19,6 +19,8 @@
 #include <atomic>
 #include <cassert>
 
+extern "C" void __clear_cache(void* start, void* end);
+
 namespace __xray {
 
 uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
@@ -116,8 +118,8 @@
   //   B #20
 
   uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+  uint32_t *CurAddress = FirstAddress + 1;
   if (Enable) {
-    uint32_t *CurAddress = FirstAddress + 1;
     CurAddress =
         Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
     CurAddress =
@@ -125,6 +127,7 @@
     *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
     CurAddress++;
     *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
+    CurAddress++;
     std::atomic_store_explicit(
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
@@ -133,6 +136,8 @@
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
   }
+  __clear_cache(reinterpret_cast<char*>(FirstAddress),
+      reinterpret_cast<char*>(CurAddress));
   return true;
 }
 
Index: test/xray/lit.cfg
===================================================================
--- test/xray/lit.cfg
+++ test/xray/lit.cfg
@@ -30,8 +30,14 @@
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
-if config.host_os not in ['Linux'] or config.host_arch.find('64') == -1:
+if config.host_os not in ['Linux']:
   config.unsupported = True
+elif '64' not in config.host_arch:
+  if 'arm' in config.host_arch:
+    if '-mthumb' in config.target_cflags:
+      config.unsupported = True
+  else:
+    config.unsupported = True
 
 # Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
 # e.g. because the test sometimes passes, sometimes fails.