diff --git a/bolt/test/X86/asm-dump.c b/bolt/test/X86/asm-dump.c
--- a/bolt/test/X86/asm-dump.c
+++ b/bolt/test/X86/asm-dump.c
@@ -1,13 +1,14 @@
 /**
  * Test for asm-dump functionality.
  *
- * REQUIRES: system-linux,bolt-runtime
+ * REQUIRES: x86_64-linux,bolt-runtime
  *
  * Compile the source
  * RUN: %clang -fPIC %s -o %t.exe -Wl,-q
  *
  * Profile collection: instrument the binary
- * RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata -o %t.instr
+ * RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata -o \
+ * RUN:   %t.instr
  *
  * Profile collection: run instrumented binary (and capture output)
  * RUN: %t.instr > %t.result
diff --git a/bolt/test/X86/bolt-address-translation-internal-call.test b/bolt/test/X86/bolt-address-translation-internal-call.test
--- a/bolt/test/X86/bolt-address-translation-internal-call.test
+++ b/bolt/test/X86/bolt-address-translation-internal-call.test
@@ -4,7 +4,7 @@
 # internal calls) might create new blocks without a mapping to an
 # input block.
 
-# REQUIRES: system-linux,bolt-runtime
+# REQUIRES: x86_64-linux,bolt-runtime
 
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # Delete our BB symbols so BOLT doesn't mark them as entry points
diff --git a/bolt/test/X86/internal-call-instrument.s b/bolt/test/X86/internal-call-instrument.s
--- a/bolt/test/X86/internal-call-instrument.s
+++ b/bolt/test/X86/internal-call-instrument.s
@@ -1,6 +1,6 @@
 # This reproduces a bug with instrumentation crashes on internal call
 
-# REQUIRES: system-linux,bolt-runtime
+# REQUIRES: x86_64-linux,bolt-runtime
 
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # Delete our BB symbols so BOLT doesn't mark them as entry points
diff --git a/bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s b/bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s
new file mode 100644
--- /dev/null
+++ b/bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s
@@ -0,0 +1,9 @@
+  .globl main
+  .type main, %function
+main:
+	sub		sp, sp, #16
+	mov		w0, wzr
+	str		wzr, [sp, #12]
+	add		sp, sp, #16
+	ret
+.size main, .-main
diff --git a/bolt/test/runtime/AArch64/basic-instrumentation.test b/bolt/test/runtime/AArch64/basic-instrumentation.test
new file mode 100644
--- /dev/null
+++ b/bolt/test/runtime/AArch64/basic-instrumentation.test
@@ -0,0 +1,22 @@
+# Try to instrument a very fast test. Input bin will not execute any code during
+# runtime besides returning zero in main, so it is a good trivial case.
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
+RUN: llvm-bolt %t.exe -o %t --instrument \
+RUN:   --instrumentation-file=%t \
+RUN:   --instrumentation-file-append-pid
+
+# Execute program to collect profile
+RUN: rm %t.*.fdata || echo Nothing to remove
+RUN: %t
+
+# Profile should be written to %t.PID.fdata, check it
+RUN: mv %t.*.fdata %t.fdata
+RUN: cat %t.fdata | FileCheck -check-prefix=CHECK %s
+
+# Check BOLT works with this profile
+RUN: llvm-bolt %t.exe --data %t.fdata -o %t.2 --reorder-blocks=cache
+
+# The instrumented profile should at least say main was called once
+CHECK: main 0 0 1{{$}}
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
new file mode 100644
--- /dev/null
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+
+typedef int (*func_ptr)(int, int);
+
+int add(int a, int b) { return a + b; }
+
+int main() {
+  func_ptr fun;
+  fun = add;
+  int sum = fun(10, 20); // indirect call to 'add'
+  printf("The sum is: %d\n", sum);
+  return 0;
+}
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags %s -o %t.exe -Wl,-q -nopie -fpie
+
+RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
+RUN:   -o %t.instrumented
+
+# Instrumented program needs to finish returning zero
+RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT
+
+# Test that the instrumented data makes sense
+RUN:  llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
+RUN:    --reorder-blocks=ext-tsp --reorder-functions=hfsort+ \
+RUN:    --print-only=main --print-finalized | FileCheck %s
+
+RUN: %t.bolted | FileCheck %s -check-prefix=CHECK-OUTPUT
+
+CHECK-OUTPUT: The sum is: 30
+
+# Check that our indirect call has 1 hit recorded in the fdata file and that
+# this was processed correctly by BOLT
+CHECK:         blr     x8 # CallProfile: 1 (0 misses) :
+CHECK-NEXT:    { add: 1 (0 misses) }
+*/
diff --git a/bolt/test/runtime/meta-merge-fdata.test b/bolt/test/runtime/meta-merge-fdata.test
--- a/bolt/test/runtime/meta-merge-fdata.test
+++ b/bolt/test/runtime/meta-merge-fdata.test
@@ -1,7 +1,7 @@
 # Meta test using merge-fdata binary
 UNSUPPORTED: asan
 # Instrumentation currently only works on X86
-REQUIRES: x86_64-linux,bolt-runtime
+REQUIRES: bolt-runtime
 
 # Instrumentation, should test:
 # - Direct branches