Index: lib/safestack/safestack.cc
===================================================================
--- lib/safestack/safestack.cc
+++ lib/safestack/safestack.cc
@@ -18,6 +18,7 @@
 #include <pthread.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/user.h>
@@ -82,11 +83,36 @@
     "default"))) __thread void *__safestack_unsafe_stack_ptr = nullptr;
 }
 
+#if !SANITIZER_MAC
 // Per-thread unsafe stack information. It's not frequently accessed, so there
 // it can be kept out of the tcb in normal thread-local variables.
 static __thread void *unsafe_stack_start = nullptr;
 static __thread size_t unsafe_stack_size = 0;
 static __thread size_t unsafe_stack_guard = 0;
+#else
+// On OS X, let's store these in a dynamically allocated TLS (via
+// pthread_key_create), because we cannot access the regular __thread variables
+// in unsafe_stack_free (they are already destroyed).
+static pthread_key_t dynamic_data_key;
+struct dynamic_data_t {
+  void *unsafe_stack_start;
+  size_t unsafe_stack_size;
+  size_t unsafe_stack_guard;
+};
+
+// To keep dynamic_data_key alive during destruction iterations.
+static void dynamic_data_cleanup(void *val) {
+  pthread_setspecific(dynamic_data_key, val);
+}
+
+static void dynamic_data_allocate(void *start, size_t size, size_t guard) {
+  struct dynamic_data_t *data = (struct dynamic_data_t *)malloc(sizeof(struct dynamic_data_t));
+  data->unsafe_stack_start = start;
+  data->unsafe_stack_size = size;
+  data->unsafe_stack_guard = guard;
+  pthread_setspecific(dynamic_data_key, data);
+}
+#endif
 
 static inline void *unsafe_stack_alloc(size_t size, size_t guard) {
   CHECK_GE(size + guard, size);
@@ -102,17 +128,33 @@
   CHECK_EQ((((size_t)stack_ptr) & (kStackAlign - 1)), 0);
 
   __safestack_unsafe_stack_ptr = stack_ptr;
+
+#if !SANITIZER_MAC
   unsafe_stack_start = start;
   unsafe_stack_size = size;
   unsafe_stack_guard = guard;
+#else
+  dynamic_data_allocate(start, size, guard);
+#endif
 }
 
 static void unsafe_stack_free() {
+#if !SANITIZER_MAC
   if (unsafe_stack_start) {
     UnmapOrDie((char *)unsafe_stack_start - unsafe_stack_guard,
                unsafe_stack_size + unsafe_stack_guard);
   }
   unsafe_stack_start = nullptr;
+#else
+  struct dynamic_data_t *data =
+      (struct dynamic_data_t *)pthread_getspecific(dynamic_data_key);
+  if (data) {
+    UnmapOrDie((char *)data->unsafe_stack_start - data->unsafe_stack_guard,
+               data->unsafe_stack_size + data->unsafe_stack_guard);
+    free(data);
+  }
+  pthread_setspecific(dynamic_data_key, nullptr);
+#endif
 }
 
 /// Thread data for the cleanup handler
@@ -199,13 +241,8 @@
   return REAL(pthread_create)(thread, attr, thread_start, tinfo);
 }
 
-extern "C" __attribute__((visibility("default")))
-#if !SANITIZER_CAN_USE_PREINIT_ARRAY
-// On ELF platforms, the constructor is invoked using .preinit_array (see below)
-__attribute__((constructor(0)))
-#endif
-void __safestack_init() {
-  // Determine the stack size for the main thread.
+static void unsafe_stack_setup_self() {
+  // Determine the stack size.
   size_t size = kDefaultUnsafeStackSize;
   size_t guard = 4096;
 
@@ -213,16 +250,59 @@
   if (getrlimit(RLIMIT_STACK, &limit) == 0 && limit.rlim_cur != RLIM_INFINITY)
     size = limit.rlim_cur;
 
-  // Allocate unsafe stack for main thread
+  // Allocate unsafe stack.
   void *addr = unsafe_stack_alloc(size, guard);
 
   unsafe_stack_setup(addr, size, guard);
+}
+
+#if SANITIZER_MAC
+// We need to use pthread instrospection API to get notifications about newly
+// created GCD worker threads (which are not creating via pthread_create).
+typedef void (*pthread_introspection_hook_t)(unsigned int event,
+                                             pthread_t thread, void *addr,
+                                             size_t size);
+extern "C" pthread_introspection_hook_t pthread_introspection_hook_install(
+    pthread_introspection_hook_t hook);
+static const uptr PTHREAD_INTROSPECTION_THREAD_CREATE = 1;
+static pthread_introspection_hook_t prev_pthread_introspection_hook;
+static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
+                                          void *addr, size_t size) {
+  if (event == PTHREAD_INTROSPECTION_THREAD_CREATE) {
+    if (thread == pthread_self()) {
+      // The current thread is a newly created GCD worker thread.
+      unsafe_stack_setup_self();
+    }
+  }
+
+  if (prev_pthread_introspection_hook != nullptr)
+    prev_pthread_introspection_hook(event, thread, addr, size);
+}
+#endif
+
+extern "C" __attribute__((visibility("default")))
+#if !SANITIZER_CAN_USE_PREINIT_ARRAY
+// On ELF platforms, the constructor is invoked using .preinit_array (see below)
+__attribute__((constructor(0)))
+#endif
+void __safestack_init() {
+  // Setup the main thread's unsafe stack.
+  unsafe_stack_setup_self();
 
   // Initialize pthread interceptors for thread allocation
   INTERCEPT_FUNCTION(pthread_create);
 
   // Setup the cleanup handler
   pthread_key_create(&thread_cleanup_key, thread_cleanup_handler);
+
+#if SANITIZER_MAC
+  // Create the dynamic TLS key for correct cleanup of unsafe stacks.
+  pthread_key_create(&dynamic_data_key, dynamic_data_cleanup);
+
+  // Setup the libpthread instrospection hook.
+  prev_pthread_introspection_hook =
+      pthread_introspection_hook_install(&my_pthread_introspection_hook);
+#endif
 }
 
 #if SANITIZER_CAN_USE_PREINIT_ARRAY
@@ -237,7 +317,13 @@
 
 extern "C"
     __attribute__((visibility("default"))) void *__get_unsafe_stack_start() {
+#if !SANITIZER_MAC
   return unsafe_stack_start;
+#else
+  struct dynamic_data_t *data =
+      (struct dynamic_data_t *)pthread_getspecific(dynamic_data_key);
+  return data->unsafe_stack_start;
+#endif
 }
 
 extern "C"
Index: test/safestack/Darwin/gcd.mm
===================================================================
--- test/safestack/Darwin/gcd.mm
+++ test/safestack/Darwin/gcd.mm
@@ -0,0 +1,33 @@
+// RUN: %clang_safestack %s -o %t -framework Foundation
+// RUN: %run %t 2>&1
+
+#import <Foundation/Foundation.h>
+
+void force_use_of_unsafe_stack(char c) {
+  char buf[32];
+  memset(buf, c, 32);
+  buf[31] = '\0';
+  fprintf(stderr, "%s\n", buf);
+}
+
+int main() {
+  NSLog(@"Hello world.");
+
+  force_use_of_unsafe_stack('x');
+
+  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+    force_use_of_unsafe_stack('y');
+
+    dispatch_sync(dispatch_get_main_queue(), ^{
+      CFRunLoopStop(CFRunLoopGetCurrent());
+    });
+  });
+
+  CFRunLoopRun();
+  NSLog(@"Done.");
+}
+
+// CHECK: Hello world.
+// CHECK: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+// CHECK: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+// CHECK: Done.
Index: test/safestack/Darwin/lit.local.cfg
===================================================================
--- test/safestack/Darwin/lit.local.cfg
+++ test/safestack/Darwin/lit.local.cfg
@@ -0,0 +1,9 @@
+def getRoot(config):
+  if not config.parent:
+    return config
+  return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.host_os not in ['Darwin']:
+  config.unsupported = True
Index: test/safestack/pthread-cleanup.c
===================================================================
--- test/safestack/pthread-cleanup.c
+++ test/safestack/pthread-cleanup.c
@@ -1,8 +1,9 @@
 // RUN: %clang_safestack %s -pthread -o %t
-// RUN: not --crash %run %t
+// RUN: not --crash %run %t 2>&1 | FileCheck %s
 
 // Test that unsafe stacks are deallocated correctly on thread exit.
 
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
@@ -25,7 +26,14 @@
   if (pthread_join(t1, &buffer))
     abort();
 
+  fprintf(stderr, "Thread created and joined.\n");
+
   // should segfault here
   memset(buffer, 0, kBufferSize);
+
+  fprintf(stderr, "Shouldn't get here.\n");
   return 0;
 }
+
+// CHECK: Thread created and joined.
+// CHECK-NOT: Shouldn't get here.
Index: test/safestack/pthread.c
===================================================================
--- test/safestack/pthread.c
+++ test/safestack/pthread.c
@@ -1,8 +1,6 @@
 // RUN: %clang_safestack %s -pthread -o %t
 // RUN: %run %t
 
-// XFAIL: darwin
-
 // Test that pthreads receive their own unsafe stack.
 
 #include <stdlib.h>