Index: include/sanitizer/tsan_interface.h
===================================================================
--- include/sanitizer/tsan_interface.h
+++ include/sanitizer/tsan_interface.h
@@ -137,6 +137,20 @@
 void __tsan_external_read(void *addr, void *caller_pc, void *tag);
 void __tsan_external_write(void *addr, void *caller_pc, void *tag);
 
+// Fiber switching API.
+//   - TSAN context for fiber can be created by __tsan_create_fiber
+//     and freed by __tsan_destroy_fiber.
+//   - TSAN context of current fiber or thread can be obtained
+//     by calling __tsan_get_current_fiber.
+//   - __tsan_switch_to_fiber should be called immediatly before switch
+//     to fiber, such as call of swapcontext.
+//   - Fiber name can be set by __tsan_set_fiber_name.
+void *__tsan_get_current_fiber(void);
+void *__tsan_create_fiber(void);
+void __tsan_destroy_fiber(void *fiber);
+void __tsan_switch_to_fiber(void *fiber);
+void __tsan_set_fiber_name(void *fiber, const char *name);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
Index: lib/tsan/rtl/tsan_interface.cc
===================================================================
--- lib/tsan/rtl/tsan_interface.cc
+++ lib/tsan/rtl/tsan_interface.cc
@@ -124,6 +124,47 @@
   __tsan_unaligned_write8(addr);
   *addr = v;
 }
+
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_get_current_fiber() {
+  return cur_thread();
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_create_fiber() {
+  const uptr pc = GET_CALLER_PC();
+  void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadState));
+  ThreadState *thr = static_cast<ThreadState *>(mem);
+  internal_memset(thr, 0, sizeof(*thr));
+  int tid = ThreadCreate(cur_thread(), pc, 0, true);
+  ThreadStart(thr, tid, 0, false);
+  return thr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_destroy_fiber(void *fiber) {
+  ThreadState *thr = static_cast<ThreadState *>(fiber);
+  ThreadFinish(thr);
+  internal_free(thr);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_switch_to_fiber(void *fiber) {
+  ThreadState *thr = cur_thread();
+  Processor *proc = thr->proc();
+  ProcUnwire(proc, thr);
+  thr = static_cast<ThreadState *>(fiber);
+  ProcWire(proc, thr);
+  cur_thread1 = thr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_set_fiber_name(void *fiber, const char *name) {
+  ThreadState *thr = static_cast<ThreadState *>(fiber);
+  ThreadSetName(thr, name);
+}
+#endif // !SANITIZER_MAC && !SANITIZER_ANDROID
 }  // extern "C"
 
 void __tsan_acquire(void *addr) {
Index: lib/tsan/rtl/tsan_rtl.h
===================================================================
--- lib/tsan/rtl/tsan_rtl.h
+++ lib/tsan/rtl/tsan_rtl.h
@@ -431,11 +431,7 @@
 
   // Current wired Processor, or nullptr. Required to handle any events.
   Processor *proc1;
-#if !SANITIZER_GO
-  Processor *proc() { return proc1; }
-#else
   Processor *proc();
-#endif
 
   atomic_uintptr_t in_signal_handler;
   ThreadSignalContext *signal_ctx;
@@ -464,11 +460,18 @@
 #else
 __attribute__((tls_model("initial-exec")))
 extern THREADLOCAL char cur_thread_placeholder[];
+__attribute__((tls_model("initial-exec")))
+extern THREADLOCAL ThreadState *cur_thread1;
 INLINE ThreadState *cur_thread() {
+  if (cur_thread1)
+    return cur_thread1;
   return reinterpret_cast<ThreadState *>(&cur_thread_placeholder);
 }
 INLINE void cur_thread_finalize() { }
 #endif  // SANITIZER_MAC || SANITIZER_ANDROID
+INLINE Processor *ThreadState::proc() {
+  return proc1 ? proc1 : cur_thread()->proc1;
+}
 #endif  // SANITIZER_GO
 
 class ThreadContext : public ThreadContextBase {
@@ -839,7 +842,8 @@
 
 extern "C" void __tsan_trace_switch();
 void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
-                                        EventType typ, u64 addr) {
+                                        EventType typ, u64 addr,
+                                        bool hacky_call = true) {
   if (!kCollectHistory)
     return;
   DCHECK_GE((int)typ, 0);
@@ -849,10 +853,12 @@
   u64 pos = fs.GetTracePos();
   if (UNLIKELY((pos % kTracePartSize) == 0)) {
 #if !SANITIZER_GO
-    HACKY_CALL(__tsan_trace_switch);
-#else
-    TraceSwitch(thr);
+    if (hacky_call) {
+      DCHECK_EQ(thr, cur_thread());
+      HACKY_CALL(__tsan_trace_switch);
+    } else
 #endif
+      TraceSwitch(thr);
   }
   Event *trace = (Event*)GetThreadTrace(fs.tid());
   Event *evp = &trace[pos];
Index: lib/tsan/rtl/tsan_rtl.cc
===================================================================
--- lib/tsan/rtl/tsan_rtl.cc
+++ lib/tsan/rtl/tsan_rtl.cc
@@ -48,6 +48,8 @@
 #if !SANITIZER_GO && !SANITIZER_MAC
 __attribute__((tls_model("initial-exec")))
 THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64);
+__attribute__((tls_model("initial-exec")))
+THREADLOCAL ThreadState *cur_thread1;
 #endif
 static char ctx_placeholder[sizeof(Context)] ALIGNED(64);
 Context *ctx;
@@ -619,6 +621,7 @@
   thr->racy_state[1] = old.raw();
   thr->racy_shadow_addr = shadow_mem;
 #if !SANITIZER_GO
+  DCHECK_EQ(thr, cur_thread());
   HACKY_CALL(__tsan_report_race);
 #else
   ReportRace(thr);
Index: lib/tsan/rtl/tsan_rtl_thread.cc
===================================================================
--- lib/tsan/rtl/tsan_rtl_thread.cc
+++ lib/tsan/rtl/tsan_rtl_thread.cc
@@ -59,7 +59,7 @@
     return;
   args->thr->fast_state.IncrementEpoch();
   // Can't increment epoch w/o writing to the trace as well.
-  TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
+  TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0, false);
   ReleaseImpl(args->thr, 0, &sync);
   creation_stack_id = CurrentStackId(args->thr, args->pc);
   if (reuse_count == 0)
@@ -112,7 +112,7 @@
   thr->fast_state.SetHistorySize(flags()->history_size);
   // Commit switch to the new part of the trace.
   // TraceAddEvent will reset stack0/mset0 in the new part for us.
-  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0, false);
 
   thr->fast_synch_epoch = epoch0;
   AcquireImpl(thr, 0, &sync);
@@ -135,7 +135,7 @@
   if (!detached) {
     thr->fast_state.IncrementEpoch();
     // Can't increment epoch w/o writing to the trace as well.
-    TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0, false);
     ReleaseImpl(thr, 0, &sync);
   }
   epoch1 = thr->fast_state.epoch();
@@ -246,7 +246,8 @@
   uptr tls_addr = 0;
   uptr tls_size = 0;
 #if !SANITIZER_GO
-  GetThreadStackAndTls(tid == 0, &stk_addr, &stk_size, &tls_addr, &tls_size);
+  if (os_id)
+    GetThreadStackAndTls(tid == 0, &stk_addr, &stk_size, &tls_addr, &tls_size);
 
   if (tid) {
     if (stk_addr && stk_size)
Index: test/tsan/fiber_asm.cc
===================================================================
--- /dev/null
+++ test/tsan/fiber_asm.cc
@@ -0,0 +1,85 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: darwin
+#include "test.h"
+
+struct ucontext {
+  void *sp;
+  void *fiber;
+};
+
+extern "C" {
+  void ucontext_do_switch(void **save, void **load);
+  void ucontext_trampoline();
+}
+
+__asm__(".global ucontext_do_switch\n"
+        "ucontext_do_switch:\n\t"
+        "pushq %rbp\n\t"
+        "pushq %r15\n\t"
+        "pushq %r14\n\t"
+        "pushq %r13\n\t"
+        "pushq %r12\n\t"
+        "pushq %rbx\n\t"
+        "movq %rsp, (%rdi)\n\t"
+        "movq (%rsi), %rsp\n\t"
+        "popq %rbx\n\t"
+        "popq %r12\n\t"
+        "popq %r13\n\t"
+        "popq %r14\n\t"
+        "popq %r15\n\t"
+        "popq %rbp\n\t"
+        "retq");
+
+__asm__(".global ucontext_trampoline\n"
+        "ucontext_trampoline:\n\t"
+        ".cfi_startproc\n\t"
+        ".cfi_undefined rip\n\t"
+        "movq %r12, %rdi\n\t"
+        "jmpq *%rbx\n\t"
+        ".cfi_endproc");
+
+void ucontext_init(ucontext *context, void *stack, unsigned stack_sz,
+                   void (*func)(void*), void *arg) {
+  void **sp = reinterpret_cast<void **>(static_cast<char *>(stack) + stack_sz);
+  *(--sp) = 0;
+  *(--sp) = reinterpret_cast<void *>(ucontext_trampoline);
+  *(--sp) = 0;   // rbp
+  *(--sp) = 0;   // r15
+  *(--sp) = 0;   // r14
+  *(--sp) = 0;   // r13
+  *(--sp) = arg; // r12
+  *(--sp) = reinterpret_cast<void *>(func); // rbx
+  context->sp = sp;
+  context->fiber = __tsan_create_fiber();
+}
+
+void ucontext_free(ucontext *context) {
+  __tsan_destroy_fiber(context->fiber);
+}
+
+__attribute__((no_sanitize_thread))
+void ucontext_switch(ucontext *save, ucontext *load) {
+  save->fiber = __tsan_get_current_fiber();
+  __tsan_switch_to_fiber(load->fiber);
+  ucontext_do_switch(&save->sp, &load->sp);
+}
+
+char stack[64 * 1024] __attribute__((aligned(16)));
+
+ucontext uc, orig_uc;
+
+void func(void *arg) {
+  __asm__ __volatile__(".cfi_undefined rip");
+  ucontext_switch(&uc, &orig_uc);
+}
+
+int main() {
+  ucontext_init(&uc, stack, sizeof(stack), func, 0);
+  ucontext_switch(&orig_uc, &uc);
+  ucontext_free(&uc);
+  fprintf(stderr, "PASS\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: PASS
Index: test/tsan/fiber_race.cc
===================================================================
--- /dev/null
+++ test/tsan/fiber_race.cc
@@ -0,0 +1,36 @@
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: darwin
+#include "test.h"
+#include <ucontext.h>
+
+char stack[64 * 1024] __attribute__((aligned(16)));
+
+ucontext_t uc, orig_uc;
+void *fiber, *orig_fiber;
+
+int var;
+
+void func() {
+  var = 1;
+  __tsan_switch_to_fiber(orig_fiber);
+  swapcontext(&uc, &orig_uc);
+}
+
+int main() {
+  orig_fiber = __tsan_get_current_fiber();
+  fiber = __tsan_create_fiber();
+  getcontext(&uc);
+  uc.uc_stack.ss_sp = stack;
+  uc.uc_stack.ss_size = sizeof(stack);
+  uc.uc_link = &orig_uc;
+  makecontext(&uc, func, 0);
+  var = 2;
+  __tsan_switch_to_fiber(fiber);
+  swapcontext(&orig_uc, &uc);
+  __tsan_destroy_fiber(fiber);
+  fprintf(stderr, "PASS\n");
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: PASS
Index: test/tsan/fiber_simple.cc
===================================================================
--- /dev/null
+++ test/tsan/fiber_simple.cc
@@ -0,0 +1,32 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: darwin
+#include "test.h"
+#include <ucontext.h>
+
+char stack[64 * 1024] __attribute__((aligned(16)));
+
+ucontext_t uc, orig_uc;
+void *fiber, *orig_fiber;
+
+void func() {
+  __tsan_switch_to_fiber(orig_fiber);
+  swapcontext(&uc, &orig_uc);
+}
+
+int main() {
+  orig_fiber = __tsan_get_current_fiber();
+  fiber = __tsan_create_fiber();
+  getcontext(&uc);
+  uc.uc_stack.ss_sp = stack;
+  uc.uc_stack.ss_size = sizeof(stack);
+  uc.uc_link = &orig_uc;
+  makecontext(&uc, func, 0);
+  __tsan_switch_to_fiber(fiber);
+  swapcontext(&orig_uc, &uc);
+  __tsan_destroy_fiber(fiber);
+  fprintf(stderr, "PASS\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: PASS
Index: test/tsan/fiber_two_threads.cc
===================================================================
--- /dev/null
+++ test/tsan/fiber_two_threads.cc
@@ -0,0 +1,66 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: darwin
+#include "test.h"
+#include <ucontext.h>
+
+char stack[64 * 1024] __attribute__((aligned(16)));
+
+ucontext_t uc, orig_uc[2];
+void *fiber, *orig_fiber[2];
+
+const unsigned N = 1000;
+
+__attribute__((noinline))
+void switch0() {
+  __tsan_acquire(&orig_fiber[0]);
+  __tsan_switch_to_fiber(orig_fiber[0]);
+  swapcontext(&uc, &orig_uc[0]);
+}
+
+void func() {
+  for (;;) {
+    switch0();
+    __tsan_acquire(&orig_fiber[1]);
+    __tsan_switch_to_fiber(orig_fiber[1]);
+    swapcontext(&uc, &orig_uc[1]);
+  }
+}
+
+void *Thread(void *x) {
+  orig_fiber[1] = __tsan_get_current_fiber();
+  for (unsigned i = 0; i < N; i++) {
+    barrier_wait(&barrier);
+    __tsan_release(&orig_fiber[1]);
+    __tsan_switch_to_fiber(fiber);
+    swapcontext(&orig_uc[1], &uc);
+    barrier_wait(&barrier);
+  }
+  return 0;
+}
+
+int main() {
+  fiber = __tsan_create_fiber();
+  barrier_init(&barrier, 2);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  orig_fiber[0] = __tsan_get_current_fiber();
+  getcontext(&uc);
+  uc.uc_stack.ss_sp = stack;
+  uc.uc_stack.ss_size = sizeof(stack);
+  uc.uc_link = &orig_uc[0];
+  makecontext(&uc, func, 0);
+  for (unsigned i = 0; i < N; i++) {
+    __tsan_release(&orig_fiber[0]);
+    __tsan_switch_to_fiber(fiber);
+    swapcontext(&orig_uc[0], &uc);
+    barrier_wait(&barrier);
+    barrier_wait(&barrier);
+  }
+  pthread_join(t, 0);
+  __tsan_destroy_fiber(fiber);
+  fprintf(stderr, "PASS\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: PASS