Index: docs/UsersManual.rst
===================================================================
--- docs/UsersManual.rst
+++ docs/UsersManual.rst
@@ -1133,8 +1133,8 @@
    Select which TLS model to use.
 
    Valid values are: ``global-dynamic``, ``local-dynamic``,
-   ``initial-exec`` and ``local-exec``. The default value is
-   ``global-dynamic``. The compiler may use a different model if the
+   ``initial-exec``, ``local-exec`` and ``emulated``. The default value is
+   ``global-dynamic``.  The compiler may use a different model if the
    selected model is not supported by the target, or if a more
    efficient model can be used. The TLS model can be overridden per
    variable using the ``tls_model`` attribute.
Index: docs/tools/clang.pod
===================================================================
--- docs/tools/clang.pod
+++ docs/tools/clang.pod
@@ -352,8 +352,9 @@
 =item B<-ftls-model>
 
 Set the default thread-local storage (TLS) model to use for thread-local
-variables. Valid values are: "global-dynamic", "local-dynamic", "initial-exec"
-and "local-exec". The default is "global-dynamic". The default model can be
+variables. Valid values are: "global-dynamic", "local-dynamic", "initial-exec",
+"local-exec", and "emulated". The default is "global-dynamic" or "emulated" if
+the target platform does not support "global-dynamic". The default model can be
 overridden with the tls_model attribute. The compiler will try to choose a more
 efficient model if possible.
 
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -62,6 +62,7 @@
 * local-dynamic
 * initial-exec
 * local-exec
+* emulated
 
 TLS models are mutually exclusive.
   }];
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -2138,7 +2138,7 @@
 }
 
 def err_attr_tlsmodel_arg : Error<"tls_model must be \"global-dynamic\", "
-  "\"local-dynamic\", \"initial-exec\" or \"local-exec\"">;
+  "\"local-dynamic\", \"initial-exec\", \"local-exec\" or \"emulated\"">;
 
 def err_only_annotate_after_access_spec : Error<
   "access specifier can only have annotation attributes">;
Index: include/clang/Frontend/CodeGenOptions.h
===================================================================
--- include/clang/Frontend/CodeGenOptions.h
+++ include/clang/Frontend/CodeGenOptions.h
@@ -85,7 +85,8 @@
     GeneralDynamicTLSModel,
     LocalDynamicTLSModel,
     InitialExecTLSModel,
-    LocalExecTLSModel
+    LocalExecTLSModel,
+    EmulatedTLSModel
   };
 
   enum FPContractModeKind {
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -176,7 +176,7 @@
 ENUM_CODEGENOPT(VecLib, VectorLibrary, 1, NoLibrary)
 
 /// The default TLS model to use.
-ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel)
+ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 3, GeneralDynamicTLSModel)
 
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -521,7 +521,8 @@
       .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel)
       .Case("local-dynamic", llvm::GlobalVariable::LocalDynamicTLSModel)
       .Case("initial-exec", llvm::GlobalVariable::InitialExecTLSModel)
-      .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel);
+      .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel)
+      .Case("emulated", llvm::GlobalVariable::EmulatedTLSModel);
 }
 
 static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(
@@ -535,6 +536,8 @@
     return llvm::GlobalVariable::InitialExecTLSModel;
   case CodeGenOptions::LocalExecTLSModel:
     return llvm::GlobalVariable::LocalExecTLSModel;
+  case CodeGenOptions::EmulatedTLSModel:
+    return llvm::GlobalVariable::EmulatedTLSModel;
   }
   llvm_unreachable("Invalid TLS model!");
 }
Index: lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- lib/CodeGen/ItaniumCXXABI.cpp
+++ lib/CodeGen/ItaniumCXXABI.cpp
@@ -2038,7 +2038,13 @@
         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
         llvm::GlobalVariable::InternalLinkage,
         llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard");
-    Guard->setThreadLocal(true);
+    // Use EmulatedTLSModel when -ftls-model=emulated
+    if (CGM.getCodeGenOpts().getDefaultTLSModel() ==
+        clang::CodeGenOptions::TLSModel::EmulatedTLSModel)
+      Guard->setThreadLocalMode(
+          llvm::GlobalValue::ThreadLocalMode::EmulatedTLSModel);
+    else
+      Guard->setThreadLocal(true);
     CodeGenFunction(CGM)
         .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits, Guard);
   }
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -597,6 +597,7 @@
         .Case("local-dynamic", CodeGenOptions::LocalDynamicTLSModel)
         .Case("initial-exec", CodeGenOptions::InitialExecTLSModel)
         .Case("local-exec", CodeGenOptions::LocalExecTLSModel)
+        .Case("emulated", CodeGenOptions::EmulatedTLSModel)
         .Default(~0U);
     if (Model == ~0U) {
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -1543,7 +1543,8 @@
 
   // Check that the value.
   if (Model != "global-dynamic" && Model != "local-dynamic"
-      && Model != "initial-exec" && Model != "local-exec") {
+      && Model != "initial-exec" && Model != "local-exec"
+      && Model != "emulated") {
     S.Diag(LiteralLoc, diag::err_attr_tlsmodel_arg);
     return;
   }
Index: test/CodeGen/thread-specifier.c
===================================================================
--- test/CodeGen/thread-specifier.c
+++ test/CodeGen/thread-specifier.c
@@ -1,18 +1,25 @@
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s
 
+// CHECK: @a1 = global i32 0, align 4
 // CHECK: @b = external thread_local global
 // CHECK: @d.e = internal thread_local global
 // CHECK: @d.f = internal thread_local global
-// CHECK: @f.a = internal thread_local(initialexec) global
-// CHECK: @a = thread_local global
-// CHECK: @g = thread_local global
-// CHECK: @h = thread_local(localdynamic) global
-// CHECK: @i = thread_local(initialexec) global
-// CHECK: @j = thread_local(localexec) global
+// CHECK: @f.a = internal thread_local(initialexec) global i32 0, align 4
+// CHECK: @f.b = internal thread_local(emulated) global i32 0, align 4
+// CHECK: @a0 = common global i32 0, align 4
+// CHECK: @a = thread_local global i32 0, align 4
+// CHECK: @g = thread_local global i32 0, align 4
+// CHECK: @h = thread_local(localdynamic) global i32 0, align 4
+// CHECK: @i = thread_local(initialexec) global i32 0, align 4
+// CHECK: @j = thread_local(localexec) global i32 0, align 4
+// CHECK: @k = thread_local(emulated) global i32 0, align 4
 
 // CHECK-NOT: @_ZTW
 // CHECK-NOT: @_ZTH
 
+int a0;
+int a1 = 0;
+
 __thread int a;
 extern __thread int b;
 int c() { return *&b; }
@@ -26,8 +33,10 @@
 __thread int h __attribute__((tls_model("local-dynamic")));
 __thread int i __attribute__((tls_model("initial-exec")));
 __thread int j __attribute__((tls_model("local-exec")));
+__thread int k __attribute__((tls_model("emulated")));
 
 int f() {
   __thread static int a __attribute__((tls_model("initial-exec")));
-  return a++;
+  __thread static int b __attribute__((tls_model("emulated")));
+  return a++ + b++;
 }
Index: test/CodeGen/tls-model.c
===================================================================
--- test/CodeGen/tls-model.c
+++ test/CodeGen/tls-model.c
@@ -3,26 +3,52 @@
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
+// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=emulated -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-EMU
 
+int z1 = 0;
 int __thread x;
 int f() {
   static int __thread y;
   return y++;
 }
 int __thread __attribute__((tls_model("initial-exec"))) z;
+int __thread __attribute__((tls_model("emulated"))) z2;
+int z3;
 
+// Note that unlike normal C uninitialized global variables,
+// uninitialized TLS variables do NOT have COMMON linkage.
+
+// CHECK-GD: @z1 = global i32 0, align 4
 // CHECK-GD: @f.y = internal thread_local global i32 0
-// CHECK-GD: @x = thread_local global i32 0
-// CHECK-GD: @z = thread_local(initialexec) global i32 0
+// CHECK-GD: @x = thread_local global i32 0, align 4
+// CHECK-GD: @z = thread_local(initialexec) global i32 0, align 4
+// CHECK-GD: @z2 = thread_local(emulated) global i32 0, align 4
+// CHECK-GD: @z3 = common global i32 0, align 4
 
+// CHECK-LD: @z1 = global i32 0, align 4
 // CHECK-LD: @f.y = internal thread_local(localdynamic) global i32 0
-// CHECK-LD: @x = thread_local(localdynamic) global i32 0
-// CHECK-LD: @z = thread_local(initialexec) global i32 0
+// CHECK-LD: @x = thread_local(localdynamic) global i32 0, align 4
+// CHECK-LD: @z = thread_local(initialexec) global i32 0, align 4
+// CHECK-LD: @z2 = thread_local(emulated) global i32 0, align 4
+// CHECK-LD: @z3 = common global i32 0, align 4
 
+// CHECK-IE: @z1 = global i32 0, align 4
 // CHECK-IE: @f.y = internal thread_local(initialexec) global i32 0
-// CHECK-IE: @x = thread_local(initialexec) global i32 0
-// CHECK-IE: @z = thread_local(initialexec) global i32 0
+// CHECK-IE: @x = thread_local(initialexec) global i32 0, align 4
+// CHECK-IE: @z = thread_local(initialexec) global i32 0, align 4
+// CHECK-IE: @z2 = thread_local(emulated) global i32 0, align 4
+// CHECK-IE: @z3 = common global i32 0, align 4
 
+// CHECK-LE: @z1 = global i32 0, align 4
 // CHECK-LE: @f.y = internal thread_local(localexec) global i32 0
-// CHECK-LE: @x = thread_local(localexec) global i32 0
-// CHECK-LE: @z = thread_local(initialexec) global i32 0
+// CHECK-LE: @x = thread_local(localexec) global i32 0, align 4
+// CHECK-LE: @z = thread_local(initialexec) global i32 0, align 4
+// CHECK-LE: @z2 = thread_local(emulated) global i32 0, align 4
+// CHECK-LE: @z3 = common global i32 0, align 4
+
+// CHECK-EMU: @z1 = global i32 0, align 4
+// CHECK-EMU: @f.y = internal thread_local(emulated) global i32 0
+// CHECK-EMU: @x = thread_local(emulated) global i32 0, align 4
+// CHECK-EMU: @z = thread_local(initialexec) global i32 0, align 4
+// CHECK-EMU: @z2 = thread_local(emulated) global i32 0, align 4
+// CHECK-EMU: @z3 = common global i32 0, align 4
Index: test/CodeGenCXX/cxx11-thread-local.cpp
===================================================================
--- test/CodeGenCXX/cxx11-thread-local.cpp
+++ test/CodeGenCXX/cxx11-thread-local.cpp
@@ -1,73 +1,115 @@
 // RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -ftls-model=emulated -emit-llvm %s -o - \
+// RUN:   -triple x86_64-linux-gnu | FileCheck --check-prefix=EMU %s
+
+// Make sure that -tfls-model=emulated sets all thread_local variables to
+// the default thread_local(emulated) mode.
 
 int f();
 int g();
 
 // CHECK: @a = thread_local global i32 0
+// EMU: @a = thread_local(emulated) global i32 0
 thread_local int a = f();
+
 extern thread_local int b;
+
 // CHECK: @c = global i32 0
+// EMU: @c = global i32 0
 int c = b;
+
 // CHECK: @_ZL1d = internal thread_local global i32 0
+// EMU: @_ZL1d = internal thread_local(emulated) global i32 0
 static thread_local int d = g();
 
 struct U { static thread_local int m; };
+
 // CHECK: @_ZN1U1mE = thread_local global i32 0
+// EMU: @_ZN1U1mE = thread_local(emulated) global i32 0
 thread_local int U::m = f();
 
 template<typename T> struct V { static thread_local int m; };
 template<typename T> thread_local int V<T>::m = g();
 
 // CHECK: @e = global i32 0
+// EMU: @e = global i32 0
 int e = V<int>::m;
 
 // CHECK: @_ZN1VIiE1mE = linkonce_odr thread_local global i32 0
+// EMU: @_ZN1VIiE1mE = linkonce_odr thread_local(emulated) global i32 0
 
 // CHECK: @_ZZ1fvE1n = internal thread_local global i32 0
+// EMU: @_ZZ1fvE1n = internal thread_local(emulated) global i32 0
 
 // CHECK: @_ZGVZ1fvE1n = internal thread_local global i8 0
+// EMU: @_ZGVZ1fvE1n = internal thread_local(emulated) global i8 0
 
 // CHECK: @_ZZ8tls_dtorvE1s = internal thread_local global
+// EMU: @_ZZ8tls_dtorvE1s = internal thread_local(emulated) global
 // CHECK: @_ZGVZ8tls_dtorvE1s = internal thread_local global i8 0
+// EMU: @_ZGVZ8tls_dtorvE1s = internal thread_local(emulated) global i8 0
 
 // CHECK: @_ZZ8tls_dtorvE1t = internal thread_local global
+// EMU: @_ZZ8tls_dtorvE1t = internal thread_local(emulated) global
 // CHECK: @_ZGVZ8tls_dtorvE1t = internal thread_local global i8 0
+// EMU: @_ZGVZ8tls_dtorvE1t = internal thread_local(emulated) global i8 0
 
 // CHECK: @_ZZ8tls_dtorvE1u = internal thread_local global
+// EMU: @_ZZ8tls_dtorvE1u = internal thread_local(emulated) global
 // CHECK: @_ZGVZ8tls_dtorvE1u = internal thread_local global i8 0
+// EMU: @_ZGVZ8tls_dtorvE1u = internal thread_local(emulated) global i8 0
 // CHECK: @_ZGRZ8tls_dtorvE1u_ = internal thread_local global
+// EMU: @_ZGRZ8tls_dtorvE1u_ = internal thread_local(emulated) global
 
 // CHECK: @_ZGVN1VIiE1mE = linkonce_odr thread_local global i64 0
+// EMU: @_ZGVN1VIiE1mE = linkonce_odr thread_local(emulated) global i64 0
 
 // CHECK: @__tls_guard = internal thread_local global i8 0
+// EMU: @__tls_guard = internal thread_local(emulated) global i8 0
 
 // CHECK: @llvm.global_ctors = appending global {{.*}} @[[GLOBAL_INIT:[^ ]*]]
+// EMU: @llvm.global_ctors = appending global {{.*}} @[[GLOBAL_INIT:[^ ]*]]
 
 // CHECK: @_ZTH1a = alias void ()* @__tls_init
+// EMU: @_ZTH1a = alias void ()* @__tls_init
 // CHECK: @_ZTHL1d = internal alias void ()* @__tls_init
+// EMU: @_ZTHL1d = internal alias void ()* @__tls_init
 // CHECK: @_ZTHN1U1mE = alias void ()* @__tls_init
+// EMU: @_ZTHN1U1mE = alias void ()* @__tls_init
 // CHECK: @_ZTHN1VIiE1mE = linkonce_odr alias void ()* @__tls_init
+// EMU: @_ZTHN1VIiE1mE = linkonce_odr alias void ()* @__tls_init
 
 
 // Individual variable initialization functions:
 
 // CHECK: define {{.*}} @[[A_INIT:.*]]()
+// EMU: define {{.*}} @[[A_INIT:.*]]()
 // CHECK: call i32 @_Z1fv()
+// EMU: call i32 @_Z1fv()
 // CHECK-NEXT: store i32 {{.*}}, i32* @a, align 4
+// EMU-NEXT: store i32 {{.*}}, i32* @a, align 4
 
 // CHECK-LABEL: define i32 @_Z1fv()
 int f() {
   // CHECK: %[[GUARD:.*]] = load i8, i8* @_ZGVZ1fvE1n, align 1
+  // EMU: %[[GUARD:.*]] = load i8, i8* @_ZGVZ1fvE1n, align 1
   // CHECK: %[[NEED_INIT:.*]] = icmp eq i8 %[[GUARD]], 0
+  // EMU: %[[NEED_INIT:.*]] = icmp eq i8 %[[GUARD]], 0
   // CHECK: br i1 %[[NEED_INIT]]
+  // EMU: br i1 %[[NEED_INIT]]
 
   // CHECK: %[[CALL:.*]] = call i32 @_Z1gv()
+  // EMU: %[[CALL:.*]] = call i32 @_Z1gv()
   // CHECK: store i32 %[[CALL]], i32* @_ZZ1fvE1n, align 4
+  // EMU: store i32 %[[CALL]], i32* @_ZZ1fvE1n, align 4
   // CHECK: store i8 1, i8* @_ZGVZ1fvE1n
+  // EMU: store i8 1, i8* @_ZGVZ1fvE1n
   // CHECK: br label
+  // EMU: br label
   static thread_local int n = g();
 
   // CHECK: load i32, i32* @_ZZ1fvE1n, align 4
+  // EMU: load i32, i32* @_ZZ1fvE1n, align 4
   return n;
 }
 
Index: test/Sema/attr-tls_model.c
===================================================================
--- test/Sema/attr-tls_model.c
+++ test/Sema/attr-tls_model.c
@@ -11,4 +11,4 @@
 
 static __thread int y __attribute((tls_model("local", "dynamic"))); // expected-error {{'tls_model' attribute takes one argument}}
 static __thread int y __attribute((tls_model(123))); // expected-error {{'tls_model' attribute requires a string}}
-static __thread int y __attribute((tls_model("foobar"))); // expected-error {{tls_model must be "global-dynamic", "local-dynamic", "initial-exec" or "local-exec"}}
+static __thread int y __attribute((tls_model("foobar"))); // expected-error {{tls_model must be "global-dynamic", "local-dynamic", "initial-exec", "local-exec" or "emulated"}}