Index: ELF/InputSection.cpp
===================================================================
--- ELF/InputSection.cpp
+++ ELF/InputSection.cpp
@@ -147,6 +147,10 @@
     } else if (!Target->relocNeedsCopy(Type, Body) &&
                isa<SharedSymbol<ELFT>>(Body)) {
       continue;
+    } else if (Body.isTLS() &&
+               Target->getTlsOptimization(Type, Body) == TargetInfo::ToLE) {
+      Target->relocateTlsToLe(BufLoc, BufEnd, Type, AddrLoc, SymVA);
+      continue;
     }
     Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
                         SymVA + getAddend<ELFT>(RI));
Index: ELF/Target.h
===================================================================
--- ELF/Target.h
+++ ELF/Target.h
@@ -21,6 +21,7 @@
 
 class TargetInfo {
 public:
+  enum TlsOpt { None, ToLE };
   unsigned getPageSize() const { return PageSize; }
   uint64_t getVAStart() const;
   unsigned getCopyReloc() const { return CopyReloc; }
@@ -57,7 +58,9 @@
   virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
   virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
                            uint64_t P, uint64_t SA) const = 0;
-
+  virtual TlsOpt getTlsOptimization(unsigned Type, const SymbolBody &S) const;
+  virtual void relocateTlsToLe(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
+                               uint64_t P, uint64_t SA) const;
   virtual ~TargetInfo();
 
 protected:
Index: ELF/Target.cpp
===================================================================
--- ELF/Target.cpp
+++ ELF/Target.cpp
@@ -73,6 +73,9 @@
   void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
                    uint64_t SA) const override;
   bool isRelRelative(uint32_t Type) const override;
+  TlsOpt getTlsOptimization(unsigned Type, const SymbolBody &S) const override;
+  void relocateTlsToLe(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
+    uint64_t SA) const override;
 };
 
 class PPC64TargetInfo final : public TargetInfo {
@@ -145,6 +148,11 @@
 
 TargetInfo::~TargetInfo() {}
 
+TargetInfo::TlsOpt TargetInfo::getTlsOptimization(unsigned Type,
+                                                  const SymbolBody &S) const {
+  return None;
+}
+
 uint64_t TargetInfo::getVAStart() const { return Config->Shared ? 0 : VAStart; }
 
 bool TargetInfo::relocNeedsCopy(uint32_t Type, const SymbolBody &S) const {
@@ -159,6 +167,9 @@
 
 void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
 
+void TargetInfo::relocateTlsToLe(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
+                                 uint64_t P, uint64_t SA) const {}
+
 X86TargetInfo::X86TargetInfo() {
   PCRelReloc = R_386_PC32;
   GotReloc = R_386_GLOB_DAT;
@@ -268,6 +279,8 @@
 }
 
 bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
+  if (Type == R_X86_64_GOTTPOFF)
+    return getTlsOptimization(Type, S) == None;
   return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
          relocNeedsPlt(Type, S);
 }
@@ -333,6 +346,41 @@
   }
 }
 
+TargetInfo::TlsOpt
+X86_64TargetInfo::getTlsOptimization(unsigned Type, const SymbolBody &S) const {
+  if (Config->Shared)
+    return None;
+  return (Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true)) ? ToLE : None;
+}
+
+void X86_64TargetInfo::relocateTlsToLe(uint8_t *Loc, uint8_t *BufEnd,
+                                       uint32_t Type, uint64_t P,
+                                       uint64_t SA) const {
+  uint8_t Val = Loc[-3];
+  uint8_t Ins = Loc[-2];
+  uint8_t Reg = Loc[-1] >> 3;
+  // Originally it can be one of two:
+  // 1) movq foo@gottpoff(%rip), %reg
+  // We change it into one of:
+  // movq $foo, %reg
+  // addq $foo, %rsp (addressing with %rsp is special).
+  // 2) addq foo@gottpoff(%rip), %reg
+  // We change it into leaq foo(%reg), %reg.
+  bool RspAdd = (Ins != 0x8b && Reg == 4);
+  if (Ins == 0x8b || RspAdd) {
+    if (Val == 0x4c)
+      Loc[-3] = 0x49;
+    Loc[-2] = RspAdd ? 0x81 : 0xc7;
+    Loc[-1] = 0xc0 | Reg;
+  } else {
+    if (Val == 0x4c)
+      Loc[-3] = 0x4d;
+    Loc[-2] = 0x8d;
+    Loc[-1] = 0x80 | Reg | (Reg << 3);
+  }
+  write32le(Loc, SA - Out<ELF64LE>::TlsPhdr->p_memsz);
+}
+
 void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
                                    uint64_t P, uint64_t SA) const {
   switch (Type) {
Index: test/elf2/tls-opt.s
===================================================================
--- test/elf2/tls-opt.s
+++ test/elf2/tls-opt.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+// RUN: ld.lld2 -e main %t.o -o %t1
+// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s
+// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
+
+// NORELOC:      Relocations [
+// NORELOC-NEXT: ]
+
+// DISASM: Disassembly of section .text:
+// DISASM-NEXT: main:
+// DISASM-NEXT: 11000: 48 c7 c0 fc ff ff ff movq $-4, %rax
+// DISASM-NEXT: 11007: 48 c7 c0 f8 ff ff ff movq $-8, %rax
+// DISASM-NEXT: 1100e: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax
+// DISASM-NEXT: 11015: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
+// DISASM-NEXT: 1101c: 48 81 c4 fc ff ff ff addq $-4, %rsp
+// DISASM-NEXT: 11023: 48 81 c4 f8 ff ff ff addq $-8, %rsp
+   
+.type tls0,@object
+.section .tbss,"awT",@nobits
+.globl tls0
+.align 4
+tls0:
+ .long 0
+ .size tls0, 4
+
+.type  tls1,@object
+.globl tls1
+.align 4
+tls1:
+ .long 0
+ .size tls1, 4
+
+.text
+ .globl main
+ .align 16, 0x90
+ .type main,@function
+main:
+ movq tls1@GOTTPOFF(%rip), %rax
+ movq tls0@GOTTPOFF(%rip), %rax
+ addq tls1@GOTTPOFF(%rip), %rax
+ addq tls0@GOTTPOFF(%rip), %rax
+ addq tls1@GOTTPOFF(%rip), %rsp
+ addq tls0@GOTTPOFF(%rip), %rsp