diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -207,7 +207,8 @@
     void Select(SDNode *N) override;
 
     bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
-    bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+    bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
+                            bool AllowSegmentRegForX32 = false);
     bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool matchAddress(SDValue N, X86ISelAddressMode &AM);
     bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
@@ -1613,20 +1614,26 @@
 
 }
 
-bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
+                                         bool AllowSegmentRegForX32) {
   SDValue Address = N->getOperand(1);
 
   // load gs:0 -> GS segment register.
   // load fs:0 -> FS segment register.
   //
-  // This optimization is valid because the GNU TLS model defines that
-  // gs:0 (or fs:0 on X86-64) contains its own address.
+  // This optimization is generally valid because the GNU TLS model defines that
+  // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
+  // with 32-bit registers, as we get in ILP32 mode, those registers are first
+  // zero-extended to 64 bits and then added it to the base address, which gives
+  // unwanted results when the register holds a negative value.
   // For more information see http://people.redhat.com/drepper/tls.pdf
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) {
     if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
         !IndirectTlsSegRefs &&
         (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
-         Subtarget->isTargetFuchsia()))
+         Subtarget->isTargetFuchsia())) {
+      if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
+        return true;
       switch (N->getPointerInfo().getAddrSpace()) {
       case X86AS::GS:
         AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -1637,6 +1644,8 @@
       // Address space X86AS::SS is not handled here, because it is not used to
       // address TLS areas.
       }
+    }
+  }
 
   return true;
 }
@@ -1720,6 +1729,21 @@
   if (matchAddressRecursively(N, AM, 0))
     return true;
 
+  // Post-processing: Make a second attempt to fold a load, if we now know
+  // that there will not be any other register. This is only performed for
+  // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
+  // any foldable load the first time.
+  if (Subtarget->isTarget64BitILP32() &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
+    SDValue Save_Base_Reg = AM.Base_Reg;
+    if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
+      AM.Base_Reg = SDValue();
+      if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
+        AM.Base_Reg = Save_Base_Reg;
+    }
+  }
+
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
   // a smaller encoding and avoids a scaled-index.
   if (AM.Scale == 2 &&
diff --git a/llvm/test/CodeGen/X86/pic.ll b/llvm/test/CodeGen/X86/pic.ll
--- a/llvm/test/CodeGen/X86/pic.ll
+++ b/llvm/test/CodeGen/X86/pic.ll
@@ -336,17 +336,18 @@
 ; CHECK-I686-DAG:	movl	%gs:0,
 ; CHECK-X32-DAG:	movl	tlsdstie@GOTTPOFF(%rip),
 ; CHECK-X32-DAG:	movl	%fs:0,
-; CHECK:	addl
+; CHECK-I686:	addl
+; CHECK-X32:	leal	({{%.*,%.*}}),
 ; CHECK-I686:	movl	tlsptrie@GOTNTPOFF(
 ; CHECK-X32:	movl	tlsptrie@GOTTPOFF(%rip),
 ; CHECK-I686:	movl	{{%.*}}, %gs:(
-; CHECK-X32:	movl	{{%.*}}, %fs:(
+; CHECK-X32:	movl	{{%.*}}, ({{%.*,%.*}})
 ; CHECK-I686:	movl	tlssrcie@GOTNTPOFF(
 ; CHECK-X32:	movl	tlssrcie@GOTTPOFF(%rip),
 ; CHECK-I686:	movl	%gs:(
-; CHECK-X32:	movl	%fs:(
+; CHECK-X32:	movl	({{%.*,%.*}}),
 ; CHECK-I686:	movl	{{%.*}}, %gs:(
-; CHECK-X32:	movl	{{%.*}}, %fs:(
+; CHECK-X32:	movl	{{%.*}}, ({{%.*,%.*}})
 ; CHECK-I686:	ret
 ; CHECK-X32:	retq
 }
diff --git a/llvm/test/CodeGen/X86/tls-pie.ll b/llvm/test/CodeGen/X86/tls-pie.ll
--- a/llvm/test/CodeGen/X86/tls-pie.ll
+++ b/llvm/test/CodeGen/X86/tls-pie.ll
@@ -65,7 +65,8 @@
 ; X32-LABEL: f3:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl i2@{{.*}}(%rip), %eax
-; X32-NEXT:    movl %fs:(%eax), %eax
+; X32-NEXT:    movl %fs:0, %ecx
+; X32-NEXT:    movl (%ecx,%eax), %eax
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f3: