diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -207,7 +207,8 @@ void Select(SDNode *N) override; bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); - bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); + bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, + bool NoRegisters = false); bool matchWrapper(SDValue N, X86ISelAddressMode &AM); bool matchAddress(SDValue N, X86ISelAddressMode &AM); bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); @@ -1613,20 +1614,26 @@ } -bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ +bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, + bool NoRegisters) { SDValue Address = N->getOperand(1); // load gs:0 -> GS segment register. // load fs:0 -> FS segment register. // - // This optimization is valid because the GNU TLS model defines that - // gs:0 (or fs:0 on X86-64) contains its own address. + // This optimization is generally valid because the GNU TLS model defines that + // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode + // with 32-bit registers, as we get in ILP32 mode, those registers are first + // zero-extended to 64 bits and then added it to the base address, which gives + // unwanted results when the register holds a negative value. // For more information see http://people.redhat.com/drepper/tls.pdf - if (ConstantSDNode *C = dyn_cast(Address)) + if (ConstantSDNode *C = dyn_cast(Address)) { if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && !IndirectTlsSegRefs && (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || - Subtarget->isTargetFuchsia())) + Subtarget->isTargetFuchsia())) { + if (Subtarget->isTarget64BitILP32() && !NoRegisters) + return true; switch (N->getPointerInfo().getAddrSpace()) { case X86AS::GS: AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); @@ -1637,6 +1644,8 @@ // Address space X86AS::SS is not handled here, because it is not used to // address TLS areas. } + } + } return true; } @@ -1720,6 +1729,22 @@ if (matchAddressRecursively(N, AM, 0)) return true; + // Post-processing: Make a second attempt to fold a load, if we now know + // that there will not be any other register. This is only performed for + // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded + // any foldable load the first time. + if (Subtarget->isTarget64BitILP32() && + AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base_Reg.getNode() != nullptr && + AM.IndexReg.getNode() == nullptr) { + SDValue Save_Base_Reg = AM.Base_Reg; + if (auto *LoadN = dyn_cast(Save_Base_Reg)) { + AM.Base_Reg = SDValue(); + if (matchLoadInAddress(LoadN, AM, /*NoRegisters=*/true)) + AM.Base_Reg = Save_Base_Reg; + } + } + // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && diff --git a/llvm/test/CodeGen/X86/pic.ll b/llvm/test/CodeGen/X86/pic.ll --- a/llvm/test/CodeGen/X86/pic.ll +++ b/llvm/test/CodeGen/X86/pic.ll @@ -336,17 +336,17 @@ ; CHECK-I686-DAG: movl %gs:0, ; CHECK-X32-DAG: movl tlsdstie@GOTTPOFF(%rip), ; CHECK-X32-DAG: movl %fs:0, -; CHECK: addl +; CHECK: {{addl|leal \(%.*,%.*\),}} ; CHECK-I686: movl tlsptrie@GOTNTPOFF( ; CHECK-X32: movl tlsptrie@GOTTPOFF(%rip), ; CHECK-I686: movl {{%.*}}, %gs:( -; CHECK-X32: movl {{%.*}}, %fs:( +; CHECK-X32: movl {{%.*}}, ({{%.*,%.*}}) ; CHECK-I686: movl tlssrcie@GOTNTPOFF( ; CHECK-X32: movl tlssrcie@GOTTPOFF(%rip), ; CHECK-I686: movl %gs:( -; CHECK-X32: movl %fs:( +; CHECK-X32: movl ({{%.*,%.*}}), ; CHECK-I686: movl {{%.*}}, %gs:( -; CHECK-X32: movl {{%.*}}, %fs:( +; CHECK-X32: movl {{%.*}}, ({{%.*,%.*}}) ; CHECK-I686: ret ; CHECK-X32: retq } diff --git a/llvm/test/CodeGen/X86/tls-pie.ll b/llvm/test/CodeGen/X86/tls-pie.ll --- a/llvm/test/CodeGen/X86/tls-pie.ll +++ b/llvm/test/CodeGen/X86/tls-pie.ll @@ -65,7 +65,8 @@ ; X32-LABEL: f3: ; X32: # %bb.0: # %entry ; X32-NEXT: movl i2@{{.*}}(%rip), %eax -; X32-NEXT: movl %fs:(%eax), %eax +; X32-NEXT: movl %fs:0, %ecx +; X32-NEXT: movl (%ecx,%eax), %eax ; X32-NEXT: retq ; ; X64-LABEL: f3: