Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1138,6 +1138,9 @@ MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -22660,6 +22660,35 @@ } MachineBasicBlock * +X86TargetLowering::EmitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const { + // So, here we replace TLSADDR with the sequence: + // adjust_stackdown -> TLSADDR -> adjust_stackup. + // We need this because TLSADDR is lowered into calls + // inside MC, therefore without the two markers shrink-wrapping + // may push the prologue/epilogue pass them. + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction &MF = *BB->getParent(); + + // Emit CALLSEQ_START right before the instruction. + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + MachineInstrBuilder CallseqStart = + BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0); + BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); + + // Emit CALLSEQ_END right after the instruction. + // We don't call erase from parent because we want to keep the + // original instruction around. + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + MachineInstrBuilder CallseqEnd = + BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); + BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); + + return BB; +} + +MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const { // This is pretty easy. We're taking the value that we received from @@ -23039,6 +23068,11 @@ case X86::TCRETURNri64: case X86::TCRETURNmi64: return BB; + case X86::TLS_addr32: + case X86::TLS_addr64: + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: + return EmitLoweredTLSAddr(MI, BB); case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); case X86::CATCHRET: Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -436,7 +436,7 @@ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { + usesCustomInserter = 1, Uses = [ESP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, @@ -456,7 +456,7 @@ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { + usesCustomInserter = 1, Uses = [RSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Index: test/CodeGen/X86/tls-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/tls-shrink-wrapping.ll +++ test/CodeGen/X86/tls-shrink-wrapping.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -relocation-model=pic | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-freebsd11.0" + +@i = external thread_local global i32, align 4 + +; Function Attrs: nounwind uwtable +define i32 @g() #0 { +entry: + %0 = load i32, i32* @i, align 4, !tbaa !2 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 0, i32* @i, align 4, !tbaa !2 + tail call void (...) @f() #2 + %.pre = load i32, i32* @i, align 4, !tbaa !2 + br label %if.end + +if.end: ; preds = %entry, %if.then + %1 = phi i32 [ 0, %entry ], [ %.pre, %if.then ] + ret i32 %1 +} + +; CHECK: g: # @g +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # BB#0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: data16 +; CHECK-NEXT: leaq i@TLSGD(%rip), %rdi + +declare void @f(...) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size +"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu +"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.9.0 (trunk 261236)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"}