diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4082,6 +4082,17 @@ NoSignFlag = false; ClearsOverflowFlag = false; + // "ELF Handling for Thread-Local Storage" specifies that x86-64 GOTTPOFF, and + // i386 GOTNTPOFF/INDNTPOFF relocations can convert an ADD to a LEA during + // Initial Exec to Local Exec relaxation. In these cases, we must not depend + // on the EFLAGS modification of ADD actually happening in the final binary. + if (MI.getOpcode() == X86::ADD64rm || MI.getOpcode() == X86::ADD32rm) { + unsigned Flags = MI.getOperand(5).getTargetFlags(); + if (Flags == X86II::MO_GOTTPOFF || Flags == X86II::MO_INDNTPOFF || + Flags == X86II::MO_GOTNTPOFF) + return false; + } + switch (MI.getOpcode()) { default: return false; diff --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir @@ -0,0 +1,107 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=peephole-opt -mtriple=i386-- %s -o - | FileCheck %s + +# Linkers may change `addq xx@GOTNTPOFF, %reg` to `leaq OFFSET(%reg), %reg`, +# so we must not depend upon the EFLAGS output. Verify that the TEST +# instruction won't be folded into the ADD. + +# NOTE: the IR will no longer actually produce the input MIR after +# llvm.threadlocal.address intrinsic is annotated as having a nonnull +# result. + +# NOTE2: the foo_nopic MIR was produced from IR with --relocation-model=static +# while foo_pic's MIR was produced with --relocation-model=pic. + +--- | + target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" + target triple = "i386-unknown-linux-gnu" + + @x = external thread_local(initialexec) global i32, align 4 + + define i32 @foo_nopic() { + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x) + %cmp = icmp eq ptr %1, null + %zext = zext i1 %cmp to i32 + ret i32 %zext + } + + define i32 @foo_pic() { + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x) + %cmp = icmp eq ptr %1, null + %zext = zext i1 %cmp to i32 + ret i32 %zext + } + + ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn + declare ptr @llvm.threadlocal.address.p0(ptr) #0 + + attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } + +... +--- +name: foo_nopic +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr8 } + - { id: 3, class: gr32 } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + ; CHECK-LABEL: name: foo_nopic + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256) + ; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got) + ; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] + ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]] + ; CHECK-NEXT: RET 0, $eax + %0:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256) + %1:gr32 = ADD32rm %0, $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got) + TEST32rr %1, %1, implicit-def $eflags + %2:gr8 = SETCCr 4, implicit $eflags + %3:gr32 = MOVZX32rr8 killed %2 + $eax = COPY %3 + RET 0, $eax + +... +--- +name: foo_pic +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32_nosp } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr8 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + ; CHECK-LABEL: name: foo_pic + ; CHECK: [[MOVPC32r:%[0-9]+]]:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp + ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:gr32_nosp = ADD32ri [[MOVPC32r]], target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags + ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256) + ; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[ADD32ri]], 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got) + ; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] + ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]] + ; CHECK-NEXT: RET 0, $eax + %5:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp + %0:gr32_nosp = ADD32ri %5, target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags + %1:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256) + %2:gr32 = ADD32rm %1, %0, 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got) + TEST32rr %2, %2, implicit-def $eflags + %3:gr8 = SETCCr 4, implicit $eflags + %4:gr32 = MOVZX32rr8 killed %3 + $eax = COPY %4 + RET 0, $eax +... diff --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s + +# Linkers may change `addq xx@GOTTPOFF, %reg` to `leaq OFFSET(%reg), %reg`, +# so we must not depend upon the EFLAGS output. Verify that the TEST +# instruction won't be folded into the ADD. + +# NOTE: the IR will no longer actually produce the input MIR after +# llvm.threadlocal.address intrinsic is annotated as having a nonnull +# result. + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + @x = external thread_local(initialexec) global i32, align 4 + + define i32 @foo() { + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x) + %cmp = icmp eq ptr %1, null + %zext = zext i1 %cmp to i32 + ret i32 %zext + } + + ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn + declare ptr @llvm.threadlocal.address.p0(ptr) #0 + + attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr8 } + - { id: 3, class: gr32 } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + ; CHECK-LABEL: name: foo + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257) + ; CHECK-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[MOV64rm]], $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got) + ; CHECK-NEXT: TEST64rr [[ADD64rm]], [[ADD64rm]], implicit-def $eflags + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] + ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]] + ; CHECK-NEXT: RET 0, $eax + %0:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257) + %1:gr64 = ADD64rm %0, $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got) + TEST64rr %1, %1, implicit-def $eflags + %2:gr8 = SETCCr 4, implicit $eflags + %3:gr32 = MOVZX32rr8 killed %2 + $eax = COPY %3 + RET 0, $eax + +...