Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/test/CodeGen/X86/fmf-flags.ll
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||||
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 | ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 | ||||
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 | ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 | ||||
declare float @llvm.sqrt.f32(float %x); | declare float @llvm.sqrt.f32(float %x); | ||||
define float @fast_recip_sqrt(float %x) { | define float @fast_recip_sqrt(float %x) { | ||||
; X64-LABEL: fast_recip_sqrt: | ; X64-LABEL: fast_recip_sqrt: | ||||
; X64: # %bb.0: | ; X64: # %bb.0: | ||||
; X64-NEXT: rsqrtss %xmm0, %xmm1 | ; X64-NEXT: rsqrtss %xmm0, %xmm1 | ||||
; X64-NEXT: xorps %xmm2, %xmm2 | |||||
; X64-NEXT: cmpeqss %xmm0, %xmm2 | |||||
; X64-NEXT: mulss %xmm1, %xmm0 | ; X64-NEXT: mulss %xmm1, %xmm0 | ||||
; X64-NEXT: movss {{.*}}(%rip), %xmm3 | |||||
; X64-NEXT: mulss %xmm0, %xmm3 | |||||
; X64-NEXT: mulss %xmm1, %xmm0 | ; X64-NEXT: mulss %xmm1, %xmm0 | ||||
; X64-NEXT: addss {{.*}}(%rip), %xmm0 | ; X64-NEXT: addss {{.*}}(%rip), %xmm0 | ||||
; X64-NEXT: mulss %xmm3, %xmm0 | ; X64-NEXT: mulss {{.*}}(%rip), %xmm1 | ||||
; X64-NEXT: andnps %xmm0, %xmm2 | ; X64-NEXT: mulss %xmm1, %xmm0 | ||||
; X64-NEXT: movss {{.*}}(%rip), %xmm0 | |||||
; X64-NEXT: divss %xmm2, %xmm0 | |||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
; | ; | ||||
; X86-LABEL: fast_recip_sqrt: | ; X86-LABEL: fast_recip_sqrt: | ||||
; X86: # %bb.0: | ; X86: # %bb.0: | ||||
; X86-NEXT: flds {{[0-9]+}}(%esp) | ; X86-NEXT: flds {{[0-9]+}}(%esp) | ||||
; X86-NEXT: fsqrt | ; X86-NEXT: fsqrt | ||||
; X86-NEXT: fld1 | ; X86-NEXT: fld1 | ||||
; X86-NEXT: fdivp %st(1) | ; X86-NEXT: fdivp %st(1) | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | |||||
; The sqrt is strict. | ; The sqrt is strict. | ||||
@sqrt1 = common global float 0.000000e+00, align 4 | @sqrt1 = common global float 0.000000e+00, align 4 | ||||
define float @not_so_fast_recip_sqrt(float %x) { | define float @not_so_fast_recip_sqrt(float %x) { | ||||
; X64-LABEL: not_so_fast_recip_sqrt: | ; X64-LABEL: not_so_fast_recip_sqrt: | ||||
; X64: # %bb.0: | ; X64: # %bb.0: | ||||
; X64-NEXT: sqrtss %xmm0, %xmm1 | ; X64-NEXT: rsqrtss %xmm0, %xmm1 | ||||
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ; X64-NEXT: sqrtss %xmm0, %xmm2 | ||||
; X64-NEXT: divss %xmm1, %xmm0 | ; X64-NEXT: mulss %xmm1, %xmm0 | ||||
; X64-NEXT: movss %xmm1, {{.*}}(%rip) | ; X64-NEXT: mulss %xmm1, %xmm0 | ||||
; X64-NEXT: addss {{.*}}(%rip), %xmm0 | |||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm1 | |||||
; X64-NEXT: mulss %xmm1, %xmm0 | |||||
; X64-NEXT: movss %xmm2, sqrt1(%rip) | |||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
; | ; | ||||
; X86-LABEL: not_so_fast_recip_sqrt: | ; X86-LABEL: not_so_fast_recip_sqrt: | ||||
; X86: # %bb.0: | ; X86: # %bb.0: | ||||
; X86-NEXT: flds {{[0-9]+}}(%esp) | ; X86-NEXT: flds {{[0-9]+}}(%esp) | ||||
; X86-NEXT: fsqrt | ; X86-NEXT: fsqrt | ||||
; X86-NEXT: fld1 | ; X86-NEXT: fld1 | ||||
; X86-NEXT: fdiv %st(1) | ; X86-NEXT: fdiv %st(1) | ||||
; X86-NEXT: fxch %st(1) | ; X86-NEXT: fxch %st(1) | ||||
; X86-NEXT: fstps sqrt1 | ; X86-NEXT: fstps sqrt1 | ||||
; X86-NEXT: retl | ; X86-NEXT: retl | ||||
%y = call float @llvm.sqrt.f32(float %x) | %y = call float @llvm.sqrt.f32(float %x) | ||||
%z = fdiv fast float 1.0, %y | %z = fdiv fast float 1.0, %y | ||||
store float %y, float* @sqrt1, align 4 | store float %y, float* @sqrt1, align 4 | ||||
%ret = fadd float %z , 14.5 | %ret = fadd float %z , 14.5 | ||||
ret float %z | ret float %z | ||||
} | } | ||||
define float @div_arcp_by_const(half %x) { | |||||
; X64-LABEL: .LCPI4_0: | |||||
; X64-NEXT: .long 1036828672 | |||||
; X64-LABEL: div_arcp_by_const: | |||||
; X64: movzwl %ax, %edi | |||||
; X64: mulss .LCPI4_0(%rip), %xmm0 | |||||
; | |||||
; X86-LABEL: .LCPI4_0: | |||||
; X86-NEXT: .long 1036828672 | |||||
; X86-LABEL: div_arcp_by_const: | |||||
; X86: movzwl %ax, %eax | |||||
; X86: fmuls .LCPI4_0 | |||||
%rcp = fdiv arcp half %x, 10.0 | |||||
%z = fpext half %rcp to float | |||||
ret float %z | |||||
} |