diff --git a/llvm/test/CodeGen/X86/half-constrained.ll b/llvm/test/CodeGen/X86/half-constrained.ll --- a/llvm/test/CodeGen/X86/half-constrained.ll +++ b/llvm/test/CodeGen/X86/half-constrained.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-NOF16C -; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-F16C +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X32-F16C ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=X64-NOF16C ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X64-F16C @@ -21,12 +21,15 @@ ; ; X32-F16C-LABEL: half_to_float: ; X32-F16C: ## %bb.0: -; X32-F16C-NEXT: subl $12, %esp -; X32-F16C-NEXT: .cfi_def_cfa_offset 16 +; X32-F16C-NEXT: pushl %eax +; X32-F16C-NEXT: .cfi_def_cfa_offset 8 ; X32-F16C-NEXT: movzwl _a, %eax -; X32-F16C-NEXT: movl %eax, (%esp) -; X32-F16C-NEXT: calll ___extendhfsf2 -; X32-F16C-NEXT: addl $12, %esp +; X32-F16C-NEXT: vmovd %eax, %xmm0 +; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; X32-F16C-NEXT: vmovss %xmm0, (%esp) +; X32-F16C-NEXT: flds (%esp) +; X32-F16C-NEXT: wait +; X32-F16C-NEXT: popl %eax ; X32-F16C-NEXT: retl ; ; X64-NOF16C-LABEL: half_to_float: @@ -65,8 +68,12 @@ ; X32-F16C-NEXT: subl $12, %esp ; X32-F16C-NEXT: .cfi_def_cfa_offset 16 ; X32-F16C-NEXT: movzwl _a, %eax -; X32-F16C-NEXT: movl %eax, (%esp) -; X32-F16C-NEXT: calll ___extendhfsf2 +; X32-F16C-NEXT: vmovd %eax, %xmm0 +; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; X32-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; X32-F16C-NEXT: vmovsd %xmm0, (%esp) +; X32-F16C-NEXT: fldl (%esp) +; X32-F16C-NEXT: wait ; X32-F16C-NEXT: addl $12, %esp ; X32-F16C-NEXT: retl ; @@ -105,12 +112,15 @@ ; ; X32-F16C-LABEL: half_to_fp80: ; X32-F16C: ## %bb.0: -; X32-F16C-NEXT: subl $12, %esp -; X32-F16C-NEXT: .cfi_def_cfa_offset 16 +; X32-F16C-NEXT: pushl %eax +; X32-F16C-NEXT: .cfi_def_cfa_offset 8 ; X32-F16C-NEXT: movzwl _a, %eax -; X32-F16C-NEXT: movl %eax, (%esp) -; X32-F16C-NEXT: calll ___extendhfsf2 -; X32-F16C-NEXT: addl $12, %esp +; X32-F16C-NEXT: vmovd %eax, %xmm0 +; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; X32-F16C-NEXT: vmovss %xmm0, (%esp) +; X32-F16C-NEXT: flds (%esp) +; X32-F16C-NEXT: wait +; X32-F16C-NEXT: popl %eax ; X32-F16C-NEXT: retl ; ; X64-NOF16C-LABEL: half_to_fp80: @@ -154,14 +164,9 @@ ; ; X32-F16C-LABEL: float_to_half: ; X32-F16C: ## %bb.0: -; X32-F16C-NEXT: subl $12, %esp -; X32-F16C-NEXT: .cfi_def_cfa_offset 16 -; X32-F16C-NEXT: flds {{[0-9]+}}(%esp) -; X32-F16C-NEXT: fstps (%esp) -; X32-F16C-NEXT: wait -; X32-F16C-NEXT: calll ___truncsfhf2 -; X32-F16C-NEXT: movw %ax, _a -; X32-F16C-NEXT: addl $12, %esp +; X32-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; X32-F16C-NEXT: vpextrw $0, %xmm0, _a ; X32-F16C-NEXT: retl ; ; X64-NOF16C-LABEL: float_to_half: @@ -202,9 +207,8 @@ ; X32-F16C: ## %bb.0: ; X32-F16C-NEXT: subl $12, %esp ; X32-F16C-NEXT: .cfi_def_cfa_offset 16 -; X32-F16C-NEXT: fldl {{[0-9]+}}(%esp) -; X32-F16C-NEXT: fstpl (%esp) -; X32-F16C-NEXT: wait +; X32-F16C-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32-F16C-NEXT: vmovsd %xmm0, (%esp) ; X32-F16C-NEXT: calll ___truncdfhf2 ; X32-F16C-NEXT: movw %ax, _a ; X32-F16C-NEXT: addl $12, %esp @@ -309,23 +313,17 @@ ; ; X32-F16C-LABEL: add: ; X32-F16C: ## %bb.0: -; X32-F16C-NEXT: subl $12, %esp -; X32-F16C-NEXT: .cfi_def_cfa_offset 16 ; X32-F16C-NEXT: movzwl _a, %eax -; X32-F16C-NEXT: movl %eax, (%esp) -; X32-F16C-NEXT: calll ___extendhfsf2 -; X32-F16C-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X32-F16C-NEXT: wait +; X32-F16C-NEXT: vmovd %eax, %xmm0 +; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; X32-F16C-NEXT: movzwl _b, %eax -; X32-F16C-NEXT: movl %eax, (%esp) -; X32-F16C-NEXT: calll ___extendhfsf2 -; X32-F16C-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload -; X32-F16C-NEXT: faddp %st, %st(1) -; X32-F16C-NEXT: fstps (%esp) -; X32-F16C-NEXT: wait -; X32-F16C-NEXT: calll ___truncsfhf2 -; X32-F16C-NEXT: movw %ax, _c -; X32-F16C-NEXT: addl $12, %esp +; X32-F16C-NEXT: vmovd %eax, %xmm1 +; X32-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 +; X32-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; X32-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; X32-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; X32-F16C-NEXT: vpextrw $0, %xmm0, _c ; X32-F16C-NEXT: retl ; ; X64-NOF16C-LABEL: add: