Index: llvm/trunk/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp @@ -2410,7 +2410,8 @@ if (!Subtarget->hasAVX()) return false; - if (!I->getOperand(0)->getType()->isIntegerTy(32)) + Type *InTy = I->getOperand(0)->getType(); + if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64)) return false; // Select integer to float/double conversion. @@ -2423,11 +2424,11 @@ if (I->getType()->isDoubleTy()) { // sitofp int -> double - Opcode = X86::VCVTSI2SDrr; + Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr; RC = &X86::FR64RegClass; } else if (I->getType()->isFloatTy()) { // sitofp int -> float - Opcode = X86::VCVTSI2SSrr; + Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr; RC = &X86::FR32RegClass; } else return false; Index: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX + + +define double @long_to_double_rr(i64 %a) { +; SSE2-LABEL: long_to_double_rr: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2sdq %rdi, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_double_rr: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = sitofp i64 %a to double + ret double %0 +} + +define double @long_to_double_rm(i64* %a) { +; SSE2-LABEL: long_to_double_rm: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_double_rm: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = sitofp i64 %0 to double + ret double %1 +} + +define float @long_to_float_rr(i64 %a) { +; SSE2-LABEL: long_to_float_rr: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2ssq %rdi, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_float_rr: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = sitofp i64 %a to float + ret float %0 +} + +define float @long_to_float_rm(i64* %a) { +; SSE2-LABEL: long_to_float_rm: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_float_rm: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = sitofp i64 %0 to float + ret float %1 +} Index: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2 +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX +; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2_X86 +; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86 define double @int_to_double_rr(i32 %a) { @@ -13,6 +15,39 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_double_rr: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: .cfi_offset %ebp, -8 +; SSE2_X86-NEXT: movl %esp, %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp +; SSE2_X86-NEXT: andl $-8, %esp +; SSE2_X86-NEXT: subl $8, %esp +; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: cvtsi2sdl %eax, %xmm0 +; SSE2_X86-NEXT: movsd %xmm0, (%esp) +; SSE2_X86-NEXT: fldl (%esp) +; SSE2_X86-NEXT: movl %ebp, %esp +; SSE2_X86-NEXT: popl %ebp +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_double_rr: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: retl entry: %0 = sitofp i32 %a to double ret double %0 @@ -28,6 +63,40 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_double_rm: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: .cfi_offset %ebp, -8 +; SSE2_X86-NEXT: movl %esp, %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp +; SSE2_X86-NEXT: andl $-8, %esp +; SSE2_X86-NEXT: subl $8, %esp +; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0 +; SSE2_X86-NEXT: movsd %xmm0, (%esp) +; SSE2_X86-NEXT: fldl (%esp) +; SSE2_X86-NEXT: movl %ebp, %esp +; SSE2_X86-NEXT: popl %ebp +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_double_rm: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a %1 = sitofp i32 %0 to double @@ -44,6 +113,27 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_float_rr: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_X86-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE2_X86-NEXT: movss %xmm0, (%esp) +; SSE2_X86-NEXT: flds (%esp) +; SSE2_X86-NEXT: popl %eax +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_float_rr: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: retl entry: %0 = sitofp i32 %a to float ret float %0 @@ -59,6 +149,28 @@ ; AVX: # BB#0: # %entry ; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_float_rm: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0 +; SSE2_X86-NEXT: movss %xmm0, (%esp) +; SSE2_X86-NEXT: flds (%esp) +; SSE2_X86-NEXT: popl %eax +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_float_rm: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a %1 = sitofp i32 %0 to float