Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -606,9 +606,9 @@ setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87, except f128 in MMX. + // Long double always uses X87, except f128 in SSE. if (UseX87) { - if (Subtarget.is64Bit() && Subtarget.hasMMX()) { + if (Subtarget.is64Bit() && Subtarget.hasSSE1()) { addRegisterClass(MVT::f128, &X86::FR128RegClass); ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); setOperationAction(ISD::FABS , MVT::f128, Custom); Index: test/CodeGen/X86/extract-store.ll =================================================================== --- test/CodeGen/X86/extract-store.ll +++ test/CodeGen/X86/extract-store.ll @@ -5,8 +5,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE41-X64 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=AVX-X32 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=AVX-X64 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128 define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind { ; SSE2-X32-LABEL: extract_i8_0: @@ -527,17 +527,10 @@ ; SSE-X32-NEXT: popl %edi ; SSE-X32-NEXT: retl ; -; SSE2-X64-LABEL: extract_f128_0: -; SSE2-X64: # BB#0: -; SSE2-X64-NEXT: movq %rdx, 8(%rdi) -; SSE2-X64-NEXT: movq %rsi, (%rdi) -; SSE2-X64-NEXT: retq -; -; SSE41-X64-LABEL: extract_f128_0: -; SSE41-X64: # BB#0: -; SSE41-X64-NEXT: movq %rdx, 8(%rdi) -; SSE41-X64-NEXT: movq %rsi, (%rdi) -; SSE41-X64-NEXT: retq +; X64-LABEL: extract_f128_0: +; X64: # BB#0: +; X64-NEXT: movaps %xmm0, (%rdi) +; X64-NEXT: retq ; ; AVX-X32-LABEL: extract_f128_0: ; AVX-X32: # BB#0: @@ -545,17 +538,6 @@ ; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX-X32-NEXT: vmovups %xmm0, (%eax) ; AVX-X32-NEXT: retl -; -; AVX-X64-LABEL: extract_f128_0: -; AVX-X64: # BB#0: -; AVX-X64-NEXT: movq %rdx, 8(%rdi) -; AVX-X64-NEXT: movq %rsi, (%rdi) -; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_f128_0: -; SSE-F128: # BB#0: -; SSE-F128-NEXT: movaps %xmm0, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <2 x fp128> %foo, i32 0 store fp128 %vecext, fp128* %dst, align 1 ret void @@ -579,17 +561,10 @@ ; SSE-X32-NEXT: popl %edi ; SSE-X32-NEXT: retl ; -; SSE2-X64-LABEL: extract_f128_1: -; SSE2-X64: # BB#0: -; SSE2-X64-NEXT: movq %r8, 8(%rdi) -; SSE2-X64-NEXT: movq %rcx, (%rdi) -; SSE2-X64-NEXT: retq -; -; SSE41-X64-LABEL: extract_f128_1: -; SSE41-X64: # BB#0: -; SSE41-X64-NEXT: movq %r8, 8(%rdi) -; SSE41-X64-NEXT: movq %rcx, (%rdi) -; SSE41-X64-NEXT: retq +; X64-LABEL: extract_f128_1: +; X64: # BB#0: +; X64-NEXT: movaps %xmm1, (%rdi) +; X64-NEXT: retq ; ; AVX-X32-LABEL: extract_f128_1: ; AVX-X32: # BB#0: @@ -597,17 +572,6 @@ ; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX-X32-NEXT: vmovups %xmm0, (%eax) ; AVX-X32-NEXT: retl -; -; AVX-X64-LABEL: extract_f128_1: -; AVX-X64: # BB#0: -; AVX-X64-NEXT: movq %r8, 8(%rdi) -; AVX-X64-NEXT: movq %rcx, (%rdi) -; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_f128_1: -; SSE-F128: # BB#0: -; SSE-F128-NEXT: movaps %xmm1, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <2 x fp128> %foo, i32 1 store fp128 %vecext, fp128* %dst, align 1 ret void Index: test/CodeGen/X86/fp128-select.ll =================================================================== --- test/CodeGen/X86/fp128-select.ll +++ test/CodeGen/X86/fp128-select.ll @@ -1,12 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \ -; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \ -; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \ -; RUN: -enable-legalize-types-checking | FileCheck %s -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \ -; RUN: -enable-legalize-types-checking | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=-sse -enable-legalize-types-checking | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=-sse -enable-legalize-types-checking | FileCheck %s define void @test_select(fp128* %p, fp128* %q, i1 zeroext %c) { ; MMX-LABEL: test_select: Index: test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- test/CodeGen/X86/vec_fp_to_int.ll +++ test/CodeGen/X86/vec_fp_to_int.ll @@ -2399,51 +2399,37 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; SSE-LABEL: fptosi_2f128_to_4i32: ; SSE: # BB#0: -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %rbx -; SSE-NEXT: subq $24, %rsp -; SSE-NEXT: movq %rsi, %r14 -; SSE-NEXT: movq %rdi, %rbx -; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: movq %rcx, %rsi +; SSE-NEXT: subq $40, %rsp +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: callq __fixtfdi ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; SSE-NEXT: movq %rbx, %rdi -; SSE-NEXT: movq %r14, %rsi +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; SSE-NEXT: callq __fixtfdi ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload ; SSE-NEXT: # xmm0 = xmm0[0],mem[0] ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] -; SSE-NEXT: addq $24, %rsp -; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r14 +; SSE-NEXT: addq $40, %rsp ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_2f128_to_4i32: ; AVX: # BB#0: -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %rbx -; AVX-NEXT: subq $24, %rsp -; AVX-NEXT: movq %rsi, %r14 -; AVX-NEXT: movq %rdi, %rbx -; AVX-NEXT: movq %rdx, %rdi -; AVX-NEXT: movq %rcx, %rsi +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: callq __fixtfdi ; AVX-NEXT: vmovq %rax, %xmm0 ; AVX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; AVX-NEXT: movq %rbx, %rdi -; AVX-NEXT: movq %r14, %rsi +; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX-NEXT: callq __fixtfdi ; AVX-NEXT: vmovq %rax, %xmm0 ; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload ; AVX-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX-NEXT: addq $24, %rsp -; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r14 +; AVX-NEXT: addq $40, %rsp ; AVX-NEXT: retq %cvt = fptosi <2 x fp128> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32>