Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1936,7 +1936,8 @@ if (Subtarget.hasSSE2()) return MVT::v16i8; // TODO: Can SSE1 handle a byte vector? - if (Subtarget.hasSSE1()) + + if (Subtarget.hasSSE1() && Subtarget.hasX87()) return MVT::v4f32; } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { Index: llvm/test/CodeGen/X86/pr38738.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/pr38738.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+sse,-sse2 %s | FileCheck --check-prefix X64 %s +; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+sse,-sse2 %s | FileCheck --check-prefix X86 %s + +%struct.params = type { double, double } + +define dso_local i32 @pr38738() { +; X64-LABEL: pr38738: +; X64: # %bb.0: # %entry +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: retq +; +; X86-LABEL: pr38738: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $28, %esp +; X86-NEXT: .cfi_def_cfa_offset 32 +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, (%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $28, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +entry: + %retval = alloca i32, align 4 + %dlg_sys_param = alloca %struct.params, align 8 + %total_active_bw = alloca float, align 4 + %0 = bitcast %struct.params* %dlg_sys_param to i8* + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 16, i1 false) + store float 0.000000e+00, float* %total_active_bw, align 4 + %1 = load i32, i32* %retval, align 4 + ret i32 %1 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) argmemonly nounwind +