Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -7242,12 +7242,8 @@ case X86::CVTSI2SD64rm: case X86::CVTSD2SSrr: case X86::CVTSD2SSrm: - case X86::Int_CVTSD2SSrr: - case X86::Int_CVTSD2SSrm: case X86::CVTSS2SDrr: case X86::CVTSS2SDrm: - case X86::Int_CVTSS2SDrr: - case X86::Int_CVTSS2SDrm: case X86::MOVHPDrm: case X86::MOVHPSrm: case X86::MOVLPDrm: @@ -7258,12 +7254,8 @@ case X86::RCPSSm_Int: case X86::ROUNDSDr: case X86::ROUNDSDm: - case X86::ROUNDSDr_Int: - case X86::ROUNDSDm_Int: case X86::ROUNDSSr: case X86::ROUNDSSm: - case X86::ROUNDSSr_Int: - case X86::ROUNDSSm_Int: case X86::RSQRTSSr: case X86::RSQRTSSm: case X86::RSQRTSSr_Int: Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1227,14 +1227,12 @@ ; X32-LABEL: test_mm_cvtsd_ss_load: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movaps (%eax), %xmm1 -; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: cvtsd2ss (%eax), %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cvtsd_ss_load: ; X64: # BB#0: -; X64-NEXT: movaps (%rdi), %xmm1 -; X64-NEXT: cvtsd2ss %xmm1, %xmm0 +; X64-NEXT: cvtsd2ss (%rdi), %xmm0 ; X64-NEXT: retq %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -370,8 +370,7 @@ ; SSE-LABEL: test_x86_sse2_cvtsd2ss_load: ; SSE: ## BB#0: ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08] -; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1] +; SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00] ; SSE-NEXT: retl ## encoding: [0xc3] ; ; VCHECK-LABEL: test_x86_sse2_cvtsd2ss_load: @@ -444,8 +443,7 @@ ; SSE-LABEL: test_x86_sse2_cvtss2sd_load: ; SSE: ## BB#0: ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08] -; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1] +; SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00] ; SSE-NEXT: retl ## encoding: [0xc3] ; ; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load: Index: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -467,6 +467,24 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone +define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) { +; SSE41-LABEL: test_x86_sse41_round_sd_load: +; SSE41: ## BB#0: +; SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; SSE41-NEXT: roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07] +; SSE41-NEXT: retl ## encoding: [0xc3] +; +; VCHECK-LABEL: test_x86_sse41_round_sd_load: +; VCHECK: ## BB#0: +; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; VCHECK-NEXT: vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07] +; VCHECK-NEXT: retl ## encoding: [0xc3] + %a1b = load <2 x double>, <2 x double>* %a1 + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE41-LABEL: test_x86_sse41_round_ss: ; SSE41: ## BB#0: Index: llvm/trunk/test/CodeGen/X86/sse_partial_update.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse_partial_update.ll +++ llvm/trunk/test/CodeGen/X86/sse_partial_update.ll @@ -98,9 +98,8 @@ define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) { ; CHECK-LABEL: load_fold_cvtss2sd_int: ; CHECK: ## BB#0: -; CHECK-NEXT: movaps (%rdi), %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtss2sd %xmm1, %xmm0 +; CHECK-NEXT: cvtss2sd (%rdi), %xmm0 ; CHECK-NEXT: retq %ld = load <4 x float>, <4 x float> *%a %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> , <4 x float> %ld) Index: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll +++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll @@ -200,8 +200,7 @@ ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movaps %xmm0, (%esp) ## 16-byte Spill ; X32-NEXT: calll _f -; X32-NEXT: movaps (%esp), %xmm1 ## 16-byte Reload -; X32-NEXT: roundss $4, %xmm1, %xmm0 +; X32-NEXT: roundss $4, (%esp), %xmm0 ## 16-byte Folded Reload ; X32-NEXT: addl $28, %esp ; X32-NEXT: retl ; @@ -211,8 +210,7 @@ ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: callq _f -; X64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload -; X64-NEXT: roundss $4, %xmm1, %xmm0 +; X64-NEXT: roundss $4, (%rsp), %xmm0 ## 16-byte Folded Reload ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ;