Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -27,8 +27,6 @@ //===----------------------------------------------------------------------===// def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>; -def load_mvmmx : PatFrag<(ops node:$ptr), - (x86mmx (MMX_X86movw2d (load node:$ptr)))>; //===----------------------------------------------------------------------===// // SSE specific DAG Nodes. Index: lib/Target/X86/X86InstrMMX.td =================================================================== --- lib/Target/X86/X86InstrMMX.td +++ lib/Target/X86/X86InstrMMX.td @@ -479,13 +479,6 @@ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q, MMX_SHIFT_ITINS>; -def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSRLWrm VR64:$src1, addr:$src2)>; -def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSRLDrm VR64:$src1, addr:$src2)>; -def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSRLQrm VR64:$src1, addr:$src2)>; - defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_mmx_psll_w, int_x86_mmx_pslli_w, MMX_SHIFT_ITINS>; @@ -496,13 +489,6 @@ int_x86_mmx_psll_q, int_x86_mmx_pslli_q, MMX_SHIFT_ITINS>; -def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSLLWrm VR64:$src1, addr:$src2)>; -def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSLLDrm VR64:$src1, addr:$src2)>; -def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSLLQrm VR64:$src1, addr:$src2)>; - defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_mmx_psra_w, int_x86_mmx_psrai_w, MMX_SHIFT_ITINS>; @@ -510,11 +496,6 @@ int_x86_mmx_psra_d, int_x86_mmx_psrai_d, MMX_SHIFT_ITINS>; -def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSRAWrm VR64:$src1, addr:$src2)>; -def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)), - (MMX_PSRADrm VR64:$src1, addr:$src2)>; - // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b, MMX_INTALU_ITINS>; Index: test/CodeGen/X86/bitcast-mmx.ll =================================================================== --- test/CodeGen/X86/bitcast-mmx.ll +++ test/CodeGen/X86/bitcast-mmx.ll @@ -34,9 +34,10 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: psllq 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movd 16(%ebp), %mm0 +; X86-NEXT: movq 8(%ebp), %mm1 +; X86-NEXT: psllq %mm0, %mm1 +; X86-NEXT: movq %mm1, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp @@ -64,10 +65,11 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: psllq 16(%ebp), %mm0 -; X86-NEXT: por 8(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movd 16(%ebp), %mm0 +; X86-NEXT: movd 20(%ebp), %mm1 +; X86-NEXT: psllq %mm0, %mm1 +; X86-NEXT: por 8(%ebp), %mm1 +; X86-NEXT: movq %mm1, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp @@ -104,7 +106,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psllq (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psllq %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -115,7 +118,8 @@ ; X64-LABEL: t3: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psllq (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psllq %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: Index: test/CodeGen/X86/mmx-fold-load.ll =================================================================== --- test/CodeGen/X86/mmx-fold-load.ll +++ test/CodeGen/X86/mmx-fold-load.ll @@ -12,7 +12,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psllq (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psllq %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -23,7 +24,8 @@ ; X64-LABEL: t0: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psllq (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psllq %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -46,7 +48,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psrlq (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psrlq %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -57,7 +60,8 @@ ; X64-LABEL: t1: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psrlq (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psrlq %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -80,7 +84,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psllw (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psllw %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -91,7 +96,8 @@ ; X64-LABEL: t2: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psllw (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psllw %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -114,7 +120,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psrlw (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psrlw %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -125,7 +132,8 @@ ; X64-LABEL: t3: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psrlw (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psrlw %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -148,7 +156,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: pslld (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: pslld %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -159,7 +168,8 @@ ; X64-LABEL: t4: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pslld (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: pslld %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -182,7 +192,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psrld (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psrld %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -193,7 +204,8 @@ ; X64-LABEL: t5: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psrld (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psrld %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -216,7 +228,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psraw (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psraw %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -227,7 +240,8 @@ ; X64-LABEL: t6: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psraw (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psraw %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -250,7 +264,8 @@ ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movq (%ecx), %mm0 -; X86-NEXT: psrad (%eax), %mm0 +; X86-NEXT: movd (%eax), %mm1 +; X86-NEXT: psrad %mm1, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -261,7 +276,8 @@ ; X64-LABEL: t7: ; X64: # BB#0: # %entry ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: psrad (%rsi), %mm0 +; X64-NEXT: movd (%rsi), %mm1 +; X64-NEXT: psrad %mm1, %mm0 ; X64-NEXT: movd %mm0, %rax ; X64-NEXT: retq entry: @@ -563,8 +579,6 @@ } declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) -; FIXME: Show issue with storing i32 to stack and then reloading as x86_mmx -; which will lead to garbage in the other 32-bits. define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind { ; X86-LABEL: test_psrlq_by_volatile_shift_amount: ; X86: # BB#0: # %entry @@ -574,11 +588,12 @@ ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $255, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrlq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%eax) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1 +; X86-NEXT: psrlq %mm0, %mm1 +; X86-NEXT: movq %mm1, (%eax) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -586,10 +601,11 @@ ; X64-LABEL: test_psrlq_by_volatile_shift_amount: ; X64: # BB#0: # %entry ; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0 ; X64-NEXT: movl $255, %eax -; X64-NEXT: movd %rax, %mm0 -; X64-NEXT: psrlq -{{[0-9]+}}(%rsp), %mm0 -; X64-NEXT: movq %mm0, (%rdi) +; X64-NEXT: movd %rax, %mm1 +; X64-NEXT: psrlq %mm0, %mm1 +; X64-NEXT: movq %mm1, (%rdi) ; X64-NEXT: retq entry: %0 = alloca i32, align 4