Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/X86/shrink_vmul.ll
Show First 20 Lines • Show All 734 Lines • ▼ Show 20 Lines | |||||
; X86-SSE-NEXT: movdqu (%edx,%ecx), %xmm0 | ; X86-SSE-NEXT: movdqu (%edx,%ecx), %xmm0 | ||||
; X86-SSE-NEXT: movdqu 16(%edx,%ecx), %xmm1 | ; X86-SSE-NEXT: movdqu 16(%edx,%ecx), %xmm1 | ||||
; X86-SSE-NEXT: movdqu (%eax,%ecx), %xmm2 | ; X86-SSE-NEXT: movdqu (%eax,%ecx), %xmm2 | ||||
; X86-SSE-NEXT: movdqu 16(%eax,%ecx), %xmm3 | ; X86-SSE-NEXT: movdqu 16(%eax,%ecx), %xmm3 | ||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm4 | ; X86-SSE-NEXT: movdqa %xmm2, %xmm4 | ||||
; X86-SSE-NEXT: pmulhuw %xmm0, %xmm4 | ; X86-SSE-NEXT: pmulhuw %xmm0, %xmm4 | ||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm2 | ; X86-SSE-NEXT: pmullw %xmm0, %xmm2 | ||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm0 | ; X86-SSE-NEXT: movdqa %xmm2, %xmm0 | ||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] | ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] | ||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] | ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] | ||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm4 | ; X86-SSE-NEXT: movdqa %xmm3, %xmm4 | ||||
; X86-SSE-NEXT: pmulhuw %xmm1, %xmm4 | ; X86-SSE-NEXT: pmulhuw %xmm1, %xmm4 | ||||
; X86-SSE-NEXT: pmullw %xmm1, %xmm3 | ; X86-SSE-NEXT: pmullw %xmm1, %xmm3 | ||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm1 | ; X86-SSE-NEXT: movdqa %xmm3, %xmm1 | ||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] | ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] | ||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] | ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] | ||||
; X86-SSE-NEXT: movdqu %xmm3, 48(%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm3, 32(%esi,%ecx,4) | ||||
; X86-SSE-NEXT: movdqu %xmm1, 32(%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm1, 48(%esi,%ecx,4) | ||||
; X86-SSE-NEXT: movdqu %xmm2, 16(%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4) | ||||
; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4) | ||||
; X86-SSE-NEXT: popl %esi | ; X86-SSE-NEXT: popl %esi | ||||
; X86-SSE-NEXT: retl | ; X86-SSE-NEXT: retl | ||||
; | ; | ||||
; X86-AVX1-LABEL: mul_16xi16: | ; X86-AVX1-LABEL: mul_16xi16: | ||||
; X86-AVX1: # %bb.0: # %entry | ; X86-AVX1: # %bb.0: # %entry | ||||
; X86-AVX1-NEXT: pushl %esi | ; X86-AVX1-NEXT: pushl %esi | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | |||||
; X64-SSE-NEXT: movdqu (%rdi,%rdx), %xmm0 | ; X64-SSE-NEXT: movdqu (%rdi,%rdx), %xmm0 | ||||
; X64-SSE-NEXT: movdqu 16(%rdi,%rdx), %xmm1 | ; X64-SSE-NEXT: movdqu 16(%rdi,%rdx), %xmm1 | ||||
; X64-SSE-NEXT: movdqu (%rsi,%rdx), %xmm2 | ; X64-SSE-NEXT: movdqu (%rsi,%rdx), %xmm2 | ||||
; X64-SSE-NEXT: movdqu 16(%rsi,%rdx), %xmm3 | ; X64-SSE-NEXT: movdqu 16(%rsi,%rdx), %xmm3 | ||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm4 | ; X64-SSE-NEXT: movdqa %xmm2, %xmm4 | ||||
; X64-SSE-NEXT: pmulhuw %xmm0, %xmm4 | ; X64-SSE-NEXT: pmulhuw %xmm0, %xmm4 | ||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm2 | ; X64-SSE-NEXT: pmullw %xmm0, %xmm2 | ||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm0 | ; X64-SSE-NEXT: movdqa %xmm2, %xmm0 | ||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] | ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] | ||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] | ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] | ||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm4 | ; X64-SSE-NEXT: movdqa %xmm3, %xmm4 | ||||
; X64-SSE-NEXT: pmulhuw %xmm1, %xmm4 | ; X64-SSE-NEXT: pmulhuw %xmm1, %xmm4 | ||||
; X64-SSE-NEXT: pmullw %xmm1, %xmm3 | ; X64-SSE-NEXT: pmullw %xmm1, %xmm3 | ||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm1 | ; X64-SSE-NEXT: movdqa %xmm3, %xmm1 | ||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] | ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] | ||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] | ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] | ||||
; X64-SSE-NEXT: movdqu %xmm3, 48(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm3, 32(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm1, 32(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm2, 16(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm0, (%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: retq | ; X64-SSE-NEXT: retq | ||||
; | ; | ||||
; X64-AVX1-LABEL: mul_16xi16: | ; X64-AVX1-LABEL: mul_16xi16: | ||||
; X64-AVX1: # %bb.0: # %entry | ; X64-AVX1: # %bb.0: # %entry | ||||
; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
▲ Show 20 Lines • Show All 400 Lines • ▼ Show 20 Lines | |||||
; X86-SSE-NEXT: movdqu (%edx,%ecx), %xmm0 | ; X86-SSE-NEXT: movdqu (%edx,%ecx), %xmm0 | ||||
; X86-SSE-NEXT: movdqu 16(%edx,%ecx), %xmm1 | ; X86-SSE-NEXT: movdqu 16(%edx,%ecx), %xmm1 | ||||
; X86-SSE-NEXT: movdqu (%eax,%ecx), %xmm2 | ; X86-SSE-NEXT: movdqu (%eax,%ecx), %xmm2 | ||||
; X86-SSE-NEXT: movdqu 16(%eax,%ecx), %xmm3 | ; X86-SSE-NEXT: movdqu 16(%eax,%ecx), %xmm3 | ||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm4 | ; X86-SSE-NEXT: movdqa %xmm2, %xmm4 | ||||
; X86-SSE-NEXT: pmulhw %xmm0, %xmm4 | ; X86-SSE-NEXT: pmulhw %xmm0, %xmm4 | ||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm2 | ; X86-SSE-NEXT: pmullw %xmm0, %xmm2 | ||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm0 | ; X86-SSE-NEXT: movdqa %xmm2, %xmm0 | ||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] | ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] | ||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] | ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] | ||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm4 | ; X86-SSE-NEXT: movdqa %xmm3, %xmm4 | ||||
; X86-SSE-NEXT: pmulhw %xmm1, %xmm4 | ; X86-SSE-NEXT: pmulhw %xmm1, %xmm4 | ||||
; X86-SSE-NEXT: pmullw %xmm1, %xmm3 | ; X86-SSE-NEXT: pmullw %xmm1, %xmm3 | ||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm1 | ; X86-SSE-NEXT: movdqa %xmm3, %xmm1 | ||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] | ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] | ||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] | ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] | ||||
; X86-SSE-NEXT: movdqu %xmm3, 48(%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm3, 32(%esi,%ecx,4) | ||||
; X86-SSE-NEXT: movdqu %xmm1, 32(%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm1, 48(%esi,%ecx,4) | ||||
; X86-SSE-NEXT: movdqu %xmm2, 16(%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4) | ||||
; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4) | ; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4) | ||||
; X86-SSE-NEXT: popl %esi | ; X86-SSE-NEXT: popl %esi | ||||
; X86-SSE-NEXT: retl | ; X86-SSE-NEXT: retl | ||||
; | ; | ||||
; X86-AVX1-LABEL: mul_16xi16_sext: | ; X86-AVX1-LABEL: mul_16xi16_sext: | ||||
; X86-AVX1: # %bb.0: # %entry | ; X86-AVX1: # %bb.0: # %entry | ||||
; X86-AVX1-NEXT: pushl %esi | ; X86-AVX1-NEXT: pushl %esi | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | |||||
; X64-SSE-NEXT: movdqu (%rdi,%rdx), %xmm0 | ; X64-SSE-NEXT: movdqu (%rdi,%rdx), %xmm0 | ||||
; X64-SSE-NEXT: movdqu 16(%rdi,%rdx), %xmm1 | ; X64-SSE-NEXT: movdqu 16(%rdi,%rdx), %xmm1 | ||||
; X64-SSE-NEXT: movdqu (%rsi,%rdx), %xmm2 | ; X64-SSE-NEXT: movdqu (%rsi,%rdx), %xmm2 | ||||
; X64-SSE-NEXT: movdqu 16(%rsi,%rdx), %xmm3 | ; X64-SSE-NEXT: movdqu 16(%rsi,%rdx), %xmm3 | ||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm4 | ; X64-SSE-NEXT: movdqa %xmm2, %xmm4 | ||||
; X64-SSE-NEXT: pmulhw %xmm0, %xmm4 | ; X64-SSE-NEXT: pmulhw %xmm0, %xmm4 | ||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm2 | ; X64-SSE-NEXT: pmullw %xmm0, %xmm2 | ||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm0 | ; X64-SSE-NEXT: movdqa %xmm2, %xmm0 | ||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] | ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] | ||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] | ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] | ||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm4 | ; X64-SSE-NEXT: movdqa %xmm3, %xmm4 | ||||
; X64-SSE-NEXT: pmulhw %xmm1, %xmm4 | ; X64-SSE-NEXT: pmulhw %xmm1, %xmm4 | ||||
; X64-SSE-NEXT: pmullw %xmm1, %xmm3 | ; X64-SSE-NEXT: pmullw %xmm1, %xmm3 | ||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm1 | ; X64-SSE-NEXT: movdqa %xmm3, %xmm1 | ||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] | ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] | ||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] | ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] | ||||
; X64-SSE-NEXT: movdqu %xmm3, 48(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm3, 32(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm1, 32(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm2, 16(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm0, (%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: retq | ; X64-SSE-NEXT: retq | ||||
; | ; | ||||
; X64-AVX1-LABEL: mul_16xi16_sext: | ; X64-AVX1-LABEL: mul_16xi16_sext: | ||||
; X64-AVX1: # %bb.0: # %entry | ; X64-AVX1: # %bb.0: # %entry | ||||
; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX1-NEXT: vpmovsxwd 16(%rdi,%rdx), %xmm0 | ; X64-AVX1-NEXT: vpmovsxwd 16(%rdi,%rdx), %xmm0 | ||||
; X64-AVX1-NEXT: vpmovsxwd 24(%rdi,%rdx), %xmm1 | ; X64-AVX1-NEXT: vpmovsxwd 24(%rdi,%rdx), %xmm1 | ||||
; X64-AVX1-NEXT: vpmovsxwd (%rdi,%rdx), %xmm2 | ; X64-AVX1-NEXT: vpmovsxwd (%rdi,%rdx), %xmm2 | ||||
▲ Show 20 Lines • Show All 1,214 Lines • Show Last 20 Lines |