Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/test/CodeGen/X86/shrink_vmul.ll
Show First 20 Lines • Show All 203 Lines • ▼ Show 20 Lines | |||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; X86-AVX1-NEXT: movl c, %esi | ; X86-AVX1-NEXT: movl c, %esi | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmaddwd %xmm0, %xmm2, %xmm0 | ; X86-AVX1-NEXT: vpmaddwd %xmm0, %xmm2, %xmm0 | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmaddwd %xmm1, %xmm2, %xmm1 | ; X86-AVX1-NEXT: vpmaddwd %xmm1, %xmm2, %xmm1 | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X86-AVX1-NEXT: vmovdqu %xmm0, 16(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm0, (%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm1, (%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: popl %esi | ; X86-AVX1-NEXT: popl %esi | ||||
; X86-AVX1-NEXT: vzeroupper | |||||
; X86-AVX1-NEXT: retl | ; X86-AVX1-NEXT: retl | ||||
; | ; | ||||
; X86-AVX2-LABEL: mul_8xi8: | ; X86-AVX2-LABEL: mul_8xi8: | ||||
; X86-AVX2: # %bb.0: # %entry | ; X86-AVX2: # %bb.0: # %entry | ||||
; X86-AVX2-NEXT: pushl %esi | ; X86-AVX2-NEXT: pushl %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
Show All 26 Lines | |||||
; X64-AVX1: # %bb.0: # %entry | ; X64-AVX1: # %bb.0: # %entry | ||||
; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmaddwd %xmm0, %xmm2, %xmm0 | ; X64-AVX1-NEXT: vpmaddwd %xmm0, %xmm2, %xmm0 | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm2, %xmm1 | ; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm2, %xmm1 | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X64-AVX1-NEXT: vmovdqu %xmm0, 16(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm0, (%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm1, (%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vzeroupper | |||||
; X64-AVX1-NEXT: retq | ; X64-AVX1-NEXT: retq | ||||
; | ; | ||||
; X64-AVX2-LABEL: mul_8xi8: | ; X64-AVX2-LABEL: mul_8xi8: | ||||
; X64-AVX2: # %bb.0: # %entry | ; X64-AVX2: # %bb.0: # %entry | ||||
; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ||||
; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ||||
; X64-AVX2-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0 | ; X64-AVX2-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0 | ||||
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines | |||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmaddwd %xmm0, %xmm4, %xmm0 | ; X86-AVX1-NEXT: vpmaddwd %xmm0, %xmm4, %xmm0 | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmaddwd %xmm1, %xmm4, %xmm1 | ; X86-AVX1-NEXT: vpmaddwd %xmm1, %xmm4, %xmm1 | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 | ; X86-AVX1-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 | ||||
; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X86-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X86-AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 | ; X86-AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 | ; X86-AVX1-NEXT: vmovdqu %xmm0, 48(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X86-AVX1-NEXT: vmovdqu %xmm1, 32(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm0, 32(%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm2, 16(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm2, (%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm3, (%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: popl %esi | ; X86-AVX1-NEXT: popl %esi | ||||
; X86-AVX1-NEXT: vzeroupper | |||||
; X86-AVX1-NEXT: retl | ; X86-AVX1-NEXT: retl | ||||
; | ; | ||||
; X86-AVX2-LABEL: mul_16xi8: | ; X86-AVX2-LABEL: mul_16xi8: | ||||
; X86-AVX2: # %bb.0: # %entry | ; X86-AVX2: # %bb.0: # %entry | ||||
; X86-AVX2-NEXT: pushl %esi | ; X86-AVX2-NEXT: pushl %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | |||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmaddwd %xmm0, %xmm4, %xmm0 | ; X64-AVX1-NEXT: vpmaddwd %xmm0, %xmm4, %xmm0 | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm4, %xmm1 | ; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm4, %xmm1 | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 | ; X64-AVX1-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 | ||||
; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ; X64-AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero | ||||
; X64-AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 | ; X64-AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 | ; X64-AVX1-NEXT: vmovdqu %xmm0, 48(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X64-AVX1-NEXT: vmovdqu %xmm1, 32(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm0, 32(%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm2, 16(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm2, (%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm3, (%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vzeroupper | |||||
; X64-AVX1-NEXT: retq | ; X64-AVX1-NEXT: retq | ||||
; | ; | ||||
; X64-AVX2-LABEL: mul_16xi8: | ; X64-AVX2-LABEL: mul_16xi8: | ||||
; X64-AVX2: # %bb.0: # %entry | ; X64-AVX2: # %bb.0: # %entry | ||||
; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ||||
; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ||||
; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ; X64-AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero | ||||
▲ Show 20 Lines • Show All 208 Lines • ▼ Show 20 Lines | |||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; X86-AVX1-NEXT: movl c, %esi | ; X86-AVX1-NEXT: movl c, %esi | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmulld %xmm0, %xmm2, %xmm0 | ; X86-AVX1-NEXT: vpmulld %xmm0, %xmm2, %xmm0 | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 | ; X86-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X86-AVX1-NEXT: vmovdqu %xmm0, 16(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm0, (%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm1, (%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: popl %esi | ; X86-AVX1-NEXT: popl %esi | ||||
; X86-AVX1-NEXT: vzeroupper | |||||
; X86-AVX1-NEXT: retl | ; X86-AVX1-NEXT: retl | ||||
; | ; | ||||
; X86-AVX2-LABEL: mul_8xi16: | ; X86-AVX2-LABEL: mul_8xi16: | ||||
; X86-AVX2: # %bb.0: # %entry | ; X86-AVX2: # %bb.0: # %entry | ||||
; X86-AVX2-NEXT: pushl %esi | ; X86-AVX2-NEXT: pushl %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
Show All 25 Lines | |||||
; X64-AVX1: # %bb.0: # %entry | ; X64-AVX1: # %bb.0: # %entry | ||||
; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmulld %xmm0, %xmm2, %xmm0 | ; X64-AVX1-NEXT: vpmulld %xmm0, %xmm2, %xmm0 | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 | ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X64-AVX1-NEXT: vmovdqu %xmm0, 16(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm0, (%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm1, (%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vzeroupper | |||||
; X64-AVX1-NEXT: retq | ; X64-AVX1-NEXT: retq | ||||
; | ; | ||||
; X64-AVX2-LABEL: mul_8xi16: | ; X64-AVX2-LABEL: mul_8xi16: | ||||
; X64-AVX2: # %bb.0: # %entry | ; X64-AVX2: # %bb.0: # %entry | ||||
; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0 | ; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0 | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | |||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | ; X86-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ; X86-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | ; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ; X86-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 | ; X86-AVX1-NEXT: vmovdqu %xmm0, 48(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X86-AVX1-NEXT: vmovdqu %xmm1, 32(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm0, 32(%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm2, 16(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm2, (%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm3, (%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: popl %esi | ; X86-AVX1-NEXT: popl %esi | ||||
; X86-AVX1-NEXT: vzeroupper | |||||
; X86-AVX1-NEXT: retl | ; X86-AVX1-NEXT: retl | ||||
; | ; | ||||
; X86-AVX2-LABEL: mul_16xi16: | ; X86-AVX2-LABEL: mul_16xi16: | ||||
; X86-AVX2: # %bb.0: # %entry | ; X86-AVX2: # %bb.0: # %entry | ||||
; X86-AVX2-NEXT: pushl %esi | ; X86-AVX2-NEXT: pushl %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | |||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | ; X64-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | ; X64-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ; X64-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 | ; X64-AVX1-NEXT: vmovdqu %xmm0, 48(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X64-AVX1-NEXT: vmovdqu %xmm1, 32(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm0, 32(%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm2, 16(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm2, (%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm3, (%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vzeroupper | |||||
; X64-AVX1-NEXT: retq | ; X64-AVX1-NEXT: retq | ||||
; | ; | ||||
; X64-AVX2-LABEL: mul_16xi16: | ; X64-AVX2-LABEL: mul_16xi16: | ||||
; X64-AVX2: # %bb.0: # %entry | ; X64-AVX2: # %bb.0: # %entry | ||||
; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
▲ Show 20 Lines • Show All 398 Lines • ▼ Show 20 Lines | |||||
; | ; | ||||
; X86-AVX1-LABEL: mul_16xi16_sext: | ; X86-AVX1-LABEL: mul_16xi16_sext: | ||||
; X86-AVX1: # %bb.0: # %entry | ; X86-AVX1: # %bb.0: # %entry | ||||
; X86-AVX1-NEXT: pushl %esi | ; X86-AVX1-NEXT: pushl %esi | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; X86-AVX1-NEXT: movl c, %esi | ; X86-AVX1-NEXT: movl c, %esi | ||||
; X86-AVX1-NEXT: vpmovsxwd 16(%edx,%ecx), %xmm0 | ; X86-AVX1-NEXT: vpmovsxwd 24(%edx,%ecx), %xmm0 | ||||
; X86-AVX1-NEXT: vpmovsxwd 24(%edx,%ecx), %xmm1 | ; X86-AVX1-NEXT: vpmovsxwd 16(%edx,%ecx), %xmm1 | ||||
; X86-AVX1-NEXT: vpmovsxwd (%edx,%ecx), %xmm2 | ; X86-AVX1-NEXT: vpmovsxwd 8(%edx,%ecx), %xmm2 | ||||
; X86-AVX1-NEXT: vpmovsxwd 8(%edx,%ecx), %xmm3 | ; X86-AVX1-NEXT: vpmovsxwd (%edx,%ecx), %xmm3 | ||||
; X86-AVX1-NEXT: vpmovsxwd 16(%eax,%ecx), %xmm4 | |||||
; X86-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | |||||
; X86-AVX1-NEXT: vpmovsxwd 24(%eax,%ecx), %xmm4 | ; X86-AVX1-NEXT: vpmovsxwd 24(%eax,%ecx), %xmm4 | ||||
; X86-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | |||||
; X86-AVX1-NEXT: vpmovsxwd 16(%eax,%ecx), %xmm4 | |||||
; X86-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ; X86-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ||||
; X86-AVX1-NEXT: vpmovsxwd (%eax,%ecx), %xmm4 | |||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | |||||
; X86-AVX1-NEXT: vpmovsxwd 8(%eax,%ecx), %xmm4 | ; X86-AVX1-NEXT: vpmovsxwd 8(%eax,%ecx), %xmm4 | ||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | |||||
; X86-AVX1-NEXT: vpmovsxwd (%eax,%ecx), %xmm4 | |||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ; X86-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 | ; X86-AVX1-NEXT: vmovdqu %xmm0, 48(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X86-AVX1-NEXT: vmovdqu %xmm1, 32(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm0, 32(%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm2, 16(%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: vmovups %ymm2, (%esi,%ecx,4) | ; X86-AVX1-NEXT: vmovdqu %xmm3, (%esi,%ecx,4) | ||||
; X86-AVX1-NEXT: popl %esi | ; X86-AVX1-NEXT: popl %esi | ||||
; X86-AVX1-NEXT: vzeroupper | |||||
; X86-AVX1-NEXT: retl | ; X86-AVX1-NEXT: retl | ||||
; | ; | ||||
; X86-AVX2-LABEL: mul_16xi16_sext: | ; X86-AVX2-LABEL: mul_16xi16_sext: | ||||
; X86-AVX2: # %bb.0: # %entry | ; X86-AVX2: # %bb.0: # %entry | ||||
; X86-AVX2-NEXT: pushl %esi | ; X86-AVX2-NEXT: pushl %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
Show All 33 Lines | |||||
; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4) | ||||
; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4) | ; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4) | ||||
; X64-SSE-NEXT: retq | ; X64-SSE-NEXT: retq | ||||
; | ; | ||||
; X64-AVX1-LABEL: mul_16xi16_sext: | ; X64-AVX1-LABEL: mul_16xi16_sext: | ||||
; X64-AVX1: # %bb.0: # %entry | ; X64-AVX1: # %bb.0: # %entry | ||||
; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX1-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX1-NEXT: vpmovsxwd 16(%rdi,%rdx), %xmm0 | ; X64-AVX1-NEXT: vpmovsxwd 24(%rdi,%rdx), %xmm0 | ||||
; X64-AVX1-NEXT: vpmovsxwd 24(%rdi,%rdx), %xmm1 | ; X64-AVX1-NEXT: vpmovsxwd 16(%rdi,%rdx), %xmm1 | ||||
; X64-AVX1-NEXT: vpmovsxwd (%rdi,%rdx), %xmm2 | ; X64-AVX1-NEXT: vpmovsxwd 8(%rdi,%rdx), %xmm2 | ||||
; X64-AVX1-NEXT: vpmovsxwd 8(%rdi,%rdx), %xmm3 | ; X64-AVX1-NEXT: vpmovsxwd (%rdi,%rdx), %xmm3 | ||||
; X64-AVX1-NEXT: vpmovsxwd 16(%rsi,%rdx), %xmm4 | |||||
; X64-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | |||||
; X64-AVX1-NEXT: vpmovsxwd 24(%rsi,%rdx), %xmm4 | ; X64-AVX1-NEXT: vpmovsxwd 24(%rsi,%rdx), %xmm4 | ||||
; X64-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 | |||||
; X64-AVX1-NEXT: vpmovsxwd 16(%rsi,%rdx), %xmm4 | |||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 | ||||
; X64-AVX1-NEXT: vpmovsxwd (%rsi,%rdx), %xmm4 | |||||
; X64-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | |||||
; X64-AVX1-NEXT: vpmovsxwd 8(%rsi,%rdx), %xmm4 | ; X64-AVX1-NEXT: vpmovsxwd 8(%rsi,%rdx), %xmm4 | ||||
; X64-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 | |||||
; X64-AVX1-NEXT: vpmovsxwd (%rsi,%rdx), %xmm4 | |||||
; X64-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ; X64-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 | ; X64-AVX1-NEXT: vmovdqu %xmm0, 48(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; X64-AVX1-NEXT: vmovdqu %xmm1, 32(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm0, 32(%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm2, 16(%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vmovups %ymm2, (%rax,%rdx,4) | ; X64-AVX1-NEXT: vmovdqu %xmm3, (%rax,%rdx,4) | ||||
; X64-AVX1-NEXT: vzeroupper | |||||
; X64-AVX1-NEXT: retq | ; X64-AVX1-NEXT: retq | ||||
; | ; | ||||
; X64-AVX2-LABEL: mul_16xi16_sext: | ; X64-AVX2-LABEL: mul_16xi16_sext: | ||||
; X64-AVX2: # %bb.0: # %entry | ; X64-AVX2: # %bb.0: # %entry | ||||
; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ; X64-AVX2-NEXT: movq {{.*}}(%rip), %rax | ||||
; X64-AVX2-NEXT: vpmovsxwd 16(%rdi,%rdx), %ymm0 | ; X64-AVX2-NEXT: vpmovsxwd 16(%rdi,%rdx), %ymm0 | ||||
; X64-AVX2-NEXT: vpmovsxwd (%rdi,%rdx), %ymm1 | ; X64-AVX2-NEXT: vpmovsxwd (%rdi,%rdx), %ymm1 | ||||
; X64-AVX2-NEXT: vpmovsxwd 16(%rsi,%rdx), %ymm2 | ; X64-AVX2-NEXT: vpmovsxwd 16(%rsi,%rdx), %ymm2 | ||||
▲ Show 20 Lines • Show All 799 Lines • ▼ Show 20 Lines | |||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X86-AVX1-NEXT: vmovd %xmm1, %eax | ; X86-AVX1-NEXT: vmovd %xmm1, %eax | ||||
; X86-AVX1-NEXT: xorl %edx, %edx | ; X86-AVX1-NEXT: xorl %edx, %edx | ||||
; X86-AVX1-NEXT: divl 32(%ecx) | ; X86-AVX1-NEXT: divl 32(%ecx) | ||||
; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | ; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | ||||
; X86-AVX1-NEXT: vpextrd $3, %xmm2, %eax | ; X86-AVX1-NEXT: vpextrd $3, %xmm2, %eax | ||||
; X86-AVX1-NEXT: vmovdqa (%ecx), %xmm3 | ; X86-AVX1-NEXT: vmovdqa (%ecx), %xmm1 | ||||
; X86-AVX1-NEXT: vmovdqa 16(%ecx), %xmm1 | ; X86-AVX1-NEXT: vmovdqa 16(%ecx), %xmm3 | ||||
; X86-AVX1-NEXT: vpextrd $3, %xmm3, %ecx | ; X86-AVX1-NEXT: vpextrd $3, %xmm3, %ecx | ||||
; X86-AVX1-NEXT: xorl %edx, %edx | ; X86-AVX1-NEXT: xorl %edx, %edx | ||||
; X86-AVX1-NEXT: divl %ecx | ; X86-AVX1-NEXT: divl %ecx | ||||
; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | ; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | ||||
; X86-AVX1-NEXT: vpextrd $2, %xmm2, %eax | ; X86-AVX1-NEXT: vpextrd $2, %xmm2, %eax | ||||
; X86-AVX1-NEXT: vpextrd $2, %xmm3, %ecx | ; X86-AVX1-NEXT: vpextrd $2, %xmm3, %ecx | ||||
; X86-AVX1-NEXT: xorl %edx, %edx | ; X86-AVX1-NEXT: xorl %edx, %edx | ||||
; X86-AVX1-NEXT: divl %ecx | ; X86-AVX1-NEXT: divl %ecx | ||||
Show All 36 Lines | |||||
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload | ; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload | ||||
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload | ; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload | ||||
; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload | ; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload | ||||
; X86-AVX1-NEXT: # imm = 0x2007 | ; X86-AVX1-NEXT: # imm = 0x2007 | ||||
; X86-AVX1-NEXT: movl %eax, (%eax) | ; X86-AVX1-NEXT: movl %eax, (%eax) | ||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] | ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] | ||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 | ; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 | ||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1 | ; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1 | ||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ; X86-AVX1-NEXT: vmovdqa %xmm1, (%eax) | ||||
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) | ; X86-AVX1-NEXT: vmovdqa %xmm0, (%eax) | ||||
; X86-AVX1-NEXT: addl $16, %esp | ; X86-AVX1-NEXT: addl $16, %esp | ||||
; X86-AVX1-NEXT: popl %esi | ; X86-AVX1-NEXT: popl %esi | ||||
; X86-AVX1-NEXT: popl %edi | ; X86-AVX1-NEXT: popl %edi | ||||
; X86-AVX1-NEXT: popl %ebx | ; X86-AVX1-NEXT: popl %ebx | ||||
; X86-AVX1-NEXT: popl %ebp | ; X86-AVX1-NEXT: popl %ebp | ||||
; X86-AVX1-NEXT: vzeroupper | |||||
; X86-AVX1-NEXT: retl | ; X86-AVX1-NEXT: retl | ||||
; | ; | ||||
; X86-AVX2-LABEL: PR34947: | ; X86-AVX2-LABEL: PR34947: | ||||
; X86-AVX2: # %bb.0: | ; X86-AVX2: # %bb.0: | ||||
; X86-AVX2-NEXT: pushl %edi | ; X86-AVX2-NEXT: pushl %edi | ||||
; X86-AVX2-NEXT: pushl %esi | ; X86-AVX2-NEXT: pushl %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi | ||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
▲ Show 20 Lines • Show All 156 Lines • ▼ Show 20 Lines | |||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||||
; X64-AVX1-NEXT: vmovd %xmm1, %eax | ; X64-AVX1-NEXT: vmovd %xmm1, %eax | ||||
; X64-AVX1-NEXT: xorl %edx, %edx | ; X64-AVX1-NEXT: xorl %edx, %edx | ||||
; X64-AVX1-NEXT: divl 32(%rsi) | ; X64-AVX1-NEXT: divl 32(%rsi) | ||||
; X64-AVX1-NEXT: movl %edx, %r8d | ; X64-AVX1-NEXT: movl %edx, %r8d | ||||
; X64-AVX1-NEXT: vpextrd $3, %xmm2, %eax | ; X64-AVX1-NEXT: vpextrd $3, %xmm2, %eax | ||||
; X64-AVX1-NEXT: vmovdqa (%rsi), %xmm3 | ; X64-AVX1-NEXT: vmovdqa (%rsi), %xmm1 | ||||
; X64-AVX1-NEXT: vmovdqa 16(%rsi), %xmm1 | ; X64-AVX1-NEXT: vmovdqa 16(%rsi), %xmm3 | ||||
; X64-AVX1-NEXT: vpextrd $3, %xmm3, %ecx | ; X64-AVX1-NEXT: vpextrd $3, %xmm3, %ecx | ||||
; X64-AVX1-NEXT: xorl %edx, %edx | ; X64-AVX1-NEXT: xorl %edx, %edx | ||||
; X64-AVX1-NEXT: divl %ecx | ; X64-AVX1-NEXT: divl %ecx | ||||
; X64-AVX1-NEXT: movl %edx, %r9d | ; X64-AVX1-NEXT: movl %edx, %r9d | ||||
; X64-AVX1-NEXT: vpextrd $2, %xmm2, %eax | ; X64-AVX1-NEXT: vpextrd $2, %xmm2, %eax | ||||
; X64-AVX1-NEXT: vpextrd $2, %xmm3, %ecx | ; X64-AVX1-NEXT: vpextrd $2, %xmm3, %ecx | ||||
; X64-AVX1-NEXT: xorl %edx, %edx | ; X64-AVX1-NEXT: xorl %edx, %edx | ||||
; X64-AVX1-NEXT: divl %ecx | ; X64-AVX1-NEXT: divl %ecx | ||||
Show All 33 Lines | |||||
; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 | ; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 | ||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] | ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] | ||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 | ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 | ||||
; X64-AVX1-NEXT: vmovd %esi, %xmm2 | ; X64-AVX1-NEXT: vmovd %esi, %xmm2 | ||||
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2 | ; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2 | ||||
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2 | ; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2 | ||||
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2 | ; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2 | ||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 | ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 | ||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |||||
; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007 | ; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007 | ||||
; X64-AVX1-NEXT: movl %eax, (%rax) | ; X64-AVX1-NEXT: movl %eax, (%rax) | ||||
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax) | ; X64-AVX1-NEXT: vmovdqa %xmm1, (%rax) | ||||
; X64-AVX1-NEXT: vmovdqa %xmm0, (%rax) | |||||
; X64-AVX1-NEXT: popq %rbx | ; X64-AVX1-NEXT: popq %rbx | ||||
; X64-AVX1-NEXT: popq %rbp | ; X64-AVX1-NEXT: popq %rbp | ||||
; X64-AVX1-NEXT: vzeroupper | |||||
; X64-AVX1-NEXT: retq | ; X64-AVX1-NEXT: retq | ||||
; | ; | ||||
; X64-AVX2-LABEL: PR34947: | ; X64-AVX2-LABEL: PR34947: | ||||
; X64-AVX2: # %bb.0: | ; X64-AVX2: # %bb.0: | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero | ||||
; X64-AVX2-NEXT: vmovdqa (%rsi), %xmm2 | ; X64-AVX2-NEXT: vmovdqa (%rsi), %xmm2 | ||||
; X64-AVX2-NEXT: vmovdqa 16(%rsi), %xmm3 | ; X64-AVX2-NEXT: vmovdqa 16(%rsi), %xmm3 | ||||
▲ Show 20 Lines • Show All 62 Lines • Show Last 20 Lines |