Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
Show First 20 Lines • Show All 536 Lines • ▼ Show 20 Lines | ; AVX-NEXT: retq | ||||
%r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 | %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 | ||||
%r = bitcast <4 x i32> %r1 to <2 x i64> | %r = bitcast <4 x i32> %r1 to <2 x i64> | ||||
ret <2 x i64> %r | ret <2 x i64> %r | ||||
} | } | ||||
define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { | define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { | ||||
; SSE2-LABEL: _clearupper4xi64b: | ; SSE2-LABEL: _clearupper4xi64b: | ||||
; SSE2: # %bb.0: | ; SSE2: # %bb.0: | ||||
; SSE2-NEXT: movaps {{.*#+}} xmm2 | ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [NaN,0.0E+0,NaN,0.0E+0] | ||||
; SSE2-NEXT: andps %xmm2, %xmm0 | ; SSE2-NEXT: andps %xmm2, %xmm0 | ||||
; SSE2-NEXT: andps %xmm2, %xmm1 | ; SSE2-NEXT: andps %xmm2, %xmm1 | ||||
; SSE2-NEXT: retq | ; SSE2-NEXT: retq | ||||
; | ; | ||||
; SSE42-LABEL: _clearupper4xi64b: | ; SSE42-LABEL: _clearupper4xi64b: | ||||
; SSE42: # %bb.0: | ; SSE42: # %bb.0: | ||||
; SSE42-NEXT: xorps %xmm2, %xmm2 | ; SSE42-NEXT: xorps %xmm2, %xmm2 | ||||
; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] | ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] | ||||
▲ Show 20 Lines • Show All 246 Lines • ▼ Show 20 Lines | |||||
; SSE42-NEXT: popq %rbx | ; SSE42-NEXT: popq %rbx | ||||
; SSE42-NEXT: retq | ; SSE42-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: _clearupper16xi8b: | ; AVX-LABEL: _clearupper16xi8b: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: pushq %rbx | ; AVX-NEXT: pushq %rbx | ||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) | ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) | ||||
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 | ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 | ||||
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx | ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx | ||||
; AVX-NEXT: movq %r9, %r8 | ; AVX-NEXT: movq %r9, %r8 | ||||
; AVX-NEXT: shrq $56, %r8 | ; AVX-NEXT: shrq $56, %r8 | ||||
; AVX-NEXT: andl $15, %r8d | ; AVX-NEXT: andl $15, %r8d | ||||
; AVX-NEXT: movq %r9, %r10 | ; AVX-NEXT: movq %r9, %r10 | ||||
; AVX-NEXT: shrq $48, %r10 | ; AVX-NEXT: shrq $48, %r10 | ||||
; AVX-NEXT: andl $15, %r10d | ; AVX-NEXT: andl $15, %r10d | ||||
; AVX-NEXT: movq %r9, %rsi | ; AVX-NEXT: movq %rcx, %rdx | ||||
; AVX-NEXT: shrq $40, %rsi | ; AVX-NEXT: shldq $24, %r9, %rdx | ||||
; AVX-NEXT: andl $15, %esi | ; AVX-NEXT: andl $15, %edx | ||||
; AVX-NEXT: movq %r9, %r11 | ; AVX-NEXT: movq %r9, %r11 | ||||
; AVX-NEXT: shrq $32, %r11 | ; AVX-NEXT: shrq $32, %r11 | ||||
; AVX-NEXT: andl $15, %r11d | ; AVX-NEXT: andl $15, %r11d | ||||
; AVX-NEXT: movq %rdx, %rdi | ; AVX-NEXT: movq %rcx, %rdi | ||||
; AVX-NEXT: shrq $56, %rdi | ; AVX-NEXT: shrq $56, %rdi | ||||
; AVX-NEXT: andl $15, %edi | ; AVX-NEXT: andl $15, %edi | ||||
; AVX-NEXT: movq %rdx, %rax | ; AVX-NEXT: movq %rcx, %rsi | ||||
; AVX-NEXT: shrq $48, %rax | ; AVX-NEXT: shrq $48, %rsi | ||||
; AVX-NEXT: andl $15, %esi | |||||
; AVX-NEXT: movq %rcx, %rax | |||||
; AVX-NEXT: shrq $40, %rax | |||||
; AVX-NEXT: andl $15, %eax | ; AVX-NEXT: andl $15, %eax | ||||
; AVX-NEXT: movq %rdx, %rcx | ; AVX-NEXT: movq %rcx, %rbx | ||||
; AVX-NEXT: shrq $40, %rcx | |||||
; AVX-NEXT: andl $15, %ecx | |||||
; AVX-NEXT: movq %rdx, %rbx | |||||
; AVX-NEXT: shrq $32, %rbx | ; AVX-NEXT: shrq $32, %rbx | ||||
; AVX-NEXT: andl $15, %ebx | ; AVX-NEXT: andl $15, %ebx | ||||
; AVX-NEXT: shlq $32, %rbx | ; AVX-NEXT: shlq $32, %rbx | ||||
; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F | ; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F | ||||
; AVX-NEXT: orq %rbx, %rdx | ; AVX-NEXT: orq %rbx, %rcx | ||||
; AVX-NEXT: shlq $40, %rcx | ; AVX-NEXT: shlq $40, %rax | ||||
; AVX-NEXT: orq %rdx, %rcx | |||||
; AVX-NEXT: shlq $48, %rax | |||||
; AVX-NEXT: orq %rcx, %rax | ; AVX-NEXT: orq %rcx, %rax | ||||
; AVX-NEXT: shlq $48, %rsi | |||||
; AVX-NEXT: orq %rax, %rsi | |||||
; AVX-NEXT: shlq $56, %rdi | ; AVX-NEXT: shlq $56, %rdi | ||||
; AVX-NEXT: orq %rax, %rdi | ; AVX-NEXT: orq %rsi, %rdi | ||||
; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) | ; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) | ||||
; AVX-NEXT: shlq $32, %r11 | ; AVX-NEXT: shlq $32, %r11 | ||||
; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F | ; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F | ||||
; AVX-NEXT: orq %r11, %r9 | ; AVX-NEXT: orq %r11, %r9 | ||||
; AVX-NEXT: shlq $40, %rsi | ; AVX-NEXT: shlq $40, %rdx | ||||
; AVX-NEXT: orq %r9, %rsi | ; AVX-NEXT: orq %r9, %rdx | ||||
; AVX-NEXT: shlq $48, %r10 | ; AVX-NEXT: shlq $48, %r10 | ||||
; AVX-NEXT: orq %rsi, %r10 | ; AVX-NEXT: orq %rdx, %r10 | ||||
; AVX-NEXT: shlq $56, %r8 | ; AVX-NEXT: shlq $56, %r8 | ||||
; AVX-NEXT: orq %r10, %r8 | ; AVX-NEXT: orq %r10, %r8 | ||||
; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) | ; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) | ||||
; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 | ; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 | ||||
; AVX-NEXT: popq %rbx | ; AVX-NEXT: popq %rbx | ||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
%x4 = bitcast <16 x i8> %0 to <32 x i4> | %x4 = bitcast <16 x i8> %0 to <32 x i4> | ||||
%r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 | %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 | ||||
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | |||||
; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | ; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | ||||
; SSE42-NEXT: popq %rbx | ; SSE42-NEXT: popq %rbx | ||||
; SSE42-NEXT: retq | ; SSE42-NEXT: retq | ||||
; | ; | ||||
; AVX1-LABEL: _clearupper32xi8b: | ; AVX1-LABEL: _clearupper32xi8b: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) | ; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) | ||||
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax | ; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax | ||||
; AVX1-NEXT: movq %rax, %rcx | ; AVX1-NEXT: movq %rax, %r8 | ||||
; AVX1-NEXT: movq %rax, %rdx | ; AVX1-NEXT: movq %rax, %rdx | ||||
; AVX1-NEXT: movq %rax, %rsi | ; AVX1-NEXT: movq %rax, %rsi | ||||
; AVX1-NEXT: movq %rax, %rdi | ; AVX1-NEXT: movq %rax, %rdi | ||||
; AVX1-NEXT: shrq $32, %rdi | ; AVX1-NEXT: movq %rax, %rcx | ||||
; AVX1-NEXT: andl $15, %edi | ; AVX1-NEXT: shrq $32, %rcx | ||||
; AVX1-NEXT: shlq $32, %rdi | ; AVX1-NEXT: andl $15, %ecx | ||||
; AVX1-NEXT: shlq $32, %rcx | |||||
; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F | ; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F | ||||
; AVX1-NEXT: orq %rdi, %rax | ; AVX1-NEXT: orq %rcx, %rax | ||||
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rdi | ; AVX1-NEXT: shrq $40, %rdi | ||||
; AVX1-NEXT: shrq $40, %rsi | ; AVX1-NEXT: andl $15, %edi | ||||
; AVX1-NEXT: shlq $40, %rdi | |||||
; AVX1-NEXT: orq %rax, %rdi | |||||
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax | |||||
; AVX1-NEXT: shrq $48, %rsi | |||||
; AVX1-NEXT: andl $15, %esi | ; AVX1-NEXT: andl $15, %esi | ||||
; AVX1-NEXT: shlq $40, %rsi | ; AVX1-NEXT: shlq $48, %rsi | ||||
; AVX1-NEXT: orq %rax, %rsi | ; AVX1-NEXT: orq %rdi, %rsi | ||||
; AVX1-NEXT: movq %rdi, %rax | ; AVX1-NEXT: movq %rax, %rcx | ||||
; AVX1-NEXT: shrq $48, %rdx | ; AVX1-NEXT: shrq $56, %rdx | ||||
; AVX1-NEXT: andl $15, %edx | ; AVX1-NEXT: andl $15, %edx | ||||
; AVX1-NEXT: shlq $48, %rdx | ; AVX1-NEXT: shlq $56, %rdx | ||||
; AVX1-NEXT: orq %rsi, %rdx | ; AVX1-NEXT: orq %rsi, %rdx | ||||
; AVX1-NEXT: movq %rdi, %rsi | ; AVX1-NEXT: movq %rax, %rsi | ||||
; AVX1-NEXT: shldq $24, %rax, %r8 | |||||
; AVX1-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) | |||||
; AVX1-NEXT: movq %rax, %rdx | |||||
; AVX1-NEXT: shrq $32, %rdx | |||||
; AVX1-NEXT: andl $15, %edx | |||||
; AVX1-NEXT: shlq $32, %rdx | |||||
; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F | |||||
; AVX1-NEXT: orq %rdx, %rax | |||||
; AVX1-NEXT: andl $15, %r8d | |||||
; AVX1-NEXT: shlq $40, %r8 | |||||
; AVX1-NEXT: orq %rax, %r8 | |||||
; AVX1-NEXT: shrq $48, %rsi | |||||
; AVX1-NEXT: andl $15, %esi | |||||
; AVX1-NEXT: shlq $48, %rsi | |||||
; AVX1-NEXT: orq %r8, %rsi | |||||
; AVX1-NEXT: shrq $56, %rcx | ; AVX1-NEXT: shrq $56, %rcx | ||||
; AVX1-NEXT: andl $15, %ecx | ; AVX1-NEXT: andl $15, %ecx | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |||||
; AVX1-NEXT: shlq $56, %rcx | ; AVX1-NEXT: shlq $56, %rcx | ||||
; AVX1-NEXT: orq %rdx, %rcx | ; AVX1-NEXT: orq %rsi, %rcx | ||||
; AVX1-NEXT: movq %rdi, %rdx | ; AVX1-NEXT: vmovq %xmm0, %rax | ||||
; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) | ; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) | ||||
; AVX1-NEXT: movq %rdi, %rcx | ; AVX1-NEXT: movl %eax, %ecx | ||||
; AVX1-NEXT: shrl $8, %ecx | |||||
; AVX1-NEXT: vmovd %eax, %xmm1 | |||||
; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 | |||||
; AVX1-NEXT: movl %eax, %ecx | |||||
; AVX1-NEXT: shrl $16, %ecx | |||||
; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 | |||||
; AVX1-NEXT: movl %eax, %ecx | |||||
; AVX1-NEXT: shrl $24, %ecx | |||||
; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 | |||||
; AVX1-NEXT: movq %rax, %rcx | |||||
; AVX1-NEXT: shrq $32, %rcx | ; AVX1-NEXT: shrq $32, %rcx | ||||
; AVX1-NEXT: andl $15, %ecx | ; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 | ||||
; AVX1-NEXT: shlq $32, %rcx | ; AVX1-NEXT: movq %rax, %rcx | ||||
; AVX1-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F | ; AVX1-NEXT: shrq $40, %rcx | ||||
; AVX1-NEXT: orq %rcx, %rdi | ; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 | ||||
; AVX1-NEXT: shrq $40, %rdx | ; AVX1-NEXT: movq %rax, %rcx | ||||
; AVX1-NEXT: andl $15, %edx | ; AVX1-NEXT: shrq $48, %rcx | ||||
; AVX1-NEXT: shlq $40, %rdx | ; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 | ||||
; AVX1-NEXT: orq %rdi, %rdx | ; AVX1-NEXT: vpextrq $1, %xmm0, %rcx | ||||
; AVX1-NEXT: shrq $48, %rsi | |||||
; AVX1-NEXT: andl $15, %esi | |||||
; AVX1-NEXT: shlq $48, %rsi | |||||
; AVX1-NEXT: orq %rdx, %rsi | |||||
; AVX1-NEXT: shrq $56, %rax | ; AVX1-NEXT: shrq $56, %rax | ||||
; AVX1-NEXT: andl $15, %eax | ; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |||||
; AVX1-NEXT: shlq $56, %rax | |||||
; AVX1-NEXT: orq %rsi, %rax | |||||
; AVX1-NEXT: vmovq %xmm0, %rcx | |||||
; AVX1-NEXT: movq %rax, -{{[0-9]+}}(%rsp) | |||||
; AVX1-NEXT: movl %ecx, %eax | ; AVX1-NEXT: movl %ecx, %eax | ||||
; AVX1-NEXT: shrl $8, %eax | ; AVX1-NEXT: shrl $8, %eax | ||||
; AVX1-NEXT: vmovd %ecx, %xmm1 | ; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 | ; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 | ||||
; AVX1-NEXT: movl %ecx, %eax | ; AVX1-NEXT: movl %ecx, %eax | ||||
; AVX1-NEXT: shrl $16, %eax | ; AVX1-NEXT: shrl $16, %eax | ||||
; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 | ; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 | ||||
; AVX1-NEXT: movl %ecx, %eax | ; AVX1-NEXT: movl %ecx, %eax | ||||
; AVX1-NEXT: shrl $24, %eax | ; AVX1-NEXT: shrl $24, %eax | ||||
; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 | ; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 | ||||
; AVX1-NEXT: movq %rcx, %rax | ; AVX1-NEXT: movq %rcx, %rax | ||||
; AVX1-NEXT: shrq $32, %rax | ; AVX1-NEXT: shrq $32, %rax | ||||
; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 | ; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 | ||||
; AVX1-NEXT: movq %rcx, %rax | ; AVX1-NEXT: movq %rcx, %rax | ||||
; AVX1-NEXT: shrq $40, %rax | ; AVX1-NEXT: shrq $40, %rax | ||||
; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 | ; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 | ||||
; AVX1-NEXT: movq %rcx, %rax | ; AVX1-NEXT: movq %rcx, %rax | ||||
; AVX1-NEXT: shrq $48, %rax | ; AVX1-NEXT: shrq $48, %rax | ||||
; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 | ; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax | |||||
; AVX1-NEXT: shrq $56, %rcx | ; AVX1-NEXT: shrq $56, %rcx | ||||
; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 | ; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 | ||||
; AVX1-NEXT: movl %eax, %ecx | |||||
; AVX1-NEXT: shrl $8, %ecx | |||||
; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 | |||||
; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 | |||||
; AVX1-NEXT: movl %eax, %ecx | |||||
; AVX1-NEXT: shrl $16, %ecx | |||||
; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 | |||||
; AVX1-NEXT: movl %eax, %ecx | |||||
; AVX1-NEXT: shrl $24, %ecx | |||||
; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 | |||||
; AVX1-NEXT: movq %rax, %rcx | |||||
; AVX1-NEXT: shrq $32, %rcx | |||||
; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 | |||||
; AVX1-NEXT: movq %rax, %rcx | |||||
; AVX1-NEXT: shrq $40, %rcx | |||||
; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 | |||||
; AVX1-NEXT: movq %rax, %rcx | |||||
; AVX1-NEXT: shrq $48, %rcx | |||||
; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 | |||||
; AVX1-NEXT: shrq $56, %rax | |||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 | |||||
; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1 | ; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1 | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: _clearupper32xi8b: | ; AVX2-LABEL: _clearupper32xi8b: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) | ; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) | ||||
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax | ; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax | ||||
; AVX2-NEXT: movq %rax, %rcx | ; AVX2-NEXT: movq %rax, %r8 | ||||
; AVX2-NEXT: movq %rax, %rdx | ; AVX2-NEXT: movq %rax, %rdx | ||||
; AVX2-NEXT: movq %rax, %rsi | ; AVX2-NEXT: movq %rax, %rsi | ||||
; AVX2-NEXT: movq %rax, %rdi | ; AVX2-NEXT: movq %rax, %rdi | ||||
; AVX2-NEXT: shrq $32, %rdi | ; AVX2-NEXT: movq %rax, %rcx | ||||
; AVX2-NEXT: andl $15, %edi | ; AVX2-NEXT: shrq $32, %rcx | ||||
; AVX2-NEXT: shlq $32, %rdi | ; AVX2-NEXT: andl $15, %ecx | ||||
; AVX2-NEXT: shlq $32, %rcx | |||||
; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F | ; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F | ||||
; AVX2-NEXT: orq %rdi, %rax | ; AVX2-NEXT: orq %rcx, %rax | ||||
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rdi | ; AVX2-NEXT: shrq $40, %rdi | ||||
; AVX2-NEXT: shrq $40, %rsi | ; AVX2-NEXT: andl $15, %edi | ||||
; AVX2-NEXT: shlq $40, %rdi | |||||
; AVX2-NEXT: orq %rax, %rdi | |||||
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax | |||||
; AVX2-NEXT: shrq $48, %rsi | |||||
; AVX2-NEXT: andl $15, %esi | ; AVX2-NEXT: andl $15, %esi | ||||
; AVX2-NEXT: shlq $40, %rsi | ; AVX2-NEXT: shlq $48, %rsi | ||||
; AVX2-NEXT: orq %rax, %rsi | ; AVX2-NEXT: orq %rdi, %rsi | ||||
; AVX2-NEXT: movq %rdi, %rax | ; AVX2-NEXT: movq %rax, %rcx | ||||
; AVX2-NEXT: shrq $48, %rdx | ; AVX2-NEXT: shrq $56, %rdx | ||||
; AVX2-NEXT: andl $15, %edx | ; AVX2-NEXT: andl $15, %edx | ||||
; AVX2-NEXT: shlq $48, %rdx | ; AVX2-NEXT: shlq $56, %rdx | ||||
; AVX2-NEXT: orq %rsi, %rdx | ; AVX2-NEXT: orq %rsi, %rdx | ||||
; AVX2-NEXT: movq %rdi, %rsi | ; AVX2-NEXT: movq %rax, %rsi | ||||
; AVX2-NEXT: shldq $24, %rax, %r8 | |||||
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) | |||||
; AVX2-NEXT: movq %rax, %rdx | |||||
; AVX2-NEXT: shrq $32, %rdx | |||||
; AVX2-NEXT: andl $15, %edx | |||||
; AVX2-NEXT: shlq $32, %rdx | |||||
; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F | |||||
; AVX2-NEXT: orq %rdx, %rax | |||||
; AVX2-NEXT: andl $15, %r8d | |||||
; AVX2-NEXT: shlq $40, %r8 | |||||
; AVX2-NEXT: orq %rax, %r8 | |||||
; AVX2-NEXT: shrq $48, %rsi | |||||
; AVX2-NEXT: andl $15, %esi | |||||
; AVX2-NEXT: shlq $48, %rsi | |||||
; AVX2-NEXT: orq %r8, %rsi | |||||
; AVX2-NEXT: shrq $56, %rcx | ; AVX2-NEXT: shrq $56, %rcx | ||||
; AVX2-NEXT: andl $15, %ecx | ; AVX2-NEXT: andl $15, %ecx | ||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 | |||||
; AVX2-NEXT: shlq $56, %rcx | ; AVX2-NEXT: shlq $56, %rcx | ||||
; AVX2-NEXT: orq %rdx, %rcx | ; AVX2-NEXT: orq %rsi, %rcx | ||||
; AVX2-NEXT: movq %rdi, %rdx | ; AVX2-NEXT: vmovq %xmm0, %rax | ||||
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) | ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) | ||||
; AVX2-NEXT: movq %rdi, %rcx | ; AVX2-NEXT: movl %eax, %ecx | ||||
; AVX2-NEXT: shrl $8, %ecx | |||||
; AVX2-NEXT: vmovd %eax, %xmm1 | |||||
; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 | |||||
; AVX2-NEXT: movl %eax, %ecx | |||||
; AVX2-NEXT: shrl $16, %ecx | |||||
; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 | |||||
; AVX2-NEXT: movl %eax, %ecx | |||||
; AVX2-NEXT: shrl $24, %ecx | |||||
; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 | |||||
; AVX2-NEXT: movq %rax, %rcx | |||||
; AVX2-NEXT: shrq $32, %rcx | ; AVX2-NEXT: shrq $32, %rcx | ||||
; AVX2-NEXT: andl $15, %ecx | ; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 | ||||
; AVX2-NEXT: shlq $32, %rcx | ; AVX2-NEXT: movq %rax, %rcx | ||||
; AVX2-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F | ; AVX2-NEXT: shrq $40, %rcx | ||||
; AVX2-NEXT: orq %rcx, %rdi | ; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 | ||||
; AVX2-NEXT: shrq $40, %rdx | ; AVX2-NEXT: movq %rax, %rcx | ||||
; AVX2-NEXT: andl $15, %edx | ; AVX2-NEXT: shrq $48, %rcx | ||||
; AVX2-NEXT: shlq $40, %rdx | ; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 | ||||
; AVX2-NEXT: orq %rdi, %rdx | ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx | ||||
; AVX2-NEXT: shrq $48, %rsi | |||||
; AVX2-NEXT: andl $15, %esi | |||||
; AVX2-NEXT: shlq $48, %rsi | |||||
; AVX2-NEXT: orq %rdx, %rsi | |||||
; AVX2-NEXT: shrq $56, %rax | ; AVX2-NEXT: shrq $56, %rax | ||||
; AVX2-NEXT: andl $15, %eax | ; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 | ||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 | |||||
; AVX2-NEXT: shlq $56, %rax | |||||
; AVX2-NEXT: orq %rsi, %rax | |||||
; AVX2-NEXT: vmovq %xmm0, %rcx | |||||
; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) | |||||
; AVX2-NEXT: movl %ecx, %eax | ; AVX2-NEXT: movl %ecx, %eax | ||||
; AVX2-NEXT: shrl $8, %eax | ; AVX2-NEXT: shrl $8, %eax | ||||
; AVX2-NEXT: vmovd %ecx, %xmm1 | ; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 | ||||
; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 | ; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 | ||||
; AVX2-NEXT: movl %ecx, %eax | ; AVX2-NEXT: movl %ecx, %eax | ||||
; AVX2-NEXT: shrl $16, %eax | ; AVX2-NEXT: shrl $16, %eax | ||||
; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 | ; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 | ||||
; AVX2-NEXT: movl %ecx, %eax | ; AVX2-NEXT: movl %ecx, %eax | ||||
; AVX2-NEXT: shrl $24, %eax | ; AVX2-NEXT: shrl $24, %eax | ||||
; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 | ; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 | ||||
; AVX2-NEXT: movq %rcx, %rax | ; AVX2-NEXT: movq %rcx, %rax | ||||
; AVX2-NEXT: shrq $32, %rax | ; AVX2-NEXT: shrq $32, %rax | ||||
; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 | ; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 | ||||
; AVX2-NEXT: movq %rcx, %rax | ; AVX2-NEXT: movq %rcx, %rax | ||||
; AVX2-NEXT: shrq $40, %rax | ; AVX2-NEXT: shrq $40, %rax | ||||
; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 | ; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 | ||||
; AVX2-NEXT: movq %rcx, %rax | ; AVX2-NEXT: movq %rcx, %rax | ||||
; AVX2-NEXT: shrq $48, %rax | ; AVX2-NEXT: shrq $48, %rax | ||||
; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 | ; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 | ||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rax | |||||
; AVX2-NEXT: shrq $56, %rcx | ; AVX2-NEXT: shrq $56, %rcx | ||||
; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 | ; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 | ||||
; AVX2-NEXT: movl %eax, %ecx | |||||
; AVX2-NEXT: shrl $8, %ecx | |||||
; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 | |||||
; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 | |||||
; AVX2-NEXT: movl %eax, %ecx | |||||
; AVX2-NEXT: shrl $16, %ecx | |||||
; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 | |||||
; AVX2-NEXT: movl %eax, %ecx | |||||
; AVX2-NEXT: shrl $24, %ecx | |||||
; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 | |||||
; AVX2-NEXT: movq %rax, %rcx | |||||
; AVX2-NEXT: shrq $32, %rcx | |||||
; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 | |||||
; AVX2-NEXT: movq %rax, %rcx | |||||
; AVX2-NEXT: shrq $40, %rcx | |||||
; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 | |||||
; AVX2-NEXT: movq %rax, %rcx | |||||
; AVX2-NEXT: shrq $48, %rcx | |||||
; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 | |||||
; AVX2-NEXT: shrq $56, %rax | |||||
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 | |||||
; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1 | ; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1 | ||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 | ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 | ||||
; AVX2-NEXT: retq | ; AVX2-NEXT: retq | ||||
%x4 = bitcast <32 x i8> %0 to <64 x i4> | %x4 = bitcast <32 x i8> %0 to <64 x i4> | ||||
%r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 | %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 | ||||
%r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 | %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 | ||||
%r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 | %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 | ||||
%r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 | %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 | ||||
▲ Show 20 Lines • Show All 186 Lines • Show Last 20 Lines |