Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
Show First 20 Lines • Show All 283 Lines • ▼ Show 20 Lines | |||||
; AVX512F-32-NEXT: subl $12, %esp | ; AVX512F-32-NEXT: subl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi0: | ; AVX512F-32-NEXT: .Lcfi0: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 | ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, (%esp) | ; AVX512F-32-NEXT: kmovq %k0, (%esp) | ||||
; AVX512F-32-NEXT: movl (%esp), %eax | ; AVX512F-32-NEXT: movl (%esp), %eax | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl $12, %esp | ; AVX512F-32-NEXT: addl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi1: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | ||||
ret i64 %res | ret i64 %res | ||||
} | } | ||||
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { | define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { | ||||
; AVX512BW-LABEL: test_mask_pcmpeq_b: | ; AVX512BW-LABEL: test_mask_pcmpeq_b: | ||||
; AVX512BW: ## BB#0: | ; AVX512BW: ## BB#0: | ||||
; AVX512BW-NEXT: kmovq %rdi, %k1 | ; AVX512BW-NEXT: kmovq %rdi, %k1 | ||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} | ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} | ||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_mask_pcmpeq_b: | ; AVX512F-32-LABEL: test_mask_pcmpeq_b: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: subl $12, %esp | ; AVX512F-32-NEXT: subl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi1: | ; AVX512F-32-NEXT: .Lcfi2: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ||||
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 | ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 | ||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} | ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} | ||||
; AVX512F-32-NEXT: kmovq %k0, (%esp) | ; AVX512F-32-NEXT: kmovq %k0, (%esp) | ||||
; AVX512F-32-NEXT: movl (%esp), %eax | ; AVX512F-32-NEXT: movl (%esp), %eax | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl $12, %esp | ; AVX512F-32-NEXT: addl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi3: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) | %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) | ||||
ret i64 %res | ret i64 %res | ||||
} | } | ||||
declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) | declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) | ||||
define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { | define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { | ||||
Show All 37 Lines | |||||
; AVX512BW: ## BB#0: | ; AVX512BW: ## BB#0: | ||||
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 | ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 | ||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_pcmpgt_b: | ; AVX512F-32-LABEL: test_pcmpgt_b: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: subl $12, %esp | ; AVX512F-32-NEXT: subl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi2: | ; AVX512F-32-NEXT: .Lcfi4: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ||||
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 | ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, (%esp) | ; AVX512F-32-NEXT: kmovq %k0, (%esp) | ||||
; AVX512F-32-NEXT: movl (%esp), %eax | ; AVX512F-32-NEXT: movl (%esp), %eax | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl $12, %esp | ; AVX512F-32-NEXT: addl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi5: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | ||||
ret i64 %res | ret i64 %res | ||||
} | } | ||||
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { | define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { | ||||
; AVX512BW-LABEL: test_mask_pcmpgt_b: | ; AVX512BW-LABEL: test_mask_pcmpgt_b: | ||||
; AVX512BW: ## BB#0: | ; AVX512BW: ## BB#0: | ||||
; AVX512BW-NEXT: kmovq %rdi, %k1 | ; AVX512BW-NEXT: kmovq %rdi, %k1 | ||||
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} | ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} | ||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_mask_pcmpgt_b: | ; AVX512F-32-LABEL: test_mask_pcmpgt_b: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: subl $12, %esp | ; AVX512F-32-NEXT: subl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi3: | ; AVX512F-32-NEXT: .Lcfi6: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 | ||||
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 | ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 | ||||
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} | ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} | ||||
; AVX512F-32-NEXT: kmovq %k0, (%esp) | ; AVX512F-32-NEXT: kmovq %k0, (%esp) | ||||
; AVX512F-32-NEXT: movl (%esp), %eax | ; AVX512F-32-NEXT: movl (%esp), %eax | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl $12, %esp | ; AVX512F-32-NEXT: addl $12, %esp | ||||
; AVX512F-32-NEXT: .Lcfi7: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) | %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) | ||||
ret i64 %res | ret i64 %res | ||||
} | } | ||||
declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) | declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) | ||||
define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { | define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { | ||||
▲ Show 20 Lines • Show All 1,180 Lines • ▼ Show 20 Lines | |||||
; AVX512BW-NEXT: kxnorq %k0, %k0, %k0 | ; AVX512BW-NEXT: kxnorq %k0, %k0, %k0 | ||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: addq %rcx, %rax | ; AVX512BW-NEXT: addq %rcx, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_cmp_b_512: | ; AVX512F-32-LABEL: test_cmp_b_512: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: subl $60, %esp | ; AVX512F-32-NEXT: subl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi4: | ; AVX512F-32-NEXT: .Lcfi8: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 64 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 64 | ||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 | ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 | ; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
Show All 14 Lines | |||||
; AVX512F-32-NEXT: kmovq %k0, (%esp) | ; AVX512F-32-NEXT: kmovq %k0, (%esp) | ||||
; AVX512F-32-NEXT: addl (%esp), %eax | ; AVX512F-32-NEXT: addl (%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: kxnorq %k0, %k0, %k0 | ; AVX512F-32-NEXT: kxnorq %k0, %k0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl $60, %esp | ; AVX512F-32-NEXT: addl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi9: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) | %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) | ||||
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) | %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) | ||||
%ret1 = add i64 %res0, %res1 | %ret1 = add i64 %res0, %res1 | ||||
%res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) | %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) | ||||
%ret2 = add i64 %ret1, %res2 | %ret2 = add i64 %ret1, %res2 | ||||
%res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) | %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) | ||||
%ret3 = add i64 %ret2, %res3 | %ret3 = add i64 %ret2, %res3 | ||||
Show All 33 Lines | |||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: addq %rcx, %rax | ; AVX512BW-NEXT: addq %rcx, %rax | ||||
; AVX512BW-NEXT: addq %rdi, %rax | ; AVX512BW-NEXT: addq %rdi, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_mask_cmp_b_512: | ; AVX512F-32-LABEL: test_mask_cmp_b_512: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: pushl %ebx | ; AVX512F-32-NEXT: pushl %ebx | ||||
; AVX512F-32-NEXT: .Lcfi5: | ; AVX512F-32-NEXT: .Lcfi10: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 | ||||
; AVX512F-32-NEXT: pushl %esi | ; AVX512F-32-NEXT: pushl %esi | ||||
; AVX512F-32-NEXT: .Lcfi6: | ; AVX512F-32-NEXT: .Lcfi11: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 | ||||
; AVX512F-32-NEXT: subl $60, %esp | ; AVX512F-32-NEXT: subl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi7: | ; AVX512F-32-NEXT: .Lcfi12: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 | ||||
; AVX512F-32-NEXT: .Lcfi8: | ; AVX512F-32-NEXT: .Lcfi13: | ||||
; AVX512F-32-NEXT: .cfi_offset %esi, -12 | ; AVX512F-32-NEXT: .cfi_offset %esi, -12 | ||||
; AVX512F-32-NEXT: .Lcfi9: | ; AVX512F-32-NEXT: .Lcfi14: | ||||
; AVX512F-32-NEXT: .cfi_offset %ebx, -8 | ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 | ||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 | ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 | ||||
; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 | ; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; AVX512F-32-NEXT: movb %cl, %al | ; AVX512F-32-NEXT: movb %cl, %al | ||||
; AVX512F-32-NEXT: shrb $5, %al | ; AVX512F-32-NEXT: shrb $5, %al | ||||
; AVX512F-32-NEXT: andb $1, %al | ; AVX512F-32-NEXT: andb $1, %al | ||||
; AVX512F-32-NEXT: movb %cl, %bl | ; AVX512F-32-NEXT: movb %cl, %bl | ||||
▲ Show 20 Lines • Show All 728 Lines • ▼ Show 20 Lines | |||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl %esi, %eax | ; AVX512F-32-NEXT: addl %esi, %eax | ||||
; AVX512F-32-NEXT: adcxl %ecx, %edx | ; AVX512F-32-NEXT: adcxl %ecx, %edx | ||||
; AVX512F-32-NEXT: addl $60, %esp | ; AVX512F-32-NEXT: addl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi15: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 | |||||
; AVX512F-32-NEXT: popl %esi | ; AVX512F-32-NEXT: popl %esi | ||||
; AVX512F-32-NEXT: .Lcfi16: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 | |||||
; AVX512F-32-NEXT: popl %ebx | ; AVX512F-32-NEXT: popl %ebx | ||||
; AVX512F-32-NEXT: .Lcfi17: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) | %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) | ||||
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) | %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) | ||||
%ret1 = add i64 %res0, %res1 | %ret1 = add i64 %res0, %res1 | ||||
%res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) | %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) | ||||
%ret2 = add i64 %ret1, %res2 | %ret2 = add i64 %ret1, %res2 | ||||
%res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) | %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) | ||||
%ret3 = add i64 %ret2, %res3 | %ret3 = add i64 %ret2, %res3 | ||||
Show All 33 Lines | |||||
; AVX512BW-NEXT: kxnorq %k0, %k0, %k0 | ; AVX512BW-NEXT: kxnorq %k0, %k0, %k0 | ||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: addq %rcx, %rax | ; AVX512BW-NEXT: addq %rcx, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_ucmp_b_512: | ; AVX512F-32-LABEL: test_ucmp_b_512: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: subl $60, %esp | ; AVX512F-32-NEXT: subl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi10: | ; AVX512F-32-NEXT: .Lcfi18: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 64 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 64 | ||||
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 | ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 | ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
Show All 14 Lines | |||||
; AVX512F-32-NEXT: kmovq %k0, (%esp) | ; AVX512F-32-NEXT: kmovq %k0, (%esp) | ||||
; AVX512F-32-NEXT: addl (%esp), %eax | ; AVX512F-32-NEXT: addl (%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: kxnorq %k0, %k0, %k0 | ; AVX512F-32-NEXT: kxnorq %k0, %k0, %k0 | ||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl $60, %esp | ; AVX512F-32-NEXT: addl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi19: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) | %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) | ||||
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) | %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) | ||||
%ret1 = add i64 %res0, %res1 | %ret1 = add i64 %res0, %res1 | ||||
%res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) | %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) | ||||
%ret2 = add i64 %ret1, %res2 | %ret2 = add i64 %ret1, %res2 | ||||
%res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) | %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) | ||||
%ret3 = add i64 %ret2, %res3 | %ret3 = add i64 %ret2, %res3 | ||||
Show All 33 Lines | |||||
; AVX512BW-NEXT: kmovq %k0, %rax | ; AVX512BW-NEXT: kmovq %k0, %rax | ||||
; AVX512BW-NEXT: addq %rcx, %rax | ; AVX512BW-NEXT: addq %rcx, %rax | ||||
; AVX512BW-NEXT: addq %rdi, %rax | ; AVX512BW-NEXT: addq %rdi, %rax | ||||
; AVX512BW-NEXT: retq | ; AVX512BW-NEXT: retq | ||||
; | ; | ||||
; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: | ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: | ||||
; AVX512F-32: # BB#0: | ; AVX512F-32: # BB#0: | ||||
; AVX512F-32-NEXT: pushl %ebx | ; AVX512F-32-NEXT: pushl %ebx | ||||
; AVX512F-32-NEXT: .Lcfi11: | ; AVX512F-32-NEXT: .Lcfi20: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 | ||||
; AVX512F-32-NEXT: pushl %esi | ; AVX512F-32-NEXT: pushl %esi | ||||
; AVX512F-32-NEXT: .Lcfi12: | ; AVX512F-32-NEXT: .Lcfi21: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 | ||||
; AVX512F-32-NEXT: subl $60, %esp | ; AVX512F-32-NEXT: subl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi13: | ; AVX512F-32-NEXT: .Lcfi22: | ||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 | ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 | ||||
; AVX512F-32-NEXT: .Lcfi14: | ; AVX512F-32-NEXT: .Lcfi23: | ||||
; AVX512F-32-NEXT: .cfi_offset %esi, -12 | ; AVX512F-32-NEXT: .cfi_offset %esi, -12 | ||||
; AVX512F-32-NEXT: .Lcfi15: | ; AVX512F-32-NEXT: .Lcfi24: | ||||
; AVX512F-32-NEXT: .cfi_offset %ebx, -8 | ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 | ||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 | ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 | ||||
; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 | ; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 | ||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx | ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||||
; AVX512F-32-NEXT: movb %cl, %al | ; AVX512F-32-NEXT: movb %cl, %al | ||||
; AVX512F-32-NEXT: shrb $5, %al | ; AVX512F-32-NEXT: shrb $5, %al | ||||
; AVX512F-32-NEXT: andb $1, %al | ; AVX512F-32-NEXT: andb $1, %al | ||||
; AVX512F-32-NEXT: movb %cl, %bl | ; AVX512F-32-NEXT: movb %cl, %bl | ||||
▲ Show 20 Lines • Show All 728 Lines • ▼ Show 20 Lines | |||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) | ; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) | ||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax | ||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx | ||||
; AVX512F-32-NEXT: addl %esi, %eax | ; AVX512F-32-NEXT: addl %esi, %eax | ||||
; AVX512F-32-NEXT: adcxl %ecx, %edx | ; AVX512F-32-NEXT: adcxl %ecx, %edx | ||||
; AVX512F-32-NEXT: addl $60, %esp | ; AVX512F-32-NEXT: addl $60, %esp | ||||
; AVX512F-32-NEXT: .Lcfi25: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 | |||||
; AVX512F-32-NEXT: popl %esi | ; AVX512F-32-NEXT: popl %esi | ||||
; AVX512F-32-NEXT: .Lcfi26: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 | |||||
; AVX512F-32-NEXT: popl %ebx | ; AVX512F-32-NEXT: popl %ebx | ||||
; AVX512F-32-NEXT: .Lcfi27: | |||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4 | |||||
; AVX512F-32-NEXT: retl | ; AVX512F-32-NEXT: retl | ||||
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) | %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) | ||||
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) | %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) | ||||
%ret1 = add i64 %res0, %res1 | %ret1 = add i64 %res0, %res1 | ||||
%res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) | %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) | ||||
%ret2 = add i64 %ret1, %res2 | %ret2 = add i64 %ret1, %res2 | ||||
%res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) | %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) | ||||
%ret3 = add i64 %ret2, %res3 | %ret3 = add i64 %ret2, %res3 | ||||
▲ Show 20 Lines • Show All 290 Lines • Show Last 20 Lines |