diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/test/CodeGen/X86/memcmp.ll @@ -1,17 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,SSE,X86-SSE1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512BW +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 declare i32 @memcmp(i8*, i8*, i64) @@ -189,7 +193,7 @@ ; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 ; X64-NEXT: setne %al ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } @@ -431,7 +435,7 @@ ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 ; X64-NEXT: sete %al ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 4) nounwind %c = icmp eq i32 %m, 0 ret i1 %c } @@ -679,7 +683,7 @@ ; X64-NEXT: cmpq %rax, (%rdi) ; X64-NEXT: setne %al ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 8) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } @@ -990,6 +994,17 @@ ; X86-SSE2-NEXT: setne %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length16_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu (%eax), %xmm1 +; X86-SSE41-NEXT: pxor %xmm0, %xmm1 +; X86-SSE41-NEXT: ptest %xmm1, %xmm1 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length16_eq: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1000,6 +1015,15 @@ ; X64-SSE2-NEXT: setne %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length16_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 +; X64-SSE41-NEXT: pxor %xmm0, %xmm1 +; X64-SSE41-NEXT: ptest %xmm1, %xmm1 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length16_eq: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1007,19 +1031,97 @@ ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: setne %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length16_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp ne i32 %call, 0 ret i1 %cmp } +define i1 @length16_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length16_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $16 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length16_lt: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB33_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB33_3 +; X64-NEXT: .LBB33_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB33_3: # %endblock +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length16_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $16 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length16_gt: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB34_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB34_3 +; X64-NEXT: .LBB34_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: .LBB34_3: # %endblock +; X64-NEXT: testl %edx, %edx +; X64-NEXT: setg %al +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + define i1 @length16_eq_const(i8* %X) nounwind { ; X86-NOSSE-LABEL: length16_eq_const: ; X86-NOSSE: # %bb.0: @@ -1055,6 +1157,15 @@ ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length16_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length16_eq_const: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1064,6 +1175,14 @@ ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length16_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length16_eq_const: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1071,15 +1190,7 @@ ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length16_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 16) nounwind %c = icmp eq i32 %m, 0 ret i1 %c } @@ -1146,6 +1257,21 @@ ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length24_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length24_eq: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1160,6 +1286,19 @@ ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length24_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 +; X64-SSE41-NEXT: pxor %xmm0, %xmm1 +; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: por %xmm1, %xmm2 +; X64-SSE41-NEXT: ptest %xmm2, %xmm2 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length24_eq: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1171,23 +1310,65 @@ ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length24_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-AVX512-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp } +define i1 @length24_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length24_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length24_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length24_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length24_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + define i1 @length24_eq_const(i8* %X) nounwind { ; X86-NOSSE-LABEL: length24_eq_const: ; X86-NOSSE: # %bb.0: @@ -1226,6 +1407,18 @@ ; X86-SSE2-NEXT: setne %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length24_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: por %xmm1, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length24_eq_const: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1238,6 +1431,17 @@ ; X64-SSE2-NEXT: setne %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length24_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: por %xmm1, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length24_eq_const: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1248,48 +1452,35 @@ ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: setne %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length24_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 24) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } -define i32 @length32(i8* %X, i8* %Y) nounwind { -; X86-LABEL: length32: +define i32 @length31(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length31: ; X86: # %bb.0: ; X86-NEXT: pushl $0 -; X86-NEXT: pushl $32 +; X86-NEXT: pushl $31 ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: calll memcmp ; X86-NEXT: addl $16, %esp ; X86-NEXT: retl ; -; X64-LABEL: length32: +; X64-LABEL: length31: ; X64: # %bb.0: -; X64-NEXT: movl $32, %edx +; X64-NEXT: movl $31, %edx ; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind ret i32 %m } -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(i8* %x, i8* %y) nounwind { -; X86-NOSSE-LABEL: length32_eq: +define i1 @length31_eq(i8* %x, i8* %y) nounwind { +; X86-NOSSE-LABEL: length31_eq: ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $31 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: calll memcmp @@ -1298,10 +1489,10 @@ ; X86-NOSSE-NEXT: sete %al ; X86-NOSSE-NEXT: retl ; -; X86-SSE1-LABEL: length32_eq: +; X86-SSE1-LABEL: length31_eq: ; X86-SSE1: # %bb.0: ; X86-SSE1-NEXT: pushl $0 -; X86-SSE1-NEXT: pushl $32 +; X86-SSE1-NEXT: pushl $31 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: calll memcmp @@ -1310,15 +1501,15 @@ ; X86-SSE1-NEXT: sete %al ; X86-SSE1-NEXT: retl ; -; X86-SSE2-LABEL: length32_eq: +; X86-SSE2-LABEL: length31_eq: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 ; X86-SSE2-NEXT: movdqu (%eax), %xmm2 ; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 ; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax @@ -1326,13 +1517,28 @@ ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; -; X64-SSE2-LABEL: length32_eq: +; X86-SSE41-LABEL: length31_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length31_eq: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 ; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 ; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax @@ -1340,44 +1546,93 @@ ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; -; X64-AVX1-LABEL: length32_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX1-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 -; X64-AVX1-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vptest %xmm0, %xmm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq +; X64-SSE41-LABEL: length31_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq ; -; X64-AVX512-LABEL: length32_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq - %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind +; X64-AVX-LABEL: length31_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp } -define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { -; X86-NOSSE-LABEL: length32_eq_prefer128: +define i1 @length31_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length31_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $31 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length31_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $31, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length31_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $31 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length31_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $31, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length31_eq_prefer128: ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $31 ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: calll memcmp @@ -1386,10 +1641,10 @@ ; X86-NOSSE-NEXT: sete %al ; X86-NOSSE-NEXT: retl ; -; X86-SSE1-LABEL: length32_eq_prefer128: +; X86-SSE1-LABEL: length31_eq_prefer128: ; X86-SSE1: # %bb.0: ; X86-SSE1-NEXT: pushl $0 -; X86-SSE1-NEXT: pushl $32 +; X86-SSE1-NEXT: pushl $31 ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: calll memcmp @@ -1398,15 +1653,15 @@ ; X86-SSE1-NEXT: sete %al ; X86-SSE1-NEXT: retl ; -; X86-SSE2-LABEL: length32_eq_prefer128: +; X86-SSE2-LABEL: length31_eq_prefer128: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 ; X86-SSE2-NEXT: movdqu (%eax), %xmm2 ; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 ; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax @@ -1414,13 +1669,28 @@ ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; -; X64-SSE2-LABEL: length32_eq_prefer128: +; X86-SSE41-LABEL: length31_eq_prefer128: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length31_eq_prefer128: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 ; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 ; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax @@ -1428,37 +1698,39 @@ ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: length32_eq_prefer128: +; X64-SSE41-LABEL: length31_eq_prefer128: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; +; X64-AVX-LABEL: length31_eq_prefer128: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 ; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length32_eq_prefer128: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX512-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 -; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: retq - %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp } -define i1 @length32_eq_const(i8* %X) nounwind { -; X86-NOSSE-LABEL: length32_eq_const: +define i1 @length31_eq_const(i8* %X) nounwind { +; X86-NOSSE-LABEL: length31_eq_const: ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $31 ; X86-NOSSE-NEXT: pushl $.L.str ; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: calll memcmp @@ -1467,10 +1739,10 @@ ; X86-NOSSE-NEXT: setne %al ; X86-NOSSE-NEXT: retl ; -; X86-SSE1-LABEL: length32_eq_const: +; X86-SSE1-LABEL: length31_eq_const: ; X86-SSE1: # %bb.0: ; X86-SSE1-NEXT: pushl $0 -; X86-SSE1-NEXT: pushl $32 +; X86-SSE1-NEXT: pushl $31 ; X86-SSE1-NEXT: pushl $.L.str ; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE1-NEXT: calll memcmp @@ -1479,11 +1751,11 @@ ; X86-SSE1-NEXT: setne %al ; X86-SSE1-NEXT: retl ; -; X86-SSE2-LABEL: length32_eq_const: +; X86-SSE2-LABEL: length31_eq_const: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 +; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1 ; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1 ; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0 ; X86-SSE2-NEXT: pand %xmm1, %xmm0 @@ -1492,10 +1764,22 @@ ; X86-SSE2-NEXT: setne %al ; X86-SSE2-NEXT: retl ; -; X64-SSE2-LABEL: length32_eq_const: +; X86-SSE41-LABEL: length31_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: por %xmm1, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length31_eq_const: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 ; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1 ; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 ; X64-SSE2-NEXT: pand %xmm1, %xmm0 @@ -1504,63 +1788,2829 @@ ; X64-SSE2-NEXT: setne %al ; X64-SSE2-NEXT: retq ; -; X64-AVX1-LABEL: length32_eq_const: +; X64-SSE41-LABEL: length31_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: por %xmm1, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; +; X64-AVX-LABEL: length31_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length32: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $32 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length32: +; X64: # %bb.0: +; X64-NEXT: movl $32, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(i8* %x, i8* %y) nounwind { +; X86-NOSSE-LABEL: length32_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length32_eq: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $32 +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: sete %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length32_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu (%eax), %xmm2 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl +; +; X86-SSE41-LABEL: length32_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length32_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-SSE41-LABEL: length32_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; +; X64-AVX1-LABEL: length32_eq: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0 ; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 -; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX1-NEXT: vpxor (%rsi), %xmm0, %xmm0 ; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vptest %xmm0, %xmm0 -; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: sete %al ; X64-AVX1-NEXT: retq ; -; X64-AVX2-LABEL: length32_eq_const: +; X64-AVX2-LABEL: length32_eq: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 ; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: sete %al ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq ; -; X64-AVX512-LABEL: length32_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind - %c = icmp ne i32 %m, 0 +; X64-AVX512-LABEL: length32_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length32_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $32 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length32_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $32, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length32_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $32 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length32_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $32, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length32_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length32_eq_prefer128: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $32 +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: sete %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length32_eq_prefer128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu (%eax), %xmm2 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl +; +; X86-SSE41-LABEL: length32_eq_prefer128: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length32_eq_prefer128: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-SSE41-LABEL: length32_eq_prefer128: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; +; X64-AVX-LABEL: length32_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(i8* %X) nounwind { +; X86-NOSSE-LABEL: length32_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length32_eq_const: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $32 +; X86-SSE1-NEXT: pushl $.L.str +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: setne %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length32_eq_const: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movdqu (%eax), %xmm0 +; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pand %xmm1, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: setne %al +; X86-SSE2-NEXT: retl +; +; X86-SSE41-LABEL: length32_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: por %xmm1, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length32_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 +; X64-SSE2-NEXT: pand %xmm1, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: retq +; +; X64-SSE41-LABEL: length32_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: por %xmm1, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; +; X64-AVX1-LABEL: length32_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vptest %xmm0, %xmm0 +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length32_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length32_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length48: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length48: +; X64: # %bb.0: +; X64-NEXT: movl $48, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length48_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length48_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $48, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length48_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $48, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length48_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: movq 32(%rdi), %rcx +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $8, %edx +; X64-AVX2-NEXT: vmovd %ecx, %xmm0 +; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $16, %edx +; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $24, %edx +; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $32, %rdx +; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1 +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: movq 40(%rdi), %rcx +; X64-AVX2-NEXT: shrq $48, %rdx +; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $56, %rdx +; X64-AVX2-NEXT: shrq $56, %rax +; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $8, %eax +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $16, %eax +; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $24, %eax +; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $32, %rax +; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $48, %rax +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq 32(%rsi), %rcx +; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $8, %edx +; X64-AVX2-NEXT: vmovd %ecx, %xmm2 +; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $16, %edx +; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $24, %edx +; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $32, %rdx +; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq 40(%rsi), %rcx +; X64-AVX2-NEXT: shrq $48, %rdx +; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $56, %rax +; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $8, %eax +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $16, %eax +; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $24, %eax +; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $32, %rax +; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: shrq $48, %rax +; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: shrq $56, %rdx +; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length48_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movq 32(%rdi), %rcx +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $8, %edx +; X64-AVX512-NEXT: vmovd %ecx, %xmm0 +; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $16, %edx +; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $24, %edx +; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $32, %rdx +; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1 +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: movq 40(%rdi), %rcx +; X64-AVX512-NEXT: shrq $48, %rdx +; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $56, %rdx +; X64-AVX512-NEXT: shrq $56, %rax +; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $8, %eax +; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $16, %eax +; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $24, %eax +; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $32, %rax +; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $48, %rax +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq 32(%rsi), %rcx +; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $8, %edx +; X64-AVX512-NEXT: vmovd %ecx, %xmm2 +; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $16, %edx +; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $24, %edx +; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $32, %rdx +; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq 40(%rsi), %rcx +; X64-AVX512-NEXT: shrq $48, %rdx +; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $56, %rax +; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $8, %eax +; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $16, %eax +; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $24, %eax +; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $32, %rax +; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX512-NEXT: shrq $48, %rax +; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: shrq $56, %rdx +; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length48_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length48_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $48, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length48_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length48_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $48, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { +; X86-LABEL: length48_eq_prefer128: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length48_eq_prefer128: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $48, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(i8* %X) nounwind { +; X86-LABEL: length48_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length48_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $48, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length48_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $48, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length48_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rbp +; X64-AVX2-NEXT: pushq %r15 +; X64-AVX2-NEXT: pushq %r14 +; X64-AVX2-NEXT: pushq %r12 +; X64-AVX2-NEXT: pushq %rbx +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: movq 40(%rdi), %rcx +; X64-AVX2-NEXT: movq %rcx, %r8 +; X64-AVX2-NEXT: shrq $56, %r8 +; X64-AVX2-NEXT: movq %rcx, %r9 +; X64-AVX2-NEXT: shrq $48, %r9 +; X64-AVX2-NEXT: movq %rcx, %r10 +; X64-AVX2-NEXT: shrq $32, %r10 +; X64-AVX2-NEXT: movl %ecx, %r11d +; X64-AVX2-NEXT: shrl $24, %r11d +; X64-AVX2-NEXT: movl %ecx, %r14d +; X64-AVX2-NEXT: shrl $16, %r14d +; X64-AVX2-NEXT: movl %ecx, %r15d +; X64-AVX2-NEXT: shrl $8, %r15d +; X64-AVX2-NEXT: movq 32(%rdi), %rdi +; X64-AVX2-NEXT: movq %rdi, %r12 +; X64-AVX2-NEXT: shrq $56, %r12 +; X64-AVX2-NEXT: movq %rdi, %rbx +; X64-AVX2-NEXT: shrq $48, %rbx +; X64-AVX2-NEXT: movq %rdi, %rdx +; X64-AVX2-NEXT: shrq $32, %rdx +; X64-AVX2-NEXT: movl %edi, %ebp +; X64-AVX2-NEXT: shrl $24, %ebp +; X64-AVX2-NEXT: movl %edi, %esi +; X64-AVX2-NEXT: shrl $16, %esi +; X64-AVX2-NEXT: vmovd %edi, %xmm1 +; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shrl $8, %eax +; X64-AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX2-NEXT: shrq $40, %rdi +; X64-AVX2-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1 +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rbx +; X64-AVX2-NEXT: popq %r12 +; X64-AVX2-NEXT: popq %r14 +; X64-AVX2-NEXT: popq %r15 +; X64-AVX2-NEXT: popq %rbp +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length48_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: pushq %rbp +; X64-AVX512-NEXT: pushq %r15 +; X64-AVX512-NEXT: pushq %r14 +; X64-AVX512-NEXT: pushq %r12 +; X64-AVX512-NEXT: pushq %rbx +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: movq 40(%rdi), %rcx +; X64-AVX512-NEXT: movq %rcx, %r8 +; X64-AVX512-NEXT: shrq $56, %r8 +; X64-AVX512-NEXT: movq %rcx, %r9 +; X64-AVX512-NEXT: shrq $48, %r9 +; X64-AVX512-NEXT: movq %rcx, %r10 +; X64-AVX512-NEXT: shrq $32, %r10 +; X64-AVX512-NEXT: movl %ecx, %r11d +; X64-AVX512-NEXT: shrl $24, %r11d +; X64-AVX512-NEXT: movl %ecx, %r14d +; X64-AVX512-NEXT: shrl $16, %r14d +; X64-AVX512-NEXT: movl %ecx, %r15d +; X64-AVX512-NEXT: shrl $8, %r15d +; X64-AVX512-NEXT: movq 32(%rdi), %rdi +; X64-AVX512-NEXT: movq %rdi, %r12 +; X64-AVX512-NEXT: shrq $56, %r12 +; X64-AVX512-NEXT: movq %rdi, %rbx +; X64-AVX512-NEXT: shrq $48, %rbx +; X64-AVX512-NEXT: movq %rdi, %rdx +; X64-AVX512-NEXT: shrq $32, %rdx +; X64-AVX512-NEXT: movl %edi, %ebp +; X64-AVX512-NEXT: shrl $24, %ebp +; X64-AVX512-NEXT: movl %edi, %esi +; X64-AVX512-NEXT: shrl $16, %esi +; X64-AVX512-NEXT: vmovd %edi, %xmm1 +; X64-AVX512-NEXT: movl %edi, %eax +; X64-AVX512-NEXT: shrl $8, %eax +; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX512-NEXT: shrq $40, %rdi +; X64-AVX512-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1 +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: popq %rbx +; X64-AVX512-NEXT: popq %r12 +; X64-AVX512-NEXT: popq %r14 +; X64-AVX512-NEXT: popq %r15 +; X64-AVX512-NEXT: popq %rbp +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length63: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length63: +; X64: # %bb.0: +; X64-NEXT: movl $63, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length63_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length63_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $63, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length63_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $63, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length63_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length63_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length63_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length63_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $63, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length63_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length63_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $63, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(i8* %X) nounwind { +; X86-LABEL: length63_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length63_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $63, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length63_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $63, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length63_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length63_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length64: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length64: +; X64: # %bb.0: +; X64-NEXT: movl $64, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length64_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length64_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $64, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length64_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $64, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length64_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 +; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length64_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length64_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0 +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length64_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length64_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $64, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length64_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length64_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $64, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(i8* %X) nounwind { +; X86-LABEL: length64_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length64_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $64, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length64_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $64, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length64_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length64_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k0 +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length64_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0 +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length96: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length96: +; X64: # %bb.0: +; X64-NEXT: movl $96, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length96_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length96_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length96_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $96, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length96_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $96, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length96_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: movq 80(%rdi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm0 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X64-AVX512F-NEXT: movq 88(%rdi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X64-AVX512F-NEXT: movq 64(%rdi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm2 +; X64-AVX512F-NEXT: movq 72(%rdi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: movq 80(%rsi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm3 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 +; X64-AVX512F-NEXT: movq 88(%rsi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3 +; X64-AVX512F-NEXT: movq 64(%rsi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm4 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm4, %xmm4 +; X64-AVX512F-NEXT: movq 72(%rsi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm4, %xmm4 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm1 +; X64-AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm2, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length96_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: movq 80(%rdi), %rcx +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm0 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 88(%rdi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 64(%rdi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm1 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm2 +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm1 +; X64-AVX512BW-NEXT: movq 72(%rdi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq 80(%rsi), %rcx +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm3 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq 88(%rsi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq 64(%rsi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm4 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq 72(%rsi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm2 +; X64-AVX512BW-NEXT: vpcmpeqb %zmm2, %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length96_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length96_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $96, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length96_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length96_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $96, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(i8* %X) nounwind { +; X86-LABEL: length96_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length96_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length96_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $96, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length96_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $.L.str, %esi +; X64-AVX2-NEXT: movl $96, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length96_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: movq 72(%rdi), %rax +; X64-AVX512F-NEXT: movq 64(%rdi), %rcx +; X64-AVX512F-NEXT: vmovd %ecx, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rcx +; X64-AVX512F-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: movq 88(%rdi), %rcx +; X64-AVX512F-NEXT: movq 80(%rdi), %rdx +; X64-AVX512F-NEXT: vmovd %edx, %xmm2 +; X64-AVX512F-NEXT: shrq $32, %rdx +; X64-AVX512F-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2 +; X64-AVX512F-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 +; X64-AVX512F-NEXT: shrq $32, %rcx +; X64-AVX512F-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2 +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length96_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: movq 80(%rdi), %rax +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: vmovd %eax, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 88(%rdi), %rax +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rcx +; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $8, %ecx +; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $16, %ecx +; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $24, %ecx +; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $32, %rcx +; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 64(%rdi), %rax +; X64-AVX512BW-NEXT: shrq $48, %rcx +; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %eax, %xmm1 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm2 +; X64-AVX512BW-NEXT: movq 72(%rdi), %rax +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rcx +; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $8, %ecx +; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $16, %ecx +; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $24, %ecx +; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $32, %rcx +; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: shrq $48, %rcx +; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm2, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length127: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length127: +; X64: # %bb.0: +; X64-NEXT: movl $127, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length127_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length127_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length127_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $127, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length127_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $127, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length127_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd 63(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length127_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb 63(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length127_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length127_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $127, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length127_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length127_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $127, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(i8* %X) nounwind { +; X86-LABEL: length127_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length127_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length127_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $127, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length127_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $.L.str, %esi +; X64-AVX2-NEXT: movl $127, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length127_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length127_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length128: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length128: +; X64: # %bb.0: +; X64-NEXT: movl $128, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length128_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length128_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length128_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $128, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length128_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $128, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length128_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length128_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length128_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length128_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $128, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length128_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length128_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $128, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(i8* %X) nounwind { +; X86-LABEL: length128_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length128_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length128_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $128, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length128_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $.L.str, %esi +; X64-AVX2-NEXT: movl $128, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length128_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length128_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length192: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length192: +; X64: # %bb.0: +; X64-NEXT: movl $192, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length192_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length192_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length192_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length192_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length192_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length192_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(i8* %X) nounwind { +; X86-LABEL: length192_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length192_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length255: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length255: +; X64: # %bb.0: +; X64-NEXT: movl $255, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length255_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length255_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length255_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length255_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length255_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length255_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(i8* %X) nounwind { +; X86-LABEL: length255_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length255_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length256: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length256: +; X64: # %bb.0: +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length256_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length256_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length256_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length256_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length256_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length256_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(i8* %X) nounwind { +; X86-LABEL: length256_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length256_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length384: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length384: +; X64: # %bb.0: +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length384_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length384_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length384_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length384_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length384_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length384_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(i8* %X) nounwind { +; X86-LABEL: length384_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length384_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 384) nounwind + %c = icmp eq i32 %m, 0 ret i1 %c } -define i32 @length64(i8* %X, i8* %Y) nounwind { -; X86-LABEL: length64: +define i32 @length511(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length511: ; X86: # %bb.0: ; X86-NEXT: pushl $0 -; X86-NEXT: pushl $64 +; X86-NEXT: pushl $511 # imm = 0x1FF ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: calll memcmp ; X86-NEXT: addl $16, %esp ; X86-NEXT: retl ; -; X64-LABEL: length64: +; X64-LABEL: length511: ; X64: # %bb.0: -; X64-NEXT: movl $64, %edx +; X64-NEXT: movl $511, %edx # imm = 0x1FF ; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 511) nounwind ret i32 %m } -define i1 @length64_eq(i8* %x, i8* %y) nounwind { -; X86-LABEL: length64_eq: +define i1 @length511_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length511_eq: ; X86: # %bb.0: ; X86-NEXT: pushl $0 -; X86-NEXT: pushl $64 +; X86-NEXT: pushl $511 # imm = 0x1FF ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: calll memcmp @@ -1569,65 +4619,79 @@ ; X86-NEXT: setne %al ; X86-NEXT: retl ; -; X64-SSE2-LABEL: length64_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $64, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq +; X64-LABEL: length511_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length511_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl ; -; X64-AVX512F-LABEL: length64_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 -; X64-AVX512F-NEXT: kortestw %k0, %k0 -; X64-AVX512F-NEXT: setae %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq +; X64-LABEL: length511_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length511_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl ; -; X64-AVX512BW-LABEL: length64_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0 -; X64-AVX512BW-NEXT: kortestq %k0, %k0 -; X64-AVX512BW-NEXT: setae %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq - %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 +; X64-LABEL: length511_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 ret i1 %cmp } -define i1 @length64_eq_const(i8* %X) nounwind { -; X86-LABEL: length64_eq_const: +define i1 @length511_eq_const(i8* %X) nounwind { +; X86-LABEL: length511_eq_const: ; X86: # %bb.0: ; X86-NEXT: pushl $0 -; X86-NEXT: pushl $64 +; X86-NEXT: pushl $511 # imm = 0x1FF ; X86-NEXT: pushl $.L.str ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: calll memcmp @@ -1636,58 +4700,145 @@ ; X86-NEXT: sete %al ; X86-NEXT: retl ; -; X64-SSE2-LABEL: length64_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $.L.str, %esi -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq +; X64-LABEL: length511_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length512: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl ; -; X64-AVX1-LABEL: length64_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $64, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq +; X64-LABEL: length512: +; X64: # %bb.0: +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length512_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl ; -; X64-AVX2-LABEL: length64_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq +; X64-LABEL: length512_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length512_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl ; -; X64-AVX512F-LABEL: length64_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k0 -; X64-AVX512F-NEXT: kortestw %k0, %k0 -; X64-AVX512F-NEXT: setb %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq +; X64-LABEL: length512_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length512_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl ; -; X64-AVX512BW-LABEL: length64_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0 -; X64-AVX512BW-NEXT: kortestq %k0, %k0 -; X64-AVX512BW-NEXT: setb %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind +; X64-LABEL: length512_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(i8* %X) nounwind { +; X86-LABEL: length512_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length512_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 512) nounwind %c = icmp eq i32 %m, 0 ret i1 %c }