diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3639,6 +3639,8 @@ TTI::MemCmpExpansionOptions Options; Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = 2; + // All GPR and vector loads can be unaligned. + Options.AllowOverlappingLoads = true; if (IsZeroCmp) { // Only enable vector loads for equality comparison. Right now the vector // version is not as fast for three way compare (see #33329). @@ -3646,8 +3648,6 @@ if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32); if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); - // All GPR and vector loads can be unaligned. - Options.AllowOverlappingLoads = true; } if (ST->is64Bit()) { Options.LoadSizes.push_back(8); diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -577,64 +577,53 @@ ; X86-LABEL: length7: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl (%esi), %ecx ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi +; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB19_4 +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jne .LBB19_2 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzwl 4(%eax), %edx -; X86-NEXT: movzwl 4(%ecx), %esi -; X86-NEXT: rolw $8, %dx -; X86-NEXT: rolw $8, %si -; X86-NEXT: movzwl %dx, %edx -; X86-NEXT: movzwl %si, %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB19_4 -; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movzbl 6(%eax), %eax -; X86-NEXT: movzbl 6(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB19_4: # %res_block +; X86-NEXT: movl 3(%esi), %ecx +; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: je .LBB19_3 +; X86-NEXT: .LBB19_2: # %res_block +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: setae %al ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: .LBB19_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB19_4 +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: jne .LBB19_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzwl 4(%rdi), %eax -; X64-NEXT: movzwl 4(%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB19_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movzbl 6(%rdi), %eax -; X64-NEXT: movzbl 6(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB19_4: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: movl 3(%rdi), %ecx +; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: bswapl %ecx +; X64-NEXT: bswapl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: je .LBB19_3 +; X64-NEXT: .LBB19_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB19_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind ret i32 %m @@ -671,31 +660,25 @@ ; X86-LABEL: length7_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl (%esi), %ecx ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi +; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB21_4 +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jne .LBB21_2 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzwl 4(%eax), %edx -; X86-NEXT: movzwl 4(%ecx), %esi -; X86-NEXT: rolw $8, %dx -; X86-NEXT: rolw $8, %si -; X86-NEXT: movzwl %dx, %edx -; X86-NEXT: movzwl %si, %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB21_4 -; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movzbl 6(%eax), %eax -; X86-NEXT: movzbl 6(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB21_3 -; X86-NEXT: .LBB21_4: # %res_block +; X86-NEXT: movl 3(%esi), %ecx +; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: je .LBB21_3 +; X86-NEXT: .LBB21_2: # %res_block +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: setae %al ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB21_3: # %endblock @@ -706,33 +689,26 @@ ; ; X64-LABEL: length7_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB21_3 +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: jne .LBB21_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzwl 4(%rdi), %eax -; X64-NEXT: movzwl 4(%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB21_3 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movzbl 6(%rdi), %eax -; X64-NEXT: movzbl 6(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB21_3: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: movl 3(%rdi), %ecx +; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: bswapl %ecx +; X64-NEXT: bswapl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: je .LBB21_3 +; X64-NEXT: .LBB21_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1931,8 +1907,41 @@ ; ; X64-LABEL: length31: ; X64: # %bb.0: -; X64-NEXT: movl $31, %edx -; X64-NEXT: jmp memcmp # TAILCALL +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB43_4 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB43_4 +; X64-NEXT: # %bb.2: # %loadbb2 +; X64-NEXT: movq 16(%rdi), %rcx +; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB43_4 +; X64-NEXT: # %bb.3: # %loadbb3 +; X64-NEXT: movq 23(%rdi), %rcx +; X64-NEXT: movq 23(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB43_5 +; X64-NEXT: .LBB43_4: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB43_5: # %endblock +; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind ret i32 %m } @@ -2063,12 +2072,42 @@ ; ; X64-LABEL: length31_lt: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $31, %edx -; X64-NEXT: callq memcmp +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB45_4 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB45_4 +; X64-NEXT: # %bb.2: # %loadbb2 +; X64-NEXT: movq 16(%rdi), %rcx +; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB45_4 +; X64-NEXT: # %bb.3: # %loadbb3 +; X64-NEXT: movq 23(%rdi), %rcx +; X64-NEXT: movq 23(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB45_5 +; X64-NEXT: .LBB45_4: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB45_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind %cmp = icmp slt i32 %call, 0 @@ -2090,12 +2129,42 @@ ; ; X64-LABEL: length31_gt: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $31, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB46_4 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB46_4 +; X64-NEXT: # %bb.2: # %loadbb2 +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB46_4 +; X64-NEXT: # %bb.3: # %loadbb3 +; X64-NEXT: movq 23(%rdi), %rax +; X64-NEXT: movq 23(%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB46_5 +; X64-NEXT: .LBB46_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: .LBB46_5: # %endblock +; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al -; X64-NEXT: popq %rcx ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind %cmp = icmp sgt i32 %call, 0 diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -622,18 +622,55 @@ define i32 @length7(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length7: ; X86: # %bb.0: -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $7 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl (%esi), %ecx +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %edx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jne .LBB21_2 +; X86-NEXT: # %bb.1: # %loadbb1 +; X86-NEXT: movl 3(%esi), %ecx +; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: je .LBB21_3 +; X86-NEXT: .LBB21_2: # %res_block +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: setae %al +; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: .LBB21_3: # %endblock +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7: ; X64: # %bb.0: -; X64-NEXT: movl $7, %edx -; X64-NEXT: jmp memcmp # TAILCALL +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: bswapl %ecx +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: jne .LBB21_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movl 3(%rdi), %ecx +; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: bswapl %ecx +; X64-NEXT: bswapl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: je .LBB21_3 +; X64-NEXT: .LBB21_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB21_3: # %endblock +; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind ret i32 %m } @@ -641,24 +678,58 @@ define i1 @length7_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length7_lt: ; X86: # %bb.0: -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $7 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl (%esi), %ecx +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %edx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jne .LBB22_2 +; X86-NEXT: # %bb.1: # %loadbb1 +; X86-NEXT: movl 3(%esi), %ecx +; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: je .LBB22_3 +; X86-NEXT: .LBB22_2: # %res_block +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: setae %al +; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7_lt: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $7, %edx -; X64-NEXT: callq memcmp +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: bswapl %ecx +; X64-NEXT: bswapl %edx +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: jne .LBB22_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movl 3(%rdi), %ecx +; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: bswapl %ecx +; X64-NEXT: bswapl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: je .LBB22_3 +; X64-NEXT: .LBB22_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB22_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind %c = icmp slt i32 %m, 0 @@ -998,8 +1069,27 @@ ; ; X64-LABEL: length15: ; X64: # %bb.0: -; X64-NEXT: movl $15, %edx -; X64-NEXT: jmp memcmp # TAILCALL +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB34_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 7(%rdi), %rcx +; X64-NEXT: movq 7(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB34_3 +; X64-NEXT: .LBB34_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB34_3: # %endblock +; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind ret i32 %m } @@ -1019,12 +1109,28 @@ ; ; X64-LABEL: length15_lt: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $15, %edx -; X64-NEXT: callq memcmp +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB35_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 7(%rdi), %rcx +; X64-NEXT: movq 7(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB35_3 +; X64-NEXT: .LBB35_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind %c = icmp slt i32 %m, 0 @@ -1044,9 +1150,25 @@ ; ; X64-LABEL: length15_const: ; X64: # %bb.0: -; X64-NEXT: movl $.L.str+1, %esi -; X64-NEXT: movl $15, %edx -; X64-NEXT: jmp memcmp # TAILCALL +; X64-NEXT: movabsq $3544952156018063160, %rcx # imm = 0x3132333435363738 +; X64-NEXT: movq (%rdi), %rdx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rcx, %rdx +; X64-NEXT: jne .LBB36_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movabsq $4051322327650219061, %rcx # imm = 0x3839303132333435 +; X64-NEXT: movq 7(%rdi), %rdx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rcx, %rdx +; X64-NEXT: je .LBB36_3 +; X64-NEXT: .LBB36_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rcx, %rdx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB36_3: # %endblock +; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 15) nounwind ret i32 %m } @@ -1093,13 +1215,26 @@ ; ; X64-LABEL: length15_gt_const: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str+1, %esi -; X64-NEXT: movl $15, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax +; X64-NEXT: movabsq $3544952156018063160, %rax # imm = 0x3132333435363738 +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: bswapq %rcx +; X64-NEXT: cmpq %rax, %rcx +; X64-NEXT: jne .LBB38_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movabsq $4051322327650219061, %rax # imm = 0x3839303132333435 +; X64-NEXT: movq 7(%rdi), %rcx +; X64-NEXT: bswapq %rcx +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rax, %rcx +; X64-NEXT: je .LBB38_3 +; X64-NEXT: .LBB38_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rax, %rcx +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: .LBB38_3: # %endblock +; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al -; X64-NEXT: popq %rcx ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 15) nounwind %c = icmp sgt i32 %m, 0 diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -165,8 +165,36 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp7( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7) -; ALL-NEXT: ret i32 [[CALL]] +; ALL-NEXT: br label [[LOADBB:%.*]] +; ALL: res_block: +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] +; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) +; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; ALL: loadbb1: +; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 3 +; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; ALL-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32* +; ALL-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] +; ALL-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] +; ALL-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) +; ALL-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) +; ALL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] +; ALL-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; ALL: endblock: +; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; ALL-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7) ret i32 %call @@ -304,9 +332,41 @@ } define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp11( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) -; ALL-NEXT: ret i32 [[CALL]] +; X32-LABEL: @cmp11( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) +; X32-NEXT: ret i32 [[CALL]] +; +; X64-LABEL: @cmp11( +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 3 +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64* +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] +; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] +; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) +; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) +; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] +; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) ret i32 %call @@ -356,27 +416,123 @@ } define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp13( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) -; ALL-NEXT: ret i32 [[CALL]] +; X32-LABEL: @cmp13( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) +; X32-NEXT: ret i32 [[CALL]] +; +; X64-LABEL: @cmp13( +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 5 +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 5 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64* +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] +; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] +; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) +; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) +; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] +; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) ret i32 %call } define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp14( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) -; ALL-NEXT: ret i32 [[CALL]] +; X32-LABEL: @cmp14( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) +; X32-NEXT: ret i32 [[CALL]] +; +; X64-LABEL: @cmp14( +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 6 +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 6 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64* +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] +; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] +; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) +; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) +; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] +; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) ret i32 %call } define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; ALL-LABEL: @cmp15( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) -; ALL-NEXT: ret i32 [[CALL]] +; X32-LABEL: @cmp15( +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) +; X32-NEXT: ret i32 [[CALL]] +; +; X64-LABEL: @cmp15( +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 7 +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 7 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64* +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] +; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] +; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) +; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) +; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] +; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) ret i32 %call