Index: llvm/test/CodeGen/X86/combine-bitreverse.ll =================================================================== --- llvm/test/CodeGen/X86/combine-bitreverse.ll +++ llvm/test/CodeGen/X86/combine-bitreverse.ll @@ -8,6 +8,7 @@ declare i32 @llvm.bitreverse.i32(i32) readnone declare i64 @llvm.bitreverse.i64(i64) readnone declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone +declare i32 @llvm.bswap.i32(i32) readnone ; fold (bitreverse undef) -> undef define i32 @test_undef() nounwind { @@ -444,3 +445,658 @@ %d = and <4 x i32> %c, ret <4 x i32> %d } + +define i32 @brev_and_lhs_brev32(i32 %a, i32 %b) #0 { +; X86-LABEL: brev_and_lhs_brev32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: retl +; +; X64-LABEL: brev_and_lhs_brev32: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: bswapl %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %edi +; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %edi +; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %ecx +; X64-NEXT: shrl $4, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X64-NEXT: shrl $2, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: leal (%rax,%rcx,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + %2 = and i32 %1, %b + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + ret i32 %3 +} + +define i64 @brev_or_lhs_brev64(i64 %a, i64 %b) #0 { +; X86-LABEL: brev_or_lhs_brev64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: leal (%ecx,%edx,2), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: leal (%ecx,%edx,2), %edx +; X86-NEXT: retl +; +; X64-LABEL: brev_or_lhs_brev64: +; X64: # %bb.0: +; X64-NEXT: bswapq %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $4, %rax +; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; X64-NEXT: andq %rcx, %rax +; X64-NEXT: andq %rcx, %rdi +; X64-NEXT: shlq $4, %rdi +; X64-NEXT: orq %rax, %rdi +; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NEXT: movq %rdi, %rdx +; X64-NEXT: andq %rax, %rdx +; X64-NEXT: shrq $2, %rdi +; X64-NEXT: andq %rax, %rdi +; X64-NEXT: leaq (%rdi,%rdx,4), %rdi +; X64-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555 +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: andq %rdx, %r8 +; X64-NEXT: shrq %rdi +; X64-NEXT: andq %rdx, %rdi +; X64-NEXT: leaq (%rdi,%r8,2), %rdi +; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: bswapq %rdi +; X64-NEXT: movq %rdi, %rsi +; X64-NEXT: shrq $4, %rsi +; X64-NEXT: andq %rcx, %rsi +; X64-NEXT: andq %rcx, %rdi +; X64-NEXT: shlq $4, %rdi +; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: shrq $2, %rdi +; X64-NEXT: andq %rax, %rdi +; X64-NEXT: leaq (%rdi,%rcx,4), %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: andq %rdx, %rcx +; X64-NEXT: shrq %rax +; X64-NEXT: andq %rdx, %rax +; X64-NEXT: leaq (%rax,%rcx,2), %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bitreverse.i64(i64 %a) + %2 = or i64 %1, %b + %3 = tail call i64 @llvm.bitreverse.i64(i64 %2) + ret i64 %3 +} + +define i64 @brev_xor_rhs_brev64(i64 %a, i64 %b) #0 { +; X86-LABEL: brev_xor_rhs_brev64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: leal (%ecx,%edx,2), %ecx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: leal (%ecx,%edx,2), %edx +; X86-NEXT: retl +; +; X64-LABEL: brev_xor_rhs_brev64: +; X64: # %bb.0: +; X64-NEXT: bswapq %rsi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: shrq $4, %rax +; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; X64-NEXT: andq %rcx, %rax +; X64-NEXT: andq %rcx, %rsi +; X64-NEXT: shlq $4, %rsi +; X64-NEXT: orq %rax, %rsi +; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: andq %rax, %rdx +; X64-NEXT: shrq $2, %rsi +; X64-NEXT: andq %rax, %rsi +; X64-NEXT: leaq (%rsi,%rdx,4), %rsi +; X64-NEXT: movabsq $6148914691236517205, %rdx # imm = 0x5555555555555555 +; X64-NEXT: movq %rsi, %r8 +; X64-NEXT: andq %rdx, %r8 +; X64-NEXT: shrq %rsi +; X64-NEXT: andq %rdx, %rsi +; X64-NEXT: leaq (%rsi,%r8,2), %rsi +; X64-NEXT: xorq %rdi, %rsi +; X64-NEXT: bswapq %rsi +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: shrq $4, %rdi +; X64-NEXT: andq %rcx, %rdi +; X64-NEXT: andq %rcx, %rsi +; X64-NEXT: shlq $4, %rsi +; X64-NEXT: orq %rdi, %rsi +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: shrq $2, %rsi +; X64-NEXT: andq %rax, %rsi +; X64-NEXT: leaq (%rsi,%rcx,4), %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: andq %rdx, %rcx +; X64-NEXT: shrq %rax +; X64-NEXT: andq %rdx, %rax +; X64-NEXT: leaq (%rax,%rcx,2), %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bitreverse.i64(i64 %b) + %2 = xor i64 %a, %1 + %3 = tail call i64 @llvm.bitreverse.i64(i64 %2) + ret i64 %3 +} + +define i32 @brev_and_all_operand_multiuse(i32 %a, i32 %b) #0 { +; X86-LABEL: brev_and_all_operand_multiuse: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: leal (%ecx,%edx,2), %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: bswapl %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %esi +; X86-NEXT: shrl $4, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: orl %esi, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NEXT: shrl $2, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: leal (%edx,%esi,4), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NEXT: shrl %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: leal (%edx,%esi,2), %edx +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: imull %edx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: brev_and_all_operand_multiuse: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: bswapl %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %edi +; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %edi +; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rcx,2), %ecx +; X64-NEXT: bswapl %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %esi +; X64-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %esi +; X64-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-NEXT: leal (%rsi,%rax,4), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rdx,2), %eax +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: andl %eax, %edx +; X64-NEXT: bswapl %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %esi +; X64-NEXT: shrl $4, %edx +; X64-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X64-NEXT: orl %esi, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-NEXT: shrl $2, %edx +; X64-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X64-NEXT: leal (%rdx,%rsi,4), %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X64-NEXT: shrl %edx +; X64-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X64-NEXT: leal (%rdx,%rsi,2), %edx +; X64-NEXT: imull %ecx, %eax +; X64-NEXT: imull %edx, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + %2 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %3 = and i32 %1, %2 + %4 = tail call i32 @llvm.bitreverse.i32(i32 %3) + %5 = mul i32 %1, %4 ;increase use of left bitreverse + %6 = mul i32 %2, %5 ;increase use of right bitreverse + + ret i32 %6 +} + +; negative test +define i32 @brev_and_rhs_brev32_multiuse1(i32 %a, i32 %b) #0 { +; X86-LABEL: brev_and_rhs_brev32_multiuse1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%ecx,2), %ecx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: brev_and_rhs_brev32_multiuse1: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: bswapl %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %esi +; X64-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %esi +; X64-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-NEXT: leal (%rsi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rcx,2), %ecx +; X64-NEXT: andl %edi, %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %edx +; X64-NEXT: shrl $4, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: orl %edx, %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X64-NEXT: shrl $2, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: leal (%rax,%rdx,4), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rdx,2), %eax +; X64-NEXT: imull %ecx, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + %4 = mul i32 %2, %3 ;increase use of logical op + ret i32 %4 +} + +; negative test +define i32 @brev_and_rhs_brev32_multiuse2(i32 %a, i32 %b) #0 { +; X86-LABEL: brev_and_rhs_brev32_multiuse2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%ecx,2), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%edx,4), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%edx,2), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: brev_and_rhs_brev32_multiuse2: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: bswapl %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %esi +; X64-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %esi +; X64-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-NEXT: leal (%rsi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rcx,2), %ecx +; X64-NEXT: andl %ecx, %edi +; X64-NEXT: bswapl %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %edi +; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %edi +; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rdx,2), %eax +; X64-NEXT: imull %ecx, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + %4 = mul i32 %1, %3 ;increase use of inner bitreverse + ret i32 %4 +} Index: llvm/test/CodeGen/X86/combine-bswap.ll =================================================================== --- llvm/test/CodeGen/X86/combine-bswap.ll +++ llvm/test/CodeGen/X86/combine-bswap.ll @@ -9,6 +9,7 @@ declare i32 @llvm.bswap.i32(i32) readnone declare i64 @llvm.bswap.i64(i64) readnone declare i32 @llvm.bswap.v4i32(i32) readnone +declare i32 @llvm.bitreverse.i32(i32) readnone ; fold (bswap undef) -> undef define i32 @test_undef() nounwind { @@ -255,6 +256,223 @@ ret i32 %b } +define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_lhs_bs32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_lhs_bs32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = and i32 %1, %b + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 { +; X86-LABEL: bs_or_lhs_bs64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: retl +; +; X64-LABEL: bs_or_lhs_bs64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = or i64 %1, %b + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { +; X86-LABEL: bs_xor_rhs_bs64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: retl +; +; X64-LABEL: bs_xor_rhs_bs64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = xor i64 %a, %1 + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_all_operand_multiuse: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: bswapl %edx +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: imull %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_all_operand_multiuse: +; X64: # %bb.0: +; X64-NEXT: bswapl %edi +; X64-NEXT: bswapl %esi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %edi, %esi +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = tail call i32 @llvm.bswap.i32(i32 %b) + %3 = and i32 %1, %2 + %4 = tail call i32 @llvm.bswap.i32(i32 %3) + %5 = mul i32 %1, %4 ;increase use of left bswap + %6 = mul i32 %2, %5 ;increase use of right bswap + + ret i32 %6 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_rhs_bs32_multiuse1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs32_multiuse1: +; X64: # %bb.0: +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %2, %3 ;increase use of logical op + ret i32 %4 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_rhs_bs32_multiuse2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs32_multiuse2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %1, %3 ;increase use of inner bswap + ret i32 %4 +} + +; negative test +define i32 @bs_xor_rhs_brev32(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_xor_rhs_brev32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_xor_rhs_brev32: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: bswapl %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %esi +; X64-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-NEXT: orl %eax, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: shrl $2, %esi +; X64-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-NEXT: leal (%rsi,%rax,4), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: leal (%rax,%rcx,2), %eax +; X64-NEXT: xorl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = xor i32 %a, %1 + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + ; negative test define i64 @test_bswap64_shift17(i64 %a0) { ; X86-LABEL: test_bswap64_shift17: