Index: llvm/test/CodeGen/X86/combine-bswap.ll =================================================================== --- llvm/test/CodeGen/X86/combine-bswap.ll +++ llvm/test/CodeGen/X86/combine-bswap.ll @@ -255,6 +255,134 @@ ret i32 %b } +define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_lhs_bs32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_lhs_bs32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = and i32 %1, %b + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @bs_and_lhs_bs64(i64 %a, i64 %b) #0 { +; X86-LABEL: bs_and_lhs_bs64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: retl +; +; X64-LABEL: bs_and_lhs_bs64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = and i64 %1, %b + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i64 @bs_and_rhs_bs64(i64 %a, i64 %b) #0 { +; X86-LABEL: bs_and_rhs_bs64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: andq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = and i64 %1, %a + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_rhs_bs32_multiuse1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs32_multiuse1: +; X64: # %bb.0: +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %2, %3 ;increase use of logical op + ret i32 %4 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_rhs_bs32_multiuse2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs32_multiuse2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %1, %3 ;increase use of inner bswap + ret i32 %4 +} + ; negative test define i64 @test_bswap64_shift17(i64 %a0) { ; X86-LABEL: test_bswap64_shift17: Index: llvm/test/Transforms/InstCombine/bswap-fold.ll =================================================================== --- llvm/test/Transforms/InstCombine/bswap-fold.ll +++ llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -539,6 +539,246 @@ } +; Issue#62236 +; Fold: BSWAP( OP( BSWAP(x), y ) ) -> OP( x, BSWAP(y) ) +define i16 @bs_and_lhs_bs16(i16 %a, i16 %b) #0 { +; CHECK-LABEL: @bs_and_lhs_bs16( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] +; + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = and i16 %1, %b + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @bs_or_lhs_bs16(i16 %a, i16 %b) #0 { +; CHECK-LABEL: @bs_or_lhs_bs16( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] +; + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = or i16 %1, %b + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @bs_xor_lhs_bs16(i16 %a, i16 %b) #0 { +; CHECK-LABEL: @bs_xor_lhs_bs16( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] +; + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = xor i16 %1, %b + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @bs_and_rhs_bs16(i16 %a, i16 %b) #0 { +; CHECK-LABEL: @bs_and_rhs_bs16( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] +; + %1 = tail call i16 @llvm.bswap.i16(i16 %b) + %2 = and i16 %1, %a + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @bs_or_rhs_bs16(i16 %a, i16 %b) #0 { +; CHECK-LABEL: @bs_or_rhs_bs16( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] +; + %1 = tail call i16 @llvm.bswap.i16(i16 %b) + %2 = or i16 %1, %a + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @bs_xor_rhs_bs16(i16 %a, i16 %b) #0 { +; CHECK-LABEL: @bs_xor_rhs_bs16( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] +; + %1 = tail call i16 @llvm.bswap.i16(i16 %b) + %2 = xor i16 %1, %a + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i32 @bs_and_rhs_bs32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: @bs_and_rhs_bs32( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i32 @bs_or_rhs_bs32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: @bs_or_rhs_bs32( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = or i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i32 @bs_xor_rhs_bs32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: @bs_xor_rhs_bs32( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = xor i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @bs_and_rhs_bs64(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and_rhs_bs64( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] +; + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = and i64 %1, %a + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_or_rhs_bs64( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] +; + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = or i64 %1, %a + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_xor_rhs_bs64( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] +; + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = xor i64 %1, %a + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define <2 x i32> @bs_and_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_and_rhs_i32vec( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %2 = and <2 x i32> %1, %a + %3 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2) + ret <2 x i32> %3 +} + +define <2 x i32> @bs_or_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_or_rhs_i32vec( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %2 = or <2 x i32> %1, %a + %3 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2) + ret <2 x i32> %3 +} + +define <2 x i32> @bs_xor_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_xor_rhs_i32vec( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %2 = xor <2 x i32> %1, %a + %3 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2) + ret <2 x i32> %3 +} + +define i64 @bs_and_rhs_bs64_multiuse1(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and_rhs_bs64_multiuse1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] +; + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = and i64 %1, %a + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + %4 = mul i64 %2, %3 ;increase use of logical op + ret i64 %4 +} + +define i64 @bs_and_rhs_bs64_multiuse2(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and_rhs_bs64_multiuse2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] +; + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = and i64 %1, %a + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + %4 = mul i64 %1, %3 ;increase use of inner bswap + ret i64 %4 +} + +define i64 @bs_all_operand64(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_all_operand64( +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i64 [[TMP1]] +; + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = tail call i64 @llvm.bswap.i64(i64 %b) + %3 = and i64 %1, %2 + %4 = tail call i64 @llvm.bswap.i64(i64 %3) + ret i64 %4 +} + + define i64 @bs_active_high8(i64 %0) { ; CHECK-LABEL: @bs_active_high8( ; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255