Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1804,6 +1804,68 @@ } } +// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location. +def : Pat<(and GR16:$src1, (rotl -2, GR8:$src2)), + (BTR16rr GR16:$src1, + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(and GR32:$src1, (rotl -2, GR8:$src2)), + (BTR32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(and GR64:$src1, (rotl -2, GR8:$src2)), + (BTR64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + +def : Pat<(or GR16:$src1, (shl 1, GR8:$src2)), + (BTS16rr GR16:$src1, + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(or GR32:$src1, (shl 1, GR8:$src2)), + (BTS32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(or GR64:$src1, (shl 1, GR8:$src2)), + (BTS64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + +def : Pat<(xor GR16:$src1, (shl 1, GR8:$src2)), + (BTC16rr GR16:$src1, + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(xor GR32:$src1, (shl 1, GR8:$src2)), + (BTC32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(xor GR64:$src1, (shl 1, GR8:$src2)), + (BTC64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + +// Similar to above, but removing unneeded masking of the shift amount. +def : Pat<(and GR16:$src1, (rotl -2, (and GR8:$src2, immShift16))), + (BTR16rr GR16:$src1, + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(and GR32:$src1, (rotl -2, (and GR8:$src2, immShift32))), + (BTR32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(and GR64:$src1, (rotl -2, (and GR8:$src2, immShift64))), + (BTR64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + +def : Pat<(or GR16:$src1, (shl 1, (and GR8:$src2, immShift16))), + (BTS16rr GR16:$src1, + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(or GR32:$src1, (shl 1, (and GR8:$src2, immShift32))), + (BTS32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(or GR64:$src1, (shl 1, (and GR8:$src2, immShift64))), + (BTS64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + +def : Pat<(xor GR16:$src1, (shl 1, (and GR8:$src2, immShift16))), + (BTC16rr GR16:$src1, + (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(xor GR32:$src1, (shl 1, (and GR8:$src2, immShift32))), + (BTC32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(xor GR64:$src1, (shl 1, (and GR8:$src2, immShift64))), + (BTC64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; Index: test/CodeGen/X86/btc_bts_btr.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/btc_bts_btr.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s --check-prefix=X64 + +define i32 @btr_32(i32 %x, i32 %n) { +; X86-LABEL: btr_32: +; X86: # %bb.0: +; X86-NEXT: btrl %esi, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: retq +; +; X64-LABEL: btr_32: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $-2, %eax +; X64-NEXT: roll %cl, %eax +; X64-NEXT: andl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = shl i32 1, %n + %2 = xor i32 %1, -1 + %3 = and i32 %x, %2 + ret i32 %3 +} + +define i32 @bts_32(i32 %x, i32 %n) { +; X86-LABEL: bts_32: +; X86: # %bb.0: +; X86-NEXT: btsl %esi, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: retq +; +; X64-LABEL: bts_32: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: orl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = shl i32 1, %n + %2 = or i32 %x, %1 + ret i32 %2 +} + +define i32 @btc_32(i32 %x, i32 %n) { +; X86-LABEL: btc_32: +; X86: # %bb.0: +; X86-NEXT: btcl %esi, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: retq +; +; X64-LABEL: btc_32: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = shl i32 1, %n + %2 = xor i32 %x, %1 + ret i32 %2 +} + +define i64 @btr_64(i64 %x, i64 %n) { +; X86-LABEL: btr_64: +; X86: # %bb.0: +; X86-NEXT: btrq %rsi, %rdi +; X86-NEXT: movq %rdi, %rax +; X86-NEXT: retq +; +; X64-LABEL: btr_64: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: shldl %cl, %eax, %edx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: testb $32, %cl +; X64-NEXT: je .LBB3_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB3_2: +; X64-NEXT: notl %edx +; X64-NEXT: notl %eax +; X64-NEXT: andl {{[0-9]+}}(%esp), %edx +; X64-NEXT: andl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = shl i64 1, %n + %2 = xor i64 %1, -1 + %3 = and i64 %x, %2 + ret i64 %3 +} + +define i64 @bts_64(i64 %x, i64 %n) { +; X86-LABEL: bts_64: +; X86: # %bb.0: +; X86-NEXT: btsq %rsi, %rdi +; X86-NEXT: movq %rdi, %rax +; X86-NEXT: retq +; +; X64-LABEL: bts_64: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: shldl %cl, %eax, %edx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: testb $32, %cl +; X64-NEXT: je .LBB4_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB4_2: +; X64-NEXT: orl {{[0-9]+}}(%esp), %edx +; X64-NEXT: orl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = shl i64 1, %n + %2 = or i64 %x, %1 + ret i64 %2 +} + +define i64 @btc_64(i64 %x, i64 %n) { +; X86-LABEL: btc_64: +; X86: # %bb.0: +; X86-NEXT: btcq %rsi, %rdi +; X86-NEXT: movq %rdi, %rax +; X86-NEXT: retq +; +; X64-LABEL: btc_64: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: shldl %cl, %eax, %edx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: testb $32, %cl +; X64-NEXT: je .LBB5_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB5_2: +; X64-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X64-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = shl i64 1, %n + %2 = xor i64 %x, %1 + ret i64 %2 +} + +define i32 @btr_32_mask(i32 %x, i32 %n) { +; X86-LABEL: btr_32_mask: +; X86: # %bb.0: +; X86-NEXT: btrl %esi, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: retq +; +; X64-LABEL: btr_32_mask: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $-2, %eax +; X64-NEXT: roll %cl, %eax +; X64-NEXT: andl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = and i32 %n, 31 + %2 = shl i32 1, %1 + %3 = xor i32 %2, -1 + %4 = and i32 %x, %3 + ret i32 %4 +} + +define i32 @bts_32_mask(i32 %x, i32 %n) { +; X86-LABEL: bts_32_mask: +; X86: # %bb.0: +; X86-NEXT: btsl %esi, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: retq +; +; X64-LABEL: bts_32_mask: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: orl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = and i32 %n, 31 + %2 = shl i32 1, %1 + %3 = or i32 %x, %2 + ret i32 %3 +} + +define i32 @btc_32_mask(i32 %x, i32 %n) { +; X86-LABEL: btc_32_mask: +; X86: # %bb.0: +; X86-NEXT: btcl %esi, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: retq +; +; X64-LABEL: btc_32_mask: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = and i32 %n, 31 + %2 = shl i32 1, %1 + %3 = xor i32 %x, %2 + ret i32 %3 +} + +define i64 @btr_64_mask(i64 %x, i64 %n) { +; X86-LABEL: btr_64_mask: +; X86: # %bb.0: +; X86-NEXT: btrq %rsi, %rdi +; X86-NEXT: movq %rdi, %rax +; X86-NEXT: retq +; +; X64-LABEL: btr_64_mask: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: shldl %cl, %eax, %edx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: testb $32, %cl +; X64-NEXT: je .LBB9_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB9_2: +; X64-NEXT: notl %edx +; X64-NEXT: notl %eax +; X64-NEXT: andl {{[0-9]+}}(%esp), %edx +; X64-NEXT: andl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = and i64 %n, 63 + %2 = shl i64 1, %1 + %3 = xor i64 %2, -1 + %4 = and i64 %x, %3 + ret i64 %4 +} + +define i64 @bts_64_mask(i64 %x, i64 %n) { +; X86-LABEL: bts_64_mask: +; X86: # %bb.0: +; X86-NEXT: btsq %rsi, %rdi +; X86-NEXT: movq %rdi, %rax +; X86-NEXT: retq +; +; X64-LABEL: bts_64_mask: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: shldl %cl, %eax, %edx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: testb $32, %cl +; X64-NEXT: je .LBB10_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB10_2: +; X64-NEXT: orl {{[0-9]+}}(%esp), %edx +; X64-NEXT: orl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = and i64 %n, 63 + %2 = shl i64 1, %1 + %3 = or i64 %x, %2 + ret i64 %3 +} + +define i64 @btc_64_mask(i64 %x, i64 %n) { +; X86-LABEL: btc_64_mask: +; X86: # %bb.0: +; X86-NEXT: btcq %rsi, %rdi +; X86-NEXT: movq %rdi, %rax +; X86-NEXT: retq +; +; X64-LABEL: btc_64_mask: +; X64: # %bb.0: +; X64-NEXT: movb {{[0-9]+}}(%esp), %cl +; X64-NEXT: movl $1, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: shldl %cl, %eax, %edx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: testb $32, %cl +; X64-NEXT: je .LBB11_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB11_2: +; X64-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X64-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X64-NEXT: retl + %1 = and i64 %n, 63 + %2 = shl i64 1, %1 + %3 = xor i64 %x, %2 + ret i64 %3 +}