diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -2247,3 +2247,13 @@ let Predicates = [HasMOVBE] in { def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>; } + + +// Match idiomatic (sizeof(x) - 1 - clz(x)) with bsr instead of +// emitting extra xor + lzcnt. +def : Pat<(xor (ctlz GR16:$src), 15), (BSR16rr GR16:$src)>; +def : Pat<(xor (ctlz GR32:$src), 31), (BSR32rr GR32:$src)>; +def : Pat<(xor (ctlz GR64:$src), 63), (BSR64rr GR64:$src)>; +def : Pat<(xor (ctlz_zero_undef GR16:$src), 15), (BSR16rr GR16:$src)>; +def : Pat<(xor (ctlz_zero_undef GR32:$src), 31), (BSR32rr GR32:$src)>; +def : Pat<(xor (ctlz_zero_undef GR64:$src), 63), (BSR64rr GR64:$src)>; diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll --- a/llvm/test/CodeGen/X86/clz.ll +++ b/llvm/test/CodeGen/X86/clz.ll @@ -741,14 +741,12 @@ ; ; X86-CLZ-LABEL: ctlz_bsr: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax -; X86-CLZ-NEXT: xorl $31, %eax +; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_bsr: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: lzcntl %edi, %eax -; X64-CLZ-NEXT: xorl $31, %eax +; X64-CLZ-NEXT: bsrl %edi, %eax ; X64-CLZ-NEXT: retq %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true) %bsr = xor i32 %ctlz, 31 @@ -790,14 +788,12 @@ ; ; X86-CLZ-LABEL: ctlz_bsr_zero_test: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax -; X86-CLZ-NEXT: xorl $31, %eax +; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_bsr_zero_test: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: lzcntl %edi, %eax -; X64-CLZ-NEXT: xorl $31, %eax +; X64-CLZ-NEXT: bsrl %edi, %eax ; X64-CLZ-NEXT: retq %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false) %bsr = xor i32 %ctlz, 31 @@ -1079,8 +1075,7 @@ ; X86-CLZ-LABEL: PR47603_zext: ; X86-CLZ: # %bb.0: ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx -; X86-CLZ-NEXT: xorl $31, %ecx +; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %ecx ; X86-CLZ-NEXT: movsbl (%eax,%ecx), %eax ; X86-CLZ-NEXT: retl ;