Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1798,6 +1798,37 @@ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } +// If we have BMI2, we transform the +// x & (-1 'logical shift' y) +// into +// (x 'opposite logical shift' y) 'logical shift' y +let Predicates = [HasBMI2] in { + multiclass bit_clean { + def : Pat<(and RC:$src1, (OrigShift -1, GR8:$src2)), + (OuterShift (InnerShiftReg $src1, + (INSERT_SUBREG (VT (IMPLICIT_DEF)), + GR8:$src2, sub_8bit)), + (INSERT_SUBREG (VT (IMPLICIT_DEF)), + GR8:$src2, sub_8bit))>; + def : Pat<(and (Load addr:$src1), (OrigShift -1, GR8:$src2)), + (OuterShift (InnerShiftMem addr:$src1, + (INSERT_SUBREG (VT (IMPLICIT_DEF)), + GR8:$src2, sub_8bit)), + (INSERT_SUBREG (VT (IMPLICIT_DEF)), + GR8:$src2, sub_8bit))>; + } + + // x & (-1 >> y) --> x << y >> y (clear y high bits) + defm : bit_clean; + defm : bit_clean; + + // x & (-1 << y) --> x >> y << y (clear y low bits) + defm : bit_clean; + defm : bit_clean; +} + // Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location. multiclass one_bit_patternshasOneUse(); }]>; +// An 'shl' node with a single use. +def shl_su : PatFrag<(ops node:$lhs, node:$rhs), (shl node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; // An 'trunc' node with a single use. def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); Index: test/CodeGen/X86/clear-highbits.ll =================================================================== --- test/CodeGen/X86/clear-highbits.ll +++ test/CodeGen/X86/clear-highbits.ll @@ -31,9 +31,8 @@ ; X86-TRANSFORM-LABEL: clear_highbits32_c0: ; X86-TRANSFORM: # %bb.0: ; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %ecx +; X86-TRANSFORM-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-TRANSFORM-NEXT: shrxl %eax, %ecx, %eax -; X86-TRANSFORM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_highbits32_c0: @@ -46,9 +45,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits32_c0: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shlxl %esi, %edi, %eax ; X64-TRANSFORM-NEXT: shrxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl %edi, %eax ; X64-TRANSFORM-NEXT: retq %mask = lshr i32 -1, %numhighbits %masked = and i32 %mask, %val @@ -67,9 +65,8 @@ ; X86-TRANSFORM-LABEL: clear_highbits32_c1_indexzext: ; X86-TRANSFORM: # %bb.0: ; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %ecx +; X86-TRANSFORM-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-TRANSFORM-NEXT: shrxl %eax, %ecx, %eax -; X86-TRANSFORM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_highbits32_c1_indexzext: @@ -82,9 +79,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits32_c1_indexzext: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shlxl %esi, %edi, %eax ; X64-TRANSFORM-NEXT: shrxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl %edi, %eax ; X64-TRANSFORM-NEXT: retq %sh_prom = zext i8 %numhighbits to i32 %mask = lshr i32 -1, %sh_prom @@ -104,11 +100,10 @@ ; ; X86-TRANSFORM-LABEL: clear_highbits32_c2_load: ; X86-TRANSFORM: # %bb.0: -; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %edx -; X86-TRANSFORM-NEXT: shrxl %eax, %edx, %eax -; X86-TRANSFORM-NEXT: andl (%ecx), %eax +; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-TRANSFORM-NEXT: shlxl %ecx, (%eax), %eax +; X86-TRANSFORM-NEXT: shrxl %ecx, %eax, %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_highbits32_c2_load: @@ -121,9 +116,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits32_c2_load: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shlxl %esi, (%rdi), %eax ; X64-TRANSFORM-NEXT: shrxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl (%rdi), %eax ; X64-TRANSFORM-NEXT: retq %val = load i32, i32* %w %mask = lshr i32 -1, %numhighbits @@ -143,11 +137,10 @@ ; ; X86-TRANSFORM-LABEL: clear_highbits32_c3_load_indexzext: ; X86-TRANSFORM: # %bb.0: -; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %edx -; X86-TRANSFORM-NEXT: shrxl %eax, %edx, %eax -; X86-TRANSFORM-NEXT: andl (%ecx), %eax +; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-TRANSFORM-NEXT: shlxl %ecx, (%eax), %eax +; X86-TRANSFORM-NEXT: shrxl %ecx, %eax, %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_highbits32_c3_load_indexzext: @@ -160,9 +153,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits32_c3_load_indexzext: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shlxl %esi, (%rdi), %eax ; X64-TRANSFORM-NEXT: shrxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl (%rdi), %eax ; X64-TRANSFORM-NEXT: retq %val = load i32, i32* %w %sh_prom = zext i8 %numhighbits to i32 @@ -183,9 +175,8 @@ ; X86-TRANSFORM-LABEL: clear_highbits32_c4_commutative: ; X86-TRANSFORM: # %bb.0: ; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %ecx +; X86-TRANSFORM-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-TRANSFORM-NEXT: shrxl %eax, %ecx, %eax -; X86-TRANSFORM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_highbits32_c4_commutative: @@ -198,9 +189,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits32_c4_commutative: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shlxl %esi, %edi, %eax ; X64-TRANSFORM-NEXT: shrxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl %edi, %eax ; X64-TRANSFORM-NEXT: retq %mask = lshr i32 -1, %numhighbits %masked = and i32 %val, %mask ; swapped order @@ -253,9 +243,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits64_c0: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shlxq %rsi, %rdi, %rax ; X64-TRANSFORM-NEXT: shrxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq %rdi, %rax ; X64-TRANSFORM-NEXT: retq %mask = lshr i64 -1, %numhighbits %masked = and i64 %mask, %val @@ -307,9 +296,8 @@ ; X64-TRANSFORM-LABEL: clear_highbits64_c1_indexzext: ; X64-TRANSFORM: # %bb.0: ; X64-TRANSFORM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shlxq %rsi, %rdi, %rax ; X64-TRANSFORM-NEXT: shrxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq %rdi, %rax ; X64-TRANSFORM-NEXT: retq %sh_prom = zext i8 %numhighbits to i64 %mask = lshr i64 -1, %sh_prom @@ -367,9 +355,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits64_c2_load: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shlxq %rsi, (%rdi), %rax ; X64-TRANSFORM-NEXT: shrxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq (%rdi), %rax ; X64-TRANSFORM-NEXT: retq %val = load i64, i64* %w %mask = lshr i64 -1, %numhighbits @@ -428,9 +415,8 @@ ; X64-TRANSFORM-LABEL: clear_highbits64_c3_load_indexzext: ; X64-TRANSFORM: # %bb.0: ; X64-TRANSFORM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shlxq %rsi, (%rdi), %rax ; X64-TRANSFORM-NEXT: shrxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq (%rdi), %rax ; X64-TRANSFORM-NEXT: retq %val = load i64, i64* %w %sh_prom = zext i8 %numhighbits to i64 @@ -483,9 +469,8 @@ ; ; X64-TRANSFORM-LABEL: clear_highbits64_c4_commutative: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shlxq %rsi, %rdi, %rax ; X64-TRANSFORM-NEXT: shrxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq %rdi, %rax ; X64-TRANSFORM-NEXT: retq %mask = lshr i64 -1, %numhighbits %masked = and i64 %val, %mask ; swapped order Index: test/CodeGen/X86/clear-lowbits.ll =================================================================== --- test/CodeGen/X86/clear-lowbits.ll +++ test/CodeGen/X86/clear-lowbits.ll @@ -31,9 +31,8 @@ ; X86-TRANSFORM-LABEL: clear_lowbits32_c0: ; X86-TRANSFORM: # %bb.0: ; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %ecx +; X86-TRANSFORM-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-TRANSFORM-NEXT: shlxl %eax, %ecx, %eax -; X86-TRANSFORM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_lowbits32_c0: @@ -46,9 +45,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits32_c0: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shrxl %esi, %edi, %eax ; X64-TRANSFORM-NEXT: shlxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl %edi, %eax ; X64-TRANSFORM-NEXT: retq %mask = shl i32 -1, %numlowbits %masked = and i32 %mask, %val @@ -67,9 +65,8 @@ ; X86-TRANSFORM-LABEL: clear_lowbits32_c1_indexzext: ; X86-TRANSFORM: # %bb.0: ; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %ecx +; X86-TRANSFORM-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-TRANSFORM-NEXT: shlxl %eax, %ecx, %eax -; X86-TRANSFORM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_lowbits32_c1_indexzext: @@ -82,9 +79,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits32_c1_indexzext: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shrxl %esi, %edi, %eax ; X64-TRANSFORM-NEXT: shlxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl %edi, %eax ; X64-TRANSFORM-NEXT: retq %sh_prom = zext i8 %numlowbits to i32 %mask = shl i32 -1, %sh_prom @@ -104,11 +100,10 @@ ; ; X86-TRANSFORM-LABEL: clear_lowbits32_c2_load: ; X86-TRANSFORM: # %bb.0: -; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %edx -; X86-TRANSFORM-NEXT: shlxl %eax, %edx, %eax -; X86-TRANSFORM-NEXT: andl (%ecx), %eax +; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-TRANSFORM-NEXT: shrxl %ecx, (%eax), %eax +; X86-TRANSFORM-NEXT: shlxl %ecx, %eax, %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_lowbits32_c2_load: @@ -121,9 +116,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits32_c2_load: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shrxl %esi, (%rdi), %eax ; X64-TRANSFORM-NEXT: shlxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl (%rdi), %eax ; X64-TRANSFORM-NEXT: retq %val = load i32, i32* %w %mask = shl i32 -1, %numlowbits @@ -143,11 +137,10 @@ ; ; X86-TRANSFORM-LABEL: clear_lowbits32_c3_load_indexzext: ; X86-TRANSFORM: # %bb.0: -; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %edx -; X86-TRANSFORM-NEXT: shlxl %eax, %edx, %eax -; X86-TRANSFORM-NEXT: andl (%ecx), %eax +; X86-TRANSFORM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-TRANSFORM-NEXT: shrxl %ecx, (%eax), %eax +; X86-TRANSFORM-NEXT: shlxl %ecx, %eax, %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_lowbits32_c3_load_indexzext: @@ -160,9 +153,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits32_c3_load_indexzext: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shrxl %esi, (%rdi), %eax ; X64-TRANSFORM-NEXT: shlxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl (%rdi), %eax ; X64-TRANSFORM-NEXT: retq %val = load i32, i32* %w %sh_prom = zext i8 %numlowbits to i32 @@ -183,9 +175,8 @@ ; X86-TRANSFORM-LABEL: clear_lowbits32_c4_commutative: ; X86-TRANSFORM: # %bb.0: ; X86-TRANSFORM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-TRANSFORM-NEXT: movl $-1, %ecx +; X86-TRANSFORM-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-TRANSFORM-NEXT: shlxl %eax, %ecx, %eax -; X86-TRANSFORM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-TRANSFORM-NEXT: retl ; ; X64-NOTRANSFORM-LABEL: clear_lowbits32_c4_commutative: @@ -198,9 +189,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits32_c4_commutative: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movl $-1, %eax +; X64-TRANSFORM-NEXT: shrxl %esi, %edi, %eax ; X64-TRANSFORM-NEXT: shlxl %esi, %eax, %eax -; X64-TRANSFORM-NEXT: andl %edi, %eax ; X64-TRANSFORM-NEXT: retq %mask = shl i32 -1, %numlowbits %masked = and i32 %val, %mask ; swapped order @@ -253,9 +243,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits64_c0: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shrxq %rsi, %rdi, %rax ; X64-TRANSFORM-NEXT: shlxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq %rdi, %rax ; X64-TRANSFORM-NEXT: retq %mask = shl i64 -1, %numlowbits %masked = and i64 %mask, %val @@ -307,9 +296,8 @@ ; X64-TRANSFORM-LABEL: clear_lowbits64_c1_indexzext: ; X64-TRANSFORM: # %bb.0: ; X64-TRANSFORM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shrxq %rsi, %rdi, %rax ; X64-TRANSFORM-NEXT: shlxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq %rdi, %rax ; X64-TRANSFORM-NEXT: retq %sh_prom = zext i8 %numlowbits to i64 %mask = shl i64 -1, %sh_prom @@ -367,9 +355,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits64_c2_load: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shrxq %rsi, (%rdi), %rax ; X64-TRANSFORM-NEXT: shlxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq (%rdi), %rax ; X64-TRANSFORM-NEXT: retq %val = load i64, i64* %w %mask = shl i64 -1, %numlowbits @@ -428,9 +415,8 @@ ; X64-TRANSFORM-LABEL: clear_lowbits64_c3_load_indexzext: ; X64-TRANSFORM: # %bb.0: ; X64-TRANSFORM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shrxq %rsi, (%rdi), %rax ; X64-TRANSFORM-NEXT: shlxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq (%rdi), %rax ; X64-TRANSFORM-NEXT: retq %val = load i64, i64* %w %sh_prom = zext i8 %numlowbits to i64 @@ -483,9 +469,8 @@ ; ; X64-TRANSFORM-LABEL: clear_lowbits64_c4_commutative: ; X64-TRANSFORM: # %bb.0: -; X64-TRANSFORM-NEXT: movq $-1, %rax +; X64-TRANSFORM-NEXT: shrxq %rsi, %rdi, %rax ; X64-TRANSFORM-NEXT: shlxq %rsi, %rax, %rax -; X64-TRANSFORM-NEXT: andq %rdi, %rax ; X64-TRANSFORM-NEXT: retq %mask = shl i64 -1, %numlowbits %masked = and i64 %val, %mask ; swapped order