Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1360,6 +1360,44 @@ } } // AddedComplexity, SchedRW +//===----------------------------------------------------------------------===// +// Pattern match SUB as XOR +//===----------------------------------------------------------------------===// + +// An immediate in the LHS of a subtract can't be encoded in the instruction. +// If there is no possibility of a borrow we can use an XOR instead of a SUB +// to enable the immediate to be folded. +// TODO: Move this to a DAG combine? + +def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast(N->getOperand(0))) { + KnownBits Known; + CurDAG->computeKnownBits(N->getOperand(1), Known); + + // If all possible ones in the RHS are set in the LHS then there can't be + // a borrow and we can use xor. + return (~Known.Zero).isSubsetOf(CN->getAPIntValue()); + } + + return false; +}]>; + +let AddedComplexity = 5 in { +def : Pat<(sub_is_xor imm:$src2, GR8:$src1), + (XOR8ri GR8:$src1, imm:$src2)>; +def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1), + (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(sub_is_xor imm:$src2, GR16:$src1), + (XOR16ri GR16:$src1, imm:$src2)>; +def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1), + (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(sub_is_xor imm:$src2, GR32:$src1), + (XOR32ri GR32:$src1, imm:$src2)>; +def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1), + (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1), + (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; +} //===----------------------------------------------------------------------===// // Some peepholes Index: test/CodeGen/X86/bool-math.ll =================================================================== --- test/CodeGen/X86/bool-math.ll +++ test/CodeGen/X86/bool-math.ll @@ -47,8 +47,8 @@ ; CHECK-LABEL: add_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movb $27, %al -; CHECK-NEXT: subb %dil, %al +; CHECK-NEXT: xorb $27, %dil +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -61,8 +61,8 @@ ; CHECK-LABEL: add_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movl $27, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: xorl $27, %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -75,8 +75,8 @@ ; CHECK-LABEL: add_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movb $43, %al -; CHECK-NEXT: subb %dil, %al +; CHECK-NEXT: xorb $43, %dil +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -129,8 +129,8 @@ ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movb $-29, %al -; CHECK-NEXT: subb %dil, %al +; CHECK-NEXT: xorb $-29, %dil +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -142,8 +142,8 @@ ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movl $227, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: xorl $227, %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -155,8 +155,8 @@ ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movb $41, %al -; CHECK-NEXT: subb %dil, %al +; CHECK-NEXT: xorb $41, %dil +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i16 %x, 1 %c = icmp eq i16 %a, 0 Index: test/CodeGen/X86/dagcombine-select.ll =================================================================== --- test/CodeGen/X86/dagcombine-select.ll +++ test/CodeGen/X86/dagcombine-select.ll @@ -194,9 +194,9 @@ ; CHECK-LABEL: shl_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movb $3, %cl -; CHECK-NEXT: subb %dil, %cl +; CHECK-NEXT: xorb $3, %dil ; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 2, i32 3 @@ -208,9 +208,9 @@ ; CHECK-LABEL: lshr_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movb $3, %cl -; CHECK-NEXT: subb %dil, %cl +; CHECK-NEXT: xorb $3, %dil ; CHECK-NEXT: movl $64, %eax +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 2, i32 3 @@ -222,9 +222,9 @@ ; CHECK-LABEL: ashr_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movb $3, %cl -; CHECK-NEXT: subb %dil, %cl +; CHECK-NEXT: xorb $3, %dil ; CHECK-NEXT: movl $128, %eax +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 2, i32 3 Index: test/CodeGen/X86/extract-lowbits.ll =================================================================== --- test/CodeGen/X86/extract-lowbits.ll +++ test/CodeGen/X86/extract-lowbits.ll @@ -28,34 +28,6 @@ ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { -; X86-NOBMI-LABEL: bzhi32_a0: -; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $1, %eax -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: decl %eax -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl -; -; X64-NOBMI-LABEL: bzhi32_a0: -; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax -; X64-NOBMI-NEXT: movl %esi, %ecx -; X64-NOBMI-NEXT: shll %cl, %eax -; X64-NOBMI-NEXT: decl %eax -; X64-NOBMI-NEXT: andl %edi, %eax -; X64-NOBMI-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %mask, %val