Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -835,6 +835,9 @@ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27810,6 +27810,65 @@ // X86 Optimization Hooks //===----------------------------------------------------------------------===// +bool +X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { + // Only optimize Ands to prevent shrinking a constant that could be + // matched by movzx. + if (Op.getOpcode() != ISD::AND) + return false; + + EVT VT = Op.getValueType(); + + // Ignore vectors. + if (VT.isVector()) + return false; + + unsigned Size = VT.getSizeInBits(); + + // Make sure the RHS really is a constant. + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + + const APInt &Mask = C->getAPIntValue(); + + // Clear all non-demanded bits initially. + APInt ShrunkMask = Mask & Demanded; + + // Find the width of the shrunk mask. + unsigned Width = ShrunkMask.getActiveBits(); + + // If the mask is all 0s there's nothing to do here. + if (Width == 0) + return false; + + // Find the next power of 2 width, rounding up to a byte. + Width = PowerOf2Ceil(std::max(Width, 8U)); + // Truncate the width to size to handle illegal types. + Width = std::min(Width, Size); + + // Calculate a possible zero extend mask for this constant. + APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width); + + // If we aren't changing the mask, just return true to keep it and prevent + // the caller from optimizing. + if (ZeroExtendMask == Mask) + return true; + + // Make sure the bits in the ZeroExtendMask are also set in the original mask. + // TODO: We should be able to set bits that aren't demanded too. + if (!ZeroExtendMask.isSubsetOf(Mask)) + return false; + + // Replace the constant with the zero extend mask. + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); +} + void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1514,6 +1514,10 @@ (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; +def immff00_ffff : ImmLeaf= 0xff00 && Imm <= 0xffff; +}]>; + // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, @@ -1534,7 +1538,7 @@ (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), +def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // h-register tricks. Index: test/CodeGen/X86/3addr-or.ll =================================================================== --- test/CodeGen/X86/3addr-or.ll +++ test/CodeGen/X86/3addr-or.ll @@ -10,12 +10,14 @@ ret i32 %1 } +; This test no long requires or to be converted to 3 addr form because we are +; are able to use a zero extend instead of an 'and' which gives the register +; allocator freedom. define i64 @test2(i8 %A, i8 %B) nounwind { ; CHECK-LABEL: test2: +; CHECK: movzbl ; CHECK: shrq $4 -; CHECK-NOT: movq -; CHECK-NOT: orq -; CHECK: leaq +; CHECK: orq ; CHECK: ret %C = zext i8 %A to i64 ; [#uses=1] %D = shl i64 %C, 4 ; [#uses=1] Index: test/CodeGen/X86/popcnt.ll =================================================================== --- test/CodeGen/X86/popcnt.ll +++ test/CodeGen/X86/popcnt.ll @@ -70,8 +70,8 @@ ; X32-NEXT: shrl $2, %eax ; X32-NEXT: andl $13107, %eax # imm = 0x3333 ; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andl $32752, %ecx # imm = 0x7FF0 ; X32-NEXT: shrl $4, %ecx ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: andl $3855, %ecx # imm = 0xF0F @@ -93,15 +93,15 @@ ; X64-NEXT: shrl $2, %edi ; X64-NEXT: andl $13107, %edi # imm = 0x3333 ; X64-NEXT: addl %eax, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $32752, %eax # imm = 0x7FF0 -; X64-NEXT: shrl $4, %eax -; X64-NEXT: addl %edi, %eax -; X64-NEXT: andl $3855, %eax # imm = 0xF0F +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: shll $8, %ecx +; X64-NEXT: shrl $4, %ecx ; X64-NEXT: addl %eax, %ecx -; X64-NEXT: movzbl %ch, %eax # NOREX +; X64-NEXT: andl $3855, %ecx # imm = 0xF0F +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: shll $8, %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: movzbl %ah, %eax # NOREX ; X64-NEXT: # kill: def %ax killed %ax killed %eax ; X64-NEXT: retq ; Index: test/CodeGen/X86/pr21792.ll =================================================================== --- test/CodeGen/X86/pr21792.ll +++ test/CodeGen/X86/pr21792.ll @@ -12,19 +12,18 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pextrq $1, %xmm0, %rdx -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: shrq $32, %rcx -; CHECK-NEXT: movq %xmm0, %rax -; CHECK-NEXT: movq %rax, %r9 +; CHECK-NEXT: pextrq $1, %xmm0, %rax +; CHECK-NEXT: movzwl %ax, %ecx +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %xmm0, %rdx +; CHECK-NEXT: movzwl %dx, %r8d +; CHECK-NEXT: movq %rdx, %r9 ; CHECK-NEXT: shrq $32, %r9 -; CHECK-NEXT: andl $2032, %eax # imm = 0x7F0 -; CHECK-NEXT: leaq stuff(%rax), %rdi +; CHECK-NEXT: leaq stuff(%r8), %rdi ; CHECK-NEXT: leaq stuff(%r9), %rsi -; CHECK-NEXT: andl $2032, %edx # imm = 0x7F0 -; CHECK-NEXT: leaq stuff(%rdx), %rdx -; CHECK-NEXT: leaq stuff(%rcx), %rcx -; CHECK-NEXT: leaq stuff+8(%rax), %r8 +; CHECK-NEXT: leaq stuff(%rcx), %rdx +; CHECK-NEXT: leaq stuff(%rax), %rcx +; CHECK-NEXT: leaq stuff+8(%r8), %r8 ; CHECK-NEXT: leaq stuff+8(%r9), %r9 ; CHECK-NEXT: callq toto ; CHECK-NEXT: popq %rax