Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2222,7 +2222,56 @@ return RetVal; break; } - case ISD::AND: + case ISD::AND: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + // Try to shrink the encoding of an AND by setting additional bits in the + // mask. It is only correct to do so if we know a priori that the other + // operand of the AND already has those bits set to zero. + if (ConstantSDNode *Cst = dyn_cast(N1)) { + int64_t Val = Cst->getSExtValue(); + if (Val > 0) { + unsigned Opc8 = 0, Opc32 = 0; + // Limit ourselves to constants which already have sign bits to save on + // compile time. + if ((int8_t)Val < 0) { + if (NVT.SimpleTy == MVT::i64) + Opc8 = X86::AND64ri8; + else if (NVT.SimpleTy == MVT::i32) + Opc8 = X86::AND32ri8; + else if (NVT.SimpleTy == MVT::i16) + Opc8 = X86::AND16ri8; + } + if ((int32_t)Val < 0) + if (NVT.SimpleTy == MVT::i64) + Opc32 = X86::AND64ri32; + // Only bother computing the zero bits in the non-constant operand if we + // have a suitable instruction. + if (Opc8 || Opc32) { + APInt Op0Zero, Op0One; + CurDAG->computeKnownBits(N0, Op0Zero, Op0One); + // Grow the mask using the known zero bits. + Op0Zero |= Val; + // Check to see if the mask can be efficiently encoded. + SDNode *NewNode = nullptr; + if (Opc8 && Op0Zero.isSignedIntN(8)) { + SDValue NewCst = + CurDAG->getTargetConstant(Op0Zero.getSExtValue(), dl, MVT::i8); + NewNode = CurDAG->getMachineNode(Opc8, dl, NVT, N0, NewCst); + } else if (Opc32 && Op0Zero.isSignedIntN(32)) { + SDValue NewCst = + CurDAG->getTargetConstant(Op0Zero.getSExtValue(), dl, MVT::i32); + NewNode = CurDAG->getMachineNode(Opc32, dl, NVT, N0, NewCst); + } + if (NewNode) { + ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); + return nullptr; + } + } + } + } + // FALLTHROUGH + } case ISD::OR: case ISD::XOR: { // For operations of the form (x << C1) op C2, check if we can use a smaller Index: test/CodeGen/X86/shift-pair.ll =================================================================== --- test/CodeGen/X86/shift-pair.ll +++ test/CodeGen/X86/shift-pair.ll @@ -3,7 +3,7 @@ define i64 @test(i64 %A) { ; CHECK: @test ; CHECK: shrq $54 -; CHECK: andl $1020 +; CHECK: andq $-4 ; CHECK: ret %B = lshr i64 %A, 56 %C = shl i64 %B, 2 Index: test/CodeGen/X86/win64_frame.ll =================================================================== --- test/CodeGen/X86/win64_frame.ll +++ test/CodeGen/X86/win64_frame.ll @@ -100,9 +100,8 @@ alloca i32, i32 %a ; CHECK: movl %ecx, %eax - ; CHECK: leaq 15(,%rax,4), %rcx - ; CHECK: movabsq $34359738352, %rax - ; CHECK: andq %rcx, %rax + ; CHECK: leaq 15(,%rax,4), %rax + ; CHECK: andq $-16, %rax ; CHECK: callq __chkstk ; CHECK: subq %rax, %rsp Index: test/CodeGen/X86/zext-fold.ll =================================================================== --- test/CodeGen/X86/zext-fold.ll +++ test/CodeGen/X86/zext-fold.ll @@ -8,7 +8,7 @@ } ; CHECK: test1 ; CHECK: movzbl -; CHECK-NEXT: andl {{.*}}224 +; CHECK-NEXT: andl {{.*}}-32 ;; Multiple uses of %x but easily extensible. define i32 @test2(i8 %x) nounwind readnone { @@ -21,7 +21,7 @@ } ; CHECK: test2 ; CHECK: movzbl -; CHECK: andl $224 +; CHECK: andl $-32 ; CHECK: orl $63 declare void @use(i32, i8) @@ -36,6 +36,6 @@ ; CHECK: test3 ; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]] ; CHECK-NEXT: movl [[REGISTER]], 4(%esp) -; CHECK-NEXT: andl $224, [[REGISTER]] +; CHECK-NEXT: andl $-32, [[REGISTER]] ; CHECK-NEXT: movl [[REGISTER]], (%esp) ; CHECK-NEXT: call{{.*}}use