Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -198,6 +198,7 @@ SDNode *Select(SDNode *N) override; SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); + SDNode *SelectAndWithSExtImmediate(SDNode *Node, MVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); @@ -2132,6 +2133,59 @@ return ResNode; } +// Try to shrink the encoding of an AND by setting additional bits in the mask. +// It is only correct to do so if we know a priori that the other operand of the +// AND already has those bits set to zero. +SDNode *X86DAGToDAGISel::SelectAndWithSExtImmediate(SDNode *Node, MVT NVT) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + auto *Cst = dyn_cast(N1); + if (!Cst) + return nullptr; + int64_t Val = Cst->getSExtValue(); + if (Val <= 0) + return nullptr; + unsigned Opc8 = 0, Opc32 = 0; + // Limit ourselves to constants which already have sign bits to save on + // compile time. + if ((int8_t)Val < 0) { + if (NVT.SimpleTy == MVT::i64) + Opc8 = X86::AND64ri8; + else if (NVT.SimpleTy == MVT::i32) + Opc8 = X86::AND32ri8; + else if (NVT.SimpleTy == MVT::i16) + Opc8 = X86::AND16ri8; + } + if ((int32_t)Val < 0) + if (NVT.SimpleTy == MVT::i64) + Opc32 = X86::AND64ri32; + // Only bother computing the zero bits in the non-constant operand if we + // have a suitable instruction. + if (!Opc8 && !Opc32) + return nullptr; + APInt Op0Zero, Op0One; + CurDAG->computeKnownBits(N0, Op0Zero, Op0One); + // Grow the mask using the known zero bits. + Op0Zero |= Val; + auto SelectAndInstruction = [&](unsigned Opc, unsigned NumBits) -> SDNode * { + // Do we have an instruction for this number of bits? + if (!Opc) + return nullptr; + // See if the mask can be efficiently encoded using at most NumBits. + if (!Op0Zero.isSignedIntN(NumBits)) + return nullptr; + SDLoc DL(Node); + SDValue NewCst = CurDAG->getTargetConstant(Op0Zero.getSExtValue(), DL, + MVT::getIntegerVT(NumBits)); + return CurDAG->getMachineNode(Opc, DL, NVT, N0, NewCst); + }; + if (SDNode *NewInst = SelectAndInstruction(Opc8, 8)) + return NewInst; + if (SDNode *NewInst = SelectAndInstruction(Opc32, 32)) + return NewInst; + return nullptr; +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; @@ -2222,7 +2276,13 @@ return RetVal; break; } - case ISD::AND: + case ISD::AND: { + // FALLTHROUGH + if (SDNode *NewNode = SelectAndWithSExtImmediate(Node, NVT)) { + ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); + return nullptr; + } + } case ISD::OR: case ISD::XOR: { // For operations of the form (x << C1) op C2, check if we can use a smaller Index: test/CodeGen/X86/shift-pair.ll =================================================================== --- test/CodeGen/X86/shift-pair.ll +++ test/CodeGen/X86/shift-pair.ll @@ -3,7 +3,7 @@ define i64 @test(i64 %A) { ; CHECK: @test ; CHECK: shrq $54 -; CHECK: andl $1020 +; CHECK: andq $-4 ; CHECK: ret %B = lshr i64 %A, 56 %C = shl i64 %B, 2 Index: test/CodeGen/X86/win64_frame.ll =================================================================== --- test/CodeGen/X86/win64_frame.ll +++ test/CodeGen/X86/win64_frame.ll @@ -100,9 +100,8 @@ alloca i32, i32 %a ; CHECK: movl %ecx, %eax - ; CHECK: leaq 15(,%rax,4), %rcx - ; CHECK: movabsq $34359738352, %rax - ; CHECK: andq %rcx, %rax + ; CHECK: leaq 15(,%rax,4), %rax + ; CHECK: andq $-16, %rax ; CHECK: callq __chkstk ; CHECK: subq %rax, %rsp Index: test/CodeGen/X86/zext-fold.ll =================================================================== --- test/CodeGen/X86/zext-fold.ll +++ test/CodeGen/X86/zext-fold.ll @@ -8,7 +8,7 @@ } ; CHECK: test1 ; CHECK: movzbl -; CHECK-NEXT: andl {{.*}}224 +; CHECK-NEXT: andl {{.*}}-32 ;; Multiple uses of %x but easily extensible. define i32 @test2(i8 %x) nounwind readnone { @@ -21,7 +21,7 @@ } ; CHECK: test2 ; CHECK: movzbl -; CHECK: andl $224 +; CHECK: andl $-32 ; CHECK: orl $63 declare void @use(i32, i8) @@ -36,6 +36,6 @@ ; CHECK: test3 ; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]] ; CHECK-NEXT: movl [[REGISTER]], 4(%esp) -; CHECK-NEXT: andl $224, [[REGISTER]] +; CHECK-NEXT: andl $-32, [[REGISTER]] ; CHECK-NEXT: movl [[REGISTER]], (%esp) ; CHECK-NEXT: call{{.*}}use