diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49683,6 +49683,46 @@ } } + // Transform (and (srl X 30) 2) into (add Y Y), where Y = (srl X 31). This + // encodes one byte shorter for i32 and i64. For i64, we can use a 32-bit + // shift instruction to discard the upper half. + // + // Likewise, we transform (and (srl X 62) 2) into (add Y Y), where + // Y = (srl X 63). + // + // The final result is ISD::ADD, so we can choose LEA if profitable. + auto *N1C = dyn_cast(N1); + if (N1C && N1C->getZExtValue() == 2 && N0.getOpcode() == ISD::SRL && + N0->hasOneUse()) { + if (auto *N01C = dyn_cast(N0.getOperand(1))) { + uint64_t ShiftCount = N01C->getZExtValue(); + if (ShiftCount == 30 && VT == MVT::i32) { + SDValue Shift = + DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0), + DAG.getConstant(ShiftCount + 1, dl, MVT::i8)); + return DAG.getNode(ISD::ADD, dl, VT, Shift, Shift); + } else if (ShiftCount == 62 && VT == MVT::i64 && Subtarget.is64Bit()) { + SDValue Shift = + DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0), + DAG.getConstant(ShiftCount + 1, dl, MVT::i8)); + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Shift); + // Use 32-bit ISD::ADD for shorter encoding of ADD/LEA. + SDValue Add = DAG.getNode(ISD::ADD, dl, MVT::i32, Trunc, Trunc); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Add); + } else if (ShiftCount == 30 && VT == MVT::i64) { + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N0.getOperand(0)); + SDValue Shift = + DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, + DAG.getConstant(ShiftCount + 1, dl, MVT::i8)); + // Use 32-bit ISD::ADD for shorter encoding of ADD/LEA. + SDValue Add = DAG.getNode(ISD::ADD, dl, MVT::i32, Shift, Shift); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Add); + } + } + } + if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { // Attempt to recursively combine a bitmask AND with shuffles. SDValue Op(N, 0); diff --git a/llvm/test/CodeGen/X86/and-shift.ll b/llvm/test/CodeGen/X86/and-shift.ll --- a/llvm/test/CodeGen/X86/and-shift.ll +++ b/llvm/test/CodeGen/X86/and-shift.ll @@ -6,15 +6,15 @@ ; X32-LABEL: shift30_and2_i32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: andl $-2, %eax +; X32-NEXT: shrl $31, %eax +; X32-NEXT: addl %eax, %eax ; X32-NEXT: retl ; ; X64-LABEL: shift30_and2_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $30, %eax -; X64-NEXT: andl $-2, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: shrl $31, %edi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: retq %shr = lshr i32 %x, 30 %and = and i32 %shr, 2 @@ -25,16 +25,15 @@ ; X32-LABEL: shift62_and2_i64: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: andl $-2, %eax +; X32-NEXT: shrl $31, %eax +; X32-NEXT: addl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl ; ; X64-LABEL: shift62_and2_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $62, %rax -; X64-NEXT: andl $-2, %eax +; X64-NEXT: shrq $63, %rdi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: retq %shr = lshr i64 %x, 62 %and = and i64 %shr, 2 @@ -45,16 +44,15 @@ ; X32-LABEL: shift30_and2_i64: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: andl $-2, %eax +; X32-NEXT: shrl $31, %eax +; X32-NEXT: addl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl ; ; X64-LABEL: shift30_and2_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $30, %rax -; X64-NEXT: andl $2, %eax +; X64-NEXT: shrl $31, %edi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: retq %shr = lshr i64 %x, 30 %and = and i64 %shr, 2