diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54928,6 +54928,81 @@ } } } + + // Reorder: icmp eq/ne (and X, C0), (shift X, C1) + // If X is a mask or shifted mask and the shift isolates the remaining + // bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) re-order + // to get better mask constants. Better mask constants basically means: + // 1) Avoid imm64 + // 2) Try to get shl for low C1 (so we can lower then lea/add) + // 3) Try to zero-extend mov masks. + if (OpVT == MVT::i64 || OpVT == MVT::i32 || OpVT == MVT::i16 || + OpVT == MVT::i8) { + auto IsAndWithShift = [](SDValue A, SDValue B) { + return A.getOpcode() == ISD::AND && + (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) && + A.getOperand(0) == B.getOperand(0); + }; + SDValue And = SDValue(), Shift = SDValue(); + if (IsAndWithShift(LHS, RHS)) { + And = LHS; + Shift = RHS; + } else if (IsAndWithShift(RHS, LHS)) { + And = RHS; + Shift = LHS; + } + if (And && Shift && And.hasOneUse() && Shift.hasOneUse()) { + auto *AndCMask = dyn_cast(And.getOperand(1)); + auto *ShiftCAmt = dyn_cast(Shift.getOperand(1)); + if (AndCMask && ShiftCAmt) { + const APInt &Mask = AndCMask->getAPIntValue(); + const APInt &Amt = ShiftCAmt->getAPIntValue(); + bool DoTransform = Amt == (~Mask).popcount(); + unsigned NewShiftOpc; + APInt NewMask; + if (Shift.getOpcode() == ISD::SHL) { + DoTransform &= (~Mask).isMask(); + + if (OpVT == MVT::i64) + // If the current setup has imm64 mask, then inverse will have + // at least imm32 mask (or be zext i32 -> i64). + DoTransform &= Mask.getSignificantBits() > 32; + else + // We can only benefit if req at least 7-bit for the mask. We + // don't want to replace shl of 1,2,3 as they can be implemented + // with lea/add. + DoTransform &= Amt.uge(7); + if (DoTransform) { + NewShiftOpc = ISD::SRL; + NewMask = Mask.lshr(Amt); + } + } else { + DoTransform &= Mask.isMask(); + if (OpVT == MVT::i64) + // Keep exactly 32-bit imm64, this is zext i32 -> i64 which is + // extremely efficient. + DoTransform &= Mask.getSignificantBits() > 33; + else + // Keep small shifts as shl so we can generate add/lea. + DoTransform &= Amt.ult(7); + + if (DoTransform) { + NewShiftOpc = ISD::SHL; + NewMask = Mask.shl(Amt); + } + } + if (DoTransform) { + SDValue NewShift = + DAG.getNode(NewShiftOpc, DL, OpVT, Shift.getOperand(0), + Shift.getOperand(1)); + SDValue NewAnd = + DAG.getNode(ISD::AND, DL, OpVT, And.getOperand(0), + DAG.getConstant(NewMask, DL, OpVT)); + return DAG.getSetCC(DL, VT, NewAnd, NewShift, CC); + } + } + } + } } } diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll --- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll +++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll @@ -7,18 +7,18 @@ ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andb $63, %cl -; X86-NEXT: shrb $2, %al -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: shlb $2, %cl +; X86-NEXT: andb $-4, %al +; X86-NEXT: cmpb %cl, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: shr_to_shl_eq_i8_s2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $63, %al -; X64-NEXT: shrb $2, %dil -; X64-NEXT: cmpb %dil, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (,%rdi,4), %eax +; X64-NEXT: andb $-4, %dil +; X64-NEXT: cmpb %al, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %and = and i8 %x, 63 @@ -32,18 +32,18 @@ ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shlb $7, %cl -; X86-NEXT: andb $-128, %al -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: shrb $7, %cl +; X86-NEXT: andb $1, %al +; X86-NEXT: cmpb %cl, %al ; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: shl_to_shr_ne_i8_s7: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shlb $7, %al -; X64-NEXT: andb $-128, %dil -; X64-NEXT: cmpb %dil, %al +; X64-NEXT: shrb $7, %al +; X64-NEXT: andb $1, %dil +; X64-NEXT: cmpb %al, %dil ; X64-NEXT: setne %al ; X64-NEXT: retq %shl = shl i8 %x, 7 @@ -57,18 +57,18 @@ ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andb $127, %cl -; X86-NEXT: shrb %al -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: addb %al, %cl +; X86-NEXT: andb $-2, %al +; X86-NEXT: cmpb %cl, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: shr_to_shl_eq_i8_s1: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $127, %al -; X64-NEXT: shrb %dil -; X64-NEXT: cmpb %dil, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: andb $-2, %dil +; X64-NEXT: cmpb %al, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %and = and i8 %x, 127 @@ -81,19 +81,18 @@ ; X86-LABEL: shr_to_shl_eq_i32_s3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $536870911, %ecx # imm = 0x1FFFFFFF -; X86-NEXT: shrl $3, %eax -; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: leal (,%eax,8), %ecx +; X86-NEXT: andl $-8, %eax +; X86-NEXT: cmpl %ecx, %eax ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: shr_to_shl_eq_i32_s3: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF -; X64-NEXT: shrl $3, %edi -; X64-NEXT: cmpl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (,%rdi,8), %eax +; X64-NEXT: andl $-8, %edi +; X64-NEXT: cmpl %eax, %edi ; X64-NEXT: sete %al ; X64-NEXT: retq %and = and i32 %x, 536870911 @@ -131,18 +130,16 @@ ; X86-LABEL: shl_to_shr_ne_i32_s16: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $16, %ecx -; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-NEXT: movzwl %ax, %ecx +; X86-NEXT: shrl $16, %eax ; X86-NEXT: cmpl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: shl_to_shr_ne_i32_s16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $16, %eax -; X64-NEXT: andl $-65536, %edi # imm = 0xFFFF0000 +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl $16, %edi ; X64-NEXT: cmpl %edi, %eax ; X64-NEXT: setne %al ; X64-NEXT: retq @@ -181,18 +178,17 @@ ; X86-LABEL: shr_to_shl_eq_i16_s1: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X86-NEXT: shrl %eax -; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: leal (%eax,%eax), %ecx +; X86-NEXT: andl $-2, %eax +; X86-NEXT: cmpw %cx, %ax ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: shr_to_shl_eq_i16_s1: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: andl $32767, %edi # imm = 0x7FFF -; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: andl $65534, %edi # imm = 0xFFFE ; X64-NEXT: cmpw %ax, %di ; X64-NEXT: sete %al ; X64-NEXT: retq @@ -231,8 +227,8 @@ ; X86-LABEL: shl_to_shr_eq_i64_s44: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shll $12, %eax -; X86-NEXT: movl $-4096, %ecx # imm = 0xF000 +; X86-NEXT: shrl $12, %eax +; X86-NEXT: movl $1048575, %ecx # imm = 0xFFFFF ; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: sete %al @@ -240,9 +236,9 @@ ; ; X64-LABEL: shl_to_shr_eq_i64_s44: ; X64: # %bb.0: -; X64-NEXT: movabsq $-17592186044416, %rax # imm = 0xFFFFF00000000000 -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: shlq $44, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $44, %rax +; X64-NEXT: andl $1048575, %edi # imm = 0xFFFFF ; X64-NEXT: cmpq %rax, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq @@ -301,18 +297,18 @@ ; X86-LABEL: shl_to_shr_eq_i64_s63: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shll $31, %eax -; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: shl_to_shr_eq_i64_s63: ; X64: # %bb.0: -; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: shlq $63, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $63, %rax +; X64-NEXT: andl $1, %edi ; X64-NEXT: cmpq %rax, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq @@ -350,30 +346,25 @@ define i1 @shr_to_shl_eq_i64_s7(i64 %x) { ; X86-LABEL: shr_to_shl_eq_i64_s7: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $33554431, %edx # imm = 0x1FFFFFF -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: shldl $25, %eax, %esi -; X86-NEXT: shrl $7, %ecx -; X86-NEXT: xorl %edx, %ecx -; X86-NEXT: xorl %eax, %esi -; X86-NEXT: orl %ecx, %esi +; X86-NEXT: shldl $7, %eax, %edx +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $7, %ecx +; X86-NEXT: andl $-128, %eax +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: shr_to_shl_eq_i64_s7: ; X64: # %bb.0: -; X64-NEXT: movabsq $144115188075855871, %rax # imm = 0x1FFFFFFFFFFFFFF -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: shrq $7, %rdi -; X64-NEXT: cmpq %rdi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $7, %rax +; X64-NEXT: andq $-128, %rdi +; X64-NEXT: cmpq %rax, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %and = and i64 %x, 144115188075855871 @@ -386,18 +377,16 @@ ; X86-LABEL: shl_to_shr_ne_i32_s24: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $24, %ecx -; X86-NEXT: andl $-16777216, %eax # imm = 0xFF000000 +; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: shrl $24, %eax ; X86-NEXT: cmpl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: shl_to_shr_ne_i32_s24: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $24, %eax -; X64-NEXT: andl $-16777216, %edi # imm = 0xFF000000 +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: shrl $24, %edi ; X64-NEXT: cmpl %edi, %eax ; X64-NEXT: setne %al ; X64-NEXT: retq