diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5614,10 +5614,10 @@ uint64_t Mask = MaskC->getZExtValue(); Mask &= maskTrailingOnes(CmpVT.getScalarSizeInBits()); - // Check if we can replace AND+IMM64 with a shift. This is possible for - // masks like 0xFF000000 or 0x00FFFFFF and if we care only about the zero - // flag. - if (CmpVT == MVT::i64 && !isInt<32>(Mask) && isShiftedMask_64(Mask) && + // Check if we can replace AND+IMM{32,64} with a shift. This is possible + // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the + // zero flag. + if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) && onlyUsesZeroFlag(SDValue(Node, 0))) { unsigned ShiftOpcode = ISD::DELETED_NODE; unsigned ShiftAmt; @@ -5626,7 +5626,12 @@ unsigned TestOpcode; unsigned LeadingZeros = countLeadingZeros(Mask); unsigned TrailingZeros = countTrailingZeros(Mask); - if (LeadingZeros == 0) { + + // With leading/trailing zeros, the transform is profitable if we can + // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without + // incurring any extra register moves. + bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse(); + if (LeadingZeros == 0 && SavesBytes) { // If the mask covers the most significant bit, then we can replace // TEST+AND with a SHR and check eflags. // This emits a redundant TEST which is subsequently eliminated. @@ -5634,7 +5639,7 @@ ShiftAmt = TrailingZeros; SubRegIdx = 0; TestOpcode = X86::TEST64rr; - } else if (TrailingZeros == 0) { + } else if (TrailingZeros == 0 && SavesBytes) { // If the mask covers the least significant bit, then we can replace // TEST+AND with a SHL and check eflags. // This emits a redundant TEST which is subsequently eliminated. @@ -5642,9 +5647,9 @@ ShiftAmt = LeadingZeros; SubRegIdx = 0; TestOpcode = X86::TEST64rr; - } else if (MaskC->hasOneUse()) { - // If the mask is 8/16 or 32bits wide, then we can replace it with - // a SHR and a TEST8rr/TEST16rr/TEST32rr. + } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) { + // If the shifted mask extends into the high half and is 8/16/32 bits + // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr. unsigned PopCount = 64 - LeadingZeros - TrailingZeros; if (PopCount == 8) { ShiftOpcode = X86::SHR64ri; diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -430,8 +430,7 @@ ; CHECK-LABEL: highmask_i64_mask32: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; CHECK-NEXT: testq $-1048576, %rdi # encoding: [0x48,0xf7,0xc7,0x00,0x00,0xf0,0xff] -; CHECK-NEXT: # imm = 0xFFF00000 +; CHECK-NEXT: shrq $20, %rdi # encoding: [0x48,0xc1,0xef,0x14] ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] %and = and i64 %val, -1048576 @@ -502,8 +501,7 @@ ; CHECK-LABEL: lowmask_i64_mask32: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; CHECK-NEXT: testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00] -; CHECK-NEXT: # imm = 0xFFFFF +; CHECK-NEXT: shlq $44, %rdi # encoding: [0x48,0xc1,0xe7,0x2c] ; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] %and = and i64 %val, 1048575