diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5621,12 +5621,52 @@ onlyUsesZeroFlag(SDValue(Node, 0))) { unsigned ShiftOpcode = ISD::DELETED_NODE; unsigned ShiftAmt; - if (isMask_64(~Mask)) { - ShiftOpcode = X86::SHR64ri; - ShiftAmt = countTrailingZeros(Mask); - } else if (isMask_64(Mask)) { - ShiftOpcode = X86::SHL64ri; - ShiftAmt = countLeadingZeros(Mask); + unsigned SubRegIdx; + MVT SubRegVT; + unsigned TestOpcode; + if (isShiftedMask_64(Mask)) { + unsigned LeadingZeros = countLeadingZeros(Mask); + unsigned TrailingZeros = countTrailingZeros(Mask); + // If the mask covers the most significant bit, then we can replace + // TEST+AND with a SHR and check eflags. + // This emits a redundant TEST which is subsequently eliminated. + if (LeadingZeros == 0) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = 0; + TestOpcode = X86::TEST64rr; + // If the mask covers the least signifcant bit, then we can replace + // TEST+AND with a SHL and check eflags. + // This emits a redundant TEST which is subsequently eliminated. + } else if (TrailingZeros == 0) { + ShiftOpcode = X86::SHL64ri; + ShiftAmt = LeadingZeros; + SubRegIdx = 0; + TestOpcode = X86::TEST64rr; + } else { + // If the mask is 8/16 or 32bits wide, then we can replace it with + // a SHR and a TEST8rr/TEST16rr/TEST32rr. + unsigned PopCount = 64 - LeadingZeros - TrailingZeros; + if (PopCount == 8) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = X86::sub_8bit; + SubRegVT = MVT::i8; + TestOpcode = X86::TEST8rr; + } else if (PopCount == 16) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = X86::sub_16bit; + SubRegVT = MVT::i16; + TestOpcode = X86::TEST16rr; + } else if (PopCount == 32) { + ShiftOpcode = X86::SHR64ri; + ShiftAmt = TrailingZeros; + SubRegIdx = X86::sub_32bit; + SubRegVT = MVT::i32; + TestOpcode = X86::TEST32rr; + } + } } if (ShiftOpcode != ISD::DELETED_NODE) { SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64); @@ -5634,8 +5674,12 @@ CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32, N0.getOperand(0), ShiftC), 0); + if (SubRegIdx != 0) { + Shift = + CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift); + } MachineSDNode *Test = - CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift); + CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift); ReplaceNode(Node, Test); return; } diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -533,9 +533,8 @@ define i1 @shifted_mask_testb(i64 %a) { ; CHECK-LABEL: shifted_mask_testb: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $287104476244869120, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03] -; CHECK-NEXT: # imm = 0x3FC000000000000 -; CHECK-NEXT: testq %rax, %rdi # encoding: [0x48,0x85,0xc7] +; CHECK-NEXT: shrq $50, %rdi # encoding: [0x48,0xc1,0xef,0x32] +; CHECK-NEXT: testb %dil, %dil # encoding: [0x40,0x84,0xff] ; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] %v0 = and i64 %a, 287104476244869120 ; 0xff << 50 @@ -546,9 +545,8 @@ define i1 @shifted_mask_testw(i64 %a) { ; CHECK-LABEL: shifted_mask_testw: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $562941363486720, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0xfe,0xff,0x01,0x00] -; CHECK-NEXT: # imm = 0x1FFFE00000000 -; CHECK-NEXT: testq %rax, %rdi # encoding: [0x48,0x85,0xc7] +; CHECK-NEXT: shrq $33, %rdi # encoding: [0x48,0xc1,0xef,0x21] +; CHECK-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff] ; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] %v0 = and i64 %a, 562941363486720 ; 0xffff << 33 @@ -559,9 +557,8 @@ define i1 @shifted_mask_testl(i64 %a) { ; CHECK-LABEL: shifted_mask_testl: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $549755813760, %rax # encoding: [0x48,0xb8,0x80,0xff,0xff,0xff,0x7f,0x00,0x00,0x00] -; CHECK-NEXT: # imm = 0x7FFFFFFF80 -; CHECK-NEXT: testq %rax, %rdi # encoding: [0x48,0x85,0xc7] +; CHECK-NEXT: shrq $7, %rdi # encoding: [0x48,0xc1,0xef,0x07] +; CHECK-NEXT: testl %edi, %edi # encoding: [0x85,0xff] ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] %v0 = and i64 %a, 549755813760 ; 0xffffffff << 7 diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -472,16 +472,16 @@ ; X64-BMI1-NEXT: movq %rsi, %rcx ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-BMI1-NEXT: shlq %cl, %rdi -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI1-NEXT: testq %rax, %rdi +; X64-BMI1-NEXT: shrq $16, %rdi +; X64-BMI1-NEXT: testl %edi, %edi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax -; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: shrq $16, %rax +; X64-BMI2-NEXT: testl %eax, %eax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 281474976645120, %y diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -435,16 +435,16 @@ ; X64-BMI1-NEXT: movq %rsi, %rcx ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-BMI1-NEXT: shrq %cl, %rdi -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI1-NEXT: testq %rax, %rdi +; X64-BMI1-NEXT: shrq $16, %rdi +; X64-BMI1-NEXT: testl %edi, %edi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: shrq $16, %rax +; X64-BMI2-NEXT: testl %eax, %eax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i64 281474976645120, %y