diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -567,6 +567,7 @@ SDValue &InFlag); bool tryOptimizeRem8Extend(SDNode *N); + bool postprocessTest64RR(SDNode *N, unsigned DeadOpUses); bool postprocessTestRR(SDNode *N); bool postprocessKortestRR(SDNode *N); bool postprocessSubregToReg(SDNode *N); @@ -1465,8 +1466,51 @@ return true; } -// Look for a TESTrr+ANDrr pattern where both operands of the test are -// the same. Rewrite to remove the AND. +// Look for test with movabsq operand and rewrite to shr+test where possible. +bool X86DAGToDAGISel::postprocessTest64RR(SDNode *N, unsigned DeadOpUses) { + assert(N->getMachineOpcode() == X86::TEST64rr && "expected TEST64rr"); + SDValue Op1 = N->getOperand(1); + // Assume MOV64ri operands are always operand 0. + if (!Op1->isMachineOpcode() || Op1->getMachineOpcode() != X86::MOV64ri || + Op1->use_size() != 1 + DeadOpUses) + return false; + uint64_t C = Op1->getConstantOperandVal(0); + if (!isShiftedMask_64(C) || !onlyUsesZeroFlag(SDValue(N, 0))) + return false; + unsigned TrailZ = countTrailingZeros(C); + unsigned LeadingZ = countLeadingZeros(C); + unsigned PopCount = 64 - TrailZ - LeadingZ; + unsigned TestOpc; + MVT SubRegVT; + unsigned SubRegIdx; + if (PopCount == 8) { + TestOpc = X86::TEST8rr; + SubRegVT = MVT::i8; + SubRegIdx = X86::sub_8bit; + } else if (PopCount == 16) { + TestOpc = X86::TEST16rr; + SubRegVT = MVT::i16; + SubRegIdx = X86::sub_16bit; + } else if (PopCount == 32) { + TestOpc = X86::TEST32rr; + SubRegVT = MVT::i32; + SubRegIdx = X86::sub_32bit; + } else { + return false; + } + SDLoc DL(N); + SDValue ShiftC = CurDAG->getTargetConstant(TrailZ, DL, MVT::i8); + MachineSDNode *Shr = CurDAG->getMachineNode(X86::SHR64ri, DL, MVT::i64, + MVT::i32, N->getOperand(0), + ShiftC); + SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, SubRegVT, + SDValue(Shr, 0)); + MachineSDNode *Test = CurDAG->getMachineNode(TestOpc, DL, MVT::i32, + SubReg, SubReg); + ReplaceUses(N, Test); + return true; +} + bool X86DAGToDAGISel::postprocessTestRR(SDNode *N) { unsigned Opc = N->getMachineOpcode(); assert((Opc == X86::TEST8rr || Opc == X86::TEST16rr || Opc == X86::TEST32rr || @@ -1487,6 +1531,9 @@ MachineSDNode *Test = CurDAG->getMachineNode( Opc, SDLoc(N), MVT::i32, And.getOperand(0), And.getOperand(1)); ReplaceUses(N, Test); + if (Opc == X86::TEST64rr) { + postprocessTest64RR(Test, 1); + } return true; } case X86::AND8rm: @@ -1671,10 +1718,15 @@ continue; } break; + case X86::TEST64rr: + if (postprocessTest64RR(N, 0)) { + MadeChange = true; + continue; + } + LLVM_FALLTHROUGH; case X86::TEST8rr: case X86::TEST16rr: case X86::TEST32rr: - case X86::TEST64rr: if (postprocessTestRR(N)) { MadeChange = true; continue; diff --git a/llvm/test/CodeGen/X86/dag-test-mov64ri.ll b/llvm/test/CodeGen/X86/dag-test-mov64ri.ll --- a/llvm/test/CodeGen/X86/dag-test-mov64ri.ll +++ b/llvm/test/CodeGen/X86/dag-test-mov64ri.ll @@ -4,8 +4,8 @@ define i1 @f_shr_testb(i64 %a) { ; CHECK-LABEL: f_shr_testb: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $287104476244869120, %rax # imm = 0x3FC000000000000 -; CHECK-NEXT: testq %rax, %rdi +; CHECK-NEXT: shrq $50, %rdi +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %v0 = and i64 %a, 287104476244869120 ; 0xff << 50 @@ -16,8 +16,8 @@ define i1 @f_shr_testw(i64 %a) { ; CHECK-LABEL: f_shr_testw: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $562941363486720, %rax # imm = 0x1FFFE00000000 -; CHECK-NEXT: testq %rax, %rdi +; CHECK-NEXT: shrq $33, %rdi +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %v0 = and i64 %a, 562941363486720 ; 0xffff << 33 @@ -28,8 +28,8 @@ define i1 @f_shr_testl(i64 %a) { ; CHECK-LABEL: f_shr_testl: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $549755813760, %rax # imm = 0x7FFFFFFF80 -; CHECK-NEXT: testq %rax, %rdi +; CHECK-NEXT: shrq $7, %rdi +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %v0 = and i64 %a, 549755813760 ; 0xffffffff << 7 diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -472,16 +472,16 @@ ; X64-BMI1-NEXT: movq %rsi, %rcx ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-BMI1-NEXT: shlq %cl, %rdi -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI1-NEXT: testq %rax, %rdi +; X64-BMI1-NEXT: shrq $16, %rdi +; X64-BMI1-NEXT: testl %edi, %edi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax -; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: shrq $16, %rax +; X64-BMI2-NEXT: testl %eax, %eax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 281474976645120, %y diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -435,16 +435,16 @@ ; X64-BMI1-NEXT: movq %rsi, %rcx ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-BMI1-NEXT: shrq %cl, %rdi -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI1-NEXT: testq %rax, %rdi +; X64-BMI1-NEXT: shrq $16, %rdi +; X64-BMI1-NEXT: testl %edi, %edi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: shrq $16, %rax +; X64-BMI2-NEXT: testl %eax, %eax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i64 281474976645120, %y