Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3383,8 +3383,11 @@ SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && - hasNoSignedComparisonUses(Node)) + // Save the original VT of the compare. + MVT CmpVT = N0.getSimpleValueType(); + + // We can peek through truncates, but we need to be careful below. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) N0 = N0.getOperand(0); // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to @@ -3400,25 +3403,35 @@ MVT VT; int SubRegOp; - unsigned Op; + unsigned ROpc, MOpc; + + // For each of these checks we need to be careful if the sign flag is + // being used. It is only safe to use the sign flag in two conditions, + // either the sign bit in the shrunken mask is zero or the final test + // size is equal to the original compare size. if (isUInt<8>(Mask) && - (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) { + (!(Mask & 0x80) || CmpVT == MVT::i8 || + hasNoSignedComparisonUses(Node))) { // For example, convert "testl %eax, $8" to "testb %al, $8" VT = MVT::i8; SubRegOp = X86::sub_8bit; - Op = X86::TEST8ri; + ROpc = X86::TEST8ri; + MOpc = X86::TEST8mi; } else if (OptForMinSize && isUInt<16>(Mask) && - (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { + (!(Mask & 0x8000) || CmpVT == MVT::i16 || + hasNoSignedComparisonUses(Node))) { // For example, "testl %eax, $32776" to "testw %ax, $32776". // NOTE: We only want to form TESTW instructions if optimizing for // min size. Otherwise we only save one byte and possibly get a length // changing prefix penalty in the decoders. VT = MVT::i16; SubRegOp = X86::sub_16bit; - Op = X86::TEST16ri; + ROpc = X86::TEST16ri; + MOpc = X86::TEST16mi; } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && - (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) { + (!(Mask & 0x80000000) || CmpVT == MVT::i32 || + hasNoSignedComparisonUses(Node))) { // For example, "testq %rax, $268468232" to "testl %eax, $268468232". // NOTE: We only want to run that transform if N0 is 32 or 64 bits. // Otherwize, we find ourselves in a position where we have to do @@ -3426,21 +3439,37 @@ // they had a good reason not to and do not promote here. VT = MVT::i32; SubRegOp = X86::sub_32bit; - Op = X86::TEST32ri; + ROpc = X86::TEST32ri; + MOpc = X86::TEST32mi; } else { // No eligible transformation was found. break; } + // FIXME: We should be able to fold loads here. + SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); SDValue Reg = N0.getOperand(0); - // Extract the subregister if necessary. - if (N0.getValueType() != VT) - Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); - // Emit a testl or testw. - SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm); + MachineSDNode *NewNode; + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, + Reg.getOperand(0) }; + NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops); + // Update the chain. + ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1)); + // Record the mem-refs + CurDAG->setNodeMemRefs(NewNode, + {cast(Reg)->getMemOperand()}); + } else { + // Extract the subregister if necessary. + if (N0.getValueType() != VT) + Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); + + NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm); + } // Replace CMP with TEST. ReplaceNode(Node, NewNode); return; Index: test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll =================================================================== --- test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll +++ test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -12,8 +12,7 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq {{.*}}(%rip), %rax ; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $150, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb $-106, %al ; CHECK-NEXT: jle .LBB0_1 ; CHECK-NEXT: # %bb.2: # %if.then ; CHECK-NEXT: movl $1, {{.*}}(%rip) Index: test/CodeGen/X86/test-shrink.ll =================================================================== --- test/CodeGen/X86/test-shrink.ll +++ test/CodeGen/X86/test-shrink.ll @@ -578,8 +578,7 @@ define void @and16_trunc_8_sign(i16 %x) nounwind { ; CHECK-LINUX64-LABEL: and16_trunc_8_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $128, %edi -; CHECK-LINUX64-NEXT: testb %dil, %dil +; CHECK-LINUX64-NEXT: testb $-128, %dil ; CHECK-LINUX64-NEXT: jg .LBB13_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -592,8 +591,7 @@ ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp ; CHECK-WIN32-64-NEXT: # kill: def $cx killed $cx def $ecx -; CHECK-WIN32-64-NEXT: andl $128, %ecx -; CHECK-WIN32-64-NEXT: testb %cl, %cl +; CHECK-WIN32-64-NEXT: testb $-128, %cl ; CHECK-WIN32-64-NEXT: jg .LBB13_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -604,8 +602,7 @@ ; CHECK-X86-LABEL: and16_trunc_8_sign: ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: andl $128, %eax -; CHECK-X86-NEXT: testb %al, %al +; CHECK-X86-NEXT: testb $-128, %al ; CHECK-X86-NEXT: jg .LBB13_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar @@ -626,8 +623,7 @@ define void @and32_trunc_8_sign(i32 %x) nounwind { ; CHECK-LINUX64-LABEL: and32_trunc_8_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $128, %edi -; CHECK-LINUX64-NEXT: testb %dil, %dil +; CHECK-LINUX64-NEXT: testb $-128, %dil ; CHECK-LINUX64-NEXT: jg .LBB14_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -639,8 +635,7 @@ ; CHECK-WIN32-64-LABEL: and32_trunc_8_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: andl $128, %ecx -; CHECK-WIN32-64-NEXT: testb %cl, %cl +; CHECK-WIN32-64-NEXT: testb $-128, %cl ; CHECK-WIN32-64-NEXT: jg .LBB14_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -650,9 +645,7 @@ ; ; CHECK-X86-LABEL: and32_trunc_8_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movl $128, %eax -; CHECK-X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: testb %al, %al +; CHECK-X86-NEXT: testb $-128, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: jg .LBB14_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar @@ -673,8 +666,7 @@ define void @and64_trunc_8_sign(i64 %x) nounwind { ; CHECK-LINUX64-LABEL: and64_trunc_8_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $128, %edi -; CHECK-LINUX64-NEXT: testb %dil, %dil +; CHECK-LINUX64-NEXT: testb $-128, %dil ; CHECK-LINUX64-NEXT: jg .LBB15_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -686,8 +678,7 @@ ; CHECK-WIN32-64-LABEL: and64_trunc_8_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: andl $128, %ecx -; CHECK-WIN32-64-NEXT: testb %cl, %cl +; CHECK-WIN32-64-NEXT: testb $-128, %cl ; CHECK-WIN32-64-NEXT: jg .LBB15_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -697,9 +688,7 @@ ; ; CHECK-X86-LABEL: and64_trunc_8_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movl $128, %eax -; CHECK-X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: testb %al, %al +; CHECK-X86-NEXT: testb $-128, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: jg .LBB15_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar @@ -720,8 +709,7 @@ define void @and32_trunc_16_sign(i32 %x) minsize nounwind { ; CHECK-LINUX64-LABEL: and32_trunc_16_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $32768, %edi # imm = 0x8000 -; CHECK-LINUX64-NEXT: testw %di, %di +; CHECK-LINUX64-NEXT: testw $-32768, %di # imm = 0x8000 ; CHECK-LINUX64-NEXT: jg .LBB16_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -733,8 +721,7 @@ ; CHECK-WIN32-64-LABEL: and32_trunc_16_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: andl $32768, %ecx # imm = 0x8000 -; CHECK-WIN32-64-NEXT: testw %cx, %cx +; CHECK-WIN32-64-NEXT: testw $-32768, %cx # imm = 0x8000 ; CHECK-WIN32-64-NEXT: jg .LBB16_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -744,9 +731,7 @@ ; ; CHECK-X86-LABEL: and32_trunc_16_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: testw %ax, %ax +; CHECK-X86-NEXT: testw $-32768, {{[0-9]+}}(%esp) # imm = 0x8000 ; CHECK-X86-NEXT: jg .LBB16_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar @@ -767,8 +752,7 @@ define void @and64_trunc_32_sign(i64 %x) minsize nounwind { ; CHECK-LINUX64-LABEL: and64_trunc_32_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $32768, %edi # imm = 0x8000 -; CHECK-LINUX64-NEXT: testw %di, %di +; CHECK-LINUX64-NEXT: testw $-32768, %di # imm = 0x8000 ; CHECK-LINUX64-NEXT: jg .LBB17_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -780,8 +764,7 @@ ; CHECK-WIN32-64-LABEL: and64_trunc_32_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: andl $32768, %ecx # imm = 0x8000 -; CHECK-WIN32-64-NEXT: testw %cx, %cx +; CHECK-WIN32-64-NEXT: testw $-32768, %cx # imm = 0x8000 ; CHECK-WIN32-64-NEXT: jg .LBB17_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -791,9 +774,7 @@ ; ; CHECK-X86-LABEL: and64_trunc_32_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: testw %ax, %ax +; CHECK-X86-NEXT: testw $-32768, {{[0-9]+}}(%esp) # imm = 0x8000 ; CHECK-X86-NEXT: jg .LBB17_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar