diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -570,6 +570,11 @@ bool tryOptimizeRem8Extend(SDNode *N); bool onlyUsesZeroFlag(SDValue Flags) const; + + enum FlagClass { Sign, Overflow }; + + bool hasNoFlagClassUses(SDValue Flags, FlagClass FC) const; + bool hasNoOverflowFlagUses(SDValue Flags) const; bool hasNoSignFlagUses(SDValue Flags) const; bool hasNoCarryFlagUses(SDValue Flags) const; }; @@ -3024,44 +3029,83 @@ return true; } -/// Test whether the given X86ISD::CMP node has any uses which require the SF -/// flag to be accurate. -bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { - // Examine each user of the node. - for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); +static bool mayUseSignFlag(X86::CondCode CC) { + switch (CC) { + // Comparisons which don't examine the SF flag. + case X86::COND_A: case X86::COND_AE: + case X86::COND_B: case X86::COND_BE: + case X86::COND_E: case X86::COND_NE: + case X86::COND_O: case X86::COND_NO: + case X86::COND_P: case X86::COND_NP: + return false; + // Anything else: assume conservatively. + default: + return true; + } +} + +static bool mayUseOverflowFlag(X86::CondCode CC) { + switch (CC) { + // Comparisons which don't examine the SF flag. + case X86::COND_A: case X86::COND_AE: + case X86::COND_B: case X86::COND_BE: + case X86::COND_E: case X86::COND_NE: + case X86::COND_P: case X86::COND_NP: + case X86::COND_S: case X86::COND_NS: + case X86::COND_L: case X86::COND_GE: + case X86::COND_G: case X86::COND_LE: + return false; + // Anything else: assume conservatively. + default: + return true; + } +} +enum FlagClass { + Sign, + Overflow +}; + +/// Test whether the given X86ISD::CMP node has any uses which require a flag +/// from FlagClass +bool X86DAGToDAGISel::hasNoFlagClassUses(SDValue Flags, FlagClass FC) const { + // Examine each user of the node. + for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { - // Only check things that use the flags. - if (UI.getUse().getResNo() != Flags.getResNo()) + // Only check things that use the flags. + if (UI.getUse().getResNo() != Flags.getResNo()) continue; - // Only examine CopyToReg uses that copy to EFLAGS. - if (UI->getOpcode() != ISD::CopyToReg || - cast(UI->getOperand(1))->getReg() != X86::EFLAGS) + // Only examine CopyToReg uses that copy to EFLAGS. + if (UI->getOpcode() != ISD::CopyToReg || + cast(UI->getOperand(1))->getReg() != X86::EFLAGS) return false; - // Examine each user of the CopyToReg use. - for (SDNode::use_iterator FlagUI = UI->use_begin(), - FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), + FlagUE = UI->use_end(); + FlagUI != FlagUE; ++FlagUI) { // Only examine the Flag result. - if (FlagUI.getUse().getResNo() != 1) continue; + if (FlagUI.getUse().getResNo() != 1) + continue; // Anything unusual: assume conservatively. - if (!FlagUI->isMachineOpcode()) return false; + if (!FlagUI->isMachineOpcode()) + return false; // Examine the condition code of the user. X86::CondCode CC = getCondFromNode(*FlagUI); - switch (CC) { - // Comparisons which don't examine the SF flag. - case X86::COND_A: case X86::COND_AE: - case X86::COND_B: case X86::COND_BE: - case X86::COND_E: case X86::COND_NE: - case X86::COND_O: case X86::COND_NO: - case X86::COND_P: case X86::COND_NP: - continue; - // Anything else: assume conservatively. - default: + if (FC == FlagClass::Sign && mayUseSignFlag(CC)) + return false; + else if (FC == FlagClass::Overflow && mayUseOverflowFlag(CC)) return false; } } - } - return true; + return true; +} + +bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { + return hasNoFlagClassUses(Flags, FlagClass::Sign); +} + +bool X86DAGToDAGISel::hasNoOverflowFlagUses(SDValue Flags) const { + return hasNoFlagClassUses(Flags, FlagClass::Overflow); } static bool mayUseCarryFlag(X86::CondCode CC) { @@ -5638,6 +5682,43 @@ SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); + EVT OpVT = N0.getValueType(); + // This is a fixup if we converted (cmp Op8/Op16, i8) -> (cmp Op32, i32) + // during lowering. + if (OpVT.getScalarSizeInBits() > 8 && isa(N1) && + !isNullConstant(N1) && hasNoSignFlagUses(SDValue(Node, 0)) && + hasNoOverflowFlagUses(SDValue(Node, 0))) { + const APInt &C = cast(N1)->getAPIntValue(); + // Only do replacement if the constant can get imm8 encoding. Imm16 values + // cause LCP stalls in the frontend. + // TODO: Enable imm16 transform as well if -Os is set? + if (C.getSignificantBits() <= 8) { + EVT NewVT = OpVT; + if (CurDAG->MaskedValueIsZero( + N0, APInt::getBitsSetFrom(OpVT.getScalarSizeInBits(), 8))) + NewVT = MVT::i8; + else if (CurDAG->MaskedValueIsZero( + N0, APInt::getBitsSetFrom(OpVT.getScalarSizeInBits(), 16))) + NewVT = MVT::i16; + + if (NewVT == OpVT) + break; + + SDValue TruncN0 = CurDAG->getZExtOrTrunc(N0, dl, NewVT); + insertDAGNode(*CurDAG, SDValue(Node, 0), TruncN0); + SDValue TruncN1 = CurDAG->getConstant( + C.trunc(NewVT.getScalarSizeInBits()).getZExtValue(), dl, MVT::i32); + insertDAGNode(*CurDAG, SDValue(Node, 0), TruncN1); + SDValue NewCmp = + CurDAG->getNode(X86ISD::CMP, dl, MVT::i32, TruncN0, TruncN1); + ReplaceNode(Node, NewCmp.getNode()); + if (N1.getNode()->use_empty()) + CurDAG->RemoveDeadNode(N1.getNode()); + SelectCode(NewCmp.getNode()); + return; + } + } + // Optimizations for TEST compares. if (!isNullConstant(N1)) break; diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll --- a/llvm/test/CodeGen/X86/combine-movmsk.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk.ll @@ -41,7 +41,7 @@ ; SSE-NEXT: xorpd %xmm1, %xmm1 ; SSE-NEXT: cmpeqpd %xmm0, %xmm1 ; SSE-NEXT: movmskpd %xmm1, %eax -; SSE-NEXT: cmpl $3, %eax +; SSE-NEXT: cmpb $3, %al ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -95,14 +95,14 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpl $15, %eax +; SSE2-NEXT: cmpb $15, %al ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; ; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64: ; SSE42: # %bb.0: ; SSE42-NEXT: movmskpd %xmm0, %eax -; SSE42-NEXT: cmpl $3, %eax +; SSE42-NEXT: cmpb $3, %al ; SSE42-NEXT: sete %al ; SSE42-NEXT: retq ; @@ -151,7 +151,7 @@ ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cmpeqps %xmm0, %xmm1 ; SSE-NEXT: movmskps %xmm1, %eax -; SSE-NEXT: cmpl $15, %eax +; SSE-NEXT: cmpb $15, %al ; SSE-NEXT: sete %al ; SSE-NEXT: retq ;