Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -2900,23 +2900,15 @@ isCommutativeIntrinsic(II)) std::swap(LHS, RHS); - bool UseIncDec = false; - if (isa(RHS) && cast(RHS)->isOne()) - UseIncDec = true; - unsigned BaseOpc, CondOpc; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::sadd_with_overflow: - BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD); - CondOpc = X86::SETOr; - break; + BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break; case Intrinsic::uadd_with_overflow: BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; case Intrinsic::ssub_with_overflow: - BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB); - CondOpc = X86::SETOr; - break; + BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break; case Intrinsic::usub_with_overflow: BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; case Intrinsic::smul_with_overflow: @@ -2938,9 +2930,11 @@ { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }; - if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) { + if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) && + CondOpc == X86::SETOr) { + // We can use INC/DEC. ResultReg = createResultReg(TLI.getRegClassFor(VT)); - bool IsDec = BaseOpc == X86ISD::DEC; + bool IsDec = BaseOpc == ISD::SUB; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) .addReg(LHSReg, getKillRegState(LHSIsKill)); Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -472,6 +472,9 @@ SDValue &InFlag); bool tryOptimizeRem8Extend(SDNode *N); + + bool hasNoSignFlagUses(SDValue Flags) const; + bool hasNoCarryFlagUses(SDValue Flags) const; }; } @@ -2226,7 +2229,7 @@ /// Test whether the given X86ISD::CMP node has any uses which require the SF /// flag to be accurate. -static bool hasNoSignFlagUses(SDValue Flags) { +bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { // Examine each user of the node. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { @@ -2266,29 +2269,60 @@ /// Test whether the given node which sets flags has any uses which require the /// CF flag to be accurate. -static bool hasNoCarryFlagUses(SDValue Flags) { + bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { // Examine each user of the node. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { // Only check things that use the flags. if (UI.getUse().getResNo() != Flags.getResNo()) continue; - // Only examine CopyToReg uses that copy to EFLAGS. - if (UI->getOpcode() != ISD::CopyToReg || - cast(UI->getOperand(1))->getReg() != X86::EFLAGS) - return false; - // Examine each user of the CopyToReg use. - for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); - FlagUI != FlagUE; ++FlagUI) { - // Only examine the Flag result. - if (FlagUI.getUse().getResNo() != 1) - continue; - // Anything unusual: assume conservatively. - if (!FlagUI->isMachineOpcode()) + + unsigned UIOpc = UI->getOpcode(); + + if (UIOpc == ISD::CopyToReg) { + // Only examine CopyToReg uses that copy to EFLAGS. + if (cast(UI->getOperand(1))->getReg() != X86::EFLAGS) return false; - // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); + FlagUI != FlagUE; ++FlagUI) { + // Only examine the Flag result. + if (FlagUI.getUse().getResNo() != 1) + continue; + // Anything unusual: assume conservatively. + if (!FlagUI->isMachineOpcode()) + return false; + // Examine the condition code of the user. + X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + + switch (CC) { + // Comparisons which don't examine the CF flag. + case X86::COND_O: case X86::COND_NO: + case X86::COND_E: case X86::COND_NE: + case X86::COND_S: case X86::COND_NS: + case X86::COND_P: case X86::COND_NP: + case X86::COND_L: case X86::COND_GE: + case X86::COND_G: case X86::COND_LE: + continue; + // Anything else: assume conservatively. + default: + return false; + } + } + } else { + // We might not have selected this node yet. + unsigned CCOpNo; + switch (UIOpc) { + default: + // Something unusual. Be conservative. + return false; + case X86ISD::SETCC: CCOpNo = 0; break; + case X86ISD::SETCC_CARRY: CCOpNo = 0; break; + case X86ISD::CMOV: CCOpNo = 2; break; + case X86ISD::BRCOND: CCOpNo = 2; break; + } + X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); switch (CC) { // Comparisons which don't examine the CF flag. case X86::COND_O: case X86::COND_NO: @@ -2458,8 +2492,6 @@ switch (Opc) { default: return false; - case X86ISD::INC: - case X86ISD::DEC: case X86ISD::SUB: case X86ISD::SBB: break; @@ -2510,20 +2542,27 @@ MachineSDNode *Result; switch (Opc) { - case X86ISD::INC: - case X86ISD::DEC: { - unsigned NewOpc = - Opc == X86ISD::INC - ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) - : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); - const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; - Result = - CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); - break; - } case X86ISD::ADD: - case X86ISD::ADC: case X86ISD::SUB: + // Try to match inc/dec. + if (!Subtarget->slowIncDec() || + CurDAG->getMachineFunction().getFunction().optForSize()) { + bool IsOne = isOneConstant(StoredVal.getOperand(1)); + bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); + // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. + if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { + unsigned NewOpc = + ((Opc == X86ISD::ADD) == IsOne) + ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) + : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); + const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; + Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, + MVT::Other, Ops); + break; + } + } + LLVM_FALLTHROUGH; + case X86ISD::ADC: case X86ISD::SBB: case X86ISD::AND: case X86ISD::OR: Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -337,7 +337,7 @@ // Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, - INC, DEC, OR, XOR, AND, + OR, XOR, AND, // Bit field extract. BEXTR, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -18647,26 +18647,6 @@ UI->getOpcode() != ISD::STORE) goto default_case; - if (auto *C = dyn_cast(ArithOp.getOperand(1))) { - // An add of one will be selected as an INC. - if (C->isOne() && - (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction().optForSize())) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } - - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->isAllOnesValue() && - (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction().optForSize())) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; - } - } - // Otherwise use a regular EFLAGS-setting add. Opcode = X86ISD::ADD; NumOperands = 2; @@ -18758,8 +18738,6 @@ break; case X86ISD::ADD: case X86ISD::SUB: - case X86ISD::INC: - case X86ISD::DEC: case X86ISD::OR: case X86ISD::XOR: case X86ISD::AND: @@ -19627,8 +19605,7 @@ if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || Opc == X86ISD::SBB || Opc == X86ISD::SMUL || - Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR || - Opc == X86ISD::XOR || Opc == X86ISD::AND)) + Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) return true; if (Op.getResNo() == 2 && Opc == X86ISD::UMUL) @@ -20592,22 +20569,13 @@ unsigned X86Cond; SDVTList VTs; // Keep this in sync with LowerXALUO, otherwise we might create redundant - // instructions that can't be removed afterwards (i.e. X86ISD::ADD and - // X86ISD::INC). + // instructions that can't be removed afterwards (i.e. X86ISD::ADD). switch (CondOpcode) { case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; case ISD::SADDO: - if (isOneConstant(RHS)) { - X86Opcode = X86ISD::INC; X86Cond = X86::COND_O; - break; - } X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; case ISD::SSUBO: - if (isOneConstant(RHS)) { - X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O; - break; - } X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; @@ -24963,13 +24931,6 @@ switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: - // A subtract of one will be selected as a INC. Note that INC doesn't - // set CF, so we can't do this for UADDO. - if (isOneConstant(RHS)) { - BaseOp = X86ISD::INC; - Cond = X86::COND_O; - break; - } BaseOp = X86ISD::ADD; Cond = X86::COND_O; break; @@ -24978,13 +24939,6 @@ Cond = X86::COND_B; break; case ISD::SSUBO: - // A subtract of one will be selected as a DEC. Note that DEC doesn't - // set CF, so we can't do this for USUBO. - if (isOneConstant(RHS)) { - BaseOp = X86ISD::DEC; - Cond = X86::COND_O; - break; - } BaseOp = X86ISD::SUB; Cond = X86::COND_O; break; @@ -27122,8 +27076,6 @@ case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::SMUL8: return "X86ISD::SMUL8"; case X86ISD::UMUL8: return "X86ISD::UMUL8"; - case X86ISD::INC: return "X86ISD::INC"; - case X86ISD::DEC: return "X86ISD::DEC"; case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; Index: lib/Target/X86/X86InstrArithmetic.td =================================================================== --- lib/Target/X86/X86InstrArithmetic.td +++ lib/Target/X86/X86InstrArithmetic.td @@ -422,22 +422,35 @@ } // SchedRW } // CodeSize +def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86add_flag node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + +def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86sub_flag node:$lhs, node:$rhs), [{ + // Only use DEC if the result is used. + return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA. def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16; + [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>, + OpSize16; def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32; + [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>, + OpSize32; def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 // Short forms only valid in 32-bit mode. Selected during MCInst lowering. @@ -474,16 +487,18 @@ let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA. def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16; + [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>, + OpSize16; def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32; + [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>, + OpSize32; def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 // Short forms only valid in 32-bit mode. Selected during MCInst lowering. Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -2022,6 +2022,15 @@ def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; + + def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>; + def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>; + def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>; + def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>; + def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>; + def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>; + def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>; + def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>; } // or reg/reg. Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -253,8 +253,6 @@ def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>; def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>; -def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; -def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, Index: test/CodeGen/X86/sub-with-overflow.ll =================================================================== --- test/CodeGen/X86/sub-with-overflow.ll +++ test/CodeGen/X86/sub-with-overflow.ll @@ -83,8 +83,7 @@ define i1 @func3(i32 %x) nounwind { ; CHECK-LABEL: func3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: decl %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: seto %al ; CHECK-NEXT: retl entry: