Index: llvm/trunk/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp @@ -2900,23 +2900,15 @@ isCommutativeIntrinsic(II)) std::swap(LHS, RHS); - bool UseIncDec = false; - if (isa(RHS) && cast(RHS)->isOne()) - UseIncDec = true; - unsigned BaseOpc, CondOpc; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::sadd_with_overflow: - BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD); - CondOpc = X86::SETOr; - break; + BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break; case Intrinsic::uadd_with_overflow: BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; case Intrinsic::ssub_with_overflow: - BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB); - CondOpc = X86::SETOr; - break; + BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break; case Intrinsic::usub_with_overflow: BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; case Intrinsic::smul_with_overflow: @@ -2938,9 +2930,11 @@ { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }; - if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) { + if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) && + CondOpc == X86::SETOr) { + // We can use INC/DEC. ResultReg = createResultReg(TLI.getRegClassFor(VT)); - bool IsDec = BaseOpc == X86ISD::DEC; + bool IsDec = BaseOpc == ISD::SUB; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) .addReg(LHSReg, getKillRegState(LHSIsKill)); Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2327,6 +2327,22 @@ return true; } +static bool mayUseCarryFlag(X86::CondCode CC) { + switch (CC) { + // Comparisons which don't examine the CF flag. + case X86::COND_O: case X86::COND_NO: + case X86::COND_E: case X86::COND_NE: + case X86::COND_S: case X86::COND_NS: + case X86::COND_P: case X86::COND_NP: + case X86::COND_L: case X86::COND_GE: + case X86::COND_G: case X86::COND_LE: + return false; + // Anything else: assume conservatively. + default: + return true; + } +} + /// Test whether the given node which sets flags has any uses which require the /// CF flag to be accurate. bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { @@ -2336,36 +2352,49 @@ // Only check things that use the flags. if (UI.getUse().getResNo() != Flags.getResNo()) continue; - // Only examine CopyToReg uses that copy to EFLAGS. - if (UI->getOpcode() != ISD::CopyToReg || - cast(UI->getOperand(1))->getReg() != X86::EFLAGS) - return false; - // Examine each user of the CopyToReg use. - for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); - FlagUI != FlagUE; ++FlagUI) { - // Only examine the Flag result. - if (FlagUI.getUse().getResNo() != 1) - continue; - // Anything unusual: assume conservatively. - if (!FlagUI->isMachineOpcode()) - return false; - // Examine the condition code of the user. - X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); - switch (CC) { - // Comparisons which don't examine the CF flag. - case X86::COND_O: case X86::COND_NO: - case X86::COND_E: case X86::COND_NE: - case X86::COND_S: case X86::COND_NS: - case X86::COND_P: case X86::COND_NP: - case X86::COND_L: case X86::COND_GE: - case X86::COND_G: case X86::COND_LE: - continue; - // Anything else: assume conservatively. - default: + unsigned UIOpc = UI->getOpcode(); + + if (UIOpc == ISD::CopyToReg) { + // Only examine CopyToReg uses that copy to EFLAGS. + if (cast(UI->getOperand(1))->getReg() != X86::EFLAGS) return false; + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); + FlagUI != FlagUE; ++FlagUI) { + // Only examine the Flag result. + if (FlagUI.getUse().getResNo() != 1) + continue; + // Anything unusual: assume conservatively. + if (!FlagUI->isMachineOpcode()) + return false; + // Examine the condition code of the user. + X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode()); + + if (mayUseCarryFlag(CC)) + return false; } + + // This CopyToReg is ok. Move on to the next user. + continue; + } + + // This might be an unselected node. So look for the pre-isel opcodes that + // use flags. + unsigned CCOpNo; + switch (UIOpc) { + default: + // Something unusual. Be conservative. + return false; + case X86ISD::SETCC: CCOpNo = 0; break; + case X86ISD::SETCC_CARRY: CCOpNo = 0; break; + case X86ISD::CMOV: CCOpNo = 2; break; + case X86ISD::BRCOND: CCOpNo = 2; break; } + + X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); + if (mayUseCarryFlag(CC)) + return false; } return true; } @@ -2521,8 +2550,6 @@ switch (Opc) { default: return false; - case X86ISD::INC: - case X86ISD::DEC: case X86ISD::SUB: case X86ISD::SBB: break; @@ -2573,20 +2600,27 @@ MachineSDNode *Result; switch (Opc) { - case X86ISD::INC: - case X86ISD::DEC: { - unsigned NewOpc = - Opc == X86ISD::INC - ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) - : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); - const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; - Result = - CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); - break; - } case X86ISD::ADD: - case X86ISD::ADC: case X86ISD::SUB: + // Try to match inc/dec. + if (!Subtarget->slowIncDec() || + CurDAG->getMachineFunction().getFunction().optForSize()) { + bool IsOne = isOneConstant(StoredVal.getOperand(1)); + bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); + // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. + if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { + unsigned NewOpc = + ((Opc == X86ISD::ADD) == IsOne) + ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) + : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); + const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; + Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, + MVT::Other, Ops); + break; + } + } + LLVM_FALLTHROUGH; + case X86ISD::ADC: case X86ISD::SBB: case X86ISD::AND: case X86ISD::OR: Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -337,7 +337,7 @@ // Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, UMUL, - INC, DEC, OR, XOR, AND, + OR, XOR, AND, // Bit field extract. BEXTR, @@ -568,7 +568,7 @@ /// LOCK-prefixed arithmetic read-modify-write instructions. /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) - LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC, + LADD, LSUB, LOR, LXOR, LAND, // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -18644,44 +18644,6 @@ // which may be the result of a CAST. We use the variable 'Op', which is the // non-casted variable when we check for possible users. switch (ArithOp.getOpcode()) { - case ISD::ADD: - // We only want to rewrite this as a target-specific node with attached - // flags if there is a reasonable chance of either using that to do custom - // instructions selection that can fold some of the memory operands, or if - // only the flags are used. If there are other uses, leave the node alone - // and emit a test instruction. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && - UI->getOpcode() != ISD::SETCC && - UI->getOpcode() != ISD::STORE) - goto default_case; - - if (auto *C = dyn_cast(ArithOp.getOperand(1))) { - // An add of one will be selected as an INC. - if (C->isOne() && - (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction().optForSize())) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } - - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->isAllOnesValue() && - (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction().optForSize())) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; - } - } - - // Otherwise use a regular EFLAGS-setting add. - Opcode = X86ISD::ADD; - NumOperands = 2; - break; - case ISD::AND: // If the primary 'and' result isn't used, don't bother using X86ISD::AND, // because a TEST instruction will be better. @@ -18689,11 +18651,13 @@ break; LLVM_FALLTHROUGH; + case ISD::ADD: case ISD::SUB: case ISD::OR: case ISD::XOR: - // Similar to ISD::ADD above, check if the uses will preclude useful - // lowering of the target-specific node. + // Transform to an x86-specific ALU node with flags if there is a chance of + // using an RMW op or only the flags are used. Otherwise, leave + // the node alone and emit a 'test' instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() != ISD::CopyToReg && @@ -18704,6 +18668,7 @@ // Otherwise use a regular EFLAGS-setting instruction. switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); + case ISD::ADD: Opcode = X86ISD::ADD; break; case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; @@ -18714,8 +18679,6 @@ break; case X86ISD::ADD: case X86ISD::SUB: - case X86ISD::INC: - case X86ISD::DEC: case X86ISD::OR: case X86ISD::XOR: case X86ISD::AND: @@ -19603,13 +19566,6 @@ switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: - // A subtract of one will be selected as a INC. Note that INC doesn't - // set CF, so we can't do this for UADDO. - if (isOneConstant(RHS)) { - BaseOp = X86ISD::INC; - Cond = X86::COND_O; - break; - } BaseOp = X86ISD::ADD; Cond = X86::COND_O; break; @@ -19618,13 +19574,6 @@ Cond = X86::COND_B; break; case ISD::SSUBO: - // A subtract of one will be selected as a DEC. Note that DEC doesn't - // set CF, so we can't do this for USUBO. - if (isOneConstant(RHS)) { - BaseOp = X86ISD::DEC; - Cond = X86::COND_O; - break; - } BaseOp = X86ISD::SUB; Cond = X86::COND_O; break; @@ -19675,8 +19624,7 @@ if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || - Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR || - Opc == X86ISD::XOR || Opc == X86ISD::AND)) + Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) return true; return false; @@ -25511,8 +25459,7 @@ } static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - bool AllowIncDec = true) { + const X86Subtarget &Subtarget) { unsigned NewOpc = 0; switch (N->getOpcode()) { case ISD::ATOMIC_LOAD_ADD: @@ -25536,25 +25483,6 @@ MachineMemOperand *MMO = cast(N)->getMemOperand(); - if (auto *C = dyn_cast(N->getOperand(2))) { - // Convert to inc/dec if they aren't slow or we are optimizing for size. - if (AllowIncDec && (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction().optForSize())) { - if ((NewOpc == X86ISD::LADD && C->isOne()) || - (NewOpc == X86ISD::LSUB && C->isAllOnesValue())) - return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N), - DAG.getVTList(MVT::i32, MVT::Other), - {N->getOperand(0), N->getOperand(1)}, - /*MemVT=*/N->getSimpleValueType(0), MMO); - if ((NewOpc == X86ISD::LSUB && C->isOne()) || - (NewOpc == X86ISD::LADD && C->isAllOnesValue())) - return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N), - DAG.getVTList(MVT::i32, MVT::Other), - {N->getOperand(0), N->getOperand(1)}, - /*MemVT=*/N->getSimpleValueType(0), MMO); - } - } - return DAG.getMemIntrinsicNode( NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other), {N->getOperand(0), N->getOperand(1), N->getOperand(2)}, @@ -27034,8 +26962,6 @@ case X86ISD::LOR: return "X86ISD::LOR"; case X86ISD::LXOR: return "X86ISD::LXOR"; case X86ISD::LAND: return "X86ISD::LAND"; - case X86ISD::LINC: return "X86ISD::LINC"; - case X86ISD::LDEC: return "X86ISD::LDEC"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; @@ -27073,8 +26999,6 @@ case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SMUL: return "X86ISD::SMUL"; case X86ISD::UMUL: return "X86ISD::UMUL"; - case X86ISD::INC: return "X86ISD::INC"; - case X86ISD::DEC: return "X86ISD::DEC"; case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; @@ -34297,16 +34221,7 @@ /*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1), /*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()), AN->getMemOperand()); - // If the comparision uses the CF flag we can't use INC/DEC instructions. - bool NeedCF = false; - switch (CC) { - default: break; - case X86::COND_A: case X86::COND_AE: - case X86::COND_B: case X86::COND_BE: - NeedCF = true; - break; - } - auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF); + auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpLHS.getValueType())); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); Index: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td +++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td @@ -422,22 +422,35 @@ } // SchedRW } // CodeSize +def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86add_flag node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + +def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86sub_flag node:$lhs, node:$rhs), [{ + // Only use DEC if the result is used. + return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA. def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16; + [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>, + OpSize16; def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32; + [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>, + OpSize32; def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 // Short forms only valid in 32-bit mode. Selected during MCInst lowering. @@ -474,16 +487,18 @@ let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA. def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16; + [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>, + OpSize16; def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32; + [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>, + OpSize32; def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 // Short forms only valid in 32-bit mode. Selected during MCInst lowering. Index: llvm/trunk/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrCompiler.td +++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td @@ -776,53 +776,64 @@ defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">; defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">; -multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, - string frag, string mnemonic> { -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, - SchedRW = [WriteALURMW] in { -def NAME#8m : I(frag # "_8") addr:$dst))]>, - LOCK; -def NAME#16m : I(frag # "_16") addr:$dst))]>, - OpSize16, LOCK; -def NAME#32m : I(frag # "_32") addr:$dst))]>, - OpSize32, LOCK; -def NAME#64m : RI(frag # "_64") addr:$dst))]>, - LOCK; -} -} +def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86lock_add node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 0)); +}]>; -multiclass unary_atomic_intrin { - def _8 : PatFrag<(ops node:$ptr), - (atomic_op node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; - }]>; - def _16 : PatFrag<(ops node:$ptr), - (atomic_op node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; - }]>; - def _32 : PatFrag<(ops node:$ptr), - (atomic_op node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; - }]>; - def _64 : PatFrag<(ops node:$ptr), - (atomic_op node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; - }]>; -} +def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86lock_sub node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 0)); +}]>; -defm X86lock_inc : unary_atomic_intrin; -defm X86lock_dec : unary_atomic_intrin; +let Predicates = [UseIncDec] in { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALURMW] in { + def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), + "inc{b}\t$dst", + [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>, + LOCK; + def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), + "inc{w}\t$dst", + [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>, + OpSize16, LOCK; + def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), + "inc{l}\t$dst", + [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>, + OpSize32, LOCK; + def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "inc{q}\t$dst", + [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, + LOCK; + + def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), + "dec{b}\t$dst", + [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>, + LOCK; + def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), + "dec{w}\t$dst", + [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>, + OpSize16, LOCK; + def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), + "dec{l}\t$dst", + [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>, + OpSize32, LOCK; + def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "dec{q}\t$dst", + [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>, + LOCK; + } -defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">; -defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">; + // Additional patterns for -1 constant. + def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>; + def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>; + def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>; + def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; + def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>; + def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>; + def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>; + def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; +} // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, @@ -2018,6 +2029,15 @@ def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; + + def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>; + def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>; + def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>; + def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>; + def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>; + def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>; + def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>; + def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>; } // or reg/reg. Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -253,8 +253,6 @@ def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>; def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>; -def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; -def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, @@ -278,13 +276,6 @@ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, - SDNPMemOperand]>; -def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, - SDNPMemOperand]>; - def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; Index: llvm/trunk/test/CodeGen/X86/sub-with-overflow.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sub-with-overflow.ll +++ llvm/trunk/test/CodeGen/X86/sub-with-overflow.ll @@ -83,8 +83,7 @@ define i1 @func3(i32 %x) nounwind { ; CHECK-LABEL: func3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: decl %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: seto %al ; CHECK-NEXT: retl entry: