Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16587,14 +16587,18 @@ if (ConstantSDNode *C = dyn_cast(ArithOp.getOperand(1))) { // An add of one will be selected as an INC. - if (C->isOne() && !Subtarget.slowIncDec()) { + if (C->isOne() && + (!Subtarget.slowIncDec() || + DAG.getMachineFunction().getFunction()->optForSize())) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->isAllOnesValue() && !Subtarget.slowIncDec()) { + if (C->isAllOnesValue() && + (!Subtarget.slowIncDec() || + DAG.getMachineFunction().getFunction()->optForSize())) { Opcode = X86ISD::DEC; NumOperands = 1; break; Index: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td +++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td @@ -481,7 +481,7 @@ } // CodeSize = 1, hasSideEffects = 0 } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -528,7 +528,7 @@ } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; Index: llvm/trunk/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrCompiler.td +++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td @@ -273,7 +273,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; } -let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], +let Predicates = [OptForSize, Not64BitMode], AddedComplexity = 10 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. @@ -698,7 +698,7 @@ multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, int Increment, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, - SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I; defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">; +} // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, @@ -942,17 +944,17 @@ [(atomic_store_64 addr:$dst, dag64)]>; } -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], Predicates = [UseIncDec] in { defm RELEASE_INC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), - (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; + (add (atomic_load_64 addr:$dst), (i64 1))>; defm RELEASE_DEC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), - (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; + (add (atomic_load_64 addr:$dst), (i64 -1))>; } /* TODO: These don't work because the type inference of TableGen fails. @@ -1917,7 +1919,7 @@ // Increment/Decrement reg. // Do not make INC/DEC if it is slow -let Predicates = [NotSlowIncDec] in { +let Predicates = [UseIncDec] in { def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -909,12 +909,13 @@ def OptForSize : Predicate<"MF->getFunction()->optForSize()">; def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; + def UseIncDec : Predicate<"!Subtarget->slowIncDec() || " + "MF->getFunction()->optForSize()">; } def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; -def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; Index: llvm/trunk/test/CodeGen/X86/slow-incdec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/slow-incdec.ll +++ llvm/trunk/test/CodeGen/X86/slow-incdec.ll @@ -35,33 +35,21 @@ } define i32 @inc_size(i32 %x) optsize { -; INCDEC-LABEL: inc_size: -; INCDEC: # BB#0: -; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax -; INCDEC-NEXT: incl %eax -; INCDEC-NEXT: retl -; -; ADD-LABEL: inc_size: -; ADD: # BB#0: -; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax -; ADD-NEXT: addl $1, %eax -; ADD-NEXT: retl +; CHECK-LABEL: inc_size: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: retl %r = add i32 %x, 1 ret i32 %r } define i32 @dec_size(i32 %x) optsize { -; INCDEC-LABEL: dec_size: -; INCDEC: # BB#0: -; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax -; INCDEC-NEXT: decl %eax -; INCDEC-NEXT: retl -; -; ADD-LABEL: dec_size: -; ADD: # BB#0: -; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax -; ADD-NEXT: addl $-1, %eax -; ADD-NEXT: retl +; CHECK-LABEL: dec_size: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: decl %eax +; CHECK-NEXT: retl %r = add i32 %x, -1 ret i32 %r }