Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16562,14 +16562,18 @@ if (ConstantSDNode *C = dyn_cast(ArithOp.getOperand(1))) { // An add of one will be selected as an INC. - if (C->isOne() && !Subtarget.slowIncDec()) { + if (C->isOne() && + (!Subtarget.slowIncDec() || + DAG.getMachineFunction().getFunction()->optForSize())) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->isAllOnesValue() && !Subtarget.slowIncDec()) { + if (C->isAllOnesValue() && + (!Subtarget.slowIncDec() || + DAG.getMachineFunction().getFunction()->optForSize())) { Opcode = X86ISD::DEC; NumOperands = 1; break; Index: lib/Target/X86/X86InstrArithmetic.td =================================================================== --- lib/Target/X86/X86InstrArithmetic.td +++ lib/Target/X86/X86InstrArithmetic.td @@ -481,7 +481,8 @@ } // CodeSize = 1, hasSideEffects = 0 } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], + Predicates = [NotSlowIncDec_Or_OptForSize] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -528,7 +529,8 @@ } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], + Predicates = [NotSlowIncDec_Or_OptForSize] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -273,7 +273,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; } -let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], +let Predicates = [OptForSize, Not64BitMode], AddedComplexity = 10 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. @@ -698,7 +698,7 @@ multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, int Increment, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, - SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I; defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">; +} // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, @@ -942,17 +944,17 @@ [(atomic_store_64 addr:$dst, dag64)]>; } -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], Predicates = [NotSlowIncDec_Or_OptForSize] in { defm RELEASE_INC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), - (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; + (add (atomic_load_64 addr:$dst), (i64 1))>; defm RELEASE_DEC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), - (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; + (add (atomic_load_64 addr:$dst), (i64 -1))>; } /* TODO: These don't work because the type inference of TableGen fails. @@ -1917,7 +1919,7 @@ // Increment/Decrement reg. // Do not make INC/DEC if it is slow -let Predicates = [NotSlowIncDec] in { +let Predicates = [NotSlowIncDec_Or_OptForSize] in { def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -908,12 +908,13 @@ def OptForSize : Predicate<"MF->getFunction()->optForSize()">; def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; + def NotSlowIncDec_Or_OptForSize : Predicate<"!Subtarget->slowIncDec() || " + "MF->getFunction()->optForSize()">; } def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; -def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; Index: test/CodeGen/X86/slow-incdec.ll =================================================================== --- test/CodeGen/X86/slow-incdec.ll +++ test/CodeGen/X86/slow-incdec.ll @@ -2,16 +2,26 @@ ; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+slow-incdec < %s | FileCheck -check-prefix=ADD %s ; check -mattr=-slow-incdec +; INCDEC-LABEL: slow_1: ; INCDEC-NOT: addl $-1 ; INCDEC: dec +; INCDEC-LABEL: slow_2: +; INCDEC-NOT: addl $1 +; INCDEC: inc +; INCDEC-LABEL: slow_3: ; INCDEC-NOT: addl $1 ; INCDEC: inc ; check -mattr=+slow-incdec +; ADD-LABEL: slow_1: ; ADD: addl $-1 ; ADD-NOT: dec +; ADD-LABEL: slow_2: ; ADD: addl $1 ; ADD-NOT: inc +; ADD-LABEL: slow_3: +; ADD-NOT: addl $1 +; ADD: inc ; Function Attrs: nounwind readonly define i32 @slow_1(i32* nocapture readonly %a, i32 %s) #0 { @@ -74,6 +84,35 @@ ret i32 %i.0.lcssa } +define i32 @slow_3(i32* nocapture readonly %a, i32 %s) optsize { +entry: + %cmp5 = icmp eq i32 %s, 0 + br i1 %cmp5, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i32 %inc, 0 + br i1 %cmp, label %for.end.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06 + %0 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %cmp1 = icmp eq i32 %0, 0 + %inc = add nsw i32 %i.06, 1 + br i1 %cmp1, label %for.end.loopexit, label %for.cond + +for.end.loopexit: ; preds = %for.cond, %for.body + %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ] + ret i32 %i.0.lcssa +} + !1 = !{!2, !2, i64 0} !2 = !{!"int", !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0}