Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16562,14 +16562,18 @@ if (ConstantSDNode *C = dyn_cast(ArithOp.getOperand(1))) { // An add of one will be selected as an INC. - if (C->isOne() && !Subtarget.slowIncDec()) { + if (C->isOne() && + (!Subtarget.slowIncDec() || + DAG.getMachineFunction().getFunction()->optForSize())) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->isAllOnesValue() && !Subtarget.slowIncDec()) { + if (C->isAllOnesValue() && + (!Subtarget.slowIncDec() || + DAG.getMachineFunction().getFunction()->optForSize())) { Opcode = X86ISD::DEC; NumOperands = 1; break; Index: lib/Target/X86/X86InstrArithmetic.td =================================================================== --- lib/Target/X86/X86InstrArithmetic.td +++ lib/Target/X86/X86InstrArithmetic.td @@ -481,7 +481,8 @@ } // CodeSize = 1, hasSideEffects = 0 } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], + Predicates = [NotSlowIncDec_Or_OptForSize] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -528,7 +529,8 @@ } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], + Predicates = [NotSlowIncDec_Or_OptForSize] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -273,7 +273,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; } -let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], +let Predicates = [OptForSize, Not64BitMode], AddedComplexity = 10 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. @@ -698,7 +698,7 @@ multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, int Increment, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, - SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in { + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I; defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">; +} // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, @@ -942,17 +944,17 @@ [(atomic_store_64 addr:$dst, dag64)]>; } -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], Predicates = [NotSlowIncDec_Or_OptForSize] in { defm RELEASE_INC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), - (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; + (add (atomic_load_64 addr:$dst), (i64 1))>; defm RELEASE_DEC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), - (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; + (add (atomic_load_64 addr:$dst), (i64 -1))>; } /* TODO: These don't work because the type inference of TableGen fails. @@ -1917,7 +1919,7 @@ // Increment/Decrement reg. // Do not make INC/DEC if it is slow -let Predicates = [NotSlowIncDec] in { +let Predicates = [NotSlowIncDec_Or_OptForSize] in { def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -908,12 +908,13 @@ def OptForSize : Predicate<"MF->getFunction()->optForSize()">; def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; + def NotSlowIncDec_Or_OptForSize : Predicate<"!Subtarget->slowIncDec() || " + "MF->getFunction()->optForSize()">; } def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; -def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; Index: test/CodeGen/X86/slow-incdec.ll =================================================================== --- test/CodeGen/X86/slow-incdec.ll +++ test/CodeGen/X86/slow-incdec.ll @@ -1,80 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=-slow-incdec < %s | FileCheck -check-prefix=INCDEC %s ; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+slow-incdec < %s | FileCheck -check-prefix=ADD %s -; check -mattr=-slow-incdec -; INCDEC-NOT: addl $-1 -; INCDEC: dec -; INCDEC-NOT: addl $1 -; INCDEC: inc - -; check -mattr=+slow-incdec -; ADD: addl $-1 -; ADD-NOT: dec -; ADD: addl $1 -; ADD-NOT: inc - -; Function Attrs: nounwind readonly -define i32 @slow_1(i32* nocapture readonly %a, i32 %s) #0 { -entry: - %cmp5 = icmp eq i32 %s, 0 - br i1 %cmp5, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.cond: ; preds = %for.body - %cmp = icmp eq i32 %dec, 0 - br i1 %cmp, label %for.end.loopexit, label %for.body - -for.body: ; preds = %for.body.preheader, %for.cond - %i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06 - %0 = load i32, i32* %arrayidx, align 4, !tbaa !1 - %cmp1 = icmp eq i32 %0, 0 +define i32 @inc(i32 %x) { +; INCDEC-LABEL: inc: +; INCDEC: # BB#0: +; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax +; INCDEC-NEXT: incl %eax +; INCDEC-NEXT: retl ; - %dec = add nsw i32 %i.06, -1 - br i1 %cmp1, label %for.end.loopexit, label %for.cond - -for.end.loopexit: ; preds = %for.cond, %for.body - %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ] - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ] - ret i32 %i.0.lcssa +; ADD-LABEL: inc: +; ADD: # BB#0: +; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax +; ADD-NEXT: addl $1, %eax +; ADD-NEXT: retl + %r = add i32 %x, 1 + ret i32 %r } -; Function Attrs: nounwind readonly -define i32 @slow_2(i32* nocapture readonly %a, i32 %s) #0 { -entry: - %cmp5 = icmp eq i32 %s, 0 - br i1 %cmp5, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.cond: ; preds = %for.body - %cmp = icmp eq i32 %inc, 0 - br i1 %cmp, label %for.end.loopexit, label %for.body - -for.body: ; preds = %for.body.preheader, %for.cond - %i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06 - %0 = load i32, i32* %arrayidx, align 4, !tbaa !1 - %cmp1 = icmp eq i32 %0, 0 - %inc = add nsw i32 %i.06, 1 - br i1 %cmp1, label %for.end.loopexit, label %for.cond - -for.end.loopexit: ; preds = %for.cond, %for.body - %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ] - br label %for.end +define i32 @dec(i32 %x) { +; INCDEC-LABEL: dec: +; INCDEC: # BB#0: +; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax +; INCDEC-NEXT: decl %eax +; INCDEC-NEXT: retl +; +; ADD-LABEL: dec: +; ADD: # BB#0: +; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax +; ADD-NEXT: addl $-1, %eax +; ADD-NEXT: retl + %r = add i32 %x, -1 + ret i32 %r +} -for.end: ; preds = %for.end.loopexit, %entry - %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ] - ret i32 %i.0.lcssa +define i32 @inc_size(i32 %x) optsize { +; INCDEC-LABEL: inc_size: +; INCDEC: # BB#0: +; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax +; INCDEC-NEXT: incl %eax +; INCDEC-NEXT: retl +; +; ADD-LABEL: inc_size: +; ADD: # BB#0: +; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax +; ADD-NEXT: incl %eax +; ADD-NEXT: retl + %r = add i32 %x, 1 + ret i32 %r } -!1 = !{!2, !2, i64 0} -!2 = !{!"int", !3, i64 0} -!3 = !{!"omnipotent char", !4, i64 0} -!4 = !{!"Simple C/C++ TBAA"} +define i32 @dec_size(i32 %x) optsize { +; INCDEC-LABEL: dec_size: +; INCDEC: # BB#0: +; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax +; INCDEC-NEXT: decl %eax +; INCDEC-NEXT: retl +; +; ADD-LABEL: dec_size: +; ADD: # BB#0: +; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax +; ADD-NEXT: decl %eax +; ADD-NEXT: retl + %r = add i32 %x, -1 + ret i32 %r +}