Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -168,6 +168,8 @@ "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; +def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", + "INC and DEC instructions are slower than ADD and SUB">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -228,7 +230,7 @@ FeaturePCLMUL, FeatureAES, FeatureCallRegIndirect, FeaturePRFCHW, - FeatureSlowLEA, + FeatureSlowLEA, FeatureSlowIncDec, FeatureSlowBTMem, FeatureFastUAMem]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -10125,22 +10125,23 @@ UI->getOpcode() != ISD::STORE) goto default_case; - if (ConstantSDNode *C = - dyn_cast(ArithOp.getNode()->getOperand(1))) { - // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } + if (!Subtarget->slowIncDec()) + if (ConstantSDNode *C = + dyn_cast(ArithOp.getNode()->getOperand(1))) { + // An add of one will be selected as an INC. + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + NumOperands = 1; + break; + } - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue()) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; + // An add of negative one (subtract of one) will be selected as a DEC. + if (C->getAPIntValue().isAllOnesValue()) { + Opcode = X86ISD::DEC; + NumOperands = 1; + break; + } } - } // Otherwise use a regular EFLAGS-setting add. Opcode = X86ISD::ADD; Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1696,20 +1696,32 @@ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; // Increment reg. -def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>; -def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; - -// Decrement reg. -def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>; -def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; +let Predicates = [NotSlowIncDec] in { + def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>; + def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; +} +let Predicates = [NotSlowIncDec,Not64BitMode] in { + def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; + def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; +} +let Predicates = [NotSlowIncDec,In64BitMode] in { + def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>; + def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>; +} + + // Decrement reg. +let Predicates = [NotSlowIncDec] in { + def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>; + def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; +} +let Predicates = [NotSlowIncDec,Not64BitMode] in { + def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; + def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; +} +let Predicates = [NotSlowIncDec,In64BitMode] in { + def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>; + def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>; +} // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -795,6 +795,7 @@ def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; +def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -181,6 +181,9 @@ /// SlowLEA - True if the LEA instruction with certain arguments is slow bool SlowLEA; + /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags + bool SlowIncDec; + /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -319,6 +322,7 @@ bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } + bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -291,6 +291,7 @@ CallRegIndirect = false; LEAUsesAG = false; SlowLEA = false; + SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; Index: test/CodeGen/X86/slow-incdec.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/slow-incdec.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=-slow-incdec < %s | FileCheck -check-prefix=INCDEC %s +; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+slow-incdec < %s | FileCheck -check-prefix=ADD %s + +; check -mattr=-slow-incdec +; INCDEC-NOT: addl $-1 +; INCDEC: dec +; INCDEC-NOT: addl $1 +; INCDEC: inc + +; check -mattr=+slow-incdec +; ADD: addl $-1 +; ADD-NOT: dec +; ADD: addl $1 +; ADD-NOT: inc + +; Function Attrs: nounwind readonly +define i32 @slow_1(i32* nocapture readonly %a, i32 %s) #0 { +entry: + %cmp5 = icmp eq i32 %s, 0 + br i1 %cmp5, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %for.end.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %a, i32 %i.06 + %0 = load i32* %arrayidx, align 4, !tbaa !1 + %cmp1 = icmp eq i32 %0, 0 +; + %dec = add nsw i32 %i.06, -1 + br i1 %cmp1, label %for.end.loopexit, label %for.cond + +for.end.loopexit: ; preds = %for.cond, %for.body + %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ] + ret i32 %i.0.lcssa +} + +; Function Attrs: nounwind readonly +define i32 @slow_2(i32* nocapture readonly %a, i32 %s) #0 { +entry: + %cmp5 = icmp eq i32 %s, 0 + br i1 %cmp5, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i32 %inc, 0 + br i1 %cmp, label %for.end.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %a, i32 %i.06 + %0 = load i32* %arrayidx, align 4, !tbaa !1 + %cmp1 = icmp eq i32 %0, 0 + %inc = add nsw i32 %i.06, 1 + br i1 %cmp1, label %for.end.loopexit, label %for.cond + +for.end.loopexit: ; preds = %for.cond, %for.body + %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ] + ret i32 %i.0.lcssa +} + +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"}