Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -226,14 +226,12 @@ "Flush A Cache Line Optimized">; def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", "Cache Line Write Back">; -// TODO: This feature ought to be renamed. -// What it really refers to are CPUs for which certain instructions -// (which ones besides the example below?) are microcoded. -// The best examples of this are the memory forms of CALL and PUSH -// instructions, which should be avoided in favor of a MOV + register CALL/PUSH. -def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", - "CallRegIndirect", "true", - "Call register indirect">; +// On some processors, instructions that implicitly take two memory operands are +// slow. In practice, this means that CALL, PUSH, and POP with memory operands +// should be avoided in favor of a MOV + register CALL/PUSH/POP. +def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", + "SlowTwoMemOps", "true", + "Two memory operand instructions are slow">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", @@ -401,7 +399,7 @@ FeatureLEAForSP, FeatureSlowDivide32, FeatureSlowDivide64, - FeatureCallRegIndirect, + FeatureSlowTwoMemOps, FeatureLEAUsesAG, FeaturePadShortFunctions, FeatureLAHFSAHF @@ -421,7 +419,7 @@ FeaturePCLMUL, FeatureAES, FeatureSlowDivide64, - FeatureCallRegIndirect, + FeatureSlowTwoMemOps, FeaturePRFCHW, FeatureSlowLEA, FeatureSlowIncDec, @@ -444,7 +442,7 @@ FeaturePCLMUL, FeatureAES, FeaturePRFCHW, - FeatureCallRegIndirect, + FeatureSlowTwoMemOps, FeatureSlowLEA, FeatureSlowIncDec, FeatureSlowBTMem, @@ -597,6 +595,7 @@ FeatureBMI, FeatureBMI2, FeatureFMA, + FeatureSlowTwoMemOps, FeatureFastPartialYMMorZMMWrite ]>; def : KnightsLandingProc<"knl">; Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -575,7 +575,7 @@ if (OptLevel != CodeGenOpt::None && // Only does this when target favors doesn't favor register indirect // call. - ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || + ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -8010,13 +8010,13 @@ unsigned Size, unsigned Align, bool AllowCommute) const { const DenseMap > *OpcodeTablePtr = nullptr; - bool isCallRegIndirect = Subtarget.callRegIndirect(); + bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); bool isTwoAddrFold = false; // For CPUs that favor the register form of a call or push, // do not fold loads into calls or pushes, unless optimizing for size // aggressively. - if (isCallRegIndirect && !MF.getFunction()->optForMinSize() && + if (isSlowTwoMemOps && !MF.getFunction()->optForMinSize() && (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r || MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r || MI.getOpcode() == X86::PUSH64r)) Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -912,7 +912,7 @@ def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; -def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; +def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -245,9 +245,9 @@ /// a stall when returning too early. bool PadShortFunctions; - /// True if the Calls with memory reference should be converted - /// to a register-based indirect call. - bool CallRegIndirect; + /// True if two memory operand instructions should use a temporary register + /// instead. + bool SlowTwoMemOps; /// True if the LEA instruction inputs have to be ready at address generation /// (AG) time. @@ -492,7 +492,7 @@ bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } - bool callRegIndirect() const { return CallRegIndirect; } + bool slowTwoMemOps() const { return SlowTwoMemOps; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slow3OpsLEA() const { return Slow3OpsLEA; } Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp @@ -351,7 +351,7 @@ HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; - CallRegIndirect = false; + SlowTwoMemOps = false; LEAUsesAG = false; SlowLEA = false; Slow3OpsLEA = false; Index: llvm/trunk/test/CodeGen/X86/fold-push.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-push.ll +++ llvm/trunk/test/CodeGen/X86/fold-push.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL -; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM +; RUN: llc < %s -mtriple=i686-windows -mattr=slow-two-mem-ops | FileCheck %s -check-prefix=CHECK -check-prefix=SLM declare void @foo(i32 %r)