Index: llvm/trunk/lib/Target/X86/X86.td
===================================================================
--- llvm/trunk/lib/Target/X86/X86.td
+++ llvm/trunk/lib/Target/X86/X86.td
@@ -226,14 +226,12 @@
                                       "Flush A Cache Line Optimized">;
 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
                                       "Cache Line Write Back">;
-// TODO: This feature ought to be renamed.
-// What it really refers to are CPUs for which certain instructions
-// (which ones besides the example below?) are microcoded.
-// The best examples of this are the memory forms of CALL and PUSH
-// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
-def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
-                                     "CallRegIndirect", "true",
-                                     "Call register indirect">;
+// On some processors, instructions that implicitly take two memory operands are
+// slow. In practice, this means that CALL, PUSH, and POP with memory operands
+// should be avoided in favor of a MOV + register CALL/PUSH/POP.
+def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+                                     "SlowTwoMemOps", "true",
+                                     "Two memory operand instructions are slow">;
 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
                                    "LEA instruction needs inputs at AG stage">;
 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
@@ -401,7 +399,7 @@
   FeatureLEAForSP,
   FeatureSlowDivide32,
   FeatureSlowDivide64,
-  FeatureCallRegIndirect,
+  FeatureSlowTwoMemOps,
   FeatureLEAUsesAG,
   FeaturePadShortFunctions,
   FeatureLAHFSAHF
@@ -421,7 +419,7 @@
   FeaturePCLMUL,
   FeatureAES,
   FeatureSlowDivide64,
-  FeatureCallRegIndirect,
+  FeatureSlowTwoMemOps,
   FeaturePRFCHW,
   FeatureSlowLEA,
   FeatureSlowIncDec,
@@ -444,7 +442,7 @@
   FeaturePCLMUL,
   FeatureAES,
   FeaturePRFCHW,
-  FeatureCallRegIndirect,
+  FeatureSlowTwoMemOps,
   FeatureSlowLEA,
   FeatureSlowIncDec,
   FeatureSlowBTMem,
@@ -597,6 +595,7 @@
   FeatureBMI,
   FeatureBMI2,
   FeatureFMA,
+  FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite
 ]>;
 def : KnightsLandingProc<"knl">;
Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -575,7 +575,7 @@
     if (OptLevel != CodeGenOpt::None &&
         // Only does this when target favors doesn't favor register indirect
         // call.
-        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
+        ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
          (N->getOpcode() == X86ISD::TC_RETURN &&
           // Only does this if load can be folded into TC_RETURN.
           (Subtarget->is64Bit() ||
Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
@@ -8010,13 +8010,13 @@
     unsigned Size, unsigned Align, bool AllowCommute) const {
   const DenseMap<unsigned,
                  std::pair<uint16_t, uint16_t> > *OpcodeTablePtr = nullptr;
-  bool isCallRegIndirect = Subtarget.callRegIndirect();
+  bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
   bool isTwoAddrFold = false;
 
   // For CPUs that favor the register form of a call or push,
   // do not fold loads into calls or pushes, unless optimizing for size
   // aggressively.
-  if (isCallRegIndirect && !MF.getFunction()->optForMinSize() &&
+  if (isSlowTwoMemOps && !MF.getFunction()->optForMinSize() &&
       (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r ||
        MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r ||
        MI.getOpcode() == X86::PUSH64r))
Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td
@@ -912,7 +912,7 @@
 
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
-def FavorMemIndirectCall  : Predicate<"!Subtarget->callRegIndirect()">;
+def FavorMemIndirectCall  : Predicate<"!Subtarget->slowTwoMemOps()">;
 def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
 def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
 def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
Index: llvm/trunk/lib/Target/X86/X86Subtarget.h
===================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h
@@ -245,9 +245,9 @@
   /// a stall when returning too early.
   bool PadShortFunctions;
 
-  /// True if the Calls with memory reference should be converted
-  /// to a register-based indirect call.
-  bool CallRegIndirect;
+  /// True if two memory operand instructions should use a temporary register
+  /// instead.
+  bool SlowTwoMemOps;
 
   /// True if the LEA instruction inputs have to be ready at address generation
   /// (AG) time.
@@ -492,7 +492,7 @@
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }
-  bool callRegIndirect() const { return CallRegIndirect; }
+  bool slowTwoMemOps() const { return SlowTwoMemOps; }
   bool LEAusesAG() const { return LEAUsesAG; }
   bool slowLEA() const { return SlowLEA; }
   bool slow3OpsLEA() const { return Slow3OpsLEA; }
Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp
@@ -351,7 +351,7 @@
   HasSlowDivide32 = false;
   HasSlowDivide64 = false;
   PadShortFunctions = false;
-  CallRegIndirect = false;
+  SlowTwoMemOps = false;
   LEAUsesAG = false;
   SlowLEA = false;
   Slow3OpsLEA = false;
Index: llvm/trunk/test/CodeGen/X86/fold-push.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/fold-push.ll
+++ llvm/trunk/test/CodeGen/X86/fold-push.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
-; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM
+; RUN: llc < %s -mtriple=i686-windows -mattr=slow-two-mem-ops | FileCheck %s -check-prefix=CHECK -check-prefix=SLM
 
 declare void @foo(i32 %r)