Index: include/llvm/CodeGen/MachineRegisterInfo.h
===================================================================
--- include/llvm/CodeGen/MachineRegisterInfo.h
+++ include/llvm/CodeGen/MachineRegisterInfo.h
@@ -62,6 +62,12 @@
              VirtReg2IndexFunctor>
       VRegInfo;
 
+  /// CalleeSaveDisableRegs - Lists physical registers that should not be
+  /// preserved.
+  /// Some calling conventions disable CSRs that are passed to the
+  /// function as arguments or are returned by the function.
+  BitVector CalleeSaveDisableRegs;
+
   /// RegAllocHints - This vector records register allocation hints for virtual
   /// registers. For each virtual register, it keeps a register and hint type
   /// pair making up the allocation hint. Hint type is target specific except
@@ -196,6 +202,30 @@
   // Register Info
   //===--------------------------------------------------------------------===//
 
+  /// Sets a new register (and its aliases) in the CalleeSaveDisableRegs bit
+  /// vector.
+  void addCalleeSaveDisableRegister(unsigned Reg) {
+    assert(Reg < CalleeSaveDisableRegs.size() &&
+           "Trying to disable an invalif register");
+    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+    for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+         SubRegs.isValid(); ++SubRegs)
+      CalleeSaveDisableRegs.set(*SubRegs);
+  }
+
+  /// \Returns a bit vector of registers that should not be
+  /// callee saved.
+  const BitVector &getCalleeSaveDisabledRegs() const {
+    return CalleeSaveDisableRegs;
+  }
+
+  /// Flips the CalleeSaveDisableRegs and \returnes a copy of
+  /// the flipped bit vector.
+  BitVector getCalleeSaveEnabledRegs() const {
+    BitVector CalleeSaveEnabledRegs = CalleeSaveDisableRegs;
+    return CalleeSaveEnabledRegs.flip();
+  }
+
   // Strictly for use by MachineInstr.cpp.
   void addRegOperandToUseList(MachineOperand *MO);
 
Index: include/llvm/CodeGen/RegisterClassInfo.h
===================================================================
--- include/llvm/CodeGen/RegisterClassInfo.h
+++ include/llvm/CodeGen/RegisterClassInfo.h
@@ -52,11 +52,11 @@
   const TargetRegisterInfo *TRI;
 
   // Callee saved registers of last MF. Assumed to be valid until the next
-  // runOnFunction() call.
-  const MCPhysReg *CalleeSaved;
+  // runOnFunction() call. Used only to determine if an update was made.
+  SmallVector<uint8_t, 16> CalleeSavedRegs;
 
-  // Map register number to CalleeSaved index + 1;
-  SmallVector<uint8_t, 4> CSRNum;
+  // Map register alias to the callee saved Register.
+  SmallVector<uint8_t, 4> CalleeSavedAliases;
 
   // Reserved registers in the current MF.
   BitVector Reserved;
@@ -105,11 +105,11 @@
   }
 
   /// getLastCalleeSavedAlias - Returns the last callee saved register that
-  /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
+  /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases.
   unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
     assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
-    if (unsigned N = CSRNum[PhysReg])
-      return CalleeSaved[N-1];
+    if (PhysReg < CalleeSavedAliases.size())
+      return CalleeSavedAliases[PhysReg];
     return 0;
   }
 
Index: lib/CodeGen/AggressiveAntiDepBreaker.cpp
===================================================================
--- lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -158,13 +158,17 @@
       }
     }
 
-  // Mark live-out callee-saved registers. In a return block this is
+  // Mark live-out callee-saved registers (that are not
+  // passed/returned as arguments). In a return block this is
   // all callee-saved registers. In non-return this is any
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   BitVector Pristine = MFI.getPristineRegs(MF);
+  BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs();
   for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
+    if (CSRDisabled.test(Reg))
+      continue;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
       unsigned AliasReg = *AI;
Index: lib/CodeGen/CriticalAntiDepBreaker.cpp
===================================================================
--- lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -66,12 +66,16 @@
       }
     }
 
-  // Mark live-out callee-saved registers. In a return block this is
+  // Mark live-out callee-saved registers (that are not
+  // passed/returned as arguments). In a return block this is
   // all callee-saved registers. In non-return this is any
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   BitVector Pristine = MFI.getPristineRegs(MF);
+  BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs();
   for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+    if (CSRDisabled.test(*I))
+      continue;
     if (!IsReturnBlock && !Pristine.test(*I)) continue;
     for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
       unsigned Reg = *AI;
Index: lib/CodeGen/LivePhysRegs.cpp
===================================================================
--- lib/CodeGen/LivePhysRegs.cpp
+++ lib/CodeGen/LivePhysRegs.cpp
@@ -161,8 +161,11 @@
 static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
                          const MachineFrameInfo &MFI,
                          const TargetRegisterInfo &TRI) {
+  BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs();
   for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
-    LiveRegs.addReg(*CSR);
+    if (!CSRDisabled.test(*CSR))
+      LiveRegs.addReg(*CSR);
+
   for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
     LiveRegs.removeReg(Info.getReg());
 }
@@ -180,8 +183,10 @@
     if (MBB.isReturnBlock()) {
       // The return block has no successors whose live-ins we could merge
       // below. So instead we add the callee saved registers manually.
+      BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs();
       for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
-        addReg(*I);
+        if (!CSRDisabled.test(*I))
+          addReg(*I);
     } else {
       addPristines(*this, MF, MFI, *TRI);
     }
Index: lib/CodeGen/MachineFunction.cpp
===================================================================
--- lib/CodeGen/MachineFunction.cpp
+++ lib/CodeGen/MachineFunction.cpp
@@ -862,6 +862,9 @@
   for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
     BV.set(*CSR);
 
+  // Reset all callee save disabled registers
+  BV &= MF.getRegInfo().getCalleeSaveEnabledRegs();
+
   // Saved CSRs are not pristine.
   for (auto &I : getCalleeSavedInfo())
     for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
Index: lib/CodeGen/MachineInstr.cpp
===================================================================
--- lib/CodeGen/MachineInstr.cpp
+++ lib/CodeGen/MachineInstr.cpp
@@ -262,8 +262,21 @@
     return getBlockAddress() == Other.getBlockAddress() &&
            getOffset() == Other.getOffset();
   case MachineOperand::MO_RegisterMask:
-  case MachineOperand::MO_RegisterLiveOut:
-    return getRegMask() == Other.getRegMask();
+  case MachineOperand::MO_RegisterLiveOut: {
+    // Calculate the size of the RegMask
+    const MachineFunction *MF = getParent()->getParent()->getParent();
+    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+    unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+    // Compare the two RegMasks
+    const uint32_t *RegMask = getRegMask();
+    const uint32_t *OtherRegMask = Other.getRegMask();
+    for (unsigned I = 0, E = RegMaskSize; I < E; ++I)
+      if (RegMask[I] != OtherRegMask[I])
+        return false;
+
+    return true;
+  }
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
   case MachineOperand::MO_CFIIndex:
Index: lib/CodeGen/MachineRegisterInfo.cpp
===================================================================
--- lib/CodeGen/MachineRegisterInfo.cpp
+++ lib/CodeGen/MachineRegisterInfo.cpp
@@ -35,6 +35,7 @@
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
   UsedPhysRegMask.resize(NumRegs);
+  CalleeSaveDisableRegs.resize(NumRegs);
   PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
 }
 
Index: lib/CodeGen/RegAllocPBQP.cpp
===================================================================
--- lib/CodeGen/RegAllocPBQP.cpp
+++ lib/CodeGen/RegAllocPBQP.cpp
@@ -555,8 +555,9 @@
 static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
                                    const MachineFunction &MF) {
   const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+  BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs();
   for (unsigned i = 0; CSR[i] != 0; ++i)
-    if (TRI.regsOverlap(reg, CSR[i]))
+    if (!CSRDisabled.test(CSR[i]) && TRI.regsOverlap(reg, CSR[i]))
       return true;
   return false;
 }
Index: lib/CodeGen/RegisterClassInfo.cpp
===================================================================
--- lib/CodeGen/RegisterClassInfo.cpp
+++ lib/CodeGen/RegisterClassInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 using namespace llvm;
 
@@ -29,8 +30,7 @@
 StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
          cl::desc("Limit all regclasses to N registers"));
 
-RegisterClassInfo::RegisterClassInfo()
-  : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {}
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(nullptr), TRI(nullptr) {}
 
 void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   bool Update = false;
@@ -48,18 +48,26 @@
 
   // Does this MF have different CSRs?
   assert(TRI && "no register info set");
+
+  // Calculate the callee saved registers.
   const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
-  if (Update || CSR != CalleeSaved) {
-    // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+  SmallVector<uint8_t, 16> ActualCalleeSaved;
+  for (unsigned I = 0; unsigned Reg = CSR[I]; ++I) {
+    if (MF->getRegInfo().getCalleeSaveDisabledRegs().test(Reg))
+      continue;
+    ActualCalleeSaved.push_back(Reg);
+  }
+
+  if (Update || ActualCalleeSaved != CalleeSavedRegs) {
+    // Build a CSRAlias map. Every CSR alias saves the last
     // overlapping CSR.
-    CSRNum.clear();
-    CSRNum.resize(TRI->getNumRegs(), 0);
-    for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+    CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+    for (auto Reg : ActualCalleeSaved)
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+        CalleeSavedAliases[*AI] = Reg;
     Update = true;
+    CalleeSavedRegs = ActualCalleeSaved;
   }
-  CalleeSaved = CSR;
 
   // Different reserved registers?
   const BitVector &RR = MF->getRegInfo().getReservedRegs();
@@ -103,7 +111,7 @@
     unsigned Cost = TRI->getCostPerUse(PhysReg);
     MinCost = std::min(MinCost, Cost);
 
-    if (CSRNum[PhysReg])
+    if (CalleeSavedAliases[PhysReg])
       // PhysReg aliases a CSR, save it for later.
       CSRAlias.push_back(PhysReg);
     else {
Index: lib/CodeGen/TargetFrameLoweringImpl.cpp
===================================================================
--- lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -86,8 +86,11 @@
   // Functions which call __builtin_unwind_init get all their registers saved.
   bool CallsUnwindInit = MF.callsUnwindInit();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
+  BitVector CSRDisabled = MRI.getCalleeSaveDisabledRegs();
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
+    if (CSRDisabled.test(Reg))
+      continue;
     if (CallsUnwindInit || MRI.isPhysRegModified(Reg))
       SavedRegs.set(Reg);
   }
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -1077,7 +1077,8 @@
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             const SDLoc &dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
+                            SmallVectorImpl<SDValue> &InVals,
+                            uint32_t *RegMask) const;
     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
                              const SDLoc &dl, SelectionDAG &DAG,
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -2205,6 +2205,11 @@
        ++I, ++OutsIndex) {
     CCValAssign &VA = RVLocs[I];
     assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // Add the register to the CalleeSaveDisableRegs list.
+    if (CallConv == CallingConv::X86_RegCall)
+      MF.getRegInfo().addCalleeSaveDisableRegister(VA.getLocReg());
+
     SDValue ValToCopy = OutVals[OutsIndex];
     EVT ValVT = ValToCopy.getValueType();
 
@@ -2279,6 +2284,10 @@
 
       assert(2 == RegsToPass.size() &&
              "Expecting two registers after Pass64BitArgInRegs");
+
+      // Add the second register to the CalleeSaveDisableRegs list.
+      if (CallConv == CallingConv::X86_RegCall)
+        MF.getRegInfo().addCalleeSaveDisableRegister(RVLocs[I].getLocReg());
     } else {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
     }
@@ -2335,6 +2344,10 @@
     // RAX/EAX now acts like a return value.
     RetOps.push_back(
         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+
+    // Add the returned register to the CalleeSaveDisableRegs list.
+    if (CallConv == CallingConv::X86_RegCall)
+      MF.getRegInfo().addCalleeSaveDisableRegister(RetValReg);
   }
 
   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -2514,8 +2527,10 @@
 SDValue X86TargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
-    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+    uint32_t *RegMask) const {
 
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
   bool Is64Bit = Subtarget.is64Bit();
@@ -2529,6 +2544,14 @@
     CCValAssign &VA = RVLocs[I];
     EVT CopyVT = VA.getLocVT();
 
+    // In some calling conventions we need to remove the used registers
+    // from the register mask.
+    if (RegMask && CallConv == CallingConv::X86_RegCall) {
+      for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+    }
+
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
         ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
@@ -3172,6 +3195,12 @@
     }
   }
 
+  for (MachineRegisterInfo::livein_iterator I = MF.getRegInfo().livein_begin(),
+                                            E = MF.getRegInfo().livein_end();
+       I != E; I++)
+    if (CallConv == CallingConv::X86_RegCall)
+      MF.getRegInfo().addCalleeSaveDisableRegister(I->first);
+
   return Chain;
 }
 
@@ -3688,7 +3717,33 @@
       Mask = RegInfo->getNoPreservedMask();
   }
 
-  Ops.push_back(DAG.getRegisterMask(Mask));
+  // Define a new register mask from the existing mask.
+  uint32_t *RegMask;
+
+  // In some calling conventions we need to remove the used physical registers
+  // from the reg mask.
+  if (CallConv == CallingConv::X86_RegCall) {
+    /// @todo Need to add support in MIPrinter and MIParser to represent
+    ///       the customed RegMask.
+    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+    // Allocate a new Reg Mask and copy Mask.
+    RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
+    unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+    memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
+
+    // Make sure all sub registers of the argument registers are reset
+    // in the RegMask.
+    for (auto const &RegPair : RegsToPass)
+      for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+  } else
+    // Simply use the same mask instead of allocatign a new one.
+    RegMask = const_cast<uint32_t *>(Mask);
+
+  // Create the RegMask Operand according to our updated mask.
+  Ops.push_back(DAG.getRegisterMask(RegMask));
 
   if (InFlag.getNode())
     Ops.push_back(InFlag);
@@ -3741,8 +3796,8 @@
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
-                         Ins, dl, DAG, InVals);
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, RegMask);
 }
 
 //===----------------------------------------------------------------------===//
@@ -25555,6 +25610,8 @@
       MachineInstrBuilder MIB(*MF, &II);
       for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
         unsigned Reg = SavedRegs[RI];
+        if (MRI->getCalleeSaveDisabledRegs().test(Reg))
+          continue;
         if (!DefRegs[Reg])
           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
       }
Index: lib/Target/X86/X86MachineFunctionInfo.cpp
===================================================================
--- lib/Target/X86/X86MachineFunctionInfo.cpp
+++ lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -9,6 +9,7 @@
 
 #include "X86MachineFunctionInfo.h"
 #include "X86RegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
@@ -25,6 +26,8 @@
       unsigned Reg = *CSR;
        ++CSR)
     {
+      if (MF->getRegInfo().getCalleeSaveDisabledRegs().test(Reg))
+        continue;
       if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
         RestoreBasePointerOffset -= SlotSize;
     }
Index: test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
===================================================================
--- test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
+++ test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu  | FileCheck --check-prefix=CHECK %s
+
+declare x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0);
+
+; In RegCall calling convention, %esi and %edi are callee saved registers.
+; One might think that the caller could assume that %esi value is the same before
+; and after calling the callee.
+; However, RegCall also says that a register that was used for 
+; passing/retuning argumnets, can be assumed to be used by the callee.
+; In other words, it is no longer a calle saved register.
+; In this case we want to see that %edi and %esi are saved and %esi is assumed
+; to be used by the caller.
+; This is a hipe CC function that doesn't save any register for the caller 
+; of the callee. So we can be sure that there is no other reason to save 
+; %esi, %edi and other registers (it returns the result in %esi).
+define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
+  %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0)
+  %b2 = add i32 %b1, %d0
+  %b3 = add i32 %b2, %e0
+  ret i32 %b3
+}
+; CHECK-LABEL: caller
+; CHECK:       movl    %edx, %ebx
+; CHECK:       movl    %ebp, %esi
+; CHECK:       calll   callee
+; CHECK:       leal    (%eax,%ebx), %esi
+; CHECK:       addl    %ebp, %esi
+; CHECK:       retl
+
+!hipe.literals = !{ !0, !1, !2 }
+!0 = !{ !"P_NSP_LIMIT", i32 120 }
+!1 = !{ !"X86_LEAF_WORDS", i32 24 }
+!2 = !{ !"AMD64_LEAF_WORDS", i32 18 }
+
+; Make sure that the callee doesn't save paramteres that were passed as arguments.
+define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
+  %b1 = mul i32 7, %e0
+  %b2 = udiv i32 5, %e0
+  %b3 = mul i32 7, %d0
+  %b4 = insertvalue {i32, i32, i32} undef, i32 %b1, 0
+  %b5 = insertvalue {i32, i32, i32} %b4, i32 %b2, 1
+  %b6 = insertvalue {i32, i32, i32} %b5, i32 %b3, 2
+  ret {i32, i32, i32} %b6
+}
+; CHECK-LABEL: test_callee
+; CHECK-NOT:   pushl %esi
+; CHECK-NOT:   pushl %edi
+; CHECK:       retl
Index: test/CodeGen/X86/avx512-regcall-NoMask.ll
===================================================================
--- test/CodeGen/X86/avx512-regcall-NoMask.ll
+++ test/CodeGen/X86/avx512-regcall-NoMask.ll
@@ -469,17 +469,14 @@
   ret <32 x float> %x4
 }
 
-; X32-LABEL: pushl {{%e(si|di|bx|bp)}}
-; X32: pushl {{%e(si|di|bx|bp)}}
-; X32: pushl {{%e(si|di|bx|bp)}}
-; X32: pushl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
+; X32-LABEL: testi32_inp
+; X32: pushl {{%e(bx|bp)}}
+; X32: pushl {{%e(bx|bp)}}
+; X32: popl {{%e(bx|bp)}}
+; X32: popl {{%e(bx|bp)}}
 ; X32: retl
 
-; WIN64-LABEL: pushq	{{%r(bp|bx|1[0-5])}}
+; WIN64-LABEL: testi32_inp
 ; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
 ; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
 ; WIN64: pushq	{{%r(bp|bx|1[0-5])}}
@@ -489,7 +486,7 @@
 ; WIN64: popq	{{%r(bp|bx|1[0-5])}}
 ; WIN64: retq
 
-; LINUXOSX64-LABEL: pushq	{{%r(bp|bx|1[2-5])}}
+; LINUXOSX64-LABEL: testi32_inp
 ; LINUXOSX64: pushq	{{%r(bp|bx|1[2-5])}}
 ; LINUXOSX64: pushq	{{%r(bp|bx|1[2-5])}}
 ; LINUXOSX64: popq	{{%r(bp|bx|1[2-5])}}
Index: test/CodeGen/X86/sse-regcall.ll
===================================================================
--- test/CodeGen/X86/sse-regcall.ll
+++ test/CodeGen/X86/sse-regcall.ll
@@ -37,48 +37,42 @@
 }
 
 ; WIN64-LABEL: testf32_inp
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
 ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
 ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
 ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
 ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
 ; WIN64: retq
 
 ; WIN32-LABEL: testf32_inp
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}}  {{#+}} 16-byte Spill
+; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}}  {{#+}} 16-byte Spill
 ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
 ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
 ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
 ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}}  {{#+}} 16-byte Reload
+; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}}  {{#+}} 16-byte Reload
 ; WIN32: retl
 
 ; LINUXOSX-LABEL: testf32_inp
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}}  {{#+}} 16-byte Spill
 ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
 ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
 ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
 ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}}  {{#+}} 16-byte Reload
 ; LINUXOSX: retq
 
 ;test calling conventions - input parameters, callee saved XMMs
@@ -93,10 +87,6 @@
 ; WIN32-LABEL: testi32_inp
 ; WIN32: pushl {{%e(si|di|bx|bp)}}
 ; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: popl {{%e(si|di|bx|bp)}}
-; WIN32: popl {{%e(si|di|bx|bp)}}
 ; WIN32: popl {{%e(si|di|bx|bp)}}
 ; WIN32: popl {{%e(si|di|bx|bp)}}
 ; WIN32: retl