Index: include/llvm/IR/CallingConv.h
===================================================================
--- include/llvm/IR/CallingConv.h
+++ include/llvm/IR/CallingConv.h
@@ -196,6 +196,14 @@
     /// Register calling convention used for parameters transfer optimization
     X86_RegCall = 92,
 
+    /// C Calling convention for libraries. Checks caller's regparm subtarget
+    /// feature.
+    X86_LibCall = 93,
+
+    /// StdCall Calling convention for libraries. Checks caller's regparm
+    /// subtarget feature.
+    X86_LibStdCall = 94,
+
     /// The highest possible calling convention ID. Must be some 2^k - 1.
     MaxID = 1023
   };
Index: include/llvm/IR/Module.h
===================================================================
--- include/llvm/IR/Module.h
+++ include/llvm/IR/Module.h
@@ -722,9 +722,18 @@
   /// that has "dropped all references", except operator delete.
   void dropAllReferences();
 
-/// @}
-/// @name Utility functions for querying Debug information.
-/// @{
+  /// @}
+  /// @name Utility functions for querying Calling Convention information by
+  /// checking module flags.
+  /// @{
+
+  /// \brief Returns the Number of Register ParametersDwarf Version by checking
+  /// module flags.
+  unsigned getNumberRegisterParameters() const;
+
+  /// @}
+  /// @name Utility functions for querying Debug information.
+  /// @{
 
   /// \brief Returns the Dwarf Version by checking module flags.
   unsigned getDwarfVersion() const;
Index: lib/AsmParser/LLLexer.cpp
===================================================================
--- lib/AsmParser/LLLexer.cpp
+++ lib/AsmParser/LLLexer.cpp
@@ -571,7 +571,9 @@
   KEYWORD(ccc);
   KEYWORD(fastcc);
   KEYWORD(coldcc);
+  KEYWORD(x86_libcallcc);
   KEYWORD(x86_stdcallcc);
+  KEYWORD(x86_libstdcallcc);
   KEYWORD(x86_fastcallcc);
   KEYWORD(x86_thiscallcc);
   KEYWORD(x86_vectorcallcc);
Index: lib/AsmParser/LLParser.cpp
===================================================================
--- lib/AsmParser/LLParser.cpp
+++ lib/AsmParser/LLParser.cpp
@@ -1690,7 +1690,13 @@
   case lltok::kw_ccc:            CC = CallingConv::C; break;
   case lltok::kw_fastcc:         CC = CallingConv::Fast; break;
   case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
+  case lltok::kw_x86_libcallcc:
+    CC = CallingConv::X86_LibCall;
+    break;
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
+  case lltok::kw_x86_libstdcallcc:
+    CC = CallingConv::X86_LibStdCall;
+    break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
   case lltok::kw_x86_regcallcc:  CC = CallingConv::X86_RegCall; break;
   case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
Index: lib/AsmParser/LLToken.h
===================================================================
--- lib/AsmParser/LLToken.h
+++ lib/AsmParser/LLToken.h
@@ -124,7 +124,9 @@
   kw_fastcc,
   kw_coldcc,
   kw_intel_ocl_bicc,
+  kw_x86_libcallcc,
   kw_x86_stdcallcc,
+  kw_x86_libstdcallcc,
   kw_x86_fastcallcc,
   kw_x86_thiscallcc,
   kw_x86_vectorcallcc,
Index: lib/IR/AsmWriter.cpp
===================================================================
--- lib/IR/AsmWriter.cpp
+++ lib/IR/AsmWriter.cpp
@@ -308,7 +308,13 @@
   case CallingConv::PreserveAll:   Out << "preserve_allcc"; break;
   case CallingConv::CXX_FAST_TLS:  Out << "cxx_fast_tlscc"; break;
   case CallingConv::GHC:           Out << "ghccc"; break;
+  case CallingConv::X86_LibCall:
+    Out << "x86_libcallcc";
+    break;
   case CallingConv::X86_StdCall:   Out << "x86_stdcallcc"; break;
+  case CallingConv::X86_LibStdCall:
+    Out << "x86_libstdcallcc";
+    break;
   case CallingConv::X86_FastCall:  Out << "x86_fastcallcc"; break;
   case CallingConv::X86_ThisCall:  Out << "x86_thiscallcc"; break;
   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
Index: lib/IR/Mangler.cpp
===================================================================
--- lib/IR/Mangler.cpp
+++ lib/IR/Mangler.cpp
@@ -78,6 +78,7 @@
   switch (CC) {
   case CallingConv::X86_FastCall:
   case CallingConv::X86_StdCall:
+  case CallingConv::X86_LibStdCall:
   case CallingConv::X86_VectorCall:
     return true;
   default:
Index: lib/IR/Module.cpp
===================================================================
--- lib/IR/Module.cpp
+++ lib/IR/Module.cpp
@@ -465,6 +465,14 @@
     GIF.dropAllReferences();
 }
 
+unsigned Module::getNumberRegisterParameters() const {
+  auto *Val =
+      cast_or_null<ConstantAsMetadata>(getModuleFlag("NumRegisterParameters"));
+  if (!Val)
+    return 0;
+  return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
+
 unsigned Module::getDwarfVersion() const {
   auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Dwarf Version"));
   if (!Val)
Index: lib/Target/X86/X86CallingConv.h
===================================================================
--- lib/Target/X86/X86CallingConv.h
+++ lib/Target/X86/X86CallingConv.h
@@ -16,8 +16,13 @@
 #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
 
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86TargetMachine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 namespace llvm {
 
@@ -57,18 +62,24 @@
   return false;
 }
 
-inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
-                                         MVT &LocVT,
-                                         CCValAssign::LocInfo &LocInfo,
-                                         ISD::ArgFlagsTy &ArgFlags,
-                                         CCState &State) {
-  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
-  // not to split i64 and double between a register and stack
-  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
-  static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
-  
+inline bool CC_X86_32_AssignToReg_MCU(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
+  // If the argument is InAlloc or ByVal bail.
+  if (ArgFlags.isInAlloca() || ArgFlags.isByVal())
+    return false;
+
+  // Similiar to AssignToReg, but do not split multi-reg args
+  // (i64/double) between a register and stack.
+  MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+  static const unsigned MaxRegs = sizeof(RegList) / sizeof(RegList[0]);
+  auto NumRegs = MaxRegs;
+
   SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
 
+  unsigned FirstFree = std::min(NumRegs, State.getFirstUnallocated(RegList));
+
   // If this is the first part of an double/i64/i128, or if we're already
   // in the middle of a split, add to the pending list. If this is not
   // the end of the split, return, otherwise go on to process the pending
@@ -83,10 +94,83 @@
   // If there are no pending members, we are not in the middle of a split,
   // so do the usual inreg stuff.
   if (PendingMembers.empty()) {
-    if (unsigned Reg = State.AllocateReg(RegList)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    if (FirstFree < NumRegs)
+      if (unsigned Reg = State.AllocateReg(RegList[FirstFree++])) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+        return true;
+      }
+    return false;
+  }
+
+  assert(ArgFlags.isSplitEnd());
+
+  // We now have the entire original argument in PendingMembers, so decide
+  // whether to use registers or the stack.
+  // a) To use registers, we need to have enough of them free to contain
+  // the entire argument.
+  // b) We never want to use more than 2 registers for a single argument.
+
+  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+  for (auto &It : PendingMembers) {
+    // If available, always allocate register so subsequent
+    // arguments cannot use them.
+    if (UseRegs)
+      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+    else
+      It.convertToMem(State.AllocateStack(4, 4));
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
+inline bool CC_X86_32_AssignToReg_NoSplit(unsigned &ValNo, MVT &ValVT,
+                                          MVT &LocVT,
+                                          CCValAssign::LocInfo &LocInfo,
+                                          ISD::ArgFlagsTy &ArgFlags,
+                                          CCState &State) {
+
+  // If the argument is InAlloc or ByVal bail.
+  if (ArgFlags.isInAlloca() || ArgFlags.isByVal())
+    return false;
+
+  // Similiar to AssignToReg, but do not split multi-reg args
+  // (i64/double) between a register and stack.
+  MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+  static const unsigned MaxRegs = sizeof(RegList) / sizeof(RegList[0]);
+
+  unsigned NumRegs = State.getMachineFunction().getMMI()
+      .getModule()
+      ->getNumberRegisterParameters();
+
+  assert(NumRegs <= MaxRegs && "More register parameters than registers");
+
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  unsigned FirstFree = std::min(NumRegs, State.getFirstUnallocated(RegList));
+
+  // If this is the first part of an double/i64/i128, or if we're already
+  // in the middle of a split, add to the pending list. If this is not
+  // the end of the split, return, otherwise go on to process the pending
+  // list
+  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+    PendingMembers.push_back(
+        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+    if (!ArgFlags.isSplitEnd())
       return true;
-    }
+  }
+
+  // If there are no pending members, we are not in the middle of a split,
+  // so do the usual inreg stuff.
+  if (PendingMembers.empty()) {
+    if (FirstFree < NumRegs)
+      if (unsigned Reg = State.AllocateReg(RegList[FirstFree++])) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+        return true;
+      }
     return false;
   }
 
@@ -94,17 +178,19 @@
 
   // We now have the entire original argument in PendingMembers, so decide
   // whether to use registers or the stack.
-  // Per the MCU ABI:
   // a) To use registers, we need to have enough of them free to contain
   // the entire argument.
   // b) We never want to use more than 2 registers for a single argument.
 
-  unsigned FirstFree = State.getFirstUnallocated(RegList);
   bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
 
   for (auto &It : PendingMembers) {
+    // If available, always allocate register so subsequent
+    // arguments cannot use them.
     if (UseRegs)
       It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+    else if (FirstFree < MaxRegs)
+      It.convertToMem(State.AllocateStack(4, 4, RegList[FirstFree++]));
     else
       It.convertToMem(State.AllocateStack(4, 4));
     State.addLoc(It);
@@ -118,4 +204,3 @@
 } // End llvm namespace
 
 #endif
-
Index: lib/Target/X86/X86CallingConv.td
===================================================================
--- lib/Target/X86/X86CallingConv.td
+++ lib/Target/X86/X86CallingConv.td
@@ -810,19 +810,28 @@
   CCDelegateTo<CC_X86_32_Common>
 ]>;
 
-def CC_X86_32_MCU : CallingConv<[
-  // Handles byval parameters.  Note that, like FastCC, we can't rely on
-  // the delegation to CC_X86_32_Common because that happens after code that
-  // puts arguments in registers.
-  CCIfByVal<CCPassByVal<4, 4>>,
+def CC_X86_32_LibCall : CallingConv<[
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in ECX.
+  CCIfNest<CCAssignToReg<[ECX]>>,
 
+  // Assign to Reg if RegParm flag
+  CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_AssignToReg_NoSplit">>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_MCU : CallingConv<[
   // Promote i1/i8/i16 arguments to i32.
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // If the call is not a vararg call, some arguments may be passed
   // in integer registers.
-  CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,
-
+  CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_AssignToReg_MCU">>>,
+  
   // Otherwise, same as everything else.
   CCDelegateTo<CC_X86_32_Common>
 ]>;
@@ -984,6 +993,10 @@
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
   CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
 
+  //Library Call CCs
+  CCIfCC<"CallingConv::X86_LibCall", CCDelegateTo<CC_X86_32_LibCall>>,
+  CCIfCC<"CallingConv::X86_LibStdCall", CCDelegateTo<CC_X86_32_LibCall>>,
+
   // Otherwise, drop to normal X86-32 CC
   CCDelegateTo<CC_X86_32_C>
 ]>;
Index: lib/Target/X86/X86FastISel.cpp
===================================================================
--- lib/Target/X86/X86FastISel.cpp
+++ lib/Target/X86/X86FastISel.cpp
@@ -1195,12 +1195,10 @@
     return false;
 
   CallingConv::ID CC = F.getCallingConv();
-  if (CC != CallingConv::C &&
-      CC != CallingConv::Fast &&
-      CC != CallingConv::X86_FastCall &&
-      CC != CallingConv::X86_StdCall &&
-      CC != CallingConv::X86_ThisCall &&
-      CC != CallingConv::X86_64_SysV &&
+  if (CC != CallingConv::C && CC != CallingConv::X86_LibCall &&
+      CC != CallingConv::Fast && CC != CallingConv::X86_FastCall &&
+      CC != CallingConv::X86_StdCall && CC != CallingConv::X86_LibStdCall &&
+      CC != CallingConv::X86_ThisCall && CC != CallingConv::X86_64_SysV &&
       CC != CallingConv::X86_64_Win64)
     return false;
 
@@ -3131,11 +3129,13 @@
   switch (CC) {
   default: return false;
   case CallingConv::C:
+  case CallingConv::X86_LibCall:
   case CallingConv::Fast:
   case CallingConv::WebKit_JS:
   case CallingConv::Swift:
   case CallingConv::X86_FastCall:
   case CallingConv::X86_StdCall:
+  case CallingConv::X86_LibStdCall:
   case CallingConv::X86_ThisCall:
   case CallingConv::X86_64_Win64:
   case CallingConv::X86_64_SysV:
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -105,6 +105,10 @@
       addBypassSlowDiv(64, 32);
   }
 
+  // Set all builtin calling conventions to BuiltinCC.
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+    setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::X86_LibCall);
+
   if (Subtarget.isTargetKnownWindowsMSVC() ||
       Subtarget.isTargetWindowsItanium()) {
     // Setup Windows compiler runtime calls.
@@ -113,11 +117,11 @@
     setLibcallName(RTLIB::SREM_I64, "_allrem");
     setLibcallName(RTLIB::UREM_I64, "_aullrem");
     setLibcallName(RTLIB::MUL_I64, "_allmul");
-    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
-    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
-    setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
-    setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
-    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_LibStdCall);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_LibStdCall);
+    setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_LibStdCall);
+    setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_LibStdCall);
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_LibStdCall);
   }
 
   if (Subtarget.isTargetDarwin()) {
@@ -2624,11 +2628,13 @@
   switch (CC) {
   // C calling conventions:
   case CallingConv::C:
+  case CallingConv::X86_LibCall:
   case CallingConv::X86_64_Win64:
   case CallingConv::X86_64_SysV:
   // Callee pop conventions:
   case CallingConv::X86_ThisCall:
   case CallingConv::X86_StdCall:
+  case CallingConv::X86_LibStdCall:
   case CallingConv::X86_VectorCall:
   case CallingConv::X86_FastCall:
     return true;
@@ -4198,6 +4204,7 @@
   default:
     return false;
   case CallingConv::X86_StdCall:
+  case CallingConv::X86_LibStdCall:
   case CallingConv::X86_FastCall:
   case CallingConv::X86_ThisCall:
   case CallingConv::X86_VectorCall:
@@ -20234,7 +20241,7 @@
 }
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
-// this table could be generated automatically from RegInfo.
+// this table could be generated automatically from RegInfo
 unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
                                               SelectionDAG &DAG) const {
   const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
@@ -20428,7 +20435,9 @@
     default:
       llvm_unreachable("Unsupported calling convention");
     case CallingConv::C:
-    case CallingConv::X86_StdCall: {
+    case CallingConv::X86_LibCall:
+    case CallingConv::X86_StdCall:
+    case CallingConv::X86_LibStdCall: {
       // Pass 'nest' parameter in ECX.
       // Must be kept in sync with X86CallingConv.td
       NestReg = X86::ECX;
Index: lib/Target/X86/X86Subtarget.h
===================================================================
--- lib/Target/X86/X86Subtarget.h
+++ lib/Target/X86/X86Subtarget.h
@@ -563,9 +563,11 @@
     switch (CC) {
     // On Win64, all these conventions just use the default convention.
     case CallingConv::C:
+    case CallingConv::X86_LibCall:
     case CallingConv::Fast:
     case CallingConv::X86_FastCall:
     case CallingConv::X86_StdCall:
+    case CallingConv::X86_LibStdCall:
     case CallingConv::X86_ThisCall:
     case CallingConv::X86_VectorCall:
     case CallingConv::Intel_OCL_BI:
Index: lib/Target/X86/X86Subtarget.cpp
===================================================================
--- lib/Target/X86/X86Subtarget.cpp
+++ lib/Target/X86/X86Subtarget.cpp
@@ -359,4 +359,3 @@
 bool X86Subtarget::enableEarlyIfConversion() const {
   return hasCMov() && X86EarlyIfConv;
 }
-
Index: lib/Target/X86/X86WinEHState.cpp
===================================================================
--- lib/Target/X86/X86WinEHState.cpp
+++ lib/Target/X86/X86WinEHState.cpp
@@ -303,7 +303,7 @@
         "__CxxLongjmpUnwind",
         FunctionType::get(VoidTy, Int8PtrType, /*isVarArg=*/false));
     cast<Function>(CxxLongjmpUnwind->stripPointerCasts())
-        ->setCallingConv(CallingConv::X86_StdCall);
+        ->setCallingConv(CallingConv::X86_LibStdCall);
   } else if (Personality == EHPersonality::MSVC_X86SEH) {
     // If _except_handler4 is in use, some additional guard checks and prologue
     // stuff is required.
@@ -356,7 +356,7 @@
         FunctionType::get(Type::getVoidTy(TheModule->getContext()), Int8PtrType,
                           /*isVarArg=*/false));
     cast<Function>(SehLongjmpUnwind->stripPointerCasts())
-        ->setCallingConv(CallingConv::X86_StdCall);
+        ->setCallingConv(CallingConv::X86_LibStdCall);
   } else {
     llvm_unreachable("unexpected personality function");
   }
Index: test/CodeGen/X86/dwarf-eh-prepare.ll
===================================================================
--- test/CodeGen/X86/dwarf-eh-prepare.ll
+++ test/CodeGen/X86/dwarf-eh-prepare.ll
@@ -50,7 +50,7 @@
   resume { i8*, i32 } %new_ehvals
 
 ; CHECK: eh.resume:
-; CHECK-NEXT: call void @_Unwind_Resume(i8* %ehptr)
+; CHECK-NEXT: call x86_libcallcc void @_Unwind_Resume(i8* %ehptr)
 }
 
 
@@ -88,7 +88,7 @@
 ; CHECK: landingpad { i8*, i32 }
 ; CHECK-NOT: br i1
 ; CHECK: ret i32 1
-; CHECK-NOT: call void @_Unwind_Resume
+; CHECK-NOT: call x86_libcallcc void @_Unwind_Resume
 ; CHECK: {{^[}]}}
 
 
@@ -152,7 +152,7 @@
 ; CHECK: catch_int:
 ; CHECK: ret i32 1
 ; CHECK: eh.resume:
-; CHECK-NEXT: call void @_Unwind_Resume(i8* %ehptr)
+; CHECK-NEXT: call x86_libcallcc void @_Unwind_Resume(i8* %ehptr)
 
 declare i32 @__gxx_personality_v0(...)
 declare i32 @llvm.eh.typeid.for(i8*)
Index: test/CodeGen/X86/regparm.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/regparm.ll
@@ -0,0 +1,31 @@
+; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #1
+
+define void @use_memset(i8* inreg nocapture %dest, i32 inreg %c, i32 inreg %n) local_unnamed_addr #0 {
+entry:
+;CHECK-LABEL: @use_memset
+;CHECK-NOT: push
+;CHECK: jmp	memset 
+;CHECK-NOT: retl
+  %0 = trunc i32 %c to i8
+  tail call void @llvm.memset.p0i8.i32(i8* %dest, i8 %0, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #1
+
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 3}
+!1 = !{!"clang version 4.0.0 (trunk 288025) (llvm/trunk 288033)"}