Index: include/llvm/Analysis/TargetLibraryInfo.h
===================================================================
--- include/llvm/Analysis/TargetLibraryInfo.h
+++ include/llvm/Analysis/TargetLibraryInfo.h
@@ -38,6 +38,12 @@
     NumLibFuncs
   };
 
+enum SVMLAccuracy {
+  SVML_DEFAULT,
+  SVML_HA,
+  SVML_EP
+};
+
 /// Implementation of the target library information.
 ///
 /// This class constructs tables that hold the target library information and
@@ -150,7 +156,8 @@
   /// Return true if the function F has a vector equivalent with vectorization
   /// factor VF.
   bool isFunctionVectorizable(StringRef F, unsigned VF) const {
-    return !getVectorizedFunction(F, VF).empty();
+     bool Ignored;
+     return !getVectorizedFunction(F, VF, Ignored, false).empty();
   }
 
   /// Return true if the function F has a vector equivalent with any
@@ -159,7 +166,8 @@
 
   /// Return the name of the equivalent of F, vectorized with factor VF. If no
   /// such mapping exists, return the empty string.
-  StringRef getVectorizedFunction(StringRef F, unsigned VF) const;
+  std::string getVectorizedFunction(StringRef F, unsigned VF, bool &FromSVML,
+                                    bool IsFast) const;
 
   /// Return true if the function F has a scalar equivalent, and set VF to be
   /// the vectorization factor.
@@ -253,8 +261,9 @@
   bool isFunctionVectorizable(StringRef F) const {
     return Impl->isFunctionVectorizable(F);
   }
-  StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
-    return Impl->getVectorizedFunction(F, VF);
+  std::string getVectorizedFunction(StringRef F, unsigned VF, bool &FromSVML,
+                                    bool IsFast) const {
+    return Impl->getVectorizedFunction(F, VF, FromSVML, IsFast);
   }
 
   /// Tests if the function is both available and a candidate for optimized code
Index: include/llvm/IR/CMakeLists.txt
===================================================================
--- include/llvm/IR/CMakeLists.txt
+++ include/llvm/IR/CMakeLists.txt
@@ -4,3 +4,7 @@
 set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
 tablegen(LLVM Intrinsics.inc -gen-intrinsic)
 add_public_tablegen_target(intrinsics_gen)
+
+set(LLVM_TARGET_DEFINITIONS SVML.td)
+tablegen(LLVM SVML.inc -gen-svml)
+add_public_tablegen_target(svml_gen)
Index: include/llvm/IR/CallingConv.h
===================================================================
--- include/llvm/IR/CallingConv.h
+++ include/llvm/IR/CallingConv.h
@@ -220,6 +220,9 @@
     /// shader if tessellation is in use, or otherwise the vertex shader.
     AMDGPU_ES = 96,
 
+    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
+    Intel_SVML = 97,
+
     /// The highest possible calling convention ID. Must be some 2^k - 1.
     MaxID = 1023
   };
Index: include/llvm/IR/SVML.td
===================================================================
--- /dev/null
+++ include/llvm/IR/SVML.td
@@ -0,0 +1,62 @@
+//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used by TableGen to define the different typs of SVML function
+// variants used with -fveclib=SVML.
+//
+//===----------------------------------------------------------------------===//
+
+class SvmlVariant;
+
+def sin        : SvmlVariant;
+def cos        : SvmlVariant;
+def pow        : SvmlVariant;
+def exp        : SvmlVariant;
+def log        : SvmlVariant;
+def acos       : SvmlVariant;
+def acosh      : SvmlVariant;
+def asin       : SvmlVariant;
+def asinh      : SvmlVariant;
+def atan2      : SvmlVariant;
+def atan       : SvmlVariant;
+def atanh      : SvmlVariant;
+def cbrt       : SvmlVariant;
+def cdfnorm    : SvmlVariant;
+def cdfnorminv : SvmlVariant;
+def cosd       : SvmlVariant;
+def cosh       : SvmlVariant;
+def erf        : SvmlVariant;
+def erfc       : SvmlVariant;
+def erfcinv    : SvmlVariant;
+def erfinv     : SvmlVariant;
+def exp10      : SvmlVariant;
+def exp2       : SvmlVariant;
+def expm1      : SvmlVariant;
+def hypot      : SvmlVariant;
+def invsqrt    : SvmlVariant;
+def log10      : SvmlVariant;
+def log1p      : SvmlVariant;
+def log2       : SvmlVariant;
+def sind       : SvmlVariant;
+def sinh       : SvmlVariant;
+def sqrt       : SvmlVariant;
+def tan        : SvmlVariant;
+def tanh       : SvmlVariant;
+
+// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
+// We should call the default variant of these functions in all cases instead.
+
+// def nearbyint  : SvmlVariant;
+// def logb       : SvmlVariant;
+// def floor      : SvmlVariant;
+// def fmod       : SvmlVariant;
+// def ceil       : SvmlVariant;
+// def trunc      : SvmlVariant;
+// def rint       : SvmlVariant;
+// def round      : SvmlVariant;
Index: lib/Analysis/CMakeLists.txt
===================================================================
--- lib/Analysis/CMakeLists.txt
+++ lib/Analysis/CMakeLists.txt
@@ -92,4 +92,5 @@
 
   DEPENDS
   intrinsics_gen
+  svml_gen
   )
Index: lib/Analysis/TargetLibraryInfo.cpp
===================================================================
--- lib/Analysis/TargetLibraryInfo.cpp
+++ lib/Analysis/TargetLibraryInfo.cpp
@@ -50,6 +50,11 @@
   return true;
 }
 
+std::string svmlMangle(StringRef FnName, const bool IsFast) {
+  std::string FullName = FnName;
+  return IsFast ? FullName : FullName + "_ha";
+}
+
 /// Initialize the set of available library functions based on the specified
 /// target triple. This should be carefully written so that a missing target
 /// triple gets a sane set of defaults.
@@ -1453,109 +1458,9 @@
   }
   case SVML: {
     const VecDesc VecFuncs[] = {
-        {"sin", "__svml_sin2", 2},
-        {"sin", "__svml_sin4", 4},
-        {"sin", "__svml_sin8", 8},
-
-        {"sinf", "__svml_sinf4", 4},
-        {"sinf", "__svml_sinf8", 8},
-        {"sinf", "__svml_sinf16", 16},
-
-        {"llvm.sin.f64", "__svml_sin2", 2},
-        {"llvm.sin.f64", "__svml_sin4", 4},
-        {"llvm.sin.f64", "__svml_sin8", 8},
-
-        {"llvm.sin.f32", "__svml_sinf4", 4},
-        {"llvm.sin.f32", "__svml_sinf8", 8},
-        {"llvm.sin.f32", "__svml_sinf16", 16},
-
-        {"cos", "__svml_cos2", 2},
-        {"cos", "__svml_cos4", 4},
-        {"cos", "__svml_cos8", 8},
-
-        {"cosf", "__svml_cosf4", 4},
-        {"cosf", "__svml_cosf8", 8},
-        {"cosf", "__svml_cosf16", 16},
-
-        {"llvm.cos.f64", "__svml_cos2", 2},
-        {"llvm.cos.f64", "__svml_cos4", 4},
-        {"llvm.cos.f64", "__svml_cos8", 8},
-
-        {"llvm.cos.f32", "__svml_cosf4", 4},
-        {"llvm.cos.f32", "__svml_cosf8", 8},
-        {"llvm.cos.f32", "__svml_cosf16", 16},
-
-        {"pow", "__svml_pow2", 2},
-        {"pow", "__svml_pow4", 4},
-        {"pow", "__svml_pow8", 8},
-
-        {"powf", "__svml_powf4", 4},
-        {"powf", "__svml_powf8", 8},
-        {"powf", "__svml_powf16", 16},
-
-        { "__pow_finite", "__svml_pow2", 2 },
-        { "__pow_finite", "__svml_pow4", 4 },
-        { "__pow_finite", "__svml_pow8", 8 },
-
-        { "__powf_finite", "__svml_powf4", 4 },
-        { "__powf_finite", "__svml_powf8", 8 },
-        { "__powf_finite", "__svml_powf16", 16 },
-
-        {"llvm.pow.f64", "__svml_pow2", 2},
-        {"llvm.pow.f64", "__svml_pow4", 4},
-        {"llvm.pow.f64", "__svml_pow8", 8},
-
-        {"llvm.pow.f32", "__svml_powf4", 4},
-        {"llvm.pow.f32", "__svml_powf8", 8},
-        {"llvm.pow.f32", "__svml_powf16", 16},
-
-        {"exp", "__svml_exp2", 2},
-        {"exp", "__svml_exp4", 4},
-        {"exp", "__svml_exp8", 8},
-
-        {"expf", "__svml_expf4", 4},
-        {"expf", "__svml_expf8", 8},
-        {"expf", "__svml_expf16", 16},
-
-        { "__exp_finite", "__svml_exp2", 2 },
-        { "__exp_finite", "__svml_exp4", 4 },
-        { "__exp_finite", "__svml_exp8", 8 },
-
-        { "__expf_finite", "__svml_expf4", 4 },
-        { "__expf_finite", "__svml_expf8", 8 },
-        { "__expf_finite", "__svml_expf16", 16 },
-
-        {"llvm.exp.f64", "__svml_exp2", 2},
-        {"llvm.exp.f64", "__svml_exp4", 4},
-        {"llvm.exp.f64", "__svml_exp8", 8},
-
-        {"llvm.exp.f32", "__svml_expf4", 4},
-        {"llvm.exp.f32", "__svml_expf8", 8},
-        {"llvm.exp.f32", "__svml_expf16", 16},
-
-        {"log", "__svml_log2", 2},
-        {"log", "__svml_log4", 4},
-        {"log", "__svml_log8", 8},
-
-        {"logf", "__svml_logf4", 4},
-        {"logf", "__svml_logf8", 8},
-        {"logf", "__svml_logf16", 16},
-
-        { "__log_finite", "__svml_log2", 2 },
-        { "__log_finite", "__svml_log4", 4 },
-        { "__log_finite", "__svml_log8", 8 },
-
-        { "__logf_finite", "__svml_logf4", 4 },
-        { "__logf_finite", "__svml_logf8", 8 },
-        { "__logf_finite", "__svml_logf16", 16 },
-
-        {"llvm.log.f64", "__svml_log2", 2},
-        {"llvm.log.f64", "__svml_log4", 4},
-        {"llvm.log.f64", "__svml_log8", 8},
-
-        {"llvm.log.f32", "__svml_logf4", 4},
-        {"llvm.log.f32", "__svml_logf8", 8},
-        {"llvm.log.f32", "__svml_logf16", 16},
+#define GET_SVML_VARIANTS
+#include "llvm/IR/SVML.inc"
+#undef GET_SVML_VARIANTS
     };
     addVectorizableFunctions(VecFuncs);
     break;
@@ -1576,19 +1481,26 @@
   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
 }
 
-StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
-                                                       unsigned VF) const {
+std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+                                                         unsigned VF,
+                                                         bool &FromSVML,
+                                                         bool IsFast) const {
+  FromSVML = ClVectorLibrary == SVML;
   F = sanitizeFunctionName(F);
   if (F.empty())
     return F;
   std::vector<VecDesc>::const_iterator I = std::lower_bound(
       VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
-    if (I->VectorizationFactor == VF)
+    if (I->VectorizationFactor == VF) {
+      if (FromSVML) {
+        return svmlMangle(I->VectorFnName, IsFast);
+      }
       return I->VectorFnName;
+    }
     ++I;
   }
-  return StringRef();
+  return std::string();
 }
 
 StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
Index: lib/AsmParser/LLLexer.cpp
===================================================================
--- lib/AsmParser/LLLexer.cpp
+++ lib/AsmParser/LLLexer.cpp
@@ -596,6 +596,7 @@
   KEYWORD(spir_kernel);
   KEYWORD(spir_func);
   KEYWORD(intel_ocl_bicc);
+  KEYWORD(intel_svmlcc);
   KEYWORD(x86_64_sysvcc);
   KEYWORD(win64cc);
   KEYWORD(x86_regcallcc);
Index: lib/AsmParser/LLParser.cpp
===================================================================
--- lib/AsmParser/LLParser.cpp
+++ lib/AsmParser/LLParser.cpp
@@ -1778,6 +1778,7 @@
 ///   ::= 'ccc'
 ///   ::= 'fastcc'
 ///   ::= 'intel_ocl_bicc'
+///   ::= 'intel_svmlcc'
 ///   ::= 'coldcc'
 ///   ::= 'x86_stdcallcc'
 ///   ::= 'x86_fastcallcc'
@@ -1837,6 +1838,7 @@
   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
+  case lltok::kw_intel_svmlcc:   CC = CallingConv::Intel_SVML; break;
   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
Index: lib/AsmParser/LLToken.h
===================================================================
--- lib/AsmParser/LLToken.h
+++ lib/AsmParser/LLToken.h
@@ -130,6 +130,7 @@
   kw_fastcc,
   kw_coldcc,
   kw_intel_ocl_bicc,
+  kw_intel_svmlcc,
   kw_x86_stdcallcc,
   kw_x86_fastcallcc,
   kw_x86_thiscallcc,
Index: lib/IR/AsmWriter.cpp
===================================================================
--- lib/IR/AsmWriter.cpp
+++ lib/IR/AsmWriter.cpp
@@ -360,6 +360,7 @@
   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
+  case CallingConv::Intel_SVML:    Out << "intel_svmlcc"; break;
   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
Index: lib/IR/Verifier.cpp
===================================================================
--- lib/IR/Verifier.cpp
+++ lib/IR/Verifier.cpp
@@ -2091,6 +2091,7 @@
   case CallingConv::Fast:
   case CallingConv::Cold:
   case CallingConv::Intel_OCL_BI:
+  case CallingConv::Intel_SVML:
   case CallingConv::PTX_Kernel:
   case CallingConv::PTX_Device:
     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
Index: lib/Target/X86/X86CallingConv.td
===================================================================
--- lib/Target/X86/X86CallingConv.td
+++ lib/Target/X86/X86CallingConv.td
@@ -476,12 +476,29 @@
   CCDelegateTo<RetCC_X86_64_C>
 ]>;
 
+// Intel_SVML return-value convention.
+def RetCC_Intel_SVML : CallingConv<[
+  // Vector types are returned in XMM0,XMM1
+  CCIfType<[v4f32, v2f64],
+            CCAssignToReg<[XMM0,XMM1]>>,
+
+  // 256-bit FP vectors
+  CCIfType<[v8f32, v4f64],
+            CCAssignToReg<[YMM0,YMM1]>>,
+
+  // 512-bit FP vectors
+  CCIfType<[v16f32, v8f64],
+            CCAssignToReg<[ZMM0,ZMM1]>>
+]>;
+
 // This is the return-value convention used for the entire X86 backend.
 def RetCC_X86 : CallingConv<[
 
   // Check if this is the Intel OpenCL built-ins calling convention
   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
 
+  CCIfCC<"CallingConv::Intel_SVML", CCDelegateTo<RetCC_Intel_SVML>>,
+
   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
   CCDelegateTo<RetCC_X86_32>
 ]>;
@@ -983,6 +1000,22 @@
   CCDelegateTo<CC_X86_32_C>
 ]>;
 
+// X86-64 Intel Short Vector Math Library calling convention.
+def CC_Intel_SVML : CallingConv<[
+
+  // The SSE vector arguments are passed in XMM registers.
+  CCIfType<[v4f32, v2f64],
+           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
+
+  // The 256-bit vector arguments are passed in YMM registers.
+  CCIfType<[v8f32, v4f64],
+           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
+
+  // The 512-bit vector arguments are passed in ZMM registers.
+  CCIfType<[v16f32, v8f64],
+           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
+]>;
+
 def CC_X86_32_Intr : CallingConv<[
   CCAssignToStack<4, 4>
 ]>;
@@ -1039,6 +1072,7 @@
 // This is the argument convention used for the entire X86 backend.
 def CC_X86 : CallingConv<[
   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
+  CCIfCC<"CallingConv::Intel_SVML", CCDelegateTo<CC_Intel_SVML>>,
   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
   CCDelegateTo<CC_X86_32>
 ]>;
@@ -1147,4 +1181,27 @@
                                                (sequence "R%u", 12, 15))>;
 def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
                                                (sequence "XMM%u", 8, 15))>;
-                                               
+
+// SVML calling convention 
+def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
+def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
+                                                K4, K5, K6, K7)>;
+
+def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
+
+def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+                                               (sequence "XMM%u", 8, 15))>;
+def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+                                               (sequence "XMM%u", 6, 15))>;
+
+def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+                                                    (sequence "YMM%u", 8, 15))>;
+def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+                                                    (sequence "YMM%u", 6, 15))>;
+
+def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+                                                    (sequence "ZMM%u", 16, 31),
+                                                    K4, K5, K6, K7)>;
+def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
+                                                    (sequence "ZMM%u", 6, 21),
+                                                    K4, K5, K6, K7)>;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -3258,7 +3258,8 @@
     // FIXME: Only some x86_32 calling conventions support AVX512.
     if (Subtarget.hasAVX512() &&
         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
-                     CallConv == CallingConv::Intel_OCL_BI)))
+                     CallConv == CallingConv::Intel_OCL_BI   ||
+                     CallConv == CallingConv::Intel_SVML)))
       VecVT = MVT::v16f32;
     else if (Subtarget.hasAVX())
       VecVT = MVT::v8f32;
Index: lib/Target/X86/X86RegisterInfo.cpp
===================================================================
--- lib/Target/X86/X86RegisterInfo.cpp
+++ lib/Target/X86/X86RegisterInfo.cpp
@@ -311,6 +311,23 @@
       return CSR_64_Intel_OCL_BI_SaveList;
     break;
   }
+  case CallingConv::Intel_SVML: {
+    if (Is64Bit) {
+      if (HasAVX512)
+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX512_SaveList :
+                         CSR_64_Intel_SVML_AVX512_SaveList;
+      if (HasAVX)
+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX_SaveList :
+                         CSR_64_Intel_SVML_AVX_SaveList;
+
+      return IsWin64 ? CSR_Win64_Intel_SVML_SaveList :
+                       CSR_64_Intel_SVML_SaveList;
+    } else { // Is32Bit
+        if (HasAVX512)
+            return CSR_32_Intel_SVML_AVX512_SaveList;
+        return CSR_32_Intel_SVML_SaveList;
+    }
+  }
   case CallingConv::HHVM:
     return CSR_64_HHVM_SaveList;
   case CallingConv::X86_RegCall:
@@ -425,6 +442,23 @@
       return CSR_64_Intel_OCL_BI_RegMask;
     break;
   }
+  case CallingConv::Intel_SVML: {
+    if (Is64Bit) {
+      if (HasAVX512)
+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX512_RegMask :
+                         CSR_64_Intel_SVML_AVX512_RegMask;
+      if (HasAVX)
+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX_RegMask :
+                         CSR_64_Intel_SVML_AVX_RegMask;
+
+      return IsWin64 ? CSR_Win64_Intel_SVML_RegMask :
+                       CSR_64_Intel_SVML_RegMask;
+    } else { // Is32Bit
+        if (HasAVX512)
+            return CSR_32_Intel_SVML_AVX512_RegMask;
+        return CSR_32_Intel_SVML_RegMask;
+    }
+  }
   case CallingConv::HHVM:
     return CSR_64_HHVM_RegMask;
   case CallingConv::X86_RegCall:
Index: lib/Target/X86/X86Subtarget.h
===================================================================
--- lib/Target/X86/X86Subtarget.h
+++ lib/Target/X86/X86Subtarget.h
@@ -768,6 +768,7 @@
     case CallingConv::X86_ThisCall:
     case CallingConv::X86_VectorCall:
     case CallingConv::Intel_OCL_BI:
+    case CallingConv::Intel_SVML:
       return isTargetWin64();
     // This convention allows using the Win64 convention on other targets.
     case CallingConv::Win64:
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4101,6 +4101,7 @@
       }
 
       Function *VectorF;
+      bool FromSVML = false;
       if (UseVectorIntrinsic) {
         // Use vector version of the intrinsic.
         Type *TysForDecl[] = {CI->getType()};
@@ -4109,7 +4110,8 @@
         VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
       } else {
         // Use vector version of the library call.
-        StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
+        bool IsFast = CI->getFastMathFlags().isFast();
+        std::string VFnName = TLI->getVectorizedFunction(FnName, VF, FromSVML, IsFast);
         assert(!VFnName.empty() && "Vector function name is empty.");
         VectorF = M->getFunction(VFnName);
         if (!VectorF) {
@@ -4128,7 +4130,7 @@
 
       if (isa<FPMathOperator>(V))
         V->copyFastMathFlags(CI);
-
+      if (FromSVML) V->setCallingConv(CallingConv::Intel_SVML);
       VectorLoopValueMap.setVectorValue(&I, Part, V);
       addMetadata(V, &I);
     }
Index: test/Transforms/LoopVectorize/X86/svml-calls.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -31,7 +31,7 @@
 
 define void @sin_f64(double* nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -54,7 +54,7 @@
 
 define void @sin_f32(float* nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -77,7 +77,7 @@
 
 define void @sin_f64_intrinsic(double* nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -100,7 +100,7 @@
 
 define void @sin_f32_intrinsic(float* nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -123,7 +123,7 @@
 
 define void @cos_f64(double* nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -146,7 +146,7 @@
 
 define void @cos_f32(float* nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -169,7 +169,7 @@
 
 define void @cos_f64_intrinsic(double* nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -192,7 +192,7 @@
 
 define void @cos_f32_intrinsic(float* nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -215,7 +215,7 @@
 
 define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64(
-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -240,7 +240,7 @@
 
 define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64_intrinsic(
-; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -265,7 +265,7 @@
 
 define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -290,7 +290,7 @@
 
 define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
-; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -315,7 +315,7 @@
 
 define void @exp_f64(double* nocapture %varray) {
 ; CHECK-LABEL: @exp_f64(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -338,7 +338,7 @@
 
 define void @exp_f32(float* nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -361,7 +361,7 @@
 
 define void @exp_f64_intrinsic(double* nocapture %varray) {
 ; CHECK-LABEL: @exp_f64_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -384,7 +384,7 @@
 
 define void @exp_f32_intrinsic(float* nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -407,7 +407,7 @@
 
 define void @log_f64(double* nocapture %varray) {
 ; CHECK-LABEL: @log_f64(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -430,7 +430,7 @@
 
 define void @log_f32(float* nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -453,7 +453,7 @@
 
 define void @log_f64_intrinsic(double* nocapture %varray) {
 ; CHECK-LABEL: @log_f64_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -476,7 +476,7 @@
 
 define void @log_f32_intrinsic(float* nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
 ;
 entry:
@@ -497,5 +497,43 @@
   ret void
 }
 
-attributes #0 = { nounwind readnone }
+; CHECK-LABEL: @atan2_finite
+; CHECK: intel_svmlcc <8 x double> @__svml_atan28
+; CHECK: ret
+
+declare double @__atan2_finite(double, double) local_unnamed_addr #0
 
+define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc7, %entry
+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
+  %0 = trunc i64 %indvars.iv19 to i32
+  %conv = sitofp i32 %0 to double
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %1 = trunc i64 %indvars.iv.next to i32
+  %conv4 = sitofp i32 %1 to double
+  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
+  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
+  store double %call, double* %arrayidx6, align 8
+  %exitcond = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
+
+for.inc7:                                         ; preds = %for.body3
+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
+  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+!5 = distinct !{!5, !6, !7}
+!6 = !{!"llvm.loop.vectorize.width", i32 8}
+!7 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: utils/TableGen/CMakeLists.txt
===================================================================
--- utils/TableGen/CMakeLists.txt
+++ utils/TableGen/CMakeLists.txt
@@ -38,6 +38,7 @@
   SearchableTableEmitter.cpp
   SubtargetEmitter.cpp
   SubtargetFeatureInfo.cpp
+  SVMLEmitter.cpp
   TableGen.cpp
   Types.cpp
   X86DisassemblerTables.cpp
Index: utils/TableGen/SVMLEmitter.cpp
===================================================================
--- /dev/null
+++ utils/TableGen/SVMLEmitter.cpp
@@ -0,0 +1,110 @@
+//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits the scalar to svml function map for TLI.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "SVMLVariants"
+#include "llvm/Support/Debug.h"
+
+namespace {
+
+class SVMLVariantsEmitter {
+
+  RecordKeeper &Records;
+
+private:
+  void emitSVMLVariants(raw_ostream &OS);
+
+public:
+  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
+
+  void run(raw_ostream &OS);
+};
+} // End anonymous namespace
+
+/// \brief Emit the set of SVML variant function names.
+// The default is to emit the high accuracy SVML variants until a mechanism is
+// introduced to allow a selection of different variants through precision
+// requirements specified by the user. This code generates mappings to svml
+// that are in the scalar form of llvm intrinsics, math library calls, or the
+// finite variants of math library calls.
+void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
+
+  const unsigned MinSinglePrecVL = 4;
+  const unsigned MaxSinglePrecVL = 16;
+  const unsigned MinDoublePrecVL = 2;
+  const unsigned MaxDoublePrecVL = 8;
+
+  OS << "#ifdef GET_SVML_VARIANTS\n";
+
+  for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
+    StringRef SvmlVariantNameStr = D->getName();
+    // Single Precision SVML
+    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
+      // Emit the scalar math library function to svml function entry.
+      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
+         << VL << "},\n";
+
+      // Emit the scalar intrinsic to svml function entry.
+      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
+         << VL << "},\n";
+
+      // Emit the finite math library function to svml function entry.
+      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
+         << VL << "},\n";
+    }
+
+    // Double Precision SVML
+    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
+      // Emit the scalar math library function to svml function entry.
+      OS << "{\"" << SvmlVariantNameStr << "\", ";
+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << VL
+         << "},\n";
+
+      // Emit the scalar intrinsic to svml function entry.
+      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << VL
+         << "},\n";
+
+      // Emit the finite math library function to svml function entry.
+      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
+         << VL << "},\n";
+    }
+  }
+
+  OS << "#endif // GET_SVML_VARIANTS\n\n";
+}
+
+void SVMLVariantsEmitter::run(raw_ostream &OS) {
+  emitSVMLVariants(OS);
+}
+
+namespace llvm {
+
+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
+  SVMLVariantsEmitter(RK).run(OS);
+}
+
+} // End llvm namespace
Index: utils/TableGen/TableGen.cpp
===================================================================
--- utils/TableGen/TableGen.cpp
+++ utils/TableGen/TableGen.cpp
@@ -50,6 +50,7 @@
   GenX86EVEX2VEXTables,
   GenX86FoldTables,
   GenRegisterBank,
+  GenSVMLVariants,
 };
 
 namespace {
@@ -108,7 +109,9 @@
                     clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
                                "Generate X86 fold tables"),
                     clEnumValN(GenRegisterBank, "gen-register-bank",
-                               "Generate registers bank descriptions")));
+                               "Generate registers bank descriptions"),
+                    clEnumValN(GenSVMLVariants, "gen-svml",
+                               "Generate SVML variant function names")));
 
   cl::OptionCategory PrintEnumsCat("Options for -print-enums");
   cl::opt<std::string>
@@ -213,6 +216,9 @@
   case GenX86FoldTables:
     EmitX86FoldTables(Records, OS);
     break;
+  case GenSVMLVariants:
+    EmitSVMLVariants(Records, OS);
+    break;
   }
 
   return false;
Index: utils/TableGen/TableGenBackends.h
===================================================================
--- utils/TableGen/TableGenBackends.h
+++ utils/TableGen/TableGenBackends.h
@@ -86,6 +86,7 @@
 void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
 
Index: utils/vim/syntax/llvm.vim
===================================================================
--- utils/vim/syntax/llvm.vim
+++ utils/vim/syntax/llvm.vim
@@ -94,6 +94,7 @@
       \ inreg
       \ inteldialect
       \ intel_ocl_bicc
+      \ intel_svmlcc
       \ internal
       \ linkonce
       \ linkonce_odr