Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -380,6 +380,7 @@
 void initializeRegionViewerPass(PassRegistry&);
 void initializeRegisterCoalescerPass(PassRegistry&);
 void initializeRenameIndependentSubregsPass(PassRegistry&);
+void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
 void initializeResetMachineFunctionPass(PassRegistry&);
 void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeRewriteStatepointsForGCLegacyPassPass(PassRegistry &);
Index: llvm/include/llvm/LinkAllPasses.h
===================================================================
--- llvm/include/llvm/LinkAllPasses.h
+++ llvm/include/llvm/LinkAllPasses.h
@@ -228,6 +228,7 @@
       (void) llvm::createWarnMissedTransformationsPass();
       (void) llvm::createHardwareLoopsPass();
       (void) llvm::createInjectTLIMappingsLegacyPass();
+      (void)llvm::createReplaceWithVeclibLegacyPass();
       (void) llvm::createUnifyLoopExitsPass();
       (void) llvm::createFixIrreduciblePass();
 
Index: llvm/include/llvm/Transforms/Utils.h
===================================================================
--- llvm/include/llvm/Transforms/Utils.h
+++ llvm/include/llvm/Transforms/Utils.h
@@ -133,6 +133,13 @@
 //
 FunctionPass *createInjectTLIMappingsLegacyPass();
 
+//===----------------------------------------------------------------------===//
+//
+// ReplaceWithVeclibLegacy - replaces calls to builtins and intrinsics
+// operating on vectors with calls to functions from the TargetLibraryInfo.
+//
+FunctionPass *createReplaceWithVeclibLegacyPass();
+
 //===----------------------------------------------------------------------===//
 //
 // UnifyLoopExits - For each loop, creates a new block N such that all exiting
Index: llvm/include/llvm/Transforms/Utils/ReplaceWithVeclib.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Utils/ReplaceWithVeclib.h
@@ -0,0 +1,38 @@
+//===- ReplaceWithVeclib.h - Replace vector instrinsics with veclib calls -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector instrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
+#define LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+
+namespace llvm {
+class ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass
+class ReplaceWithVeclibLegacy : public FunctionPass {
+public:
+  static char ID;
+  ReplaceWithVeclibLegacy() : FunctionPass(ID) {
+    initializeReplaceWithVeclibLegacyPass(*PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+};
+
+} // End namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
\ No newline at end of file
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -226,6 +226,7 @@
 #include "llvm/Transforms/Utils/Mem2Reg.h"
 #include "llvm/Transforms/Utils/MetaRenamer.h"
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/ReplaceWithVeclib.h"
 #include "llvm/Transforms/Utils/StripGCRelocates.h"
 #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -298,6 +298,7 @@
 FUNCTION_PASS("reassociate", ReassociatePass())
 FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass())
 FUNCTION_PASS("reg2mem", RegToMemPass())
+FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib())
 FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
 FUNCTION_PASS("scalarizer", ScalarizerPass())
 FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass())
Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -53,6 +53,7 @@
   NameAnonGlobals.cpp
   PredicateInfo.cpp
   PromoteMemoryToRegister.cpp
+  ReplaceWithVeclib.cpp
   ScalarEvolutionExpander.cpp
   StripGCRelocates.cpp
   SSAUpdater.cpp
Index: llvm/lib/Transforms/Utils/ReplaceWithVeclib.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Utils/ReplaceWithVeclib.cpp
@@ -0,0 +1,243 @@
+//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector instrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ReplaceWithVeclib.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "replace-with-veclib"
+
+STATISTIC(NumCallsReplaced,
+          "Number of calls to intrinsics that have been replaced.");
+
+STATISTIC(NumTLIFuncDeclAdded,
+          "Number of vector library function declarations added.");
+
+STATISTIC(NumFuncUsedAdded,
+          "Number of functions added to `llvm.compiler.used`");
+
+static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
+  Module *M = CI.getModule();
+
+  Function *OldFunc = CI.getCalledFunction();
+
+  // Check if the vector library function is already declared in this module,
+  // otherwise insert it.
+  Function *TLIFunc = M->getFunction(TLIName);
+  if (!TLIFunc) {
+    TLIFunc = Function::Create(OldFunc->getFunctionType(),
+                               Function::ExternalLinkage, TLIName, *M);
+    TLIFunc->copyAttributesFrom(OldFunc);
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+                      << TLIName << "` of type `" << *(TLIFunc->getType())
+                      << "` to module.\n");
+
+    ++NumTLIFuncDeclAdded;
+
+    // Add the freshly created function to llvm.compiler.used,
+    // similar to as it is done in InjectTLIMappings
+    appendToCompilerUsed(*M, {TLIFunc});
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+                      << "` to `@llvm.compiler.used`.\n");
+    ++NumFuncUsedAdded;
+  }
+
+  // Replace the call to the vector intrinsic with a call
+  // to the corresponding function from the vector library.
+  IRBuilder<> builder{&CI};
+  SmallVector<Value *> args(CI.arg_operands());
+  // Preserve the operand bundles.
+  SmallVector<OperandBundleDef, 1> OpBundles;
+  CI.getOperandBundlesAsDefs(OpBundles);
+  CallInst *Replacement = builder.CreateCall(TLIFunc, args, OpBundles);
+  CI.replaceAllUsesWith(Replacement);
+  if (isa<FPMathOperator>(Replacement)) {
+    // Preserve fast math flags for FP math.
+    Replacement->copyFastMathFlags(&CI);
+  }
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+                    << OldFunc->getName() << "` with call to `" << TLIName
+                    << "`.\n");
+  ++NumCallsReplaced;
+  return true;
+}
+
+static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+                                    CallInst &CI) {
+  if (!CI.getCalledFunction()) {
+    return false;
+  }
+
+  auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
+  if (IntrinsicID == Intrinsic::not_intrinsic) {
+    // Replacement is only performed for intrinsic functions
+    return false;
+  }
+
+  // Convert vector arguments to scalar type and check that
+  // all vector operands have identical vector width.
+  unsigned VF = 0;
+  SmallVector<Type *> ScalarTypes;
+  for (auto arg : enumerate(CI.arg_operands())) {
+    auto ArgType = arg.value()->getType();
+    // Vector calls to intrinsics can still have
+    // scalar operands for specific arguments.
+    if (hasVectorInstrinsicScalarOpd(IntrinsicID, arg.index())) {
+      ScalarTypes.push_back(ArgType);
+    } else {
+      // The argument in this place should be a vector if
+      // this is a call to a vector intrinsic.
+      auto VectorArgTy = dyn_cast<VectorType>(ArgType);
+      if (!VectorArgTy) {
+        // The argument is not a vector, do not perform
+        // the replacement.
+        return false;
+      }
+      auto NumElements = VectorArgTy->getElementCount();
+      if (NumElements.isScalable()) {
+        // The current implementation does not support
+        // scalable vectors.
+        return false;
+      }
+      if (VF && VF != NumElements.getFixedValue()) {
+        // The different arguments differ in vector size.
+        return false;
+      } else {
+        VF = NumElements.getFixedValue();
+      }
+      ScalarTypes.push_back(VectorArgTy->getElementType());
+    }
+  }
+
+  // Try to reconstruct the name for the scalar version of this
+  // intrinsic using the intrinsic ID and the argument types
+  // converted to scalar above.
+  std::string ScalarName;
+  if (Intrinsic::isOverloaded(IntrinsicID)) {
+    ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes);
+  } else {
+    ScalarName = Intrinsic::getName(IntrinsicID).str();
+  }
+
+  if (!TLI.isFunctionVectorizable(ScalarName)) {
+    // The TargetLibraryInfo does not contain a vectorized version of
+    // the scalar function.
+    return false;
+  }
+
+  // Try to find the mapping for the scalar version of this intrinsic
+  // and the exact vector width of the call operands in the
+  // TargetLibraryInfo.
+  const std::string TLIName =
+      std::string(TLI.getVectorizedFunction(ScalarName, VF));
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+                    << ScalarName << "` and vector width " << VF << ".\n");
+
+  if (!TLIName.empty()) {
+    // Found the correct mapping in the TargetLibraryInfo,
+    // replace the call to the instrinsic with a call to
+    // the vector library function.
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+                      << "`.\n");
+    return replaceWithTLIFunction(CI, TLIName);
+  }
+
+  return false;
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+  bool changed = false;
+  for (auto &I : instructions(F)) {
+    if (auto CI = dyn_cast<CallInst>(&I)) {
+      changed |= replaceWithCallToVeclib(TLI, *CI);
+    }
+  }
+  return changed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses ReplaceWithVeclib::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto changed = runImpl(TLI, F);
+  if (changed) {
+    PreservedAnalyses PA;
+    PA.preserveSet<CFGAnalyses>();
+    PA.preserve<TargetLibraryAnalysis>();
+    PA.preserve<ScalarEvolutionAnalysis>();
+    PA.preserve<AAManager>();
+    PA.preserve<LoopAccessAnalysis>();
+    PA.preserve<DemandedBitsAnalysis>();
+    PA.preserve<OptimizationRemarkEmitterAnalysis>();
+    PA.preserve<GlobalsAA>();
+    return PA;
+  } else {
+    // The pass did not replace any calls, hence it preserves all analyses.
+    return PreservedAnalyses::all();
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  return runImpl(TLI, F);
+}
+
+void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<ScalarEvolutionWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
+  AU.addPreserved<LoopAccessLegacyAnalysis>();
+  AU.addPreserved<DemandedBitsWrapperPass>();
+  AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char ReplaceWithVeclibLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+                      "Replace with calls to vector library", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+                    "Replace with calls to vector library", false, false)
+
+FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
+  return new ReplaceWithVeclibLegacy();
+}
\ No newline at end of file
Index: llvm/lib/Transforms/Utils/Utils.cpp
===================================================================
--- llvm/lib/Transforms/Utils/Utils.cpp
+++ llvm/lib/Transforms/Utils/Utils.cpp
@@ -43,6 +43,7 @@
   initializeStripGCRelocatesLegacyPass(Registry);
   initializePredicateInfoPrinterLegacyPassPass(Registry);
   initializeInjectTLIMappingsLegacyPass(Registry);
+  initializeReplaceWithVeclibLegacyPass(Registry);
   initializeFixIrreduciblePass(Registry);
   initializeUnifyLoopExitsLegacyPassPass(Registry);
   initializeUniqueInternalLinkageNamesLegacyPassPass(Registry);
Index: llvm/test/Transforms/Util/replace-intrinsics-with-veclib.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Util/replace-intrinsics-with-veclib.ll
@@ -0,0 +1,96 @@
+; RUN: opt -vector-library=SVML -replace-with-veclib -dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,SVML,F64
+; RUN: opt -vector-library=SVML -passes=replace-with-veclib,dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,SVML,F64
+; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-X86,F64
+; RUN: opt -vector-library=LIBMVEC-X86 -passes=replace-with-veclib,dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-X86,F64
+; RUN: opt -vector-library=MASSV -replace-with-veclib -dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,MASSV,F32
+; RUN: opt -vector-library=MASSV -passes=replace-with-veclib,dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,MASSV,F32
+; RUN: opt -vector-library=Accelerate -replace-with-veclib -dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,ACCELERATE,F32
+; RUN: opt -vector-library=Accelerate -passes=replace-with-veclib,dce  -S < %s | FileCheck %s  --check-prefixes=COMMON,ACCELERATE,F32
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; COMMON-LABEL: @llvm.compiler.used = appending global
+; F64-SAME: [2 x i8*] [
+; SVML-SAME: i8* bitcast (<4 x double> (<4 x double>)* @__svml_exp4 to i8*),
+; SVML-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__svml_expf4 to i8*
+; LIBMVEC-X86-SAME: i8* bitcast (<4 x double> (<4 x double>)* @_ZGVdN4v_exp to i8*),
+; LIBMVEC-X86-sAME: i8* bitcast (<4 x float> (<4 x float>)* @_ZGVbN4v_expf to i8*)
+; F32-SAME: [1 x i8*] [
+; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__expf4_massv to i8*)
+; ACCELERATE-SAME: i8* bitcast (<4 x float> (<4 x float>)* @vexpf to i8*)
+; COMMON-SAME: ], section "llvm.metadata"
+
+define <4 x double> @exp_v4(<4 x double> %in) {
+; COMMON-LABEL: @exp_v4(
+; COMMON-SAME: <4 x double> %[[IN:[a-zA-Z0-9_]+]]
+; LIBMVEC-X86: %[[CALL:[a-zA-Z0-9_]+]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> %[[IN]])
+; SVML: %[[CALL:[a-zA-Z0-9_]+]] = call <4 x double> @__svml_exp4(<4 x double> %[[IN]])
+; F32: %[[CALL:[a-zA-Z0-9_]+]] = call <4 x double> @llvm.exp.v4f64(<4 x double> %[[IN]])
+; F64-NOT: call @llvm.exp.v4f64
+; COMMON: ret <4 x double> %[[CALL]]
+  %call = call <4 x double> @llvm.exp.v4f64(<4 x double> %in)
+  ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
+
+define <4 x float> @exp_f32(<4 x float> %in) {
+; COMMON-LABEL: @exp_f32(
+; COMMON-SAME: <4 x float> %[[IN1:[a-zA-Z0-9_]+]]
+; LIBMVEC-X86: %[[#CALL1:]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> %[[IN1]])
+; SVML: %[[#CALL1:]] = call <4 x float> @__svml_expf4(<4 x float> %[[IN1]])
+; MASSV: %[[#CALL1:]] = call <4 x float>  @__expf4_massv(<4 x float> %[[IN1]])
+; ACCELERATE: %[[#CALL1:]] = call <4 x float> @vexpf(<4 x float> %[[IN1]])
+; COMMON-NOT: call @llvm.exp.v4f32
+; COMMON: ret <4 x float> %[[#CALL1]]
+  %call = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
+  ret <4 x float> %call
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
+
+define double @exp_f64(double %in) {
+; No replacement should take place for non-vector intrinsic
+; COMMON-LABEL: @exp_f64(
+; COMMON: %[[CALL2:[a-zA-Z0-9_]+]] = call double @llvm.exp.f64
+; COMMON: ret double %[[CALL2]]
+  %call = call double @llvm.exp.f64(double %in)
+  ret double %call
+}
+
+declare double @llvm.exp.f64(double) #0
+
+define <4 x double> @powi_v4(<4 x double> %in){
+; Check that the pass works with scalar operands on 
+; vector intrinsics. No vector library has a substitute for powi
+; COMMON-LABEL: @powi_v4(
+; COMMON: %[[CALL3:[a-zA-Z0-9_]+]] = call <4 x double> @llvm.powi.v4f64
+; COMMON: ret <4 x double> %[[CALL3]]
+  %call = call <4 x double> @llvm.powi.v4f64(<4 x double> %in, i32 3)
+  ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32) #0
+
+define <3 x double> @exp_v3(<3 x double> %in) {
+; Replacement should not take place if the vector length
+; does not match exactly.
+; COMMON-LABEL: @exp_v3(
+; COMMON: %[[CALL4:[a-zA-Z0-9_]+]] = call <3 x double> @llvm.exp.v3f64
+; COMMON: ret <3 x double> %[[CALL4]]
+  %call = call <3 x double> @llvm.exp.v3f64(<3 x double> %in)
+  ret <3 x double> %call
+}
+
+declare <3 x double> @llvm.exp.v3f64(<3 x double>) #0
+
+; LIBMVEC-X86: declare <4 x double> @_ZGVdN4v_exp(<4 x double>) #0
+; LIBMVEC-X86: declare <4 x float> @_ZGVbN4v_expf(<4 x float>) #0
+; SVML: declare <4 x double> @__svml_exp4(<4 x double>) #0
+; SVML: declare <4 x float> @__svml_expf4(<4 x float>) #0
+; MASSV: declare <4 x float> @__expf4_massv(<4 x float>) #0
+; ACCELERATE: declare <4 x float> @vexpf(<4 x float>) #0
+
+attributes #0 = {nounwind readnone}
+