diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/PassManager.h" @@ -650,6 +651,11 @@ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoisting) addPass(ConstantHoistingPass()); + // Replace calls to LLVM intrinsics (e.g., exp, log) operating on vector + // operands with calls to the corresponding functions in a vector library. + if (getOptLevel() != CodeGenOpt::None) + addPass(ReplaceWithVeclib()); + if (getOptLevel() != CodeGenOpt::None && !Opt.DisablePartialLibcallInlining) addPass(PartiallyInlineLibCallsPass()); diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -39,6 +39,7 @@ FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) +FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -448,6 +448,10 @@ /// shuffles. FunctionPass *createExpandReductionsPass(); + // This pass replaces intrinsics operating on vector operands with calls to + // the corresponding function in a vector library (e.g., SVML, libmvec). + FunctionPass *createReplaceWithVeclibLegacyPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h @@ -0,0 +1,36 @@ +//===- ReplaceWithVeclib.h - Replace vector instrinsics with veclib calls -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics +// with vector operands) with matching calls to functions from a vector +// library (e.g., libmvec, SVML) according to TargetLibraryInfo. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H +#define LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H + +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" + +namespace llvm { +struct ReplaceWithVeclib : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +// Legacy pass +struct ReplaceWithVeclibLegacy : public FunctionPass { + static char ID; + ReplaceWithVeclibLegacy() : FunctionPass(ID) { + initializeReplaceWithVeclibLegacyPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; +}; + +} // End namespace llvm +#endif // LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -380,6 +380,7 @@ void initializeRegionViewerPass(PassRegistry&); void initializeRegisterCoalescerPass(PassRegistry&); void initializeRenameIndependentSubregsPass(PassRegistry&); +void initializeReplaceWithVeclibLegacyPass(PassRegistry &); void initializeResetMachineFunctionPass(PassRegistry&); void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializeRewriteStatepointsForGCLegacyPassPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -147,6 +147,7 @@ RegisterUsageInfo.cpp RegUsageInfoCollector.cpp RegUsageInfoPropagate.cpp + ReplaceWithVeclib.cpp ResetMachineFunctionPass.cpp SafeStack.cpp SafeStackLayout.cpp diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -0,0 +1,256 @@ +//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics +// with vector operands) with matching calls to functions from a vector +// library (e.g., libmvec, SVML) according to TargetLibraryInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ReplaceWithVeclib.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "replace-with-veclib" + +STATISTIC(NumCallsReplaced, + "Number of calls to intrinsics that have been replaced."); + +STATISTIC(NumTLIFuncDeclAdded, + "Number of vector library function declarations added."); + +STATISTIC(NumFuncUsedAdded, + "Number of functions added to `llvm.compiler.used`"); + +static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) { + Module *M = CI.getModule(); + + Function *OldFunc = CI.getCalledFunction(); + + // Check if the vector library function is already declared in this module, + // otherwise insert it. + Function *TLIFunc = M->getFunction(TLIName); + if (!TLIFunc) { + TLIFunc = Function::Create(OldFunc->getFunctionType(), + Function::ExternalLinkage, TLIName, *M); + TLIFunc->copyAttributesFrom(OldFunc); + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `" + << TLIName << "` of type `" << *(TLIFunc->getType()) + << "` to module.\n"); + + ++NumTLIFuncDeclAdded; + + // Add the freshly created function to llvm.compiler.used, + // similar to as it is done in InjectTLIMappings + appendToCompilerUsed(*M, {TLIFunc}); + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName + << "` to `@llvm.compiler.used`.\n"); + ++NumFuncUsedAdded; + } + + // Replace the call to the vector intrinsic with a call + // to the corresponding function from the vector library. + IRBuilder<> IRBuilder(&CI); + SmallVector Args(CI.arg_operands()); + // Preserve the operand bundles. + SmallVector OpBundles; + CI.getOperandBundlesAsDefs(OpBundles); + CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles); + assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() && + "Expecting function types to be identical"); + CI.replaceAllUsesWith(Replacement); + if (isa(Replacement)) { + // Preserve fast math flags for FP math. + Replacement->copyFastMathFlags(&CI); + } + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" + << OldFunc->getName() << "` with call to `" << TLIName + << "`.\n"); + ++NumCallsReplaced; + return true; +} + +static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, + CallInst &CI) { + if (!CI.getCalledFunction()) { + return false; + } + + auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID(); + if (IntrinsicID == Intrinsic::not_intrinsic) { + // Replacement is only performed for intrinsic functions + return false; + } + + // Convert vector arguments to scalar type and check that + // all vector operands have identical vector width. + ElementCount VF = ElementCount::getFixed(0); + SmallVector ScalarTypes; + for (auto Arg : enumerate(CI.arg_operands())) { + auto *ArgType = Arg.value()->getType(); + // Vector calls to intrinsics can still have + // scalar operands for specific arguments. + if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) { + ScalarTypes.push_back(ArgType); + } else { + // The argument in this place should be a vector if + // this is a call to a vector intrinsic. + auto *VectorArgTy = dyn_cast(ArgType); + if (!VectorArgTy) { + // The argument is not a vector, do not perform + // the replacement. + return false; + } + ElementCount NumElements = VectorArgTy->getElementCount(); + if (NumElements.isScalable()) { + // The current implementation does not support + // scalable vectors. + return false; + } + if (VF.isNonZero() && VF != NumElements) { + // The different arguments differ in vector size. + return false; + } else { + VF = NumElements; + } + ScalarTypes.push_back(VectorArgTy->getElementType()); + } + } + + // Try to reconstruct the name for the scalar version of this + // intrinsic using the intrinsic ID and the argument types + // converted to scalar above. + std::string ScalarName; + if (Intrinsic::isOverloaded(IntrinsicID)) { + ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes); + } else { + ScalarName = Intrinsic::getName(IntrinsicID).str(); + } + + if (!TLI.isFunctionVectorizable(ScalarName)) { + // The TargetLibraryInfo does not contain a vectorized version of + // the scalar function. + return false; + } + + // Try to find the mapping for the scalar version of this intrinsic + // and the exact vector width of the call operands in the + // TargetLibraryInfo. + const std::string TLIName = + std::string(TLI.getVectorizedFunction(ScalarName, VF)); + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `" + << ScalarName << "` and vector width " << VF << ".\n"); + + if (!TLIName.empty()) { + // Found the correct mapping in the TargetLibraryInfo, + // replace the call to the intrinsic with a call to + // the vector library function. + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName + << "`.\n"); + return replaceWithTLIFunction(CI, TLIName); + } + + return false; +} + +static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { + bool Changed = false; + SmallVector ReplacedCalls; + for (auto &I : instructions(F)) { + if (auto *CI = dyn_cast(&I)) { + if (replaceWithCallToVeclib(TLI, *CI)) { + ReplacedCalls.push_back(CI); + Changed = true; + } + } + } + // Erase the calls to the intrinsics that have been replaced + // with calls to the vector library. + for (auto *CI : ReplacedCalls) { + CI->eraseFromParent(); + } + return Changed; +} + +//////////////////////////////////////////////////////////////////////////////// +// New pass manager implementation. +//////////////////////////////////////////////////////////////////////////////// +PreservedAnalyses ReplaceWithVeclib::run(Function &F, + FunctionAnalysisManager &AM) { + const TargetLibraryInfo &TLI = AM.getResult(F); + auto Changed = runImpl(TLI, F); + if (Changed) { + PreservedAnalyses PA; + PA.preserveSet(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; + } else { + // The pass did not replace any calls, hence it preserves all analyses. + return PreservedAnalyses::all(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Legacy PM Implementation. +//////////////////////////////////////////////////////////////////////////////// +bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) { + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + return runImpl(TLI, F); +} + +void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); +} + +//////////////////////////////////////////////////////////////////////////////// +// Legacy Pass manager initialization +//////////////////////////////////////////////////////////////////////////////// +char ReplaceWithVeclibLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE, + "Replace intrinsics with calls to vector library", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE, + "Replace intrinsics with calls to vector library", false, + false) + +FunctionPass *llvm::createReplaceWithVeclibLegacyPass() { + return new ReplaceWithVeclibLegacy(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -858,6 +858,9 @@ if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) addPass(createConstantHoistingPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createReplaceWithVeclibLegacyPass()); + if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) addPass(createPartiallyInlineLibCallsPass()); diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -54,6 +54,7 @@ ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Constant Hoisting +; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Partially inline calls to library functions ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -34,6 +34,7 @@ ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Constant Hoisting +; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Partially inline calls to library functions ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics diff --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes +; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,SVML +; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 +; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV +; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x double> @exp_v4(<4 x double> %in) { +; SVML-LABEL: define {{[^@]+}}@exp_v4 +; SVML-SAME: (<4 x double> [[IN:%.*]]) { +; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]]) +; SVML-NEXT: ret <4 x double> [[TMP1]] +; +; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4 +; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) { +; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]]) +; LIBMVEC-X86-NEXT: ret <4 x double> [[TMP1]] +; +; MASSV-LABEL: define {{[^@]+}}@exp_v4 +; MASSV-SAME: (<4 x double> [[IN:%.*]]) { +; MASSV-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]]) +; MASSV-NEXT: ret <4 x double> [[CALL]] +; +; ACCELERATE-LABEL: define {{[^@]+}}@exp_v4 +; ACCELERATE-SAME: (<4 x double> [[IN:%.*]]) { +; ACCELERATE-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]]) +; ACCELERATE-NEXT: ret <4 x double> [[CALL]] +; + %call = call <4 x double> @llvm.exp.v4f64(<4 x double> %in) + ret <4 x double> %call +} + +declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0 + +define <4 x float> @exp_f32(<4 x float> %in) { +; SVML-LABEL: define {{[^@]+}}@exp_f32 +; SVML-SAME: (<4 x float> [[IN:%.*]]) { +; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]]) +; SVML-NEXT: ret <4 x float> [[TMP1]] +; +; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32 +; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) { +; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]]) +; LIBMVEC-X86-NEXT: ret <4 x float> [[TMP1]] +; +; MASSV-LABEL: define {{[^@]+}}@exp_f32 +; MASSV-SAME: (<4 x float> [[IN:%.*]]) { +; MASSV-NEXT: [[TMP1:%.*]] = call <4 x float> @__expf4_massv(<4 x float> [[IN]]) +; MASSV-NEXT: ret <4 x float> [[TMP1]] +; +; ACCELERATE-LABEL: define {{[^@]+}}@exp_f32 +; ACCELERATE-SAME: (<4 x float> [[IN:%.*]]) { +; ACCELERATE-NEXT: [[TMP1:%.*]] = call <4 x float> @vexpf(<4 x float> [[IN]]) +; ACCELERATE-NEXT: ret <4 x float> [[TMP1]] +; + %call = call <4 x float> @llvm.exp.v4f32(<4 x float> %in) + ret <4 x float> %call +} + +declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 + +; No replacement should take place for non-vector intrinsic. +define double @exp_f64(double %in) { +; COMMON-LABEL: define {{[^@]+}}@exp_f64 +; COMMON-SAME: (double [[IN:%.*]]) { +; COMMON-NEXT: [[CALL:%.*]] = call double @llvm.exp.f64(double [[IN]]) +; COMMON-NEXT: ret double [[CALL]] +; + %call = call double @llvm.exp.f64(double %in) + ret double %call +} + +declare double @llvm.exp.f64(double) #0 + +; Check that the pass works with scalar operands on +; vector intrinsics. No vector library has a substitute for powi. +define <4 x double> @powi_v4(<4 x double> %in){ +; COMMON-LABEL: define {{[^@]+}}@powi_v4 +; COMMON-SAME: (<4 x double> [[IN:%.*]]) { +; COMMON-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.powi.v4f64(<4 x double> [[IN]], i32 3) +; COMMON-NEXT: ret <4 x double> [[CALL]] +; + %call = call <4 x double> @llvm.powi.v4f64(<4 x double> %in, i32 3) + ret <4 x double> %call +} + +declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32) #0 + +; Replacement should not take place if the vector length +; does not match exactly. +define <3 x double> @exp_v3(<3 x double> %in) { +; COMMON-LABEL: define {{[^@]+}}@exp_v3 +; COMMON-SAME: (<3 x double> [[IN:%.*]]) { +; COMMON-NEXT: [[CALL:%.*]] = call <3 x double> @llvm.exp.v3f64(<3 x double> [[IN]]) +; COMMON-NEXT: ret <3 x double> [[CALL]] +; + %call = call <3 x double> @llvm.exp.v3f64(<3 x double> %in) + ret <3 x double> %call +} + +declare <3 x double> @llvm.exp.v3f64(<3 x double>) #0 + +attributes #0 = {nounwind readnone} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -51,6 +51,7 @@ ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Constant Hoisting +; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Partially inline calls to library functions ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -351,6 +351,7 @@ initializeExpandReductionsPass(*Registry); initializeHardwareLoopsPass(*Registry); initializeTransformUtils(*Registry); + initializeReplaceWithVeclibLegacyPass(*Registry); // Initialize debugging passes. initializeScavengerTestPass(*Registry); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -504,7 +504,7 @@ "expand-reductions", "indirectbr-expand", "generic-to-nvvm", "expandmemcmp", "loop-reduce", "lower-amx-type", - "polyhedral-info"}; + "polyhedral-info", "replace-with-veclib"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -581,6 +581,7 @@ initializeWriteBitcodePassPass(Registry); initializeHardwareLoopsPass(Registry); initializeTypePromotionPass(Registry); + initializeReplaceWithVeclibLegacyPass(Registry); #ifdef BUILD_EXAMPLES initializeExampleIRTransforms(Registry); diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -168,6 +168,7 @@ "RegisterScavenging.cpp", "RegisterUsageInfo.cpp", "RenameIndependentSubregs.cpp", + "ReplaceWithVeclib.cpp", "ResetMachineFunctionPass.cpp", "SafeStack.cpp", "SafeStackLayout.cpp",