diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -15,6 +15,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" +#include #include namespace llvm { @@ -34,6 +35,14 @@ bool Masked; }; +/// Describes a vector math function VectorFnName of an equivalent scalar math +/// function ScalarFnName, and its corresponding LLVM intrinsic ID. +struct VectorMathDesc { + StringRef ScalarFnName; + StringRef VectorFnName; + Intrinsic::ID ID; +}; + enum LibFunc : unsigned { #define TLI_DEFINE_ENUM #include "llvm/Analysis/TargetLibraryInfo.def" @@ -76,6 +85,16 @@ /// on VectorFnName rather than ScalarFnName. std::vector ScalarDescs; + /// A set of scalar -> vector math function mappings. + /// The vector math functions compute the same mathematical operation + /// for an array of operands. They are different from the vector functions in + /// VectorDescs/ScalarDescs, which compute for each element of the SIMD + /// vector operands. + /// Vector math descriptors - sorted by ScalarFnName. + std::vector VectorMathFuncDescs; + /// Vector math descriptors - sorted by intrinsic ID. + std::vector VectorMathIntrinDescs; + /// Return true if the function type FTy is valid for the library function /// F, regardless of whether the function is available. bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, @@ -175,6 +194,32 @@ StringRef getVectorizedFunction(StringRef F, const ElementCount &VF, bool Masked) const; + /// Add a set of scalar -> vector math function mappings for the given + /// vector library, queryable via getVectorMathIntrinsic. + void addVectorMathFunctions(ArrayRef Fns); + + /// Calls addVectorMathFunctions with a known preset of functions for the + /// given vector library. + void addVectorMathFunctionsFromVecLib(enum VectorLibrary VecLib); + + /// Return true if the scalar function F has an equivalent vector math + /// function. + bool isVectorMathFunctionAvailable(StringRef F) const; + + /// Return true if the intrinsic ID has an equivalent vector math function. + bool isVectorMathFunctionAvailable(Intrinsic::ID ID) const; + + /// Return the corresponding vector math intrinsic of the scalar function F. + /// If no such mapping exists, return not_intrinsic. + Intrinsic::ID getVectorMathIntrinsic(StringRef F) const; + + /// Return the corresponding vector math function of the intrinsic. + /// If no such mapping exists, return empty string. + StringRef getVectorMathFunction(Intrinsic::ID ID) const; + + /// Return true if the VecLib has vector math functions. + bool hasVectorMathFunctions() const { return !VectorMathFuncDescs.empty(); } + /// Set to true iff i32 parameters to library functions should have signext /// or zeroext attributes if they correspond to C-level int or unsigned int, /// respectively. @@ -353,6 +398,19 @@ bool Masked = false) const { return Impl->getVectorizedFunction(F, VF, Masked); } + bool isVectorMathFunctionAvailable(StringRef F) const { + return Impl->isVectorMathFunctionAvailable(F); + } + bool isVectorMathFunctionAvailable(Intrinsic::ID ID) const { + return Impl->isVectorMathFunctionAvailable(ID); + } + Intrinsic::ID getVectorMathIntrinsic(StringRef F) const { + return Impl->getVectorMathIntrinsic(F); + } + StringRef getVectorMathFunction(Intrinsic::ID ID) const { + return Impl->getVectorMathFunction(ID); + } + bool hasVectorMathFunctions() const { return Impl->hasVectorMathFunctions(); } /// Tests if the function is both available and a candidate for optimized code /// generation. diff --git a/llvm/include/llvm/Analysis/VectorMathFuncs.def b/llvm/include/llvm/Analysis/VectorMathFuncs.def new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/VectorMathFuncs.def @@ -0,0 +1,135 @@ +//===-- VectorMathFuncs.def - Library information ----------*- C++ -*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This .def file creates mapping from standard IEEE math functions to +// their corresponding LLVM vector math intrinsics. +// LLVM vector math intrinsics will be converted to the actual vector +// math functions supported in the specified framework or library. + +#define TLI_DEFINE_VECTOR_FUNC(SCAL, VEC, ID) {SCAL, VEC, ID}, + +#if defined(TLI_DEFINE_MASSV_VECTOR_MATH_FUNCS) +// IBM MASS library's vector math functions + +TLI_DEFINE_VECTOR_FUNC("acosf", "__vsacos", Intrinsic::experimental_vector_acosf) +TLI_DEFINE_VECTOR_FUNC("__acosf_finite", "__vsacos", + Intrinsic::experimental_vector_acosf) +TLI_DEFINE_VECTOR_FUNC("acos", "__vacos", Intrinsic::experimental_vector_acos) +TLI_DEFINE_VECTOR_FUNC("__acos_finite", "__vacos", Intrinsic::experimental_vector_acos) + +TLI_DEFINE_VECTOR_FUNC("acoshf", "__vsacosh", Intrinsic::experimental_vector_acoshf) +TLI_DEFINE_VECTOR_FUNC("__acoshf_finite", "__vsacosh", Intrinsic::experimental_vector_acoshf) +TLI_DEFINE_VECTOR_FUNC("acosh", "__vacosh", Intrinsic::experimental_vector_acosh) +TLI_DEFINE_VECTOR_FUNC("__acosh_finite", "__vacosh", Intrinsic::experimental_vector_acosh) + +TLI_DEFINE_VECTOR_FUNC("asinf", "__vsasin", Intrinsic::experimental_vector_asinf) +TLI_DEFINE_VECTOR_FUNC("__asinf_finite", "__vsasin", Intrinsic::experimental_vector_asinf) +TLI_DEFINE_VECTOR_FUNC("asin", "__vasin", Intrinsic::experimental_vector_asin) +TLI_DEFINE_VECTOR_FUNC("__asin_finite", "__vasin", Intrinsic::experimental_vector_asin) + +TLI_DEFINE_VECTOR_FUNC("asinhf", "__vsasinh", Intrinsic::experimental_vector_asinhf) +TLI_DEFINE_VECTOR_FUNC("asinh", "__vasinh", Intrinsic::experimental_vector_asinh) + +TLI_DEFINE_VECTOR_FUNC("atanf", "__vsatan", Intrinsic::experimental_vector_atanf) +TLI_DEFINE_VECTOR_FUNC("atan", "__vatan", Intrinsic::experimental_vector_atan) + +TLI_DEFINE_VECTOR_FUNC("atan2", "__vatan2", Intrinsic::experimental_vector_atan2) +TLI_DEFINE_VECTOR_FUNC("__atan2_finite", "__vatan2", Intrinsic::experimental_vector_atan2) +TLI_DEFINE_VECTOR_FUNC("atan2f", "__vsatan2", Intrinsic::experimental_vector_atan2f) +TLI_DEFINE_VECTOR_FUNC("__atan2f_finite", "__vsatan2", Intrinsic::experimental_vector_atan2f) + +TLI_DEFINE_VECTOR_FUNC("atanhf", "__vsatanh", Intrinsic::experimental_vector_atanhf) +TLI_DEFINE_VECTOR_FUNC("__atanhf_finite", "__vsatanh", Intrinsic::experimental_vector_atanhf) +TLI_DEFINE_VECTOR_FUNC("atanh", "__vatanh", Intrinsic::experimental_vector_atanh) +TLI_DEFINE_VECTOR_FUNC("__atanh_finite", "__vatanh", Intrinsic::experimental_vector_atanh) + +TLI_DEFINE_VECTOR_FUNC("cbrtf", "__vscbrt", Intrinsic::experimental_vector_cbrtf) +TLI_DEFINE_VECTOR_FUNC("cbrt", "__vcbrt", Intrinsic::experimental_vector_cbrt) + +TLI_DEFINE_VECTOR_FUNC("cosf", "__vscos", Intrinsic::experimental_vector_cosf) +TLI_DEFINE_VECTOR_FUNC("llvm.cos.f32", "__vscos", Intrinsic::experimental_vector_cosf) +TLI_DEFINE_VECTOR_FUNC("cos", "__vcos", Intrinsic::experimental_vector_cos) +TLI_DEFINE_VECTOR_FUNC("llvm.cos.f64", "__vcos", Intrinsic::experimental_vector_cos) + +TLI_DEFINE_VECTOR_FUNC("coshf", "__vscosh", Intrinsic::experimental_vector_coshf) +TLI_DEFINE_VECTOR_FUNC("__coshf_finite", "__vscosh", Intrinsic::experimental_vector_coshf) +TLI_DEFINE_VECTOR_FUNC("cosh", "__vcosh", Intrinsic::experimental_vector_cosh) +TLI_DEFINE_VECTOR_FUNC("__cosh_finite", "__vcosh", Intrinsic::experimental_vector_cosh) + +TLI_DEFINE_VECTOR_FUNC("erff", "__vserf", Intrinsic::experimental_vector_erff) +TLI_DEFINE_VECTOR_FUNC("erf", "__verf", Intrinsic::experimental_vector_erf) + +TLI_DEFINE_VECTOR_FUNC("erfcf", "__vserfc", Intrinsic::experimental_vector_erfcf) +TLI_DEFINE_VECTOR_FUNC("erfc", "__verfc", Intrinsic::experimental_vector_erfc) + +TLI_DEFINE_VECTOR_FUNC("expf", "__vsexp", Intrinsic::experimental_vector_expf) +TLI_DEFINE_VECTOR_FUNC("__expf_finite", "__vsexp", Intrinsic::experimental_vector_expf) +TLI_DEFINE_VECTOR_FUNC("llvm.exp.f32", "__vsexp", Intrinsic::experimental_vector_expf) +TLI_DEFINE_VECTOR_FUNC("exp", "__vexp", Intrinsic::experimental_vector_exp) +TLI_DEFINE_VECTOR_FUNC("__exp_finite", "__vexp", Intrinsic::experimental_vector_exp) +TLI_DEFINE_VECTOR_FUNC("llvm.exp.f64", "__vexp", Intrinsic::experimental_vector_exp) + +TLI_DEFINE_VECTOR_FUNC("expm1f", "__vsexpm1", Intrinsic::experimental_vector_expm1f) +TLI_DEFINE_VECTOR_FUNC("expm1", "__vexpm1", Intrinsic::experimental_vector_expm1) + +TLI_DEFINE_VECTOR_FUNC("hypotf", "__vshypot", Intrinsic::experimental_vector_hypotf) +TLI_DEFINE_VECTOR_FUNC("hypot", "__vhypot", Intrinsic::experimental_vector_hypot) + +TLI_DEFINE_VECTOR_FUNC("lgammaf", "__vslgamma", Intrinsic::experimental_vector_lgammaf) +TLI_DEFINE_VECTOR_FUNC("lgamma", "__vlgamma", Intrinsic::experimental_vector_lgamma) + +TLI_DEFINE_VECTOR_FUNC("logf", "__vslog", Intrinsic::experimental_vector_logf) +TLI_DEFINE_VECTOR_FUNC("__logf_finite", "__vslog", Intrinsic::experimental_vector_logf) +TLI_DEFINE_VECTOR_FUNC("llvm.log.f32", "__vslog", Intrinsic::experimental_vector_logf) +TLI_DEFINE_VECTOR_FUNC("log", "__vlog", Intrinsic::experimental_vector_log) +TLI_DEFINE_VECTOR_FUNC("__log_finite", "__vlog", Intrinsic::experimental_vector_log) +TLI_DEFINE_VECTOR_FUNC("llvm.log.f64", "__vlog", Intrinsic::experimental_vector_log) + +TLI_DEFINE_VECTOR_FUNC("log10f", "__vslog10", Intrinsic::experimental_vector_log10f) +TLI_DEFINE_VECTOR_FUNC("__log10f_finite", "__vslog10", Intrinsic::experimental_vector_log10f) +TLI_DEFINE_VECTOR_FUNC("llvm.log10.f32", "__vslog10", Intrinsic::experimental_vector_log10f) +TLI_DEFINE_VECTOR_FUNC("log10", "__vlog10", Intrinsic::experimental_vector_log10) +TLI_DEFINE_VECTOR_FUNC("__log10_finite", "__vlog10", Intrinsic::experimental_vector_log10) +TLI_DEFINE_VECTOR_FUNC("llvm.log10.f64", "__vlog10", Intrinsic::experimental_vector_log10) + +TLI_DEFINE_VECTOR_FUNC("log1pf", "__vslog1p", Intrinsic::experimental_vector_log1pf) +TLI_DEFINE_VECTOR_FUNC("log1p", "__vlog1p", Intrinsic::experimental_vector_log1p) + +TLI_DEFINE_VECTOR_FUNC("powf", "__vspow", Intrinsic::experimental_vector_powf) +TLI_DEFINE_VECTOR_FUNC("__powf_finite", "__vspow", Intrinsic::experimental_vector_powf) +TLI_DEFINE_VECTOR_FUNC("llvm.pow.f32", "__vspow", Intrinsic::experimental_vector_powf) +TLI_DEFINE_VECTOR_FUNC("pow", "__vpow", Intrinsic::experimental_vector_pow) +TLI_DEFINE_VECTOR_FUNC("__pow_finite", "__vpow", Intrinsic::experimental_vector_pow) +TLI_DEFINE_VECTOR_FUNC("llvm.pow.f64", "__vpow", Intrinsic::experimental_vector_pow) + +TLI_DEFINE_VECTOR_FUNC("rsqrt", "__vrsqrt", Intrinsic::experimental_vector_rsqrt) + +TLI_DEFINE_VECTOR_FUNC("sinf", "__vssin", Intrinsic::experimental_vector_sinf) +TLI_DEFINE_VECTOR_FUNC("llvm.sin.f32", "__vssin", Intrinsic::experimental_vector_sinf) +TLI_DEFINE_VECTOR_FUNC("sin", "__vsin", Intrinsic::experimental_vector_sin) +TLI_DEFINE_VECTOR_FUNC("llvm.sin.f64", "__vsin", Intrinsic::experimental_vector_sin) + +TLI_DEFINE_VECTOR_FUNC("sinhf", "__vssinh", Intrinsic::experimental_vector_sinhf) +TLI_DEFINE_VECTOR_FUNC("__sinhf_finite", "__vssinh", Intrinsic::experimental_vector_sinhf) +TLI_DEFINE_VECTOR_FUNC("sinh", "__vsinh", Intrinsic::experimental_vector_sinh) +TLI_DEFINE_VECTOR_FUNC("__sinh_finite", "__vsinh", Intrinsic::experimental_vector_sinh) + +TLI_DEFINE_VECTOR_FUNC("sqrt", "__vsqrt", Intrinsic::experimental_vector_sqrt) + +TLI_DEFINE_VECTOR_FUNC("tanf", "__vstan", Intrinsic::experimental_vector_tanf) +TLI_DEFINE_VECTOR_FUNC("tan", "__vtan", Intrinsic::experimental_vector_tan) + +TLI_DEFINE_VECTOR_FUNC("tanhf", "__vstanh", Intrinsic::experimental_vector_tanhf) +TLI_DEFINE_VECTOR_FUNC("tanh", "__vtanh", Intrinsic::experimental_vector_tanh) + +#else +#error "Must choose which vector library functions are to be defined." +#endif + +#undef TLI_DEFINE_MASSV_VECTOR_MATH_FUNCS +#undef TLI_DEFINE_VECTOR_FUNC diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1262,6 +1262,421 @@ } // FIXME: Consider maybe adding intrinsics for sitofp, uitofp. +//===--------------- Floating Point Vector Math Intrinsics ----------------===// +// + +def int_experimental_vector_acosf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_acos + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_acoshf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_acosh + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_asinf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_asin + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_asinhf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_asinh + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_atanf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_atan + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_atan2f + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty], + [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoCapture>, NoAlias>, NoAlias>, + NoAlias>, WriteOnly>, ReadOnly>, + ReadOnly> + ]>; +def int_experimental_vector_atan2 + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty], + [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoCapture>, NoAlias>, NoAlias>, + NoAlias>, WriteOnly>, ReadOnly>, + ReadOnly> + ]>; +def int_experimental_vector_atanhf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_atanh + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_cbrtf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_cbrt + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_cosf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_cos + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_coshf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_cosh + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_erff + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_erf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_erfcf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_erfc + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_expf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_exp + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_expm1f + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_expm1 + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_hypotf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty], + [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoCapture>, NoAlias>, NoAlias>, + NoAlias>, WriteOnly>, ReadOnly>, + ReadOnly> + ]>; +def int_experimental_vector_hypot + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty], + [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoCapture>, NoAlias>, NoAlias>, + NoAlias>, WriteOnly>, ReadOnly>, + ReadOnly> + ]>; +def int_experimental_vector_lgammaf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_lgamma + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_logf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_log + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_log10f + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_log10 + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_log1pf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_log1p + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_powf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty], + [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoCapture>, NoAlias>, NoAlias>, + NoAlias>, WriteOnly>, ReadOnly>, + ReadOnly> + ]>; +def int_experimental_vector_pow + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty], + [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoCapture>, NoAlias>, NoAlias>, + NoAlias>, WriteOnly>, ReadOnly>, + ReadOnly> + ]>; +def int_experimental_vector_rsqrt + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_sqrt + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_sinf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_sin + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_sinhf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_sinh + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_tanf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_tan + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_tanhf + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; +def int_experimental_vector_tanh + : DefaultAttrsIntrinsic< + [], [llvm_anyint_ty, llvm_anyptr_ty, llvm_anyptr_ty], [ + IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, WriteOnly>, + ReadOnly> + ]>; //===------------------------- Expect Intrinsics --------------------------===// // diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -142,8 +142,9 @@ EnableDebugEntryValues(false), ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), DebugStrictDwarf(false), Hotpatch(false), - PPCGenScalarMASSEntries(false), JMCInstrument(false), - EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false), + PPCGenScalarMASSEntries(false), PPCGenVectorMASSEntries(false), + JMCInstrument(false), EnableCFIFixup(false), MisExpect(false), + XCOFFReadOnlyPointers(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -346,6 +347,9 @@ /// Enables scalar MASS conversions unsigned PPCGenScalarMASSEntries : 1; + /// Enables vector MASS conversions + unsigned PPCGenVectorMASSEntries : 1; + /// Enable JustMyCode instrumentation. unsigned JMCInstrument : 1; diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h --- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -34,6 +34,9 @@ /// When true, Memcpy is disabled. static bool Memcpy; + + /// When true, VectorMath is disabled. + static bool VectorMath; }; /// Performs Loop Idiom Recognize Pass. diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -12,6 +12,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/TargetParser/Triple.h" @@ -863,6 +864,7 @@ } TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T); + TLI.addVectorMathFunctionsFromVecLib(ClVectorLibrary); } TargetLibraryInfoImpl::TargetLibraryInfoImpl() { @@ -888,6 +890,8 @@ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; + VectorMathFuncDescs = TLI.VectorMathFuncDescs; + VectorMathIntrinDescs = TLI.VectorMathIntrinDescs; } TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) @@ -901,6 +905,8 @@ AvailableArray); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; + VectorMathFuncDescs = TLI.VectorMathFuncDescs; + VectorMathIntrinDescs = TLI.VectorMathIntrinDescs; } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { @@ -1246,6 +1252,80 @@ return StringRef(); } +void TargetLibraryInfoImpl::addVectorMathFunctions( + ArrayRef Fns) { + llvm::append_range(VectorMathFuncDescs, Fns); + llvm::sort(VectorMathFuncDescs, + [](const VectorMathDesc &LHS, const VectorMathDesc &RHS) { + return LHS.ScalarFnName < RHS.ScalarFnName; + }); + + llvm::append_range(VectorMathIntrinDescs, Fns); + llvm::sort(VectorMathIntrinDescs, + [](const VectorMathDesc &LHS, const VectorMathDesc &RHS) { + return LHS.ID < RHS.ID; + }); +} + +void TargetLibraryInfoImpl::addVectorMathFunctionsFromVecLib( + enum VectorLibrary VecLib) { + if (VecLib == MASSV) { + const VectorMathDesc Funcs[] = { +#define TLI_DEFINE_MASSV_VECTOR_MATH_FUNCS +#include "llvm/Analysis/VectorMathFuncs.def" + }; + addVectorMathFunctions(Funcs); + } +} + +bool TargetLibraryInfoImpl::isVectorMathFunctionAvailable( + StringRef FuncName) const { + FuncName = sanitizeFunctionName(FuncName); + if (FuncName.empty()) + return false; + + std::vector::const_iterator I = + llvm::lower_bound(VectorMathFuncDescs, FuncName, + [](const VectorMathDesc &LHS, StringRef S) { + return LHS.ScalarFnName < S; + }); + return I != VectorMathFuncDescs.end() && + StringRef(I->ScalarFnName) == FuncName; +} + +bool TargetLibraryInfoImpl::isVectorMathFunctionAvailable( + Intrinsic::ID ID) const { + std::vector::const_iterator I = llvm::lower_bound( + VectorMathIntrinDescs, ID, + [](const VectorMathDesc &LHS, Intrinsic::ID ID) { return LHS.ID < ID; }); + return I != VectorMathFuncDescs.end() && I->ID == ID; +} + +Intrinsic::ID TargetLibraryInfoImpl::getVectorMathIntrinsic(StringRef F) const { + F = sanitizeFunctionName(F); + if (F.empty()) + return Intrinsic::not_intrinsic; + + std::vector::const_iterator I = llvm::lower_bound( + VectorMathFuncDescs, F, [](const VectorMathDesc &LHS, StringRef S) { + return LHS.ScalarFnName < S; + }); + if (I != VectorMathFuncDescs.end() && StringRef(I->ScalarFnName) == F) + return I->ID; + + return Intrinsic::not_intrinsic; +} + +StringRef TargetLibraryInfoImpl::getVectorMathFunction(Intrinsic::ID ID) const { + std::vector::const_iterator I = llvm::lower_bound( + VectorMathIntrinDescs, ID, + [](const VectorMathDesc &LHS, Intrinsic::ID ID) { return LHS.ID < ID; }); + if (I != VectorMathFuncDescs.end() && I->ID == ID) + return I->VectorFnName; + + return StringRef(); +} + TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F, FunctionAnalysisManager &) { if (!BaselineInfoImpl) diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -57,6 +57,7 @@ PPCPreEmitPeephole.cpp PPCLowerMASSVEntries.cpp PPCGenScalarMASSEntries.cpp + PPCGenVectorMASSEntries.cpp GISel/PPCCallLowering.cpp GISel/PPCRegisterBankInfo.cpp GISel/PPCLegalizerInfo.cpp diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -89,6 +89,10 @@ void initializePPCGenScalarMASSEntriesPass(PassRegistry &); extern char &PPCGenScalarMASSEntriesID; + ModulePass *createPPCGenVectorMASSEntriesPass(); + void initializePPCGenVectorMASSEntriesPass(PassRegistry &); + extern char &PPCGenVectorMASSEntriesID; + InstructionSelector * createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &, const PPCRegisterBankInfo &); diff --git a/llvm/lib/Target/PowerPC/PPCGenVectorMASSEntries.cpp b/llvm/lib/Target/PowerPC/PPCGenVectorMASSEntries.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCGenVectorMASSEntries.cpp @@ -0,0 +1,163 @@ +//===-- PPCGenVectorMASSEntries.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This transformation converts vector math intrinsics into their +// corresponding MASS (vector) entries for PowerPC targets. +// Following are examples of such conversion: +// llvm.experimental.vector.tanh ---> __vtanh +// Such lowering is legal under the fast-math option. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "ppc-gen-vector-mass" + +using namespace llvm; + +STATISTIC(NumVectorMASS, "Number of vector MASS calls created"); + +namespace { + +class PPCGenVectorMASSEntries : public ModulePass { +public: + static char ID; + + PPCGenVectorMASSEntries() : ModulePass(ID) {} + + bool runOnModule(Module &M) override; + + StringRef getPassName() const override { + return "PPC Generate Vector MASS Entries"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + +private: + bool createVectorMASSCall(Intrinsic::ID ID, IntrinsicInst &II, + TargetLibraryInfo &TLI) const; +}; + +} // namespace + +/// Create an alloca instruction in the entry block of the function. +static AllocaInst *createEntryBlockAlloca(Function &F, Type *Ty) { + IRBuilder<> Builder(&F.getEntryBlock(), F.getEntryBlock().begin()); + return Builder.CreateAlloca(Ty); +} + +/// Lowers vector math intrinsics to vector MASS functions. +/// e.g.: llvm.experimental.vector.tanh --> __vtanh +/// Both function prototype and its callsite is updated during lowering. +bool PPCGenVectorMASSEntries::createVectorMASSCall( + Intrinsic::ID IntrinID, IntrinsicInst &II, TargetLibraryInfo &TLI) const { + assert(II.getIntrinsicID() == IntrinID && "Intrinsic ID mismatched."); + + // Create alloca for the length parameter in vector MASS functions. + IRBuilder<> Builder(&II); + Function &F = *II.getParent()->getParent(); + Value *Len = II.getArgOperand(0); + AllocaInst *AI = createEntryBlockAlloca(F, Len->getType()); + Builder.CreateStore(Len, AI); + + // Create the vector MASS call. + StringRef VectorFuncName = TLI.getVectorMathFunction(IntrinID); + + SmallVector CallArgs; + for (unsigned I = 1; I < II.arg_size(); ++I) + CallArgs.push_back(II.getArgOperand(I)); + CallArgs.push_back(AI); + + SmallVector ArgsTy; + for (Value *Arg : CallArgs) + ArgsTy.push_back(Arg->getType()); + + FunctionType *FTy = + FunctionType::get(Builder.getVoidTy(), ArgsTy, /* isVarArg = */ false); + Module *M = II.getModule(); + assert(M && "Expecting a valid Module."); + + Function *Intrin = II.getCalledFunction(); + const AttributeList &IntrinAttrs = Intrin->getAttributes(); + AttributeSet FnAttrs = IntrinAttrs.getFnAttrs(); + AttributeSet RetAttrs = IntrinAttrs.getRetAttrs(); + SmallVector ArgAttrVec; + for (unsigned I = 1; I < II.arg_size(); ++I) + ArgAttrVec.push_back(IntrinAttrs.getParamAttrs(I)); + ArgAttrVec.push_back(IntrinAttrs.getParamAttrs(0)); + assert(ArgAttrVec.size() == CallArgs.size() && + "Each function parameter must have an attribute set."); + AttributeList NewCallAttrs = + AttributeList::get(F.getContext(), FnAttrs, RetAttrs, ArgAttrVec); + + FunctionCallee VectorFuncCallee = + M->getOrInsertFunction(VectorFuncName, FTy, NewCallAttrs); + CallInst *NewCall = Builder.CreateCall(VectorFuncCallee, CallArgs); + NewCall->copyMetadata(II); + II.eraseFromParent(); + ++NumVectorMASS; + + return true; +} + +bool PPCGenVectorMASSEntries::runOnModule(Module &M) { + bool Changed = false; + + auto *TPC = getAnalysisIfAvailable(); + if (!TPC || skipModule(M)) + return false; + + for (Function &Func : M) { + Intrinsic::ID IntrinID = Func.getIntrinsicID(); + if (IntrinID == Intrinsic::not_intrinsic) + continue; + + TargetLibraryInfo &TLI = + getAnalysis().getTLI(Func); + if (!TLI.isVectorMathFunctionAvailable(IntrinID)) + continue; + + // The call to createVectorMASSCall() invalidates the iterator over users + // upon replacing the users. Precomputing the current list of users allows + // us to replace all the call sites. + SmallVector TheUsers; + for (auto *User : Func.users()) + TheUsers.push_back(User); + + for (auto *User : TheUsers) + if (auto *II = dyn_cast_or_null(User)) + Changed |= createVectorMASSCall(IntrinID, *II, TLI); + } + + return Changed; +} + +char PPCGenVectorMASSEntries::ID = 0; + +char &llvm::PPCGenVectorMASSEntriesID = PPCGenVectorMASSEntries::ID; + +INITIALIZE_PASS_BEGIN(PPCGenVectorMASSEntries, DEBUG_TYPE, + "Generate Vector MASS entries", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(PPCGenVectorMASSEntries, DEBUG_TYPE, + "Generate Vector MASS entries", false, false) + +ModulePass *llvm::createPPCGenVectorMASSEntriesPass() { + return new PPCGenVectorMASSEntries(); +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -133,6 +133,7 @@ initializePPCMIPeepholePass(PR); initializePPCLowerMASSVEntriesPass(PR); initializePPCGenScalarMASSEntriesPass(PR); + initializePPCGenVectorMASSEntriesPass(PR); initializePPCExpandAtomicPseudoPass(PR); initializeGlobalISel(PR); initializePPCCTRLoopsPass(PR); @@ -463,6 +464,10 @@ addPass(createPPCGenScalarMASSEntriesPass()); } + // Generate PowerPC target-specific entries for vector math intrinsics + // that are available in IBM MASS (vector) library. + addPass(createPPCGenVectorMASSEntriesPass()); + // If explicitly requested, add explicit data prefetch intrinsics. if (EnablePrefetch.getNumOccurrences() > 0) addPass(createLoopDataPrefetchPass()); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -111,6 +111,8 @@ "Number of uncountable loops recognized as 'shift until bitttest' idiom"); STATISTIC(NumShiftUntilZero, "Number of uncountable loops recognized as 'shift until zero' idiom"); +STATISTIC(NumVectorMath, + "Number of vector math functions formed from scalar math functions"); bool DisableLIRP::All; static cl::opt @@ -135,12 +137,24 @@ cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden); +bool DisableLIRP::VectorMath; +static cl::opt DisableLIRPVectorMath( + "disable-" DEBUG_TYPE "-vector-math", + cl::desc("Proceed with loop idiom recognize pass, but do " + "not convert loop(s) to vector math functions."), + cl::location(DisableLIRP::VectorMath), cl::init(true), cl::ReallyHidden); + static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt MathLoopTripCountProfitThreshold( + "math-loop-tripcount-profit", cl::init(8), cl::Hidden, + cl::desc("The loop trip count beyond which the math vector array function " + "transformation is profitable")); + namespace { class LoopIdiomRecognize { @@ -177,9 +191,11 @@ StoreListMap StoreRefsForMemset; StoreListMap StoreRefsForMemsetPattern; StoreList StoreRefsForMemcpy; + StoreList StoreRefsForVectorMath; bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + bool HasVectorMath; /// Return code for isLegalStore() enum LegalStoreKind { @@ -188,6 +204,7 @@ MemsetPattern, Memcpy, UnorderedAtomicMemcpy, + VectorMath, DontUse // Dummy retval never to be used. Allows catching errors in retval // handling. }; @@ -201,6 +218,8 @@ void collectStores(BasicBlock *BB); LegalStoreKind isLegalStore(StoreInst *SI); + bool isLegalLoad(LoadInst *LI, const SCEVAddRecExpr *StoreEv); + bool isLegalMathCall(CallInst *CI, const SCEVAddRecExpr *StoreEv); enum class ForMemset { No, Yes }; bool processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, ForMemset For); @@ -227,6 +246,7 @@ const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, const SCEV *BECount); + bool processLoopStoreForVectorMath(StoreInst *SI, const SCEV *BECount); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); @@ -297,7 +317,8 @@ // Disable loop idiom recognition if the function's name is a common idiom. StringRef Name = L->getHeader()->getParent()->getName(); - if (Name == "memset" || Name == "memcpy") + if (Name == "memset" || Name == "memcpy" || + TLI->isVectorMathFunctionAvailable(Name)) return false; // Determine if code size heuristics need to be applied. @@ -307,8 +328,9 @@ HasMemset = TLI->has(LibFunc_memset); HasMemsetPattern = TLI->has(LibFunc_memset_pattern16); HasMemcpy = TLI->has(LibFunc_memcpy); + HasVectorMath = TLI->hasVectorMathFunctions(); - if (HasMemset || HasMemsetPattern || HasMemcpy) + if (HasMemset || HasMemsetPattern || HasMemcpy || HasVectorMath) if (SE->hasLoopInvariantBackedgeTakenCount(L)) return runOnCountableLoop(); @@ -404,6 +426,68 @@ return ConstantArray::get(AT, std::vector(ArraySize, C)); } +bool LoopIdiomRecognize::isLegalLoad(LoadInst *LI, + const SCEVAddRecExpr *StoreEv) { + // Only allow non-volatile loads + if (!LI || LI->isVolatile()) + return false; + // Only allow simple or unordered-atomic loads + if (!LI->isUnordered()) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided load. If we have something else, it's a + // random load we can't handle. + const SCEVAddRecExpr *LoadEv = + dyn_cast(SE->getSCEV(LI->getPointerOperand())); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + // The store and load must share the same stride. + assert(StoreEv && "Expected valid store SCEVAddRecExpr."); + if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) + return false; + + return true; +} + +bool LoopIdiomRecognize::isLegalMathCall(CallInst *CI, + const SCEVAddRecExpr *StoreEv) { + if (!CI) + return false; + + // The call must be in the same loop as the store. + if (LI->getLoopFor(CI->getParent()) != CurLoop) + return false; + + // The call must be a direct call to a scalar math function. + Function *Func = CI->getCalledFunction(); + if (!Func) + return false; + + StringRef FuncName = Func->getName(); + if (!TLI->isVectorMathFunctionAvailable(FuncName)) + return false; + + // The call instruction must only have one user, which is the store. + // Otherwise, the call to scalar math function has to be kept for the other + // users after it is hoisted to form a vector math function, resulting + // in duplicate math computations. + if (!CI->hasOneUser()) + return false; + + // The call to a scalar math function must be fed with non-volatile + // loads. + for (unsigned I = 0; I < CI->arg_size(); ++I) { + LoadInst *Load = dyn_cast(CI->getArgOperand(I)); + + if (!isLegalLoad(Load, StoreEv)) + return false; + } + + return true; +} + LoopIdiomRecognize::LegalStoreKind LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // Don't touch volatile stores. @@ -473,8 +557,10 @@ return LegalStoreKind::MemsetPattern; } - // Otherwise, see if the store can be turned into a memcpy. - if (HasMemcpy && !DisableLIRP::Memcpy) { + // Otherwise, see if the store can be turned into a memcpy or vector math + // call. + if ((HasMemcpy && !DisableLIRP::Memcpy) || + (HasVectorMath && !DisableLIRP::VectorMath)) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. APInt Stride = getStoreStride(StoreEv); @@ -482,34 +568,31 @@ if (StoreSize != Stride && StoreSize != -Stride) return LegalStoreKind::None; - // The store must be feeding a non-volatile load. - LoadInst *LI = dyn_cast(SI->getValueOperand()); + // See if the store can be turned into a memcpy. - // Only allow non-volatile loads - if (!LI || LI->isVolatile()) - return LegalStoreKind::None; - // Only allow simple or unordered-atomic loads - if (!LI->isUnordered()) - return LegalStoreKind::None; + // For memcpy, the store must be fed with a non-volatile load. + LoadInst *Load = dyn_cast(SI->getValueOperand()); + if (isLegalLoad(Load, StoreEv)) { + // Success. This store can be converted into a memcpy. + UnorderedAtomic = UnorderedAtomic || Load->isAtomic(); - // See if the pointer expression is an AddRec like {base,+,1} on the current - // loop, which indicates a strided load. If we have something else, it's a - // random load we can't handle. - const SCEVAddRecExpr *LoadEv = - dyn_cast(SE->getSCEV(LI->getPointerOperand())); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return LegalStoreKind::None; + if (HasMemcpy && !DisableLIRP::Memcpy) + return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy + : LegalStoreKind::Memcpy; + } - // The store and load must share the same stride. - if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return LegalStoreKind::None; + // See if the store can be turned into a vector math call. - // Success. This store can be converted into a memcpy. - UnorderedAtomic = UnorderedAtomic || LI->isAtomic(); - return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy - : LegalStoreKind::Memcpy; + // The store must be fed with a call to a scalar math function. + CallInst *CI = dyn_cast(StoredVal); + if (isLegalMathCall(CI, StoreEv)) { + // Success. This store can be converted into a vector math call. + if (HasVectorMath && !DisableLIRP::VectorMath) + return LegalStoreKind::VectorMath; + } } - // This store can't be transformed into a memset/memcpy. + + // This store can't be transformed into a memset/memcpy or vector math call. return LegalStoreKind::None; } @@ -517,6 +600,7 @@ StoreRefsForMemset.clear(); StoreRefsForMemsetPattern.clear(); StoreRefsForMemcpy.clear(); + StoreRefsForVectorMath.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast(&I); if (!SI) @@ -541,6 +625,9 @@ case LegalStoreKind::UnorderedAtomicMemcpy: StoreRefsForMemcpy.push_back(SI); break; + case LegalStoreKind::VectorMath: + StoreRefsForVectorMath.push_back(SI); + break; default: assert(false && "unhandled return value"); break; @@ -578,6 +665,10 @@ for (auto &SI : StoreRefsForMemcpy) MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); + // Optimize the scalar math call to the equivalent vector math call. + for (auto &SI : StoreRefsForVectorMath) + MadeChange |= processLoopStoreForVectorMath(SI, BECount); + MadeChange |= processLoopMemIntrinsic( BB, &LoopIdiomRecognize::processLoopMemCpy, BECount); MadeChange |= processLoopMemIntrinsic( @@ -997,6 +1088,59 @@ SCEV::FlagNUW); } +/// Create and insert a call instruction to the vector math intrinsic. +/// For example: +/// void llvm.experimental.vector.add( Len, Res, In1, +/// In2) +static CallInst * +insertVectorMathIntrinsic(Intrinsic::ID IntrinID, Value *TripCount, + Value *StoreBasePtr, + SmallVectorImpl &LoadInstList, + SmallVectorImpl &LoadBasePtrList, + StoreInst *SI, CallInst *CI, IRBuilder<> &Builder) { + SmallVector CallArgs; + CallArgs.push_back(TripCount); + CallArgs.push_back(StoreBasePtr); + for (Value *LoadBasePtr : LoadBasePtrList) + CallArgs.push_back(LoadBasePtr); + + SmallVector ArgsTy; + for (Value *Arg : CallArgs) + ArgsTy.push_back(Arg->getType()); + + Module *M = SI->getModule(); + Function *Func = Intrinsic::getDeclaration(M, IntrinID, ArgsTy); + CallInst *NewCall = Builder.CreateCall(Func, CallArgs); + NewCall->setDebugLoc(SI->getDebugLoc()); + + // Set the metadata to the new call instruction. + AAMDNodes AATags = SI->getAAMetadata(); + for (LoadInst *LI : LoadInstList) { + AAMDNodes LoadAATags = LI->getAAMetadata(); + AATags = AATags.merge(LoadAATags); + } + if (auto ConstInt = dyn_cast(TripCount)) + AATags = AATags.extendTo(ConstInt->getZExtValue()); + else + AATags = AATags.extendTo(-1); + + if (AATags.TBAA) + NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); + + if (AATags.TBAAStruct) + NewCall->setMetadata(LLVMContext::MD_tbaa_struct, AATags.TBAAStruct); + + if (AATags.Scope) + NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope); + + if (AATags.NoAlias) + NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); + + NewCall->copyMetadata(*CI); + + return NewCall; +} + /// processLoopStridedStore - We see a strided store of some value. If we can /// transform this into a memset or memset_pattern in the loop preheader, do so. bool LoopIdiomRecognize::processLoopStridedStore( @@ -1178,6 +1322,169 @@ StoreEv, LoadEv, BECount); } +// See if this scalar math call can be promoted to the equivalent vector math +// call. +bool LoopIdiomRecognize::processLoopStoreForVectorMath(StoreInst *SI, + const SCEV *BECount) { + // Do not transform this candidate if it is known to be not profitable. That + // is, the loop trip count is less than or equal to the threshold. The loop + // trip count is BECount plus one. + if (const SCEVConstant *BECst = dyn_cast(BECount)) + if (BECst->getAPInt().getZExtValue() < MathLoopTripCountProfitThreshold) + return false; + + Value *StoredVal = SI->getValueOperand(); + CallInst *CI = dyn_cast(StoredVal); + assert(CI && "call instruction is expected"); + + Value *StorePtr = SI->getPointerOperand(); + const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); + unsigned StoreSize = DL->getTypeStoreSize(StoredVal->getType()); + + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE, *DL, "loop-idiom"); + SCEVExpanderCleaner ExpCleaner(Expander); + + bool Changed = false; + const SCEV *StrStart = StoreEv->getStart(); + unsigned StrAS = StorePtr->getType()->getPointerAddressSpace(); + Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS)); + + APInt Stride = getStoreStride(StoreEv); + bool IsNegStride = StoreSize == -Stride; + const SCEV *StoreSizeS = SE->getConstant(IntIdxTy, StoreSize); + + // Handle negative strided loops. + if (IsNegStride) + StrStart = + getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeS, SE); + + // Okay, we have a strided store "p[i]" of a return value from a scalar math + // function, whose input is a loaded value. We can turn this into a call to + // vector math function in the loop preheader now. However, this would be + // unsafe to do if the loop contains any other reads/writes to the memory + // region we're storing to. This includes the load that feeds the stores. + // Check for an alias by generating the base address and checking everything. + Value *StoreBasePtr = Expander.expandCodeFor( + StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); + + // From here on out, conservatively report to the pass manager that we've + // changed the IR, even if we later clean up these added instructions. There + // may be structural differences e.g. in the order of use lists not accounted + // for in just a textual dump of the IR. This is written as a variable, even + // though statically all the places this dominates could be replaced with + // 'true', with the hope that anyone trying to be clever / "more precise" with + // the return value will read this comment, and leave them alone. + Changed = true; + + SmallPtrSet IgnoredInsts{SI, CI}; + + if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, + StoreSizeS, *AA, IgnoredInsts)) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore", SI) + << ore::NV("Inst", "load and store") << " in " + << ore::NV("Function", SI->getFunction()) + << " function will not be hoisted: " + << ore::NV("Reason", "The loop may access store location"); + }); + return Changed; + } + + SmallVector LoadInstList; + SmallVector LoadBasePtrList; + for (unsigned I = 0; I < CI->arg_size(); ++I) { + LoadInst *Load = dyn_cast(CI->getArgOperand(I)); + assert(Load && "load instruction is expected."); + Value *LoadPtr = Load->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = cast(SE->getSCEV(LoadPtr)); + + const SCEV *LdStart = LoadEv->getStart(); + unsigned LdAS = LoadPtr->getType()->getPointerAddressSpace(); + + // Handle negative strided loops. + if (IsNegStride) + LdStart = + getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeS, SE); + + // We have to make sure that the input array is not being mutated by the + // loop. + Value *LoadBasePtr = Expander.expandCodeFor( + LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); + + // Only ignore the call instruction. + IgnoredInsts.clear(); + IgnoredInsts.insert(CI); + if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, + StoreSizeS, *AA, IgnoredInsts)) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", Load) + << ore::NV("Inst", "load and store") << " in " + << ore::NV("Function", SI->getFunction()) + << " function will not be hoisted: " + << ore::NV("Reason", "The loop may access load location"); + }); + return Changed; + } + + LoadInstList.push_back(Load); + LoadBasePtrList.push_back(LoadBasePtr); + } + + if (avoidLIRForMultiBlockLoop()) + return Changed; + + // Okay, everything is safe, we can transform this! + + const SCEV *TripCountS = + SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop); + + Value *TripCount = + Expander.expandCodeFor(TripCountS, IntIdxTy, Preheader->getTerminator()); + + StringRef ScalarFuncName = CI->getCalledFunction()->getName(); + assert(TLI->isVectorMathFunctionAvailable(ScalarFuncName) && + "The equivalent vector math function must be available."); + Intrinsic::ID IntrinID = TLI->getVectorMathIntrinsic(ScalarFuncName); + CallInst *NewCall = + insertVectorMathIntrinsic(IntrinID, TripCount, StoreBasePtr, LoadInstList, + LoadBasePtrList, SI, CI, Builder); + + if (MSSAU) { + MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( + NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator); + MSSAU->insertDef(cast(NewMemAcc), true); + } + + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "processLoopStoreForVectorMath", + NewCall->getDebugLoc(), Preheader) + << "Formed a call to " + << ore::NV("NewFunction", NewCall->getCalledFunction()) << "() from " + << ore::NV("Inst", "load and store") << " instruction in " + << ore::NV("Function", SI->getFunction()) << " function" + << ore::setExtraArgs() + << ore::NV("FromBlock", SI->getParent()->getName()) + << ore::NV("ToBlock", Preheader->getName()); + }); + + // Okay, a new call to vector math function has been formed. + // Zap the original store and anything that feeds into it. + if (MSSAU) { + MSSAU->removeMemoryAccess(SI, true); + MSSAU->removeMemoryAccess(CI, true); + } + deleteDeadInstruction(SI); + deleteDeadInstruction(CI); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + ++NumVectorMath; + ExpCleaner.markResultUsed(); + + return true; +} + namespace { class MemmoveVerifier { public: diff --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-vector-mass.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-vector-mass.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-vector-mass.ll @@ -0,0 +1,15 @@ +; RUN: llc -vector-library=MASSV -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -vector-library=MASSV -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s + +declare void @llvm.experimental.vector.exp.i64.p0.p0(i64, ptr noalias nocapture writeonly, ptr noalias nocapture readonly) #0 + +define dso_local void @exp_f64(ptr noalias nocapture noundef readonly %x, ptr noalias nocapture noundef writeonly %y) { +; CHECK-LABEL: exp_f64 +; CHECK: __vexp +; CHECK: blr +entry: + tail call void @llvm.experimental.vector.exp.i64.p0.p0(i64 100, ptr %y, ptr %x) + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/LoopIdiom/math.ll b/llvm/test/Transforms/LoopIdiom/math.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/math.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-idiom -vector-library=MASSV -disable-loop-idiom-vector-math=false < %s -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" + +declare double @exp(double) #0 + +define void @exp_f64(ptr noalias noundef %x, ptr noalias noundef %y) { +; CHECK-LABEL: @exp_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.experimental.vector.exp.i64.p0.p0(i64 1000, ptr [[Y:%.*]], ptr [[X:%.*]]) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_EXIT:%.*]] +; CHECK: for.exit: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %i = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %idxprom = zext i32 %i to i64 + %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom + %0 = load double, ptr %arrayidx, align 8 + %call = tail call double @exp(double noundef %0) + %arrayidx2 = getelementptr inbounds double, ptr %y, i64 %idxprom + store double %call, ptr %arrayidx2, align 8 + %inc = add nuw nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, 1000 + br i1 %cmp, label %for.body, label %for.exit + +for.exit: + ret void +} + +attributes #0 = { nounwind willreturn memory(write) }