Diff 539520

clang/include/clang/Basic/CodeGenOptions.h

	Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
	public:			public:
	enum InliningMethod {			enum InliningMethod {
	NormalInlining, // Use the standard function inlining pass.			NormalInlining, // Use the standard function inlining pass.
	OnlyHintInlining, // Inline only (implicitly) hinted functions.			OnlyHintInlining, // Inline only (implicitly) hinted functions.
	OnlyAlwaysInlining // Only run the always inlining pass.			OnlyAlwaysInlining // Only run the always inlining pass.
	};			};

	enum VectorLibrary {			enum VectorLibrary {
	NoLibrary, // Don't use any vector library.			NoLibrary, // Don't use any vector library.
	Accelerate, // Use the Accelerate framework.			Accelerate, // Use the Accelerate framework.
	LIBMVEC, // GLIBC vector math library.			LIBMVEC, // GLIBC vector math library.
	MASSV, // IBM MASS vector library.			MASSV, // IBM MASS vector library.
	SVML, // Intel short vector math library.			SVML, // Intel short vector math library.
	SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions.			SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions.
	Darwin_libsystem_m // Use Darwin's libsytem_m vector functions.			Darwin_libsystem_m, // Use Darwin's libsytem_m vector functions.
				ArmPL // Arm Performance Libraries.
				paulwalker-armUnsubmitted Done Reply Inline Actions This should be "Arm Performance Libraries". paulwalker-arm: This should be "Arm Performance Libraries".
	};			};

	enum ObjCDispatchMethodKind {			enum ObjCDispatchMethodKind {
	Legacy = 0,			Legacy = 0,
	NonLegacy = 1,			NonLegacy = 1,
	Mixed = 2			Mixed = 2
	};			};

	▲ Show 20 Lines • Show All 463 Lines • Show Last 20 Lines

clang/include/clang/Driver/Options.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 2,615 Lines • ▼ Show 20 Lines
	def fno_inline_functions : Flag<["-"], "fno-inline-functions">, Group<f_clang_Group>, Flags<[CC1Option]>;			def fno_inline_functions : Flag<["-"], "fno-inline-functions">, Group<f_clang_Group>, Flags<[CC1Option]>;
	def fno_inline : Flag<["-"], "fno-inline">, Group<f_clang_Group>, Flags<[CC1Option]>;			def fno_inline : Flag<["-"], "fno-inline">, Group<f_clang_Group>, Flags<[CC1Option]>;
	def fno_global_isel : Flag<["-"], "fno-global-isel">, Group<f_clang_Group>,			def fno_global_isel : Flag<["-"], "fno-global-isel">, Group<f_clang_Group>,
	HelpText<"Disables the global instruction selector">;			HelpText<"Disables the global instruction selector">;
	def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group<f_clang_Group>,			def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group<f_clang_Group>,
	Alias<fno_global_isel>;			Alias<fno_global_isel>;
	def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,			def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,
	HelpText<"Use the given vector functions library">,			HelpText<"Use the given vector functions library">,
	Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,none">,			Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">,
	NormalizedValuesScope<"CodeGenOptions">,			NormalizedValuesScope<"CodeGenOptions">,
	NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF",			NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF",
	"Darwin_libsystem_m", "NoLibrary"]>,			"Darwin_libsystem_m", "ArmPL", "NoLibrary"]>,
	MarshallingInfoEnum<CodeGenOpts<"VecLib">, "NoLibrary">;			MarshallingInfoEnum<CodeGenOpts<"VecLib">, "NoLibrary">;
	def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,			def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
	Alias<flax_vector_conversions_EQ>, AliasArgs<["none"]>;			Alias<flax_vector_conversions_EQ>, AliasArgs<["none"]>;
	def fno_implicit_module_maps : Flag <["-"], "fno-implicit-module-maps">, Group<f_Group>,			def fno_implicit_module_maps : Flag <["-"], "fno-implicit-module-maps">, Group<f_Group>,
	Flags<[NoXarchOption]>;			Flags<[NoXarchOption]>;
	def fno_module_maps : Flag <["-"], "fno-module-maps">, Alias<fno_implicit_module_maps>;			def fno_module_maps : Flag <["-"], "fno-module-maps">, Alias<fno_implicit_module_maps>;
	def fno_modules_strict_decluse : Flag <["-"], "fno-strict-modules-decluse">, Group<f_Group>,			def fno_modules_strict_decluse : Flag <["-"], "fno-strict-modules-decluse">, Group<f_Group>,
	Flags<[NoXarchOption]>;			Flags<[NoXarchOption]>;
	▲ Show 20 Lines • Show All 4,798 Lines • Show Last 20 Lines

clang/lib/CodeGen/BackendUtil.cpp

Show First 20 Lines • Show All 278 Lines • ▼ Show 20 Lines	static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
case CodeGenOptions::SLEEF:		case CodeGenOptions::SLEEF:
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,		TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
TargetTriple);		TargetTriple);
break;		break;
case CodeGenOptions::Darwin_libsystem_m:		case CodeGenOptions::Darwin_libsystem_m:
TLII->addVectorizableFunctionsFromVecLib(		TLII->addVectorizableFunctionsFromVecLib(
TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);		TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
break;		break;
		case CodeGenOptions::ArmPL:
		TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
		TargetTriple);
		break;
default:		default:
break;		break;
}		}
return TLII;		return TLII;
}		}

static std::optional<llvm::CodeModel::Model>		static std::optional<llvm::CodeModel::Model>
getCodeModel(const CodeGenOptions &CodeGenOpts) {		getCodeModel(const CodeGenOptions &CodeGenOpts) {
▲ Show 20 Lines • Show All 1,046 Lines • Show Last 20 Lines

clang/lib/Driver/ToolChains/Clang.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,328 Lines • ▼ Show 20 Lines	if (Name == "SVML") {
Triple.getArch() != llvm::Triple::x86_64)		Triple.getArch() != llvm::Triple::x86_64)
D.Diag(diag::err_drv_unsupported_opt_for_target)		D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();		<< Name << Triple.getArchName();
} else if (Name == "LIBMVEC-X86") {		} else if (Name == "LIBMVEC-X86") {
if (Triple.getArch() != llvm::Triple::x86 &&		if (Triple.getArch() != llvm::Triple::x86 &&
Triple.getArch() != llvm::Triple::x86_64)		Triple.getArch() != llvm::Triple::x86_64)
D.Diag(diag::err_drv_unsupported_opt_for_target)		D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();		<< Name << Triple.getArchName();
} else if (Name == "SLEEF") {		} else if (Name == "SLEEF" \|\| Name == "ArmPL") {
if (Triple.getArch() != llvm::Triple::aarch64 &&		if (Triple.getArch() != llvm::Triple::aarch64 &&
Triple.getArch() != llvm::Triple::aarch64_be)		Triple.getArch() != llvm::Triple::aarch64_be)
D.Diag(diag::err_drv_unsupported_opt_for_target)		D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();		<< Name << Triple.getArchName();
}		}
A->render(Args, CmdArgs);		A->render(Args, CmdArgs);
}		}

▲ Show 20 Lines • Show All 3,327 Lines • Show Last 20 Lines

clang/test/Driver/autocomplete.c

	Show First 20 Lines • Show All 74 Lines • ▼ Show 20 Lines
	// FFPALL-NEXT: fast-honor-pragmas			// FFPALL-NEXT: fast-honor-pragmas
	// FFPALL-NEXT: off			// FFPALL-NEXT: off
	// FFPALL-NEXT: on			// FFPALL-NEXT: on
	// RUN: %clang --autocomplete=-flto= \| FileCheck %s -check-prefix=FLTOALL			// RUN: %clang --autocomplete=-flto= \| FileCheck %s -check-prefix=FLTOALL
	// FLTOALL: full			// FLTOALL: full
	// FLTOALL-NEXT: thin			// FLTOALL-NEXT: thin
	// RUN: %clang --autocomplete=-fveclib= \| FileCheck %s -check-prefix=FVECLIBALL			// RUN: %clang --autocomplete=-fveclib= \| FileCheck %s -check-prefix=FVECLIBALL
	// FVECLIBALL: Accelerate			// FVECLIBALL: Accelerate
				// FVECLIBALL-NEXT: ArmPL
	// FVECLIBALL-NEXT: Darwin_libsystem_m			// FVECLIBALL-NEXT: Darwin_libsystem_m
	// FVECLIBALL-NEXT: libmvec			// FVECLIBALL-NEXT: libmvec
	// FVECLIBALL-NEXT: MASSV			// FVECLIBALL-NEXT: MASSV
	// FVECLIBALL-NEXT: none			// FVECLIBALL-NEXT: none
	// FVECLIBALL-NEXT: SLEEF			// FVECLIBALL-NEXT: SLEEF
	// FVECLIBALL-NEXT: SVML			// FVECLIBALL-NEXT: SVML
	// RUN: %clang --autocomplete=-fshow-overloads= \| FileCheck %s -check-prefix=FSOVERALL			// RUN: %clang --autocomplete=-fshow-overloads= \| FileCheck %s -check-prefix=FSOVERALL
	// FSOVERALL: all			// FSOVERALL: all
	▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

clang/test/Driver/fveclib.c

	// RUN: %clang -### -c -fveclib=none %s 2>&1 \| FileCheck -check-prefix CHECK-NOLIB %s			// RUN: %clang -### -c -fveclib=none %s 2>&1 \| FileCheck -check-prefix CHECK-NOLIB %s
	// RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 \| FileCheck -check-prefix CHECK-ACCELERATE %s			// RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 \| FileCheck -check-prefix CHECK-ACCELERATE %s
	// RUN: %clang -### -c -fveclib=libmvec %s 2>&1 \| FileCheck -check-prefix CHECK-libmvec %s			// RUN: %clang -### -c -fveclib=libmvec %s 2>&1 \| FileCheck -check-prefix CHECK-libmvec %s
	// RUN: %clang -### -c -fveclib=MASSV %s 2>&1 \| FileCheck -check-prefix CHECK-MASSV %s			// RUN: %clang -### -c -fveclib=MASSV %s 2>&1 \| FileCheck -check-prefix CHECK-MASSV %s
	// RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 \| FileCheck -check-prefix CHECK-DARWIN_LIBSYSTEM_M %s			// RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 \| FileCheck -check-prefix CHECK-DARWIN_LIBSYSTEM_M %s
	// RUN: %clang -### -c --target=aarch64-none-none -fveclib=SLEEF %s 2>&1 \| FileCheck -check-prefix CHECK-SLEEF %s			// RUN: %clang -### -c --target=aarch64-none-none -fveclib=SLEEF %s 2>&1 \| FileCheck -check-prefix CHECK-SLEEF %s
				// RUN: %clang -### -c --target=aarch64-none-none -fveclib=ArmPL %s 2>&1 \| FileCheck -check-prefix CHECK-ARMPL %s
	// RUN: not %clang -c -fveclib=something %s 2>&1 \| FileCheck -check-prefix CHECK-INVALID %s			// RUN: not %clang -c -fveclib=something %s 2>&1 \| FileCheck -check-prefix CHECK-INVALID %s

	// CHECK-NOLIB: "-fveclib=none"			// CHECK-NOLIB: "-fveclib=none"
	// CHECK-ACCELERATE: "-fveclib=Accelerate"			// CHECK-ACCELERATE: "-fveclib=Accelerate"
	// CHECK-libmvec: "-fveclib=libmvec"			// CHECK-libmvec: "-fveclib=libmvec"
	// CHECK-MASSV: "-fveclib=MASSV"			// CHECK-MASSV: "-fveclib=MASSV"
	// CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m"			// CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m"
	// CHECK-SLEEF: "-fveclib=SLEEF"			// CHECK-SLEEF: "-fveclib=SLEEF"
				// CHECK-ARMPL: "-fveclib=ArmPL"

	// CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'			// CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'

	// RUN: not %clang --target=x86-none-none -c -fveclib=SLEEF %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s			// RUN: not %clang --target=x86-none-none -c -fveclib=SLEEF %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s
				// RUN: not %clang --target=x86-none-none -c -fveclib=ArmPL %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s
	// RUN: not %clang --target=aarch64-none-none -c -fveclib=LIBMVEC-X86 %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s			// RUN: not %clang --target=aarch64-none-none -c -fveclib=LIBMVEC-X86 %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s
	// RUN: not %clang --target=aarch64-none-none -c -fveclib=SVML %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s			// RUN: not %clang --target=aarch64-none-none -c -fveclib=SVML %s 2>&1 \| FileCheck -check-prefix CHECK-ERROR %s
	// CHECK-ERROR: unsupported option {{.*}} for target			// CHECK-ERROR: unsupported option {{.*}} for target

	// RUN: %clang -fveclib=Accelerate %s -target arm64-apple-ios8.0.0 -### 2>&1 \| FileCheck --check-prefix=CHECK-LINK %s			// RUN: %clang -fveclib=Accelerate %s -target arm64-apple-ios8.0.0 -### 2>&1 \| FileCheck --check-prefix=CHECK-LINK %s
	// CHECK-LINK: "-framework" "Accelerate"			// CHECK-LINK: "-framework" "Accelerate"

	// RUN: %clang -fveclib=Accelerate %s -nostdlib -target arm64-apple-ios8.0.0 -### 2>&1 \| FileCheck --check-prefix=CHECK-LINK-NOSTDLIB %s			// RUN: %clang -fveclib=Accelerate %s -nostdlib -target arm64-apple-ios8.0.0 -### 2>&1 \| FileCheck --check-prefix=CHECK-LINK-NOSTDLIB %s
	// CHECK-LINK-NOSTDLIB-NOT: "-framework" "Accelerate"			// CHECK-LINK-NOSTDLIB-NOT: "-framework" "Accelerate"

	// RUN: %clang -fveclib=Accelerate %s -nodefaultlibs -target arm64-apple-ios8.0.0 -### 2>&1 \| FileCheck --check-prefix=CHECK-LINK-NODEFAULTLIBS %s			// RUN: %clang -fveclib=Accelerate %s -nodefaultlibs -target arm64-apple-ios8.0.0 -### 2>&1 \| FileCheck --check-prefix=CHECK-LINK-NODEFAULTLIBS %s
	// CHECK-LINK-NODEFAULTLIBS-NOT: "-framework" "Accelerate"			// CHECK-LINK-NODEFAULTLIBS-NOT: "-framework" "Accelerate"

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines	public:
/// vectorizable functions.		/// vectorizable functions.
enum VectorLibrary {		enum VectorLibrary {
NoLibrary, // Don't use any vector library.		NoLibrary, // Don't use any vector library.
Accelerate, // Use Accelerate framework.		Accelerate, // Use Accelerate framework.
DarwinLibSystemM, // Use Darwin's libsystem_m.		DarwinLibSystemM, // Use Darwin's libsystem_m.
LIBMVEC_X86, // GLIBC Vector Math library.		LIBMVEC_X86, // GLIBC Vector Math library.
MASSV, // IBM MASS vector library.		MASSV, // IBM MASS vector library.
SVML, // Intel short vector math library.		SVML, // Intel short vector math library.
SLEEFGNUABI // SLEEF - SIMD Library for Evaluating Elementary Functions.		SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
		ArmPL // Arm Performance Libraries.
		paulwalker-armUnsubmitted Done Reply Inline Actions This should be "Arm Performance Libraries". paulwalker-arm: This should be "Arm Performance Libraries".
};		};

TargetLibraryInfoImpl();		TargetLibraryInfoImpl();
explicit TargetLibraryInfoImpl(const Triple &T);		explicit TargetLibraryInfoImpl(const Triple &T);

// Provide value semantics.		// Provide value semantics.
TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI);		TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI);
TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI);		TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI);
▲ Show 20 Lines • Show All 484 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/VecFuncs.def

	Show First 20 Lines • Show All 676 Lines • ▼ Show 20 Lines
	TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED)			TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED)

	TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED)			TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED)
	TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED)			TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED)

	TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED)			TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED)
	TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED)			TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED)

				#elif defined(TLI_DEFINE_ARMPL_VECFUNCS)

				TLI_DEFINE_VECFUNC("acos", "armpl_vacosq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("acosf", "armpl_vacosq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("acos", "armpl_svacos_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("acosf", "armpl_svacos_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("acosh", "armpl_vacoshq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("acoshf", "armpl_vacoshq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("acosh", "armpl_svacosh_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("acoshf", "armpl_svacosh_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("asin", "armpl_vasinq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("asinf", "armpl_vasinq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("asin", "armpl_svasin_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("asinf", "armpl_svasin_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("asinh", "armpl_vasinhq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("asinhf", "armpl_vasinhq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("asinh", "armpl_svasinh_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("asinhf", "armpl_svasinh_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("atan", "armpl_vatanq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("atanf", "armpl_vatanq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("atan", "armpl_svatan_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("atanf", "armpl_svatan_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("atan2", "armpl_vatan2q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("atanhf", "armpl_svatanh_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("cbrt", "armpl_vcbrtq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("cbrtf", "armpl_vcbrtq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("cbrt", "armpl_svcbrt_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("cbrtf", "armpl_svcbrt_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("copysign", "armpl_vcopysignq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("copysignf", "armpl_vcopysignq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("copysign", "armpl_svcopysign_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("copysignf", "armpl_svcopysign_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("cos", "armpl_vcosq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("cosf", "armpl_vcosq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("cos", "armpl_svcos_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("cosf", "armpl_svcos_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.cos.f64", "armpl_vcosq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.cos.f32", "armpl_vcosq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.cos.f64", "armpl_svcos_f64_x", SCALABLE(2), MASKED)
				paulwalker-armUnsubmitted Done Reply Inline Actions Up to you but I think it is cleaner to have cos and llvm.cos within separate blocks. paulwalker-arm: Up to you but I think it is cleaner to have cos and llvm.cos within separate blocks.
				TLI_DEFINE_VECFUNC("llvm.cos.f32", "armpl_svcos_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("cosh", "armpl_vcoshq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("coshf", "armpl_vcoshq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("cosh", "armpl_svcosh_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("coshf", "armpl_svcosh_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("erf", "armpl_verfq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("erff", "armpl_verfq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("erf", "armpl_sverf_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("erff", "armpl_sverf_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("erfc", "armpl_verfcq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("erfcf", "armpl_verfcq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("erfc", "armpl_sverfc_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("erfcf", "armpl_sverfc_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("exp", "armpl_vexpq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("expf", "armpl_vexpq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("exp", "armpl_svexp_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("expf", "armpl_svexp_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_vexpq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_vexpq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_svexp_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_svexp_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("exp10", "armpl_vexp10q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("exp10f", "armpl_svexp10_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("expm1f", "armpl_svexpm1_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("fdim", "armpl_vfdimq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("fdimf", "armpl_vfdimq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("fdim", "armpl_svfdim_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("fdimf", "armpl_svfdim_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("fma", "armpl_vfmaq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("fmaf", "armpl_vfmaq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("fma", "armpl_svfma_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("fmaf", "armpl_svfma_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("fmin", "armpl_vfminq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("fminf", "armpl_vfminq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("fmin", "armpl_svfmin_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("fminf", "armpl_svfmin_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("fmod", "armpl_vfmodq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("fmodf", "armpl_vfmodq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("fmod", "armpl_svfmod_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("fmodf", "armpl_svfmod_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("hypot", "armpl_vhypotq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("hypotf", "armpl_vhypotq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("hypot", "armpl_svhypot_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("hypotf", "armpl_svhypot_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("lgamma", "armpl_vlgammaq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("lgammaf", "armpl_vlgammaq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("lgamma", "armpl_svlgamma_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("lgammaf", "armpl_svlgamma_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("log", "armpl_vlogq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("logf", "armpl_vlogq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("log", "armpl_svlog_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("logf", "armpl_svlog_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_vlogq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_vlogq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_svlog_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_svlog_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("log1p", "armpl_vlog1pq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("log1pf", "armpl_vlog1pq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("log1p", "armpl_svlog1p_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("log1pf", "armpl_svlog1p_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("log2", "armpl_vlog2q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("log2f", "armpl_vlog2q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("log2", "armpl_svlog2_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("log2f", "armpl_svlog2_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_vlog2q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_vlog2q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_svlog2_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("nextafterf", "armpl_svnextafter_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("pow", "armpl_vpowq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("powf", "armpl_vpowq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("pow", "armpl_svpow_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("powf", "armpl_svpow_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.pow.f64", "armpl_vpowq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.pow.f32", "armpl_vpowq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.pow.f64", "armpl_svpow_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.pow.f32", "armpl_svpow_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("sin", "armpl_vsinq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("sinf", "armpl_vsinq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("sin", "armpl_svsin_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("sinf", "armpl_svsin_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_vsinq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("sinhf", "armpl_svsinh_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("sinpi", "armpl_vsinpiq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("sinpif", "armpl_vsinpiq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("sinpi", "armpl_svsinpi_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("sinpif", "armpl_svsinpi_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("sqrt", "armpl_vsqrtq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("sqrtf", "armpl_vsqrtq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("sqrt", "armpl_svsqrt_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("sqrtf", "armpl_svsqrt_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("tan", "armpl_vtanq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("tan", "armpl_svtan_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("tanf", "armpl_svtan_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("tanh", "armpl_vtanhq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("tanhf", "armpl_vtanhq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("tanh", "armpl_svtanh_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("tanhf", "armpl_svtanh_f32_x", SCALABLE(4), MASKED)

				TLI_DEFINE_VECFUNC("tgamma", "armpl_vtgammaq_f64", FIXED(2), NOMASK)
				TLI_DEFINE_VECFUNC("tgammaf", "armpl_vtgammaq_f32", FIXED(4), NOMASK)
				TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED)
				TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED)

	#else			#else
	#error "Must choose which vector library functions are to be defined."			#error "Must choose which vector library functions are to be defined."
	#endif			#endif

				paulwalker-armUnsubmitted Done Reply Inline Actions I guess this is ok but please be aware that `sinpi` is very new and even my reasonably up to date Fedora install doesn't have it. paulwalker-arm: I guess this is ok but please be aware that `sinpi` is very new and even my reasonably up to…
	#undef MASKED			#undef MASKED
	#undef NOMASK			#undef NOMASK
	#undef SCALABLE			#undef SCALABLE
	#undef FIXED			#undef FIXED

	#undef TLI_DEFINE_VECFUNC			#undef TLI_DEFINE_VECFUNC
	#undef TLI_DEFINE_ACCELERATE_VECFUNCS			#undef TLI_DEFINE_ACCELERATE_VECFUNCS
	#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS			#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS
	#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS			#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
	#undef TLI_DEFINE_MASSV_VECFUNCS			#undef TLI_DEFINE_MASSV_VECFUNCS
	#undef TLI_DEFINE_SVML_VECFUNCS			#undef TLI_DEFINE_SVML_VECFUNCS
	#undef TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS			#undef TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
	#undef TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS			#undef TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
	#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS			#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
	#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES			#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
				#undef TLI_DEFINE_ARMPL_VECFUNCS

llvm/lib/Analysis/TargetLibraryInfo.cpp

Show All 27 Lines	cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
"Darwin_libsystem_m", "Darwin libsystem_m"),		"Darwin_libsystem_m", "Darwin libsystem_m"),
clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86",		clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86",
"GLIBC Vector Math library"),		"GLIBC Vector Math library"),
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",		clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
"IBM MASS vector library"),		"IBM MASS vector library"),
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",		clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library"),		"Intel SVML library"),
clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",		clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
"SIMD Library for Evaluating Elementary Functions")));		"SIMD Library for Evaluating Elementary Functions"),
		clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL",
		"Arm Performance Libraries")));
		paulwalker-armUnsubmitted Done Reply Inline Actions This should be "Arm Performance Libraries". paulwalker-arm: This should be "Arm Performance Libraries".

StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =		StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
{		{
#define TLI_DEFINE_STRING		#define TLI_DEFINE_STRING
#include "llvm/Analysis/TargetLibraryInfo.def"		#include "llvm/Analysis/TargetLibraryInfo.def"
};		};

// Recognized types of library function arguments and return types.		// Recognized types of library function arguments and return types.
▲ Show 20 Lines • Show All 1,165 Lines • ▼ Show 20 Lines	#include "llvm/Analysis/VecFuncs.def"
case llvm::Triple::aarch64_be:		case llvm::Triple::aarch64_be:
addVectorizableFunctions(VecFuncs_VF2);		addVectorizableFunctions(VecFuncs_VF2);
addVectorizableFunctions(VecFuncs_VF4);		addVectorizableFunctions(VecFuncs_VF4);
addVectorizableFunctions(VecFuncs_VFScalable);		addVectorizableFunctions(VecFuncs_VFScalable);
break;		break;
}		}
break;		break;
}		}
		case ArmPL: {
		const VecDesc VecFuncs[] = {
		#define TLI_DEFINE_ARMPL_VECFUNCS
		#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
		#include "llvm/Analysis/VecFuncs.def"
		};

		switch (TargetTriple.getArch()) {
		default:
		break;
		case llvm::Triple::aarch64:
		case llvm::Triple::aarch64_be:
		addVectorizableFunctions(VecFuncs);
		break;
		}
		break;
		}
case NoLibrary:		case NoLibrary:
break;		break;
}		}
}		}

bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {		bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
funcName = sanitizeFunctionName(funcName);		funcName = sanitizeFunctionName(funcName);
if (funcName.empty())		if (funcName.empty())
▲ Show 20 Lines • Show All 100 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
				; RUN: opt -S -vector-library=ArmPL -replace-with-veclib < %s \| FileCheck %s

				target triple = "aarch64-unknown-linux-gnu"
				paulwalker-armUnsubmitted Done Reply Inline Actions It doesn't make sense to split the testing like this because the scalable vector tests are essentially bogus when SVE is not available and the fixed length vector tests should produce the same result when SVE is enabled. I think there should either be a single RUN line, or two test files that separate the fixed and scalable tests. Personally I'd opt for the former given there's nothing in the TLI interface that uses target features when making decisions paulwalker-arm: It doesn't make sense to split the testing like this because the scalable vector tests are…

				;
				; The replace-with-veclib pass does not work with scalable types, thus
				; the mappings aren't utilised. Tests will need to be regenerated when the
				; pass is improved.
				;

				declare <2 x double> @llvm.cos.v2f64(<2 x double>)
				declare <4 x float> @llvm.cos.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_cos_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_cos_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vcosq_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_cos_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_cos_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vcosq_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_cos_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_cos_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}

				declare <2 x double> @llvm.sin.v2f64(<2 x double>)
				declare <4 x float> @llvm.sin.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_sin_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_sin_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_sin_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_sin_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}

				declare <2 x double> @llvm.exp.v2f64(<2 x double>)
				declare <4 x float> @llvm.exp.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_exp_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_exp_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexpq_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_exp_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_exp_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexpq_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}

				declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
				declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_exp2_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_exp2_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}


				declare <2 x double> @llvm.log.v2f64(<2 x double>)
				declare <4 x float> @llvm.log.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_log_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_log_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlogq_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_log_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_log_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlogq_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_log_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_log_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}

				declare <2 x double> @llvm.log2.v2f64(<2 x double>)
				declare <4 x float> @llvm.log2.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_log2_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_log2_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_log2_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_log2_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}

				declare <2 x double> @llvm.log10.v2f64(<2 x double>)
				declare <4 x float> @llvm.log10.v4f32(<4 x float>)
				declare <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)

				define <2 x double> @llvm_log10_f64(<2 x double> %in) {
				; CHECK-LABEL: define <2 x double> @llvm_log10_f64
				; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_log10_f32(<4 x float> %in) {
				; CHECK-LABEL: define <4 x float> @llvm_log10_f32
				; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> [[IN]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> [[IN]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
				ret <vscale x 4 x float> %1
				}

				declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
				declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
				declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
				declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)

				;
				; There is a bug in the replace-with-veclib pass, and for intrinsics which take
				; more than one arguments, but has just one overloaded type, it incorrectly
				; reconstructs the scalar name, for pow specificlly it is searching for:
				; llvm.pow.f64.f64 and llvm.pow.f32.f32
				;

				define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
				; CHECK-LABEL: define <2 x double> @llvm_pow_f64
				; CHECK-SAME: (<2 x double> [[IN:%.]], <2 x double> [[POWER:%.]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN]], <2 x double> [[POWER]])
				; CHECK-NEXT: ret <2 x double> [[TMP1]]
				;
				%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
				ret <2 x double> %1
				}

				define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
				; CHECK-LABEL: define <4 x float> @llvm_pow_f32
				; CHECK-SAME: (<4 x float> [[IN:%.]], <4 x float> [[POWER:%.]]) {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN]], <4 x float> [[POWER]])
				; CHECK-NEXT: ret <4 x float> [[TMP1]]
				;
				%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
				ret <4 x float> %1
				}

				define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) #0 {
				; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64
				; CHECK-SAME: (<vscale x 2 x double> [[IN:%.]], <vscale x 2 x double> [[POWER:%.]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power)
				ret <vscale x 2 x double> %1
				}

				define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) #0 {
				; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32
				; CHECK-SAME: (<vscale x 4 x float> [[IN:%.]], <vscale x 4 x float> [[POWER:%.]]) #[[ATTR1]] {
				; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]])
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power)
				ret <vscale x 4 x float> %1
				}

				attributes #0 = { "target-features"="+sve" }

llvm/test/Transforms/LoopVectorize/AArch64/armpl-calls.ll

This file was added.

				; RUN: opt -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s \| FileCheck %s --check-prefixes=CHECK,NEON
				; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s \| FileCheck %s --check-prefixes=CHECK,SVE

				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-unknown-linux-gnu"


				; Tests are checking if LV can vectorize loops with function calls
				; using mappings from TLI for scalable and fixed width vectorization.

				declare double @acos(double)
				declare float @acosf(float)

				define void @acos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @acos_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vacosq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @acos(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @acos_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @acos_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vacosq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @acosf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @acosh(double)
				declare float @acoshf(float)

				define void @acosh_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @acosh_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vacoshq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svacosh_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @acosh(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @acosh_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @acosh_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vacoshq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svacosh_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @acoshf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @asin(double)
				declare float @asinf(float)

				define void @asin_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @asin_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vasinq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @asin(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @asin_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @asin_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vasinq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @asinf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @asinh(double)
				declare float @asinhf(float)

				define void @asinh_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @asinh_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vasinhq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svasinh_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @asinh(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @asinh_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @asinh_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vasinhq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svasinh_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @asinhf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @atan(double)
				declare float @atanf(float)

				define void @atan_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @atan_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vatanq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @atan(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @atan_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @atan_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vatanq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @atanf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @atanh(double)
				declare float @atanhf(float)

				define void @atanh_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @atanh_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vatanhq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svatanh_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @atanh(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @atanh_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @atanh_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vatanhq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svatanh_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @atanhf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @cbrt(double)
				declare float @cbrtf(float)

				define void @cbrt_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cbrt_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vcbrtq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svcbrt_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @cbrt(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @cbrt_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cbrt_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vcbrtq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svcbrt_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @cbrtf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @cos(double)
				declare float @cosf(float)

				define void @cos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cos_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @cos(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @cos_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cos_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @cosf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @cosh(double)
				declare float @coshf(float)

				define void @cosh_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cosh_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vcoshq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @cosh(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @cosh_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cosh_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vcoshq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @coshf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @erf(double)
				declare float @erff(float)

				define void @erf_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @erf_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_verfq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_sverf_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @erf(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @erf_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @erf_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_verfq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_sverf_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @erff(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @erfc(double)
				declare float @erfcf(float)

				define void @erfc_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @erfc_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_verfcq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_sverfc_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @erfc(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @erfc_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @erfc_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_verfcq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_sverfc_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @erfcf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @exp(double)
				declare float @expf(float)

				define void @exp_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @exp(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @exp_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @expf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @exp2(double)
				declare float @exp2f(float)

				define void @exp2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp2_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @exp2(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp2_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @exp2f(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @exp10(double)
				declare float @exp10f(float)

				define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp10_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @exp10(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @exp10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp10_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @exp10f(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @expm1(double)
				declare float @expm1f(float)

				define void @expm1_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @expm1_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vexpm1q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svexpm1_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @expm1(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @expm1_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @expm1_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vexpm1q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svexpm1_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @expm1f(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @lgamma(double)
				declare float @lgammaf(float)

				define void @lgamma_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @lgamma_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlgammaq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlgamma_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @lgamma(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @lgamma_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @lgamma_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlgammaq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlgamma_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @lgammaf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @log(double)
				declare float @logf(float)

				define void @log_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @log(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @logf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @log1p(double)
				declare float @log1pf(float)

				define void @log1p_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log1p_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlog1pq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog1p_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @log1p(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log1p_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log1p_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlog1pq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog1p_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @log1pf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @log2(double)
				declare float @log2f(float)

				define void @log2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log2_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @log2(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log2_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @log2f(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @log10(double)
				declare float @log10f(float)

				define void @log10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log10_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @log10(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log10_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @log10f(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @sin(double)
				declare float @sinf(float)

				define void @sin_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sin_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @sin(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @sin_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sin_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @sinf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @sinh(double)
				declare float @sinhf(float)

				define void @sinh_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sinh_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vsinhq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @sinh(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @sinh_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sinh_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vsinhq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @sinhf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @sinpi(double)
				declare float @sinpif(float)

				define void @sinpi_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sinpi_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vsinpiq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svsinpi_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @sinpi(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @sinpi_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sinpi_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vsinpiq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svsinpi_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @sinpif(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @sqrt(double)
				declare float @sqrtf(float)

				define void @sqrt_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sqrt_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vsqrtq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svsqrt_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @sqrt(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @sqrt_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sqrt_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vsqrtq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svsqrt_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @sqrtf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @tan(double)
				declare float @tanf(float)

				define void @tan_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @tan_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @tan(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @tan_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @tan_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @tanf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @tanh(double)
				declare float @tanhf(float)

				define void @tanh_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @tanh_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vtanhq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @tanh(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @tanh_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @tanh_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vtanhq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @tanhf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @tgamma(double)
				declare float @tgammaf(float)

				define void @tgamma_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @tgamma_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vtgammaq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svtgamma_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @tgamma(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @tgamma_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @tgamma_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vtgammaq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svtgamma_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @tgammaf(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @atan2(double, double)
				declare float @atan2f(float, float)

				define void @atan2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @atan2_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @atan2(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @atan2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @atan2_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @atan2f(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @copysign(double, double)
				declare float @copysignf(float, float)

				define void @copysign_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @copysign_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vcopysignq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svcopysign_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @copysign(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @copysign_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @copysign_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vcopysignq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svcopysign_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @copysignf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @fdim(double, double)
				declare float @fdimf(float, float)

				define void @fdim_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fdim_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vfdimq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svfdim_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @fdim(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @fdim_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fdim_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vfdimq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svfdim_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @fdimf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @fmin(double, double)
				declare float @fminf(float, float)

				define void @fmin_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fmin_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vfminq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svfmin_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @fmin(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @fmin_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fmin_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vfminq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svfmin_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @fminf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @fmod(double, double)
				declare float @fmodf(float, float)

				define void @fmod_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fmod_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vfmodq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @fmod(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @fmod_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fmod_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vfmodq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @fmodf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @hypot(double, double)
				declare float @hypotf(float, float)

				define void @hypot_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @hypot_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vhypotq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svhypot_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @hypot(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @hypot_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @hypot_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vhypotq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svhypot_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @hypotf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @nextafter(double, double)
				declare float @nextafterf(float, float)

				define void @nextafter_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @nextafter_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vnextafterq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svnextafter_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @nextafter(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @nextafter_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @nextafter_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vnextafterq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svnextafter_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @nextafterf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @pow(double, double)
				declare float @powf(float, float)

				define void @pow_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @pow_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @pow(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @pow_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @pow_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @powf(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @fma(double, double, double)
				declare float @fmaf(float, float, float)

				define void @fma_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fma_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vfmaq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svfma_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @fma(double %in, double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @fma_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @fma_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vfmaq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svfma_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @fmaf(float %in, float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				paulwalker-armUnsubmitted Done Reply Inline Actions This is not the interface ArmPL exposes for the vector variants of `ilogb`. I think this is likely a bug in ArmPL whereby the function's return type incorrectly mirrors the operand type. It's possible this doesn't matter for SVE but for NEON the code generation for the f64 case will certainly be wrong. I think it's safest to remove the mappings until we're sure ArmPL is ready. paulwalker-arm: This is not the interface ArmPL exposes for the vector variants of `ilogb`. I think this is…
				paulwalker-armUnsubmitted Not Done Reply Inline Actions This is not the interface ArmPL implements for vector `ldexp` functions. ArmPL requires the integer operand to be a scalar. I think the mappings should be removed until either ArmPL matches the interface currently expected or LoopVectorize gains the smarts to support scalar operands. paulwalker-arm: This is not the interface ArmPL implements for vector `ldexp` functions. ArmPL requires the…
				mgabkaAuthorUnsubmitted Done Reply Inline Actions good catch, I must admit I wasn't checking header files from armpl, was hoping that the interface is matching scalar definitions, so: float ldexpf( float arg, int exp ); double ldexp( double arg, int exp ); I will remove the mappings together with the tests mgabka: good catch, I must admit I wasn't checking header files from armpl, was hoping that the…

llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll

This file was added.

				; RUN: opt -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s \| FileCheck %s --check-prefixes=CHECK,NEON
				; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s \| FileCheck %s --check-prefixes=CHECK,SVE

				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-unknown-linux-gnu"


				; Tests are checking if LV can vectorize loops with llvm math intrinsics
				; using mappings from TLI for scalable and fixed width vectorization.

				declare double @llvm.cos.f64(double)
				declare float @llvm.cos.f32(float)

				define void @cos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cos_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.cos.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @cos_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @cos_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.cos.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.exp.f64(double)
				declare float @llvm.exp.f32(float)

				define void @exp_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.exp.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @exp_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.exp.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.exp2.f64(double)
				declare float @llvm.exp2.f32(float)

				define void @exp2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp2_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.exp2.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @exp2_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.exp2.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.log.f64(double)
				declare float @llvm.log.f32(float)

				define void @log_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.log.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.log.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.log2.f64(double)
				declare float @llvm.log2.f32(float)

				define void @log2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log2_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.log2.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log2_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.log2.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.log10.f64(double)
				declare float @llvm.log10.f32(float)

				define void @log10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log10_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.log10.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @log10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @log10_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.log10.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.sin.f64(double)
				declare float @llvm.sin.f32(float)

				define void @sin_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sin_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.sin.f64(double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @sin_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @sin_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[TMP4:%.]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.*}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.sin.f32(float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				declare double @llvm.pow.f64(double, double)
				declare float @llvm.pow.f32(float, float)

				define void @pow_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @pow_f64(
				; NEON: [[TMP5:%.]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[TMP4:%.]], <2 x double> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x double> [[TMP4:%.]], <vscale x 2 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
				%in = load double, ptr %in.gep, align 8
				%call = tail call double @llvm.pow.f64(double %in, double %in)
				%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
				store double %call, ptr %out.gep, align 8
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

				define void @pow_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
				; CHECK-LABEL: @pow_f32(
				; NEON: [[TMP5:%.]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[TMP4:%.]], <4 x float> [[TMP4:%.*]])
				; SVE: [[TMP5:%.]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x float> [[TMP4:%.]], <vscale x 4 x i1> {{.}})
				; CHECK: ret void
				;
				entry:
				br label %for.body

				for.body:
				%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
				%in = load float, ptr %in.gep, align 8
				%call = tail call float @llvm.pow.f32(float %in, float %in)
				%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
				store float %call, ptr %out.gep, align 4
				%iv.next = add nuw nsw i64 %iv, 1
				%exitcond = icmp eq i64 %iv.next, 1000
				br i1 %exitcond, label %for.end, label %for.body

				for.end:
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[TLI][AArch64] Add mappings to vectorized functions from ArmPL
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 539520

clang/include/clang/Basic/CodeGenOptions.h

clang/include/clang/Driver/Options.td

clang/lib/CodeGen/BackendUtil.cpp

clang/lib/Driver/ToolChains/Clang.cpp

clang/test/Driver/autocomplete.c

clang/test/Driver/fveclib.c

llvm/include/llvm/Analysis/TargetLibraryInfo.h

llvm/include/llvm/Analysis/VecFuncs.def

llvm/lib/Analysis/TargetLibraryInfo.cpp

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

llvm/test/Transforms/LoopVectorize/AArch64/armpl-calls.ll

llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll

This is an archive of the discontinued LLVM Phabricator instance.

[TLI][AArch64] Add mappings to vectorized functions from ArmPLClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 539520

clang/include/clang/Basic/CodeGenOptions.h

clang/include/clang/Driver/Options.td

clang/lib/CodeGen/BackendUtil.cpp

clang/lib/Driver/ToolChains/Clang.cpp

clang/test/Driver/autocomplete.c

clang/test/Driver/fveclib.c

llvm/include/llvm/Analysis/TargetLibraryInfo.h

llvm/include/llvm/Analysis/VecFuncs.def

llvm/lib/Analysis/TargetLibraryInfo.cpp

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

llvm/test/Transforms/LoopVectorize/AArch64/armpl-calls.ll

llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll

[TLI][AArch64] Add mappings to vectorized functions from ArmPL
ClosedPublic