Index: include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.h +++ include/llvm/Analysis/TargetLibraryInfo.h @@ -71,6 +71,18 @@ std::vector ScalarDescs; public: + /// \brief List of known vector-functions libraries. + /// + /// The vector-functions library defines, which functions are vectorizable + /// and with which factor. The library can be specified by either frontend, + /// or a commandline option, and then used by + /// addVectorizableFunctionsFromVecLib for filling up the tables of + /// vectorizable functions. + enum VectorLibrary { + NoLibrary, // Don't use any vector library. + Accelerate // Use Accelerate framework. + }; + TargetLibraryInfoImpl(); explicit TargetLibraryInfoImpl(const Triple &T); @@ -117,6 +129,10 @@ /// queryable via getVectorizedFunction and getScalarizedFunction. void addVectorizableFunctions(ArrayRef Fns); + /// Calls addVectorizableFunctions with a known preset of functions for the + /// given vector library. + void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib); + /// isFunctionVectorizable - Return true if the function F has a /// vector equivalent with vectorization factor VF. bool isFunctionVectorizable(StringRef F, unsigned VF) const { Index: lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- lib/Analysis/TargetLibraryInfo.cpp +++ lib/Analysis/TargetLibraryInfo.cpp @@ -13,8 +13,18 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt ClVectorLibrary( + "vector-library", cl::Hidden, cl::desc("Vector functions library"), + cl::init(TargetLibraryInfoImpl::NoLibrary), + cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none", + "No vector functions library"), + clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", + "Accelerate framework"), + clEnumValEnd)); + const char* TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { #define TLI_DEFINE_STRING @@ -346,6 +356,8 @@ TLI.setUnavailable(LibFunc::statvfs64); TLI.setUnavailable(LibFunc::tmpfile64); } + + TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); } TargetLibraryInfoImpl::TargetLibraryInfoImpl() { @@ -453,6 +465,28 @@ std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName); } +void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( + enum VectorLibrary VecLib) { + switch (VecLib) { + case Accelerate: { + const VecDesc VecFuncs[] = { + {"expf", "vexpf", 4}, + {"llvm.exp.f32", "vexpf", 4}, + {"logf", "vlogf", 4}, + {"llvm.log.f32", "vlogf", 4}, + {"sqrtf", "vsqrtf", 4}, + {"llvm.sqrt.f32", "vsqrtf", 4}, + {"fabsf", "vfabsf", 4}, + {"llvm.fabs.f32", "vfabsf", 4}, + }; + addVectorizableFunctions(VecFuncs); + break; + } + case NoLibrary: + break; + } +} + bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { funcName = sanitizeFunctionName(funcName); if (funcName.empty()) Index: test/Transforms/LoopVectorize/veclib-calls.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/veclib-calls.ll @@ -0,0 +1,130 @@ +; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @sqrt_f32( +;CHECK: sqrtf{{.*}}<4 x float> +;CHECK: ret void +declare float @sqrtf(float) nounwind readnone +define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @sqrtf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @exp_f32( +;CHECK: expf{{.*}}<4 x float> +;CHECK: ret void +declare float @expf(float) nounwind readnone +define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @expf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @log_f32( +;CHECK: logf{{.*}}<4 x float> +;CHECK: ret void +declare float @logf(float) nounwind readnone +define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @logf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @fabs_f32( +;CHECK: fabs{{.*}}<4 x float> +;CHECK: ret void +declare float @fabsf(float) nounwind readnone +define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @fabsf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we don't vectorize arbitrary functions. +;CHECK-LABEL: @foo_f32( +;CHECK-NOT: foo{{.*}}<4 x float> +;CHECK: ret void +declare float @foo(float) nounwind readnone +define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @foo(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +}