Index: lib/CodeGen/BackendUtil.cpp =================================================================== --- lib/CodeGen/BackendUtil.cpp +++ lib/CodeGen/BackendUtil.cpp @@ -306,6 +306,10 @@ std::unique_ptr TLII( createTLII(TargetTriple, CodeGenOpts)); + if (LangOpts.OpenMP) { + TLII->addOpenMPVectorFunctions(TheModule); + } + switch (Inlining) { case CodeGenOptions::NoInlining: break; Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -17,6 +17,7 @@ #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1663,6 +1664,15 @@ const CXXMethodDecl *MD = dyn_cast(Fn); if (Fn->isNoReturn() && !(AttrOnCallSite && MD && MD->isVirtual())) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + + // TODO: this should work also when using -fopenmp-simd + if (getLangOpts().OpenMP && Fn->hasAttr() && + !Fn->hasBody()) { + auto MangledNames = + CGOpenMPRuntime(*this).listAvailableVectorSignatures(Fn); + for (auto &MangledName : MangledNames) + FuncAttrs.addAttribute(MangledName); + } } // 'const', 'pure' and 'noalias' attributed functions are also nounwind. Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1070,6 +1070,11 @@ virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn); + /// Provides all the names of the vector variants associated to a + /// function \param FD marked with "declare simd' + virtual std::vector + listAvailableVectorSignatures(const FunctionDecl *FD); + /// Emit initialization for doacross loop nesting support. /// \param D Loop-based construct used in doacross nesting construct. virtual void emitDoacrossInit(CodeGenFunction &CGF, Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -6515,11 +6516,12 @@ return C.getTypeSize(CDT); } -static void -emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, - const llvm::APSInt &VLENVal, +static std::vector +emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::APSInt VLENVal, ArrayRef ParamAttrs, - OMPDeclareSimdDeclAttr::BranchStateTy State) { + OMPDeclareSimdDeclAttr::BranchStateTy State, + const StringRef MangledName) { + std::vector OutVec; struct ISADataTy { char ISA; unsigned VecRegSize; @@ -6581,14 +6583,144 @@ if (!!ParamAttr.Alignment) Out << 'a' << ParamAttr.Alignment; } - Out << '_' << Fn->getName(); - Fn->addFnAttr(Out.str()); + Out << '_' << MangledName; + OutVec.push_back(Out.str()); + } + } + return OutVec; +} + +static llvm::VectorType *getAArch64MaskTy(const char ISA, + const unsigned LaneSizeInBits, + const unsigned VLEN, + llvm::LLVMContext &C) { + + switch (LaneSizeInBits) { + case 64: + return llvm::VectorType::get(llvm::Type::getInt64Ty(C), VLEN); + case 32: + return llvm::VectorType::get(llvm::Type::getInt32Ty(C), VLEN); + case 16: + return llvm::VectorType::get(llvm::Type::getInt16Ty(C), VLEN); + case 8: + return llvm::VectorType::get(llvm::Type::getInt8Ty(C), VLEN); + default: + llvm_unreachable("Type is not supported"); + } + return nullptr; +} + +static void emitAArch64DeclareSimdFunction( + CodeGenModule &CGM, const FunctionDecl *FD, const llvm::APSInt &UserVLEN, + ArrayRef ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, const StringRef MangledName) { + struct ISADataTy { + char ISA; + unsigned VecRegSize; + }; + // make this depend on the size of + const ISADataTy ISAData[] = { + {'Q', 128}, // NEON 128 + {'D', 64}, // NEON 64 + }; + std::vector Masked; + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + Masked = {'M', 'N'}; + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + Masked = {'N'}; + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + Masked = {'M'}; + break; + } + + for (auto Mask : Masked) { + for (auto Data : ISAData) { + std::string Buffer; + llvm::raw_string_ostream Out(Buffer); + Out << "_ZVG" << Data.ISA << Mask; + std::vector Args; + auto VLEN = UserVLEN; + + // Compute VLEN if the user hasn't provided one in the pragma. + if (!UserVLEN) { + auto CDTSize = evaluateCDTSize(FD, ParamAttrs); + VLEN = llvm::APSInt::getUnsigned(Data.VecRegSize / CDTSize); + } + // that's not a vector, skip + if (VLEN == 1) + continue; + + // Add extra paramter if the function is Masked + if (Mask == 'M') { + auto MaskTy = + getAArch64MaskTy(Data.ISA, Data.VecRegSize / VLEN.getExtValue(), + VLEN.getExtValue(), CGM.getLLVMContext()); + Args.push_back(MaskTy); + } + + Out << VLEN; + unsigned Pos = 0; + for (auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind) { + case LinearWithVarStride: + Out << 's' << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + if (!!ParamAttr.StrideOrArg) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + + llvm::Type *Ty = CGM.getTypes().ConvertType( + FD->getParamDecl(Pos)->getOriginalType()); + llvm::Type *VecTy = llvm::VectorType::get(Ty, VLEN.getExtValue()); + switch (ParamAttr.Kind) { + default: + Args.push_back(VecTy); + break; + case Uniform: + Args.push_back(Ty); + break; + } + + ++Pos; + } + Out << '_' << MangledName; + std::string GlobalName = + llvm::TargetLibraryInfoImpl::mangle(Out.str(), MangledName); + llvm::Type *RetTy = CGM.getTypes().ConvertType(FD->getReturnType()); + llvm::Type *VecRetTy = llvm::VectorType::get(RetTy, VLEN.getExtValue()); + llvm::FunctionType *FTy = llvm::FunctionType::get(VecRetTy, Args, false); + CGM.getModule().getOrInsertGlobal(GlobalName, FTy); } } } void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { + + auto FunList = listAvailableVectorSignatures(FD); + for (auto &VectorName : FunList) { + std::string name = VectorName; + Fn->addFnAttr(name); + } +} + +std::vector +CGOpenMPRuntime::listAvailableVectorSignatures(const FunctionDecl *FD) { ASTContext &C = CGM.getContext(); FD = FD->getCanonicalDecl(); // Map params to their positions in function decl. @@ -6600,6 +6732,8 @@ ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); ++ParamPos; } + std::vector OutVec; + std::string MangledName = CGM.getMangledName(GlobalDecl(FD)); for (auto *Attr : FD->specific_attrs()) { llvm::SmallVector ParamAttrs(ParamPositions.size()); // Mark uniform parameters. @@ -6672,9 +6806,18 @@ VLENVal = VLEN->EvaluateKnownConstInt(C); OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) - emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + CGM.getTriple().getArch() == llvm::Triple::x86_64) { + const auto Out = emitX86DeclareSimdFunction(FD, VLENVal, ParamAttrs, + State, MangledName); + for (auto &VecName : Out) + OutVec.push_back(VecName); + } + if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { + emitAArch64DeclareSimdFunction(CGM, FD, VLENVal, ParamAttrs, State, + MangledName); + } } + return OutVec; } namespace { Index: test/OpenMP/declare_simd_no_definition.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_no_definition.c @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -fopenmp -x c -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=AARCH64 +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c %s -O3 -o - -S -mllvm -force-vector-width=2 | FileCheck %s --check-prefix=AARCH64-ASM-TWO +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c %s -O3 -o - -S -mllvm -force-vector-width=4 | FileCheck %s --check-prefix=AARCH64-ASM-FOUR +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +#pragma omp declare simd notinbranch +#pragma omp declare simd uniform(y) +double xpow(double x, double y); + +void bbb(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = xpow(y[i], N); + } +} + +// CHECK-LABEL: define void @aaa +// CHECK-DAG: %call = call double @f(double %{{[0-9]+}}) #[[attr:[0-9]+]] +// CHECK: attributes #[[attr]] +// CHECK-DAG: _ZGVbM2v_f +// CHECK-DAG: _ZGVbN2v_f +// CHECK-DAG: _ZGVcM4v_f +// CHECK-DAG: _ZGVcN4v_f +// CHECK-DAG: _ZGVdM4v_f +// CHECK-DAG: _ZGVdN4v_f +// CHECK-DAG: _ZGVeM8v_f +// CHECK-DAG: _ZGVeN8v_f + +// AARCH64-DAG: @vec_prefix__ZVGQM2v_f_vec_midfix_f_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>) +// AARCH64-DAG: @vec_prefix__ZVGQN2v_f_vec_midfix_f_vec_postfix = external global <2 x double> (<2 x double>) + +// AARCH64-DAG: @vec_prefix__ZVGQM2vu_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>, double) +// AARCH64-DAG: @vec_prefix__ZVGQN2vu_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x double>, double) +// AARCH64-DAG: @vec_prefix__ZVGQN2vv_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x double>, <2 x double>) + +// AARCH64-ASM-TWO-DAG: _ZVGQN2v_f + +// No code is generated for a 4 lanes vector of doubles as it is not supported in NEON +// AARCH64-ASM-FOUR-NOT: _ZVG Index: test/OpenMP/declare_simd_no_definition.cpp =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_no_definition.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=AARCH64 +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c++ %s -O3 -o - -S -mllvm -force-vector-width=2 | FileCheck %s --check-prefix=AARCH64-ASM-TWO +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c++ %s -O3 -o - -S -mllvm -force-vector-width=4 | FileCheck %s --check-prefix=AARCH64-ASM-FOUR +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +#pragma omp declare simd +float f(float x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +void aaa(float *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +// CHECK-LABEL: define void @_Z3aaaPdS_i +// CHECK-DAG: %call = call double @_Z1fd(double %{{[0-9]+}}) #[[attrD:[0-9]+]] + +// CHECK-LABEL: define void @_Z3aaaPfS_i +// CHECK-DAG: %call = call float @_Z1ff(float %{{[0-9]+}}) #[[attrF:[0-9]+]] + +// CHECK-DAG: attributes #[[attrD]] +// CHECK-DAG: _ZGVbM2v__Z1fd +// CHECK-DAG: _ZGVbN2v__Z1fd +// CHECK-DAG: _ZGVcM4v__Z1fd +// CHECK-DAG: _ZGVcN4v__Z1fd +// CHECK-DAG: _ZGVdM4v__Z1fd +// CHECK-DAG: _ZGVdN4v__Z1fd +// CHECK-DAG: _ZGVeM8v__Z1fd +// CHECK-DAG: _ZGVeN8v__Z1fd + +// CHECK-DAG: attributes #[[attrF]] +// CHECK-DAG: _ZGVbM4v__Z1ff +// CHECK-DAG: _ZGVbN4v__Z1ff +// CHECK-DAG: _ZGVcM8v__Z1ff +// CHECK-DAG: _ZGVcN8v__Z1ff +// CHECK-DAG: _ZGVdM8v__Z1ff +// CHECK-DAG: _ZGVdN8v__Z1ff +// CHECK-DAG: _ZGVeM16v__Z1ff +// CHECK-DAG: _ZGVeN16v__Z1ff + +// AARCH64-DAG: @vec_prefix__ZVGQM2v__Z1fd_vec_midfix__Z1fd_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>) +// AARCH64-DAG: @vec_prefix__ZVGQN2v__Z1fd_vec_midfix__Z1fd_vec_postfix = external global <2 x double> (<2 x double>) + +// AARCH64-DAG: @vec_prefix__ZVGDM2v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <2 x float> (<2 x i32>, <2 x float>) +// AARCH64-DAG: @vec_prefix__ZVGQM4v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <4 x float> (<4 x i32>, <4 x float>) +// AARCH64-DAG: @vec_prefix__ZVGDN2v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <2 x float> (<2 x float>) +// AARCH64-DAG: @vec_prefix__ZVGQN4v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <4 x float> (<4 x float>) + +// AARCH64-ASM-TWO-DAG: _ZVGQN2v__Z1fd +// AARCH64-ASM-TWO-DAG: _ZVGDN2v__Z1ff + +// AARCH64-ASM-FOUR-DAG: _ZVGQN4v__Z1ff