Index: lib/CodeGen/BackendUtil.cpp =================================================================== --- lib/CodeGen/BackendUtil.cpp +++ lib/CodeGen/BackendUtil.cpp @@ -320,6 +320,10 @@ CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize); } + if (LangOpts.OpenMP) { + TLII->addOpenMPVectorFunctions(TheModule); + } + PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; PMBuilder.BBVectorize = CodeGenOpts.VectorizeBB; Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -17,6 +17,7 @@ #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1661,6 +1662,15 @@ const CXXMethodDecl *MD = dyn_cast(Fn); if (Fn->isNoReturn() && !(AttrOnCallSite && MD && MD->isVirtual())) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + + // TODO: this should work also when using -fopenmp-simd + if (getLangOpts().OpenMP && Fn->hasAttr() && + !Fn->hasBody()) { + auto MangledNames = + CGOpenMPRuntime(*this).listAvailableVectorSignatures(Fn); + for (auto &MangledName : MangledNames) + FuncAttrs.addAttribute(MangledName); + } } // 'const', 'pure' and 'noalias' attributed functions are also nounwind. Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1107,6 +1107,11 @@ virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn); + /// Provides all the names of the vector variants associated to a + /// function \param FD marked with "declare simd' + virtual std::vector + listAvailableVectorSignatures(const FunctionDecl *FD); + /// Emit initialization for doacross loop nesting support. /// \param D Loop-based construct used in doacross nesting construct. virtual void emitDoacrossInit(CodeGenFunction &CGF, Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -6551,11 +6552,12 @@ return C.getTypeSize(CDT); } -static void -emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, - const llvm::APSInt &VLENVal, +static std::vector +emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::APSInt VLENVal, ArrayRef ParamAttrs, - OMPDeclareSimdDeclAttr::BranchStateTy State) { + OMPDeclareSimdDeclAttr::BranchStateTy State, + const StringRef MangledName) { + std::vector OutVec; struct ISADataTy { char ISA; unsigned VecRegSize; @@ -6617,14 +6619,146 @@ if (!!ParamAttr.Alignment) Out << 'a' << ParamAttr.Alignment; } - Out << '_' << Fn->getName(); - Fn->addFnAttr(Out.str()); + Out << '_' << MangledName; + OutVec.push_back(Out.str()); + } + } + return OutVec; +} + +static llvm::VectorType *getAArch64MaskTy(const char ISA, + const unsigned LaneSizeInBits, + const unsigned VLEN, + llvm::LLVMContext &C) { + + switch (LaneSizeInBits) { + case 64: + return llvm::VectorType::get(llvm::Type::getInt64Ty(C), VLEN); + case 32: + return llvm::VectorType::get(llvm::Type::getInt32Ty(C), VLEN); + case 16: + return llvm::VectorType::get(llvm::Type::getInt16Ty(C), VLEN); + case 8: + return llvm::VectorType::get(llvm::Type::getInt8Ty(C), VLEN); + default: + llvm_unreachable("Type is not supported"); + } + return nullptr; +} + +static std::vector emitAArch64DeclareSimdFunction( + CodeGenModule &CGM, const FunctionDecl *FD, const llvm::APSInt &UserVLEN, + ArrayRef ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, const StringRef MangledName) { + std::vector OutputList; + struct ISADataTy { + char ISA; + unsigned VecRegSize; + }; + const ISADataTy ISAData[] = { + {'n', 128}, // Advanced SIMD 128-bit registers + {'n', 64}, // Advanced SIMD 64-bit registers + }; + std::vector Masked; + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + Masked = {'M', 'N'}; + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + Masked = {'N'}; + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + Masked = {'M'}; + break; + } + + for (auto Mask : Masked) { + for (auto Data : ISAData) { + std::string Buffer; + llvm::raw_string_ostream Out(Buffer); + Out << "_ZVG" << Data.ISA << Mask; + std::vector Args; + auto VLEN = UserVLEN; + + // Compute VLEN if the user hasn't provided one in the pragma. + if (!UserVLEN) { + auto CDTSize = evaluateCDTSize(FD, ParamAttrs); + VLEN = llvm::APSInt::getUnsigned(Data.VecRegSize / CDTSize); + } + // that's not a vector, skip + if (VLEN == 1) + continue; + + // Add extra paramter if the function is Masked + if (Mask == 'M') { + auto MaskTy = + getAArch64MaskTy(Data.ISA, Data.VecRegSize / VLEN.getExtValue(), + VLEN.getExtValue(), CGM.getLLVMContext()); + Args.push_back(MaskTy); + } + + Out << VLEN; + unsigned Pos = 0; + for (auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind) { + case LinearWithVarStride: + Out << 's' << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + if (!!ParamAttr.StrideOrArg) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + + llvm::Type *Ty = CGM.getTypes().ConvertType( + FD->getParamDecl(Pos)->getOriginalType()); + llvm::Type *VecTy = llvm::VectorType::get(Ty, VLEN.getExtValue()); + switch (ParamAttr.Kind) { + default: + Args.push_back(VecTy); + break; + case Uniform: + Args.push_back(Ty); + break; + } + + ++Pos; + } + Out << '_' << MangledName; + OutputList.push_back(Out.str()); + std::string GlobalName = + llvm::TargetLibraryInfoImpl::mangle(Out.str(), MangledName); + llvm::Type *RetTy = CGM.getTypes().ConvertType(FD->getReturnType()); + llvm::Type *VecRetTy = llvm::VectorType::get(RetTy, VLEN.getExtValue()); + llvm::FunctionType *FTy = llvm::FunctionType::get(VecRetTy, Args, false); + CGM.getModule().getOrInsertGlobal(GlobalName, FTy); } } + return OutputList; } void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { + + auto FunList = listAvailableVectorSignatures(FD); + for (auto &VectorName : FunList) { + std::string name = VectorName; + Fn->addFnAttr(name); + } +} + +std::vector +CGOpenMPRuntime::listAvailableVectorSignatures(const FunctionDecl *FD) { ASTContext &C = CGM.getContext(); FD = FD->getCanonicalDecl(); // Map params to their positions in function decl. @@ -6636,6 +6770,8 @@ ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); ++ParamPos; } + std::vector OutVec; + std::string MangledName = CGM.getMangledName(GlobalDecl(FD)); for (auto *Attr : FD->specific_attrs()) { llvm::SmallVector ParamAttrs(ParamPositions.size()); // Mark uniform parameters. @@ -6707,10 +6843,21 @@ if (const Expr *VLEN = Attr->getSimdlen()) VLENVal = VLEN->EvaluateKnownConstInt(C); OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) - emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + std::vector Out; + switch (CGM.getTriple().getArch()) { + case llvm::Triple::x86: + case llvm::Triple::x86_64: + Out = emitX86DeclareSimdFunction(FD, VLENVal, ParamAttrs, State, + MangledName); + break; + case llvm::Triple::aarch64: + Out = emitAArch64DeclareSimdFunction(CGM, FD, VLENVal, ParamAttrs, State, + MangledName); + break; + } + OutVec.insert(std::end(OutVec), std::begin(Out), std::end(Out)); } + return OutVec; } namespace { Index: test/OpenMP/declare_simd_no_definition.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_no_definition.c @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -fopenmp -x c -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=AARCH64 +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c %s -O3 -o - -S -mllvm -force-vector-width=2 | FileCheck %s --check-prefix=AARCH64-ASM-TWO +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c %s -O3 -o - -S -mllvm -force-vector-width=4 | FileCheck %s --check-prefix=AARCH64-ASM-FOUR +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +#pragma omp declare simd notinbranch +#pragma omp declare simd uniform(y) +double xpow(double x, double y); + +void bbb(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = xpow(y[i], N); + } +} + +// CHECK-LABEL: define void @aaa +// CHECK-DAG: %call = call double @f(double %{{[0-9]+}}) #[[attr:[0-9]+]] +// CHECK: attributes #[[attr]] +// CHECK-DAG: _ZGVbM2v_f +// CHECK-DAG: _ZGVbN2v_f +// CHECK-DAG: _ZGVcM4v_f +// CHECK-DAG: _ZGVcN4v_f +// CHECK-DAG: _ZGVdM4v_f +// CHECK-DAG: _ZGVdN4v_f +// CHECK-DAG: _ZGVeM8v_f +// CHECK-DAG: _ZGVeN8v_f + +// AARCH64-DAG: @vec_prefix__ZVGnM2v_f_vec_midfix_f_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>) +// AARCH64-DAG: @vec_prefix__ZVGnN2v_f_vec_midfix_f_vec_postfix = external global <2 x double> (<2 x double>) + +// AARCH64-DAG: @vec_prefix__ZVGnM2vu_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>, double) +// AARCH64-DAG: @vec_prefix__ZVGnN2vu_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x double>, double) +// AARCH64-DAG: @vec_prefix__ZVGnN2vv_xpow_vec_midfix_xpow_vec_postfix = external global <2 x double> (<2 x double>, <2 x double>) + +// AARCH64-LABEL: define void @aaa +// AARCH64-DAG: %call = call double @f(double %{{[0-9]+}}) #[[attrAAA:[0-9]+]] +// AARCH64-LABEL: define void @bbb +// AARCH64-DAG: %call = call double @xpow(double %{{[0-9]+}}, double %{{[0-9a-z]+}}) #[[attrBBB:[0-9]+]] + +// AARCH64: attributes #[[attrAAA]] +// AARCH64-DAG: _ZVGnM2v_f +// AARCH64-DAG: _ZVGnN2v_f + +// AARCH64: attributes #[[attrBBB]] +// AARCH64-DAG: _ZVGnM2vu_xpow +// AARCH64-DAG: _ZVGnN2vu_xpow +// AARCH64-DAG: _ZVGnN2vv_xpow + +// AARCH64-ASM-TWO-DAG: _ZVGnN2v_f + +// No code is generated for a 4 lanes vector of doubles as it is not supported in NEON +// AARCH64-ASM-FOUR-NOT: _ZVG Index: test/OpenMP/declare_simd_no_definition.cpp =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_no_definition.cpp @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=AARCH64 +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c++ %s -O3 -o - -S -mllvm -force-vector-width=2 | FileCheck %s --check-prefix=AARCH64-ASM-TWO +// RUN: %clang -target aarch64-linux-gnu -fopenmp -x c++ %s -O3 -o - -S -mllvm -force-vector-width=4 | FileCheck %s --check-prefix=AARCH64-ASM-FOUR +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +#pragma omp declare simd +float f(float x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +void aaa(float *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +// CHECK-LABEL: define void @_Z3aaaPdS_i +// CHECK-DAG: %call = call double @_Z1fd(double %{{[0-9]+}}) #[[attrD:[0-9]+]] + +// CHECK-LABEL: define void @_Z3aaaPfS_i +// CHECK-DAG: %call = call float @_Z1ff(float %{{[0-9]+}}) #[[attrF:[0-9]+]] + +// CHECK-DAG: attributes #[[attrD]] +// CHECK-DAG: _ZGVbM2v__Z1fd +// CHECK-DAG: _ZGVbN2v__Z1fd +// CHECK-DAG: _ZGVcM4v__Z1fd +// CHECK-DAG: _ZGVcN4v__Z1fd +// CHECK-DAG: _ZGVdM4v__Z1fd +// CHECK-DAG: _ZGVdN4v__Z1fd +// CHECK-DAG: _ZGVeM8v__Z1fd +// CHECK-DAG: _ZGVeN8v__Z1fd + +// CHECK-DAG: attributes #[[attrF]] +// CHECK-DAG: _ZGVbM4v__Z1ff +// CHECK-DAG: _ZGVbN4v__Z1ff +// CHECK-DAG: _ZGVcM8v__Z1ff +// CHECK-DAG: _ZGVcN8v__Z1ff +// CHECK-DAG: _ZGVdM8v__Z1ff +// CHECK-DAG: _ZGVdN8v__Z1ff +// CHECK-DAG: _ZGVeM16v__Z1ff +// CHECK-DAG: _ZGVeN16v__Z1ff + +// AARCH64-DAG: @vec_prefix__ZVGnM2v__Z1fd_vec_midfix__Z1fd_vec_postfix = external global <2 x double> (<2 x i64>, <2 x double>) +// AARCH64-DAG: @vec_prefix__ZVGnN2v__Z1fd_vec_midfix__Z1fd_vec_postfix = external global <2 x double> (<2 x double>) + +// AARCH64-DAG: @vec_prefix__ZVGnM2v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <2 x float> (<2 x i32>, <2 x float>) +// AARCH64-DAG: @vec_prefix__ZVGnM4v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <4 x float> (<4 x i32>, <4 x float>) +// AARCH64-DAG: @vec_prefix__ZVGnN2v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <2 x float> (<2 x float>) +// AARCH64-DAG: @vec_prefix__ZVGnN4v__Z1ff_vec_midfix__Z1ff_vec_postfix = external global <4 x float> (<4 x float>) + +// AARCH64-LABEL: define void @_Z3aaaPdS_i +// AARCH64-DAG: %call = call double @_Z1fd(double %{{[0-9]+}}) #[[attrD:[0-9]+]] + +// AARCH64-LABEL: define void @_Z3aaaPfS_i +// AARCH64-DAG: %call = call float @_Z1ff(float %{{[0-9]+}}) #[[attrF:[0-9]+]] + +// AARCH64-DAG: attributes #[[attrD]] +// AARCH64-DAG: _ZVGnM2v__Z1fd +// AARCH64-DAG: _ZVGnN2v__Z1fd + +// AARCH64-DAG: attributes #[[attrF]] +// AARCH64-DAG: _ZVGnM2v__Z1ff +// AARCH64-DAG: _ZVGnM4v__Z1ff +// AARCH64-DAG: _ZVGnN2v__Z1ff +// AARCH64-DAG: _ZVGnN4v__Z1ff + +// AARCH64-ASM-TWO-DAG: _ZVGnN2v__Z1fd +// AARCH64-ASM-TWO-DAG: _ZVGnN2v__Z1ff + +// AARCH64-ASM-FOUR-DAG: _ZVGnN4v__Z1ff