Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -9648,6 +9648,289 @@ } } +// This are the Functions that are needed to mangle the name of the +// vector functions generated by the compiler, according to the rules +// defined in the "Vector Function ABI specifications for AArch64", +// available at +// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. + +/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. +/// +/// TODO: Need to implement the behavior for reference marked with a +/// var or no linear modifiers (1.b in the section). For this, we +/// need to extend ParamKindTy to support the linear modifiers. +static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { + QT = QT.getCanonicalType(); + + if (QT->isVoidType()) + return false; + + if (Kind == ParamKindTy::Uniform) + return false; + + if (Kind == ParamKindTy::Linear) + return false; + + // TODO: Handle linear references with modifiers + + if (Kind == ParamKindTy::LinearWithVarStride) + return false; + + return true; +} + +/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. +static bool getAArch64PBV(QualType QT) { + QT = QT.getCanonicalType(); + + if (QT->isFloatingType()) + return true; + + if (QT->isIntegerType()) + return true; + + if (QT->isPointerType()) + return true; + + return false; +} + +/// Computes the lane size (LS) of a return type or of an input parameter, +/// as defined by `LS(P)` in 3.2.1 of the AAVFABI. +static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { + if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { + QualType PTy = QT->getPointeeType().getCanonicalType(); + if (getAArch64PBV(PTy)) + return C.getTypeSize(PTy); + } + if (getAArch64PBV(QT)) + return C.getTypeSize(QT); + + return C.getTypeSize(C.getUIntPtrType()); +} + +// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the +// signature of the scalar function, as defined in 3.2.2 of the +// AAVFABI. +static std::tuple +getNDSWDS(const FunctionDecl *FD, ArrayRef ParamAttrs) { + QualType RetType = FD->getReturnType().getCanonicalType(); + + ASTContext &C = FD->getASTContext(); + + bool OutputBecomesInput = false; + + llvm::SmallVector Sizes; + if (!RetType->isVoidType()) { + Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); + if (!getAArch64PBV(RetType) && getAArch64MTV(RetType, {})) + OutputBecomesInput = true; + } + for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { + QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); + Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); + } + + assert(!Sizes.empty() && "Unable to determine NDS and WDS."); + + return {*std::min_element(std::begin(Sizes), std::end(Sizes)), + *std::max_element(std::begin(Sizes), std::end(Sizes)), + OutputBecomesInput}; +} + +/// Mangle the parameter part of the vector function name according to +/// their OpenMP classification. The mangling function is defined in +/// section 3.5 of the AAVFABI. +static std::string mangleVectorParameters(ArrayRef ParamAttrs) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + for (const auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind) { + case LinearWithVarStride: + Out << "ls" << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + // Don't print the step value if it is not present or if it is + // equal to 1. + if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + } + + return Out.str(); +} + +// Function used to add the attribute. The parameter `VLEN` is +// templated to allow the use of "x" when targeting scalable functions +// for SVE. +template +static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, + char ISA, StringRef ParSeq, + StringRef MangledName, bool OutputBecomesInput, + llvm::Function *Fn) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << ISA << LMask << VLEN; + if (OutputBecomesInput) + Out << "v"; + Out << ParSeq << "_" << MangledName; + Fn->addFnAttr(Out.str()); +} + +// Helper function to generate the Advanced SIMD names depending on +// the value of the NDS when simdlen is not present. +static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, + StringRef Prefix, char ISA, + StringRef ParSeq, StringRef MangledName, + bool OutputBecomesInput, + llvm::Function *Fn) { + switch (NDS) { + case 8: + addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 16: + addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 32: + addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 64: + case 128: + addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + default: + llvm_unreachable("Scalar type is too wide."); + } +} + +/// Emit vector function attributes for AArch64, as defined in the AAVFABI. +static void emitAArch64DeclareSimdFunction( + CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, + ArrayRef ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, + char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { + + // Get basic data for building the vector signature. + const auto Data = getNDSWDS(FD, ParamAttrs); + const unsigned NDS = std::get<0>(Data); + const unsigned WDS = std::get<1>(Data); + const bool OutputBecomesInput = std::get<2>(Data); + + // Check the values provided via `simdlen` by the user. + // 1. A `simdlen(1)` doesn't produce vector signatures, + if (UserVLEN == 1) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, + "The clause simdlen(1) has no effect when targeting aarch64."); + CGM.getDiags().Report(SLoc, DiagID); + return; + } + + // 2. Section 3.3.1, item 1: user input must be a power of 2 for + // Advanced SIMD output. + if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, "The value specified in simdlen must be a " + "power of 2 when targeting Advanced SIMD."); + CGM.getDiags().Report(SLoc, DiagID); + return; + } + + // 3. Section 3.4.1. SVE fixed lengh must obey the architectural + // limits. + if (ISA == 's' && UserVLEN != 0) { + if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " + "lanes in the architectural constraints " + "for SVE (min is 128-bit, max is " + "2048-bit, by steps of 128-bit)"); + CGM.getDiags().Report(SLoc, DiagID) << WDS; + return; + } + } + + // Sort out parameter sequence. + const std::string ParSeq = mangleVectorParameters(ParamAttrs); + StringRef Prefix = "_ZGV"; + // Generate simdlen from user input (if any). + if (UserVLEN) { + if (ISA == 's') { + // SVE generates only a masked function. + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + } else if (ISA == 'n') { + // Advanced SIMD generates one or two functions, depending on + // the `[not]inbranch` clause. + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + } + } + } else { + // If no user simdlen is provided, follow the AAVFABI rules for + // generating the vector length. + if (ISA == 's') { + // SVE, section 3.4.1, item 1. + addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + } else if (ISA == 'n') { + // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or + // two vector names depending on the use of the clause + // `[not]inbranch`. + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + } + } + } +} + void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); @@ -9734,12 +10017,26 @@ ++MI; } llvm::APSInt VLENVal; - if (const Expr *VLEN = Attr->getSimdlen()) - VLENVal = VLEN->EvaluateKnownConstInt(C); + SourceLocation ExprLoc; + const Expr *VLENExpr = Attr->getSimdlen(); + if (VLENExpr) { + VLENVal = VLENExpr->EvaluateKnownConstInt(C); + ExprLoc = VLENExpr->getExprLoc(); + } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) + CGM.getTriple().getArch() == llvm::Triple::x86_64) { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { + unsigned VLEN = VLENVal.getExtValue(); + StringRef MangledName = Fn->getName(); + if (CGM.getTarget().hasFeature("sve")) + emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, + MangledName, 's', 128, Fn, ExprLoc); + if (CGM.getTarget().hasFeature("neon")) + emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, + MangledName, 'n', 128, Fn, ExprLoc); + } } FD = FD->getPreviousDecl(); } Index: test/OpenMP/Inputs/declare-simd-fix.h =================================================================== --- /dev/null +++ test/OpenMP/Inputs/declare-simd-fix.h @@ -0,0 +1,3 @@ +#pragma omp declare simd +float foo(float a, float b, int c); +float bar(float a, float b, int c); Index: test/OpenMP/declare_simd_aarch64.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64.c @@ -0,0 +1,190 @@ +// -fopemp and -fopenmp-simd behavior are expected to be the same. + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64 +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=AARCH64 + +#pragma omp declare simd +#pragma omp declare simd simdlen(2) +#pragma omp declare simd simdlen(6) +#pragma omp declare simd simdlen(8) +double foo(float x); + +// AARCH64: "_ZGVnM2v_foo" "_ZGVnM4v_foo" "_ZGVnM8v_foo" "_ZGVnN2v_foo" "_ZGVnN4v_foo" "_ZGVnN8v_foo" +// AARCH64-NOT: _ZGVnN6v_foo + +void foo_loop(double *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = foo(y[i]); + } +} + +// make sure that the following two function by default gets generated +// with 4 and 2 lanes, as descrived in the vector ABI +#pragma omp declare simd notinbranch +float bar(double x); +#pragma omp declare simd notinbranch +double baz(float x); + +// AARCH64: "_ZGVnN2v_baz" "_ZGVnN4v_baz" +// AARCH64-NOT: baz +// AARCH64: "_ZGVnN2v_bar" "_ZGVnN4v_bar" +// AARCH64-NOT: bar + +void baz_bar_loop(double *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = baz(y[i]); + y[i] = bar(x[i]); + } +} + + /***************************/ + /* 32-bit integer tests */ + /***************************/ + +#pragma omp declare simd +#pragma omp declare simd simdlen(2) +#pragma omp declare simd simdlen(6) +#pragma omp declare simd simdlen(8) +long foo_int(int x); + +// AARCH64: "_ZGVnN2v_foo_int" "_ZGVnN4v_foo_int" "_ZGVnN8v_foo_int" +// No non power of two +// AARCH64-NOT: _ZGVnN6v_foo_int + +void foo_int_loop(long *x, int *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = foo_int(y[i]); + } +} + +#pragma omp declare simd +char simple_8bit(char); +// AARCH64: "_ZGVnM16v_simple_8bit" "_ZGVnM8v_simple_8bit" "_ZGVnN16v_simple_8bit" "_ZGVnN8v_simple_8bit" +#pragma omp declare simd +short simple_16bit(short); +// AARCH64: "_ZGVnM4v_simple_16bit" "_ZGVnM8v_simple_16bit" "_ZGVnN4v_simple_16bit" "_ZGVnN8v_simple_16bit" +#pragma omp declare simd +int simple_32bit(int); +// AARCH64: "_ZGVnM2v_simple_32bit" "_ZGVnM4v_simple_32bit" "_ZGVnN2v_simple_32bit" "_ZGVnN4v_simple_32bit" +#pragma omp declare simd +long simple_64bit(long); +// AARCH64: "_ZGVnM2v_simple_64bit" "_ZGVnN2v_simple_64bit" + +#pragma omp declare simd +#pragma omp declare simd simdlen(32) +char a01(int x); +// AARCH64: "_ZGVnN16v_a01" "_ZGVnN32v_a01" "_ZGVnN8v_a01" +// AARCH64-NOT: a01 + +#pragma omp declare simd +#pragma omp declare simd simdlen(2) +long a02(short x); +// AARCH64: "_ZGVnN2v_a02" "_ZGVnN4v_a02" "_ZGVnN8v_a02" + +// AARCH64-NOT: a02 +/************/ +/* pointers */ +/************/ + +#pragma omp declare simd +int b01(int *x); +// AARCH64: "_ZGVnN4v_b01" +// AARCH64-NOT: b01 + +#pragma omp declare simd +char b02(char *); +// AARCH64: "_ZGVnN16v_b02" "_ZGVnN8v_b02" +// AARCH64-NOT: b02 + +#pragma omp declare simd +double *b03(double *); +// AARCH64: "_ZGVnN2v_b03" +// AARCH64-NOT: b03 + +/***********/ +/* masking */ +/***********/ + +#pragma omp declare simd inbranch +int c01(double *x, short y); +// AARCH64: "_ZGVnM8vv_c01" +// AARCH64-NOT: c01 + +#pragma omp declare simd inbranch uniform(x) +double c02(double *x, char y); +// AARCH64: "_ZGVnM16uv_c02" "_ZGVnM8uv_c02" +// AARCH64-NOT: c02 + +/*************************/ +/* sincos-like signature */ +/*************************/ +#pragma omp declare simd linear(sin) linear(cos) +void sincos(double in, double *sin, double *cos); +// AARCH64: "_ZGVnN2vll_sincos" +// AARCH64-NOT: sincos + +#pragma omp declare simd linear(sin : 1) linear(cos : 2) +void SinCos(double in, double *sin, double *cos); +// AARCH64: "_ZGVnN2vll2_SinCos" +// AARCH64-NOT: SinCos + +// Selection of tests based on the examples provided in chapter 5 of +// the Vector Function ABI specifications for AArch64, at +// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. + +// Listing 2, p. 18 +#pragma omp declare simd inbranch uniform(x) linear(val(i) : 4) +int foo2(int *x, int i); +// AARCH64: "_ZGVnM2ul4_foo2" "_ZGVnM4ul4_foo2" +// AARCH64-NOT: foo2 + +// Listing 3, p. 18 +#pragma omp declare simd inbranch uniform(x, c) linear(i \ + : c) +int foo3(int *x, int i, unsigned char c); +// AARCH64: "_ZGVnM16uls2u_foo3" "_ZGVnM8uls2u_foo3" +// AARCH64-NOT: foo3 + +// Listing 6, p. 19 +#pragma omp declare simd linear(x) aligned(x : 16) simdlen(4) +int foo4(int *x, float y); +// AARCH64: "_ZGVnM4la16v_foo4" "_ZGVnN4la16v_foo4" +// AARCH64-NOT: foo4 + +static int *I; +static char *C; +static short *S; +static long *L; +static float *F; +static double *D; +void do_something() { + simple_8bit(*C); + simple_16bit(*S); + simple_32bit(*I); + simple_64bit(*L); + *C = a01(*I); + *L = a02(*S); + *I = b01(I); + *C = b02(C); + D = b03(D); + *I = c01(D, *S); + *D = c02(D, *S); + sincos(*D, D, D); + SinCos(*D, D, D); + foo2(I, *I); + foo3(I, *I, *C); + foo4(I, *F); +} + +typedef struct S { + char R, G, B; +} STy; +#pragma omp declare simd notinbranch +STy DoRGB(STy x); +// AARCH64: "_ZGVnN2v_DoRGB" + +static STy *RGBData; + +void do_rgb_stuff() { + DoRGB(*RGBData); +} Index: test/OpenMP/declare_simd_aarch64.cpp =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64.cpp @@ -0,0 +1,37 @@ +// -fopemp and -fopenmp-simd behavior are expected to be the same. + +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=ADVSIMD +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +sve -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=SVE + +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=ADVSIMD +// RUN: %clang_cc1 -verify -triple aarch64-linux-gnu -target-feature +sve -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls -verify| FileCheck %s --check-prefix=SVE + +// expected-no-diagnostics + +#pragma omp declare simd +double f(double x); + +#pragma omp declare simd +float f(float x); + +void aaa(double *x, double *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +void aaa(float *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = f(y[i]); + } +} + +// ADVSIMD: "_ZGVnN2v__Z1fd" +// ADVSIMD-NOT: _Z1fd +// ADVSIMD: "_ZGVnN4v__Z1ff" +// ADVSIMD-NOT: _Z1fF + +// SVE: "_ZGVsMxv__Z1fd" +// SVE-NOT: _Z1fd +// SVE: "_ZGVsMxv__Z1ff" +// SVE-NOT: _Z1ff Index: test/OpenMP/declare_simd_aarch64_complex.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64_complex.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -std=c11 -emit-llvm %s -o - -femit-all-decls | FileCheck %s + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp -x c -std=c11 -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix=SVE + +#pragma omp declare simd +#pragma omp declare simd simdlen(4) notinbranch +double _Complex double_complex(double _Complex); +// CHECK: "_ZGVnM2v_double_complex" "_ZGVnN2v_double_complex" "_ZGVnN4v_double_complex" +// CHECK-NOT: double_complex +// SVE: "_ZGVsM4v_double_complex" "_ZGVsMxv_double_complex" +// SVE-NOT: double_complex + +#pragma omp declare simd +#pragma omp declare simd simdlen(8) notinbranch +float _Complex float_complex(float _Complex); +// CHECK: "_ZGVnM2v_float_complex" "_ZGVnN2v_float_complex" "_ZGVnN8v_float_complex" +// CHECK-NOT: float_complex +// SVE: "_ZGVsM8v_float_complex" "_ZGVsMxv_float_complex" +// SVE-NOT: float_complex + +static double _Complex *DC; +static float _Complex *DF; +void call_the_complex_functions() { + double_complex(*DC); + float_complex(*DF); +} Index: test/OpenMP/declare_simd_aarch64_fix.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64_fix.c @@ -0,0 +1,37 @@ +// This test is making sure that no crash happens. + +// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \ +// RUN: -fopenmp -O3 -march=armv8-a -c %s | FileCheck %s + +// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \ +// RUN: -fopenmp-simd -O3 -march=armv8-a -c %s | FileCheck %s + +// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \ +// RUN: -fopenmp -O3 -march=armv8-a+sve -c %s | FileCheck %s + +// RUN: %clang -o - -fno-fast-math -S -target aarch64-linux-gnu \ +// RUN: -fopenmp-simd -O3 -march=armv8-a+sve -c %s | FileCheck %s + +// loop in the user code, in user_code.c +#include "Inputs/declare-simd-fix.h" + +// CHECK-LABEL: do_something: +void do_something(int *a, double *b, unsigned N) { + for (unsigned i = 0; i < N; ++i) { + a[i] = foo(b[0], b[0], 1); + } +} + +// CHECK-LABEL: do_something_else: +void do_something_else(int *a, double *b, unsigned N) { + for (unsigned i = 0; i < N; ++i) { + a[i] = foo(1.1, 1.2, 1); + } +} + +// CHECK-LABEL: do_something_more: +void do_something_more(int *a, double *b, unsigned N) { + for (unsigned i = 0; i < N; ++i) { + a[i] = foo(b[i], b[i], a[1]); + } +} Index: test/OpenMP/declare_simd_aarch64_sve.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64_sve.c @@ -0,0 +1,43 @@ +// -fopemp and -fopenmp-simd behavior are expected to be the same + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve \ +// RUN: -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve \ +// RUN: -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s + +#pragma omp declare simd +#pragma omp declare simd notinbranch +#pragma omp declare simd simdlen(2) +#pragma omp declare simd simdlen(4) +#pragma omp declare simd simdlen(5) // not a multiple of 128-bits +#pragma omp declare simd simdlen(6) +#pragma omp declare simd simdlen(8) +#pragma omp declare simd simdlen(32) +#pragma omp declare simd simdlen(34) // requires more than 2048 bits +double foo(float x); + +// CHECK-DAG: "_ZGVsM2v_foo" "_ZGVsM32v_foo" "_ZGVsM4v_foo" "_ZGVsM6v_foo" "_ZGVsM8v_foo" "_ZGVsMxv_foo" +// CHECK-NOT: _ZGVsN +// CHECK-NOT: _ZGVsM5v_foo +// CHECK-NOT: _ZGVsM34v_foo +// CHECK-NOT: foo + +void foo_loop(double *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = foo(y[i]); + } +} + + // test integers + +#pragma omp declare simd notinbranch +char a01(int x); +// CHECK-DAG: _ZGVsMxv_a01 +// CHECK-NOT: a01 + +static int *in; +static char *out; +void do_something() { + *out = a01(*in); +} Index: test/OpenMP/declare_simd_aarch64_warning_advsimd.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64_warning_advsimd.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp %s -S -o %t -verify +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd %s -S -o %t -verify + +#pragma omp declare simd simdlen(6) +double foo(float x); +// expected-warning@-2{{The value specified in simdlen must be a power of 2 when targeting Advanced SIMD.}} +#pragma omp declare simd simdlen(1) +float bar(double x); +// expected-warning@-2{{The clause simdlen(1) has no effect when targeting aarch64.}} + +void foo_loop(double *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = foo(y[i]); + y[i] = bar(x[i]); + } +} Index: test/OpenMP/declare_simd_aarch64_warning_sve.c =================================================================== --- /dev/null +++ test/OpenMP/declare_simd_aarch64_warning_sve.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp %s -S -o %t -verify +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp-simd %s -S -o %t -verify + +#pragma omp declare simd simdlen(66) +double foo(float x); +//expected-warning@-2{{The clause simdlen must fit the 64-bit lanes in the architectural constraints for SVE (min is 128-bit, max is 2048-bit, by steps of 128-bit)}} + +void foo_loop(double *x, float *y, int N) { + for (int i = 0; i < N; ++i) { + x[i] = foo(y[i]); + } +}