Index: docs/ClangCommandLineReference.rst =================================================================== --- docs/ClangCommandLineReference.rst +++ docs/ClangCommandLineReference.rst @@ -1451,6 +1451,8 @@ .. option:: -fopenmp, -fno-openmp +.. option:: -fopenmp-simd, -fno-openmp-simd + .. option:: -fopenmp-dump-offload-linker-script .. option:: -fopenmp-use-tls Index: docs/UsersManual.rst =================================================================== --- docs/UsersManual.rst +++ docs/UsersManual.rst @@ -1988,6 +1988,11 @@ Use `-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with `-fno-openmp`. +Use `-fopenmp-simd` to enable OpenMP simd features only, without linking +the runtime library; for combined constructs +(e.g. ``#pragma omp parallel for simd``) the non-simd directives and clauses +will be ignored. This can be disabled with `-fno-openmp-simd`. + Controlling implementation limits --------------------------------- Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -187,6 +187,7 @@ LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") LANGOPT(CUDA , 1, 0, "CUDA") LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)") +LANGOPT(OpenMPSimd , 1, 0, "OpenMP support for simd and declare simd directives only") LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls") LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device") LANGOPT(RenderScript , 1, 0, "RenderScript") Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1261,6 +1261,8 @@ def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group; def fopenmp : Flag<["-"], "fopenmp">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fno_openmp : Flag<["-"], "fno-openmp">, Group, Flags<[NoArgumentUnused]>; +def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; +def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group, Flags<[NoArgumentUnused]>; def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group; def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group, Flags<[NoArgumentUnused]>; Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -794,7 +794,8 @@ } if (const FunctionDecl *FD = dyn_cast_or_null(D)) - if (CGM.getLangOpts().OpenMP && FD->hasAttr()) + if ((CGM.getLangOpts().OpenMP || CGM.getLangOpts().OpenMPSimd) + && FD->hasAttr()) CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn); // Add no-jump-tables value. Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -119,7 +119,7 @@ createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); - if (LangOpts.OpenMP) + if (LangOpts.OpenMP || LangOpts.OpenMPSimd) createOpenMPRuntime(); if (LangOpts.CUDA) createCUDARuntime(); Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -3203,7 +3203,9 @@ // semantic analysis, etc. break; } - } + } else if (Args.hasFlag(options::OPT_fopenmp_simd, + options::OPT_fno_openmp_simd, /*Default=*/false)) + CmdArgs.push_back("-fopenmp-simd"); const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs(); Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType); Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -2199,6 +2199,7 @@ Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls); Opts.OpenMPIsDevice = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device); + Opts.OpenMPSimd = !Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_simd); if (Opts.OpenMP) { int Version = Index: lib/Parse/ParseOpenMP.cpp =================================================================== --- lib/Parse/ParseOpenMP.cpp +++ lib/Parse/ParseOpenMP.cpp @@ -149,6 +149,33 @@ DKind = F[i][2]; } } + + // If we're only interested in the simd pragmas, convert any combined + // construct with a simd directive to just 'simd' or 'declare simd', + // and any other to 'unknown'. + if (P.getLangOpts().OpenMPSimd) { + switch (DKind) { + default: + DKind = OMPD_unknown; + break; + case OMPD_declare_simd: + break; + case OMPD_simd: + case OMPD_parallel_for_simd: + case OMPD_for_simd: + case OMPD_taskloop_simd: + case OMPD_distribute_parallel_for_simd: + case OMPD_distribute_simd: + case OMPD_target_simd: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_simd: + DKind = OMPD_simd; + break; + } + } + return DKind < OMPD_unknown ? static_cast(DKind) : OMPD_unknown; } @@ -1015,7 +1042,10 @@ SkipUntil(tok::annot_pragma_openmp_end); break; case OMPD_unknown: - Diag(Tok, diag::err_omp_unknown_directive); + // Don't report unknown directives if we're only looking at simd, + // as the filter function will have switched the kind. + if (!getLangOpts().OpenMPSimd) + Diag(Tok, diag::err_omp_unknown_directive); SkipUntil(tok::annot_pragma_openmp_end); break; } @@ -1105,6 +1135,18 @@ OpenMPClauseKind CKind, bool FirstClause) { OMPClause *Clause = nullptr; bool ErrorFound = false; + + // If we're only interpreting 'simd' directives, filter out clauses that + // don't apply without an error. + if (DKind == OMPD_simd && getLangOpts().OpenMPSimd && + !isAllowedClauseForDirective(DKind, CKind)) { + + if (PP.LookAhead(/*N=*/0).is(tok::l_paren)) + SkipUntil(tok::r_paren); + + return nullptr; + } + // Check if clause is allowed for the given directive. if (CKind != OMPC_unknown && !isAllowedClauseForDirective(DKind, CKind)) { Diag(Tok, diag::err_omp_unexpected_clause) << getOpenMPClauseName(CKind) Index: lib/Parse/ParsePragma.cpp =================================================================== --- lib/Parse/ParsePragma.cpp +++ lib/Parse/ParsePragma.cpp @@ -213,7 +213,7 @@ PP.AddPragmaHandler("OPENCL", FPContractHandler.get()); } - if (getLangOpts().OpenMP) + if (getLangOpts().OpenMP || getLangOpts().OpenMPSimd) OpenMPHandler.reset(new PragmaOpenMPHandler()); else OpenMPHandler.reset(new PragmaNoOpenMPHandler()); Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -13961,7 +13961,8 @@ // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !Var->hasLocalStorage(); - if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var))) + if (IsGlobal && !((LangOpts.OpenMP || LangOpts.OpenMPSimd) && + IsOpenMPCapturedDecl(Var))) return true; // Walk up the stack to determine whether we can capture the variable, Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -983,7 +983,7 @@ } VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); + assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed"); D = getCanonicalDecl(D); // If we are attempting to capture a global variable in a directive with @@ -1029,7 +1029,7 @@ } bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); + assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed"); return DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level); } Index: test/OpenMP/declare_simd_codegen.cpp =================================================================== --- test/OpenMP/declare_simd_codegen.cpp +++ test/OpenMP/declare_simd_codegen.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s // expected-no-diagnostics Index: test/OpenMP/linking.c =================================================================== --- test/OpenMP/linking.c +++ test/OpenMP/linking.c @@ -89,3 +89,14 @@ // CHECK-MSVC-ILINK-64-SAME: -libpath:{{.+}}/../lib // CHECK-MSVC-ILINK-64-SAME: -defaultlib:libiomp5md.lib // +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: -fopenmp-simd -target aarch64-linux-gnu \ +// RUN: | FileCheck --check-prefix=CHECK-SIMD-ONLY-AA64 %s +// CHECK-SIMD-ONLY-AA64-NOT: "-l[[DEFAULT_OPENMP_LIB]]" +// CHECK-SIMD-ONLY-AA64-NOT: "-lpthread" +// +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: -fopenmp-simd -target x86_64-unknown_linux \ +// RUN: | FileCheck --check-prefix=CHECK-SIMD-ONLY-X64 %s +// CHECK-SIMD-ONLY-X64-NOT: "-l[[DEFAULT_OPENMP_LIB]]" +// CHECK-SIMD-ONLY-X64-NOT: "-lpthread" Index: test/OpenMP/simd_only.c =================================================================== --- /dev/null +++ test/OpenMP/simd_only.c @@ -0,0 +1,157 @@ +// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK-LABEL: @simd_plain +// CHECK-LABEL: omp.inner.for.body: +// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access +// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access +// CHECK: store float %{{.*}}, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access +// CHECK: ret void +void simd_plain(float *a, float *b, float *c, int N) { + #pragma omp simd + for (int i = 0; i < N; i += 2) + a[i] = b[i] * c[i]; +} + +// CHECK-LABEL: @simd_safelen_clause +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK-LABEL: omp.inner.for.inc: +// CHECK: br label %omp.inner.for.cond, !llvm.loop +// CHECK: ret void +void simd_safelen_clause(float *a, float *b, float *c, int N) { + #pragma omp simd safelen(4) + for (int i = 0; i < N; i += 2) + a[i] = b[i] * c[i]; +} + +extern long long initial_val(); + +// CHECK-LABEL: @simd_simdlen_and_linear_clause +// CHECK: omp.inner.for.body: +// CHECK: !llvm.mem.parallel_loop_access +// CHECK: ret void +void simd_simdlen_and_linear_clause(float *a, float *b, float *c, int N) { + long long lv = initial_val(); + #pragma omp simd simdlen(2) linear(lv: 4) + for (int i = 0; i < N; ++i) { + a[lv] = b[lv] * c[lv]; + lv += 4; + } +} + +extern float gfloat; + +// CHECK-LABEL: @simd_aligned_and_private_clause +// CHECK-LABEL: entry: +// CHECK: %gfloat = alloca float, align 4 +// CHECK: store float 1.000000e+00, float* @gfloat, align 4 +// CHECK-LABEL: omp.inner.for.body: +// CHECK-NOT: @gfloat +// CHECK: load{{.*}}!llvm.mem.parallel_loop_access +// CHECK: store float {{.*}}, float* %gfloat, align 4, !llvm.mem.parallel_loop_access +// CHECK: %[[FADD:add[0-9]+]] = fadd float %{{[0-9]+}}, 2.000000e+00 +// CHECK: store float %[[FADD]], float* {{.*}}, align 4, !llvm.mem.parallel_loop_access +// CHECK: ret void +void simd_aligned_and_private_clause(float *a, float *b, float *c, int N) { + gfloat = 1.0f; + #pragma omp simd aligned(a:4) private(gfloat) + for (int i = 0; i < N; i += 2) { + gfloat = b[i] * c[i]; + a[i] = gfloat + 2.0f; + } +} + +// CHECK-LABEL: @simd_lastprivate_and_reduction_clause +// CHECK-LABEL: entry: +// CHECK: %[[SUMVAR:sum[0-9]+]] = alloca float, align 4 +// CHECK: store float 0.000000e+00, float* %[[SUMVAR]], align 4 +// CHECK-LABEL: omp.inner.for.body +// CHECK: %[[LOAD:[0-9]+]] = load float, float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access +// CHECK: %[[FADD:add[0-9]+]] = fadd float %[[LOAD]], %mul{{.*}} +// CHECK: store float %[[FADD]], float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access +// CHECK: store i32{{.*}}, i32* %[[IDXVAR:idx[0-9]+]] +// CHECK-LABEL: omp.inner.for.end: +// CHECK-DAG: %[[TMP1:[0-9]+]] = load i32, i32* %[[IDXVAR]], align 4 +// CHECK-DAG: store i32 %[[TMP1]], i32* %idx, align 4 +// CHECK-DAG: %[[INITVAL:[0-9]+]] = load float, float* %sum, align 4 +// CHECK-DAG: %[[TMP2:[0-9]+]] = load float, float* %[[SUMVAR]], align 4 +// CHECK-DAG: %[[SUMMED:add[0-9]+]] = fadd float %[[INITVAL]], %[[TMP2]] +// CHECK-DAG: store float %[[SUMMED]], float* %sum, align 4 +// CHECK-LABEL: simd.if.end: +// CHECK: %[[OUTVAL:[0-9]+]] = load float, float* %sum, align 4 +// CHECK: %[[OUTADDR:[0-9]+]] = load float*, float** %a.addr, align 8 +// CHECK: store float %[[OUTVAL]], float* %[[OUTADDR]], align 4 +// CHECK: %[[RETIDX:[0-9]+]] = load i32, i32* %idx, align 4 +// CHECK: ret i32 %[[RETIDX]] +int simd_lastprivate_and_reduction_clause(float *a, float *b, float *c, int N) { + float sum = 0.0f; + int idx; + #pragma omp simd lastprivate(idx) reduction(+:sum) + for (int i = 0; i < N; ++i) { + sum += b[i] * c[i]; + idx = i * 2; + } + + *a = sum; + return idx; +} + +// CHECK-LABEL: @simd_collapse_clause +// CHECK: omp.inner.for.body: +// CHECK-NOT: for.body: +// CHECK: ret void +void simd_collapse_clause(float **a, float **b, float **c, int N, int M) { + #pragma omp simd collapse(2) + for (int i = 0; i < N; ++i) + for (int j = 0; j < N; ++j) + a[i][j] = b[i][j] * c[i][j]; +} + +// Negative tests; no simd directive, so should be normal code. + +// CHECK-LABEL: @parallel_for +// CHECK-NOT: call void {{.*}} @__kmpc_fork_call +// CHECK-NOT: @.omp_outlined. +// CHECK-NOT: omp.inner.for.body: +// CHECK: ret void +void parallel_for(float *a, float *b, float *c, int N) { + #pragma omp parallel for + for (int i = 0; i < N; ++i) + a[i] = b[i] * c[i]; +} + +extern void long_running_func(int); + +// CHECK-LABEL: @taskloop +// CHECK-NOT: call i8* @__kmpc_omp_task_alloc +// CHECK-NOT: call void @__kmpc_taskloop +// CHECK: ret void +void taskloop(int N) { + #pragma omp taskloop + for (int i = 0; i < N; ++i) + long_running_func(i); +} + +// Combined constructs; simd part should work, rest should be ignored. + +// CHECK-LABEL: @parallel_for_simd +// CHECK-NOT: call void {{.*}} @__kmpc_fork_call +// CHECK-NOT: @.omp_outlined. +// CHECK: omp.inner.for.body: +// CHECK: ret void +void parallel_for_simd(float *a, float *b, float *c, int N) { +#pragma omp parallel for simd num_threads(2) simdlen(4) + for (int i = 0; i < N; ++i) + a[i] = b[i] * c[i]; +} + +// Make sure there's no declarations for libomp runtime functions +// CHECK-NOT: declare void @__kmpc + +// CHECK-LABEL: !llvm.ident = !{!0} + +// simd_safelen_clause width md +// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 4} +// simd_simdlen_clause width md +// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 2}