Index: docs/ClangCommandLineReference.rst =================================================================== --- docs/ClangCommandLineReference.rst +++ docs/ClangCommandLineReference.rst @@ -1451,6 +1451,8 @@ .. option:: -fopenmp, -fno-openmp +.. option:: -fopenmp-simd, -fno-openmp-simd + .. option:: -fopenmp-dump-offload-linker-script .. option:: -fopenmp-use-tls Index: docs/UsersManual.rst =================================================================== --- docs/UsersManual.rst +++ docs/UsersManual.rst @@ -1988,6 +1988,11 @@ Use `-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with `-fno-openmp`. +Use `-fopenmp-simd` to enable OpenMP simd features only, without linking +the runtime library; for combined constructs +(e.g. ``#pragma omp parallel for simd``) the non-simd directives and clauses +will be ignored. This can be disabled with `-fno-openmp-simd`. + Controlling implementation limits --------------------------------- Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -187,6 +187,7 @@ LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") LANGOPT(CUDA , 1, 0, "CUDA") LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)") +LANGOPT(OpenMPSimd , 1, 0, "OpenMP support for simd and declare simd directives only") LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls") LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device") LANGOPT(RenderScript , 1, 0, "RenderScript") Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1252,6 +1252,8 @@ def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group; def fopenmp : Flag<["-"], "fopenmp">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fno_openmp : Flag<["-"], "fno-openmp">, Group, Flags<[NoArgumentUnused]>; +def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; +def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group, Flags<[NoArgumentUnused]>; def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group; def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group, Flags<[NoArgumentUnused]>; Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -119,7 +119,7 @@ createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); - if (LangOpts.OpenMP) + if (LangOpts.OpenMP || LangOpts.OpenMPSimd) createOpenMPRuntime(); if (LangOpts.CUDA) createCUDARuntime(); Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -3232,7 +3232,9 @@ // semantic analysis, etc. break; } - } + } else if (Args.hasFlag(options::OPT_fopenmp_simd, + options::OPT_fno_openmp_simd, /*Default=*/false)) + CmdArgs.push_back("-fopenmp-simd"); const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs(); Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType); Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -2208,6 +2208,7 @@ Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls); Opts.OpenMPIsDevice = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device); + Opts.OpenMPSimd = !Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_simd); if (Opts.OpenMP) { int Version = Index: lib/Parse/ParsePragma.cpp =================================================================== --- lib/Parse/ParsePragma.cpp +++ lib/Parse/ParsePragma.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/ASTContext.h" +#include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" @@ -98,6 +99,12 @@ Token &FirstToken) override; }; +struct PragmaOpenMPSIMDHandler : public PragmaHandler { + PragmaOpenMPSIMDHandler() : PragmaHandler("omp") { } + void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &FirstToken) override; +}; + /// PragmaCommentHandler - "\#pragma comment ...". struct PragmaCommentHandler : public PragmaHandler { PragmaCommentHandler(Sema &Actions) @@ -215,6 +222,8 @@ } if (getLangOpts().OpenMP) OpenMPHandler.reset(new PragmaOpenMPHandler()); + else if (getLangOpts().OpenMPSimd) + OpenMPHandler.reset(new PragmaOpenMPSIMDHandler()); else OpenMPHandler.reset(new PragmaNoOpenMPHandler()); PP.AddPragmaHandler(OpenMPHandler.get()); @@ -1548,6 +1557,111 @@ /*DisableMacroExpansion=*/false); } +/// \brief Handle '#pragma omp ...' when only OpenMP simd is enabled +/// +void +PragmaOpenMPSIMDHandler::HandlePragma(Preprocessor &PP, + PragmaIntroducerKind Introducer, + Token &FirstTok) { + SmallVector Pragma; + Token Tok; + Tok.startToken(); + Tok.setKind(tok::annot_pragma_openmp); + Tok.setLocation(FirstTok.getLocation()); + Pragma.push_back(Tok); + PP.Lex(Tok); + auto Text = PP.getSpelling(Tok); + + bool isSimd = false; + bool isDeclare = false; + // Check for a declare before everything else, since that isn't recognized + // as a directive on its own. + if (Text == "declare") { + Pragma.push_back(Tok); + isDeclare = true; + PP.Lex(Tok); + Text = PP.getSpelling(Tok); + } + + // Discard directives that aren't related to simd. + while (Tok.isNot(tok::eod) && getOpenMPDirectiveKind(Text) != OMPD_unknown) { + if (Text == "simd") { + Pragma.push_back(Tok); + isSimd = true; + } + + PP.Lex(Tok); + Text = PP.getSpelling(Tok); + } + + // If we didn't encounter a simd directive, discard the whole pragma and warn + // about it (if enabled). + if (!isSimd) { + if (!PP.getDiagnostics().isIgnored(diag::warn_pragma_omp_ignored, + FirstTok.getLocation())) { + PP.Diag(FirstTok, diag::warn_pragma_omp_ignored); + PP.getDiagnostics().setSeverity(diag::warn_pragma_omp_ignored, + diag::Severity::Ignored, + SourceLocation()); + } + + if (Tok.isNot(tok::eod)) + PP.DiscardUntilEndOfDirective(); + return; + } + + auto DirectiveKind = isDeclare ? OMPD_declare_simd : OMPD_simd; + + // Read through any clauses, only save those which apply to simd or + // declare simd directives + while (Tok.isNot(tok::eod)) { + Text = PP.getSpelling(Tok); + bool Allowed = isAllowedClauseForDirective(DirectiveKind, + getOpenMPClauseKind(Text)); + + if (Allowed) + Pragma.push_back(Tok); + + PP.Lex(Tok); + + // For clauses with arguments, we need to process everything up to a + // matching parenthesis. Either add to the current pragma or discard + // if it's not supported for the current directive kind. + if (Tok.is(tok::l_paren)) { + while (Tok.isNot(tok::r_paren)) { + // Make sure we bail out of processing clauses if we hit the end of the + // directive before finishing the arguments. Let the full openmp parser + // deal with reporting errors for malformed pragmas. + if (Tok.is(tok::eod)) + break; + + if (Allowed) + Pragma.push_back(Tok); + + PP.Lex(Tok); + } + + if (Tok.isNot(tok::eod)) { + // Add the ')' if we're not throwing this clause away + if (Allowed) + Pragma.push_back(Tok); + PP.Lex(Tok); + } + } + } + + SourceLocation EodLoc = Tok.getLocation(); + Tok.startToken(); + Tok.setKind(tok::annot_pragma_openmp_end); + Tok.setLocation(EodLoc); + Pragma.push_back(Tok); + + auto Toks = llvm::make_unique(Pragma.size()); + std::copy(Pragma.begin(), Pragma.end(), Toks.get()); + PP.EnterTokenStream(std::move(Toks), Pragma.size(), + /*DisableMacroExpansion=*/false); +} + /// \brief Handle '#pragma pointers_to_members' // The grammar for this pragma is as follows: // Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -13953,7 +13953,8 @@ // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !Var->hasLocalStorage(); - if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var))) + if (IsGlobal && !((LangOpts.OpenMP || LangOpts.OpenMPSimd) && + IsOpenMPCapturedDecl(Var))) return true; // Walk up the stack to determine whether we can capture the variable, Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -983,7 +983,7 @@ } VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); + assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed"); D = getCanonicalDecl(D); // If we are attempting to capture a global variable in a directive with @@ -1029,7 +1029,7 @@ } bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); + assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed"); return DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level); } Index: test/OpenMP/linking.c =================================================================== --- test/OpenMP/linking.c +++ test/OpenMP/linking.c @@ -89,3 +89,14 @@ // CHECK-MSVC-ILINK-64-SAME: -libpath:{{.+}}/../lib // CHECK-MSVC-ILINK-64-SAME: -defaultlib:libiomp5md.lib // +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: -fopenmp-simd -target aarch64-linux-gnu \ +// RUN: | FileCheck --check-prefix=CHECK-SIMD-ONLY-AA64 %s +// CHECK-SIMD-ONLY-AA64-NOT: "-l[[DEFAULT_OPENMP_LIB]]" +// CHECK-SIMD-ONLY-AA64-NOT: "-lpthread" +// +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: -fopenmp-simd -target x86_64-unknown_linux \ +// RUN: | FileCheck --check-prefix=CHECK-SIMD-ONLY-X64 %s +// CHECK-SIMD-ONLY-X64-NOT: "-l[[DEFAULT_OPENMP_LIB]]" +// CHECK-SIMD-ONLY-X64-NOT: "-lpthread" Index: test/OpenMP/simd_only.c =================================================================== --- /dev/null +++ test/OpenMP/simd_only.c @@ -0,0 +1,157 @@ +// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK-LABEL: @simd_plain +// CHECK-LABEL: omp.inner.for.body: +// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access +// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access +// CHECK: store float %{{.*}}, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access +// CHECK: ret void +void simd_plain(float *a, float *b, float *c, int N) { + #pragma omp simd + for (int i = 0; i < N; i += 2) + a[i] = b[i] * c[i]; +} + +// CHECK-LABEL: @simd_safelen_clause +// CHECK-NOT: !llvm.mem.parallel_loop_access +// CHECK-LABEL: omp.inner.for.inc: +// CHECK: br label %omp.inner.for.cond, !llvm.loop +// CHECK: ret void +void simd_safelen_clause(float *a, float *b, float *c, int N) { + #pragma omp simd safelen(4) + for (int i = 0; i < N; i += 2) + a[i] = b[i] * c[i]; +} + +extern long long initial_val(); + +// CHECK-LABEL: @simd_simdlen_and_linear_clause +// CHECK: omp.inner.for.body: +// CHECK: !llvm.mem.parallel_loop_access +// CHECK: ret void +void simd_simdlen_and_linear_clause(float *a, float *b, float *c, int N) { + long long lv = initial_val(); + #pragma omp simd simdlen(2) linear(lv: 4) + for (int i = 0; i < N; ++i) { + a[lv] = b[lv] * c[lv]; + lv += 4; + } +} + +extern float gfloat; + +// CHECK-LABEL: @simd_aligned_and_private_clause +// CHECK-LABEL: entry: +// CHECK: %gfloat = alloca float, align 4 +// CHECK: store float 1.000000e+00, float* @gfloat, align 4 +// CHECK-LABEL: omp.inner.for.body: +// CHECK-NOT: @gfloat +// CHECK: load{{.*}}!llvm.mem.parallel_loop_access +// CHECK: store float {{.*}}, float* %gfloat, align 4, !llvm.mem.parallel_loop_access +// CHECK: %[[FADD:add[0-9]+]] = fadd float %{{[0-9]+}}, 2.000000e+00 +// CHECK: store float %[[FADD]], float* {{.*}}, align 4, !llvm.mem.parallel_loop_access +// CHECK: ret void +void simd_aligned_and_private_clause(float *a, float *b, float *c, int N) { + gfloat = 1.0f; + #pragma omp simd aligned(a:4) private(gfloat) + for (int i = 0; i < N; i += 2) { + gfloat = b[i] * c[i]; + a[i] = gfloat + 2.0f; + } +} + +// CHECK-LABEL: @simd_lastprivate_and_reduction_clause +// CHECK-LABEL: entry: +// CHECK: %[[SUMVAR:sum[0-9]+]] = alloca float, align 4 +// CHECK: store float 0.000000e+00, float* %[[SUMVAR]], align 4 +// CHECK-LABEL: omp.inner.for.body +// CHECK: %[[LOAD:[0-9]+]] = load float, float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access +// CHECK: %[[FADD:add[0-9]+]] = fadd float %[[LOAD]], %mul{{.*}} +// CHECK: store float %[[FADD]], float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access +// CHECK: store i32{{.*}}, i32* %[[IDXVAR:idx[0-9]+]] +// CHECK-LABEL: omp.inner.for.end: +// CHECK-DAG: %[[TMP1:[0-9]+]] = load i32, i32* %[[IDXVAR]], align 4 +// CHECK-DAG: store i32 %[[TMP1]], i32* %idx, align 4 +// CHECK-DAG: %[[INITVAL:[0-9]+]] = load float, float* %sum, align 4 +// CHECK-DAG: %[[TMP2:[0-9]+]] = load float, float* %[[SUMVAR]], align 4 +// CHECK-DAG: %[[SUMMED:add[0-9]+]] = fadd float %[[INITVAL]], %[[TMP2]] +// CHECK-DAG: store float %[[SUMMED]], float* %sum, align 4 +// CHECK-LABEL: simd.if.end: +// CHECK: %[[OUTVAL:[0-9]+]] = load float, float* %sum, align 4 +// CHECK: %[[OUTADDR:[0-9]+]] = load float*, float** %a.addr, align 8 +// CHECK: store float %[[OUTVAL]], float* %[[OUTADDR]], align 4 +// CHECK: %[[RETIDX:[0-9]+]] = load i32, i32* %idx, align 4 +// CHECK: ret i32 %[[RETIDX]] +int simd_lastprivate_and_reduction_clause(float *a, float *b, float *c, int N) { + float sum = 0.0f; + int idx; + #pragma omp simd lastprivate(idx) reduction(+:sum) + for (int i = 0; i < N; ++i) { + sum += b[i] * c[i]; + idx = i * 2; + } + + *a = sum; + return idx; +} + +// CHECK-LABEL: @simd_collapse_clause +// CHECK: omp.inner.for.body: +// CHECK-NOT: for.body: +// CHECK: ret void +void simd_collapse_clause(float **a, float **b, float **c, int N, int M) { + #pragma omp simd collapse(2) + for (int i = 0; i < N; ++i) + for (int j = 0; j < N; ++j) + a[i][j] = b[i][j] * c[i][j]; +} + +// Negative tests; no simd directive, so should be normal code. + +// CHECK-LABEL: @parallel_for +// CHECK-NOT: call void {{.*}} @__kmpc_fork_call +// CHECK-NOT: @.omp_outlined. +// CHECK-NOT: omp.inner.for.body: +// CHECK: ret void +void parallel_for(float *a, float *b, float *c, int N) { + #pragma omp parallel for + for (int i = 0; i < N; ++i) + a[i] = b[i] * c[i]; +} + +extern void long_running_func(int); + +// CHECK-LABEL: @taskloop +// CHECK-NOT: call i8* @__kmpc_omp_task_alloc +// CHECK-NOT: call void @__kmpc_taskloop +// CHECK: ret void +void taskloop(int N) { + #pragma omp taskloop + for (int i = 0; i < N; ++i) + long_running_func(i); +} + +// Combined constructs; simd part should work, rest should be ignored. + +// CHECK-LABEL: @parallel_for_simd +// CHECK-NOT: call void {{.*}} @__kmpc_fork_call +// CHECK-NOT: @.omp_outlined. +// CHECK: omp.inner.for.body: +// CHECK: ret void +void parallel_for_simd(float *a, float *b, float *c, int N) { +#pragma omp parallel for simd num_threads(2) simdlen(4) + for (int i = 0; i < N; ++i) + a[i] = b[i] * c[i]; +} + +// Make sure there's no declarations for libomp runtime functions +// CHECK-NOT: declare void @__kmpc + +// CHECK-LABEL: !llvm.ident = !{!0} + +// simd_safelen_clause width md +// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 4} +// simd_simdlen_clause width md +// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 2}