diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -213,6 +213,7 @@ LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls") LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device") LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode") +LANGOPT(OpenMPIRBuilder , 1, 0, "Use the experimental OpenMP-IR-Builder codegen path.") LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all constructs when offloading to CUDA devices") LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.") LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1656,6 +1656,8 @@ Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>, HelpText<"Emit OpenMP code only for SIMD-based constructs.">; +def fopenmp_enable_irbuilder : Flag<["-"], "fopenmp-enable-irbuilder">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, + HelpText<"Use the experimental OpenMP-IR-Builder codegen path.">; def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetOperations.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -3481,6 +3482,29 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { + // Check if we should use the OMPBuilder + auto *OMPRegionInfo = + dyn_cast_or_null(CGF.CapturedStmtInfo); + llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); + if (OMPBuilder) { + // TODO: Move cancelation point handling into the IRBuilder. + if (EmitChecks && !ForceSimpleCall && OMPRegionInfo && + OMPRegionInfo->hasCancel() && CGF.Builder.GetInsertBlock()) { + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock( + ".cancel.exit", CGF.Builder.GetInsertBlock()->getParent()); + OMPBuilder->setCancellationBlock(ExitBB); + CGF.Builder.SetInsertPoint(ExitBB); + CodeGenFunction::JumpDest CancelDestination = + CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); + CGF.EmitBranchThroughCleanup(CancelDestination); + } + auto IP = OMPBuilder->CreateBarrier(CGF.Builder, Kind, ForceSimpleCall, + EmitChecks); + CGF.Builder.restoreIP(IP); + return; + } + if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); @@ -3490,8 +3514,7 @@ // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - if (auto *OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -5,6 +5,7 @@ Core Coroutines Coverage + FrontendOpenMP IPO IRReader AggressiveInstCombine diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -44,6 +44,7 @@ class DataLayout; class FunctionType; class LLVMContext; +class OpenMPIRBuilder; class IndexedInstrProfReader; } @@ -321,6 +322,7 @@ std::unique_ptr ObjCRuntime; std::unique_ptr OpenCLRuntime; std::unique_ptr OpenMPRuntime; + std::unique_ptr OMPBuilder; std::unique_ptr CUDARuntime; std::unique_ptr DebugInfo; std::unique_ptr ObjCData; @@ -587,6 +589,9 @@ return *OpenMPRuntime; } + /// Return a pointer to the configured OpenMPIRBuilder, if any. + llvm::OpenMPIRBuilder *getOpenMPIRBuilder() { return OMPBuilder.get(); } + /// Return a reference to the configured CUDA runtime. CGCUDARuntime &getCUDARuntime() { assert(CUDARuntime != nullptr); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -47,6 +47,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -218,6 +219,14 @@ OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } + + // The OpenMP-IR-Builder should eventually replace the above runtime codegens + // but we are not there yet so they both reside in CGModule for now and the + // OpenMP-IR-Builder is opt-in only. + if (LangOpts.OpenMPIRBuilder) { + OMPBuilder.reset(new llvm::OpenMPIRBuilder(TheModule)); + OMPBuilder->initialize(); + } } void CodeGenModule::createCUDARuntime() { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4955,6 +4955,7 @@ CmdArgs.push_back("-fnoopenmp-use-tls"); Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd, options::OPT_fno_openmp_simd); + Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_enable_irbuilder); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3014,6 +3014,8 @@ Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls); Opts.OpenMPIsDevice = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device); + Opts.OpenMPIRBuilder = + Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder); bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); diff --git a/clang/test/Driver/fopenmp.c b/clang/test/Driver/fopenmp.c --- a/clang/test/Driver/fopenmp.c +++ b/clang/test/Driver/fopenmp.c @@ -124,6 +124,12 @@ // CHECK-LD-STATIC-IOMP5-NO-BDYNAMIC: "-{{B?}}static" {{.*}} "-liomp5" // CHECK-LD-STATIC-IOMP5-NO-BDYNAMIC-NOT: "-Bdynamic" // +// RUN: %clang -target x86_64-linux-gnu -fopenmp -fopenmp-enable-irbuilder -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMPIRBUILDER +// +// CHECK-CC1-OPENMPIRBUILDER: "-cc1" +// CHECK-CC1-OPENMPIRBUILDER-SAME: "-fopenmp" +// CHECK-CC1-OPENMPIRBUILDER-SAME: "-fopenmp-enable-irbuilder" +// // We'd like to check that the default is sane, but until we have the ability // to *always* semantically analyze OpenMP without always generating runtime // calls (in the event of an unsupported runtime), we don't have a good way to diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -1,6 +1,14 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CLANGCG // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CLANGCG + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IRBUILDER +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,IRBUILDER + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -mllvm -openmp-ir-builder-optimistic-attributes -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IRBUILDER_OPT +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -mllvm -openmp-ir-builder-optimistic-attributes -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,IRBUILDER_OPT // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s @@ -34,6 +42,13 @@ return tmain(argc) + tmain(argv[0][0]) + a; } +// CLANGCG: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +// CLANGCG-NOT: # +// IRBUILDER: ; Function Attrs: nounwind +// IRBUILDER-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) # +// IRBUILDER_OPT: ; Function Attrs: nofree nosync nounwind readonly +// IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) # + // CHECK: define {{.+}} [[TMAIN_INT]]( // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* [[LOC]]) // CHECK: call void @__kmpc_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]]) diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -2,6 +2,10 @@ // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s