diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -2037,6 +2037,10 @@ Emit OpenMP code only for SIMD-based constructs. +.. option:: -fopenmp-target-simd, -fno-openmp-target-simd + +Emit OpenMP target offloading code that supports SIMD execution. + .. option:: -fopenmp-version= .. option:: -fopenmp-extensions, -fno-openmp-extensions diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -233,6 +233,7 @@ LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)") LANGOPT(OpenMPExtensions , 1, 1, "Enable all Clang extensions for OpenMP directives and clauses") LANGOPT(OpenMPSimd , 1, 0, "Use SIMD only OpenMP support.") +LANGOPT(OpenMPTargetSimd , 1, 0, "Use OpenMP target offloading SIMD support.") LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls") LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device") LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2406,9 +2406,12 @@ Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>, HelpText<"Emit OpenMP code only for SIMD-based constructs.">; +def fopenmp_target_simd : Flag<["-"], "fopenmp-target-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>, + HelpText<"Emit OpenMP target offloading code that supports SIMD execution.">; def fopenmp_enable_irbuilder : Flag<["-"], "fopenmp-enable-irbuilder">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, HelpText<"Use the experimental OpenMP-IR-Builder codegen path.">; def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; +def fno_openmp_target_simd : Flag<["-"], "fno-openmp-target-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -49,11 +49,11 @@ /// Helper for target directive initialization. void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, - bool IsSPMD); + bool IsSPMD, bool IsSIMD); /// Helper for target directive finalization. void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST, - bool IsSPMD); + bool IsSPMD, bool IsSIMD); /// Helper for generic variables globalization prolog. void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1029,7 +1029,8 @@ void Enter(CodeGenFunction &CGF) override { auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); - RT.emitKernelInit(CGF, EST, /* IsSPMD */ false); + RT.emitKernelInit(CGF, EST, /* IsSPMD */ false, + CGF.CGM.getLangOpts().OpenMPTargetSimd); // Skip target region initialization. RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } @@ -1037,7 +1038,8 @@ auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); RT.clearLocThreadIdInsertPt(CGF); - RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false); + RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false, + CGF.CGM.getLangOpts().OpenMPTargetSimd); } } Action(EST); CodeGen.setAction(Action); @@ -1048,22 +1050,24 @@ } void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF, - EntryFunctionState &EST, bool IsSPMD) { + EntryFunctionState &EST, bool IsSPMD, + bool IsSIMD) { CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime())); + Bld.restoreIP( + OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime(), IsSIMD)); IsInTargetMasterThreadRegion = IsSPMD; if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, - EntryFunctionState &EST, - bool IsSPMD) { + EntryFunctionState &EST, bool IsSPMD, + bool IsSIMD) { if (!IsSPMD) emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime()); + OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime(), IsSIMD); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -1088,13 +1092,15 @@ CGOpenMPRuntimeGPU::EntryFunctionState &EST) : RT(RT), EST(EST) {} void Enter(CodeGenFunction &CGF) override { - RT.emitKernelInit(CGF, EST, /* IsSPMD */ true); + RT.emitKernelInit(CGF, EST, /* IsSPMD */ true, + CGF.CGM.getLangOpts().OpenMPTargetSimd); // Skip target region initialization. RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { RT.clearLocThreadIdInsertPt(CGF); - RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true); + RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true, + CGF.CGM.getLangOpts().OpenMPTargetSimd); } } Action(*this, EST); CodeGen.setAction(Action); @@ -1111,13 +1117,19 @@ // 'generic', the runtime reserves one warp for the master, otherwise, all // warps participate in parallel work. static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, - bool Mode) { + bool IsSPMD, bool IsSIMD) { + int8_t Mode = 0; + if (IsSPMD) + Mode |= OMP_TGT_EXEC_MODE_SPMD; + else + Mode |= OMP_TGT_EXEC_MODE_GENERIC; + if (IsSIMD) + Mode |= OMP_TGT_EXEC_MODE_SIMD; + auto *GVMode = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD - : OMP_TGT_EXEC_MODE_GENERIC), - Twine(Name, "_exec_mode")); + llvm::ConstantInt::get(CGM.Int8Ty, Mode), Twine(Name, "_exec_mode")); CGM.addCompilerUsedGlobal(GVMode); } @@ -1152,15 +1164,16 @@ assert(!ParentName.empty() && "Invalid target region parent name!"); - bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); - if (Mode) + bool IsSPMD = supportsSPMDExecutionMode(CGM.getContext(), D); + if (IsSPMD) emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); + setPropertyExecutionMode(CGM, OutlinedFn->getName(), IsSPMD, + CGM.getLangOpts().OpenMPTargetSimd); } namespace { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6694,6 +6694,10 @@ CmdArgs.push_back("-fopenmp-host-ir-file-path"); CmdArgs.push_back(Args.MakeArgString(OpenMPDeviceInput->getFilename())); } + if (Args.hasFlag(options::OPT_fopenmp_target_simd, + options::OPT_fno_openmp_target_simd, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-target-simd"); } if (Triple.isAMDGPU()) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3863,6 +3863,10 @@ Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder); bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); + Opts.OpenMPTargetSimd = + IsTargetSpecified && + Args.hasFlag(options::OPT_fopenmp_target_simd, + options::OPT_fno_openmp_target_simd, /*Default=*/false); Opts.OpenMPTargetNewRuntime = Opts.OpenMPIsDevice && Args.hasArg(options::OPT_fopenmp_target_new_runtime); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -133,7 +133,8 @@ OMP_TGT_EXEC_MODE_SPMD = 1 << 1, OMP_TGT_EXEC_MODE_GENERIC_SPMD = OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD) + OMP_TGT_EXEC_MODE_SIMD = 1 << 2, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_SIMD) }; } // end namespace omp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1005,14 +1005,16 @@ /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime); + InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, + bool IsSIMD, bool RequiresFullRuntime); /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime); + void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, + bool IsSIMD, bool RequiresFullRuntime); ///} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2755,15 +2755,21 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime) { + bool IsSIMD, bool RequiresFullRuntime) { if (!updateToLocation(Loc)) return Loc.IP; Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); Value *Ident = getOrCreateIdent(SrcLocStr); - ConstantInt *IsSPMDVal = ConstantInt::getSigned( - IntegerType::getInt8Ty(Int8->getContext()), - IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); + int8_t Mode = 0; + if (IsSPMD) + Mode |= OMP_TGT_EXEC_MODE_SPMD; + else + Mode |= OMP_TGT_EXEC_MODE_GENERIC; + if (IsSIMD) + Mode |= OMP_TGT_EXEC_MODE_SIMD; + ConstantInt *ModeCI = + ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), Mode); ConstantInt *UseGenericStateMachine = ConstantInt::getBool(Int32->getContext(), !IsSPMD); ConstantInt *RequiresFullRuntimeVal = @@ -2773,7 +2779,7 @@ omp::RuntimeFunction::OMPRTL___kmpc_target_init); CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); + Fn, {Ident, ModeCI, UseGenericStateMachine, RequiresFullRuntimeVal}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -2807,23 +2813,29 @@ } void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD, + bool IsSPMD, bool IsSIMD, bool RequiresFullRuntime) { if (!updateToLocation(Loc)) return; Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); Value *Ident = getOrCreateIdent(SrcLocStr); - ConstantInt *IsSPMDVal = ConstantInt::getSigned( - IntegerType::getInt8Ty(Int8->getContext()), - IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); + int8_t Mode = 0; + if (IsSPMD) + Mode |= OMP_TGT_EXEC_MODE_SPMD; + else + Mode |= OMP_TGT_EXEC_MODE_GENERIC; + if (IsSIMD) + Mode |= OMP_TGT_EXEC_MODE_SIMD; + ConstantInt *ModeCI = + ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), Mode); ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); + Builder.CreateCall(Fn, {Ident, ModeCI, RequiresFullRuntimeVal}); } std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef Parts,