Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -42,8 +42,6 @@ ExecutionMode getExecutionMode() const; - bool requiresFullRuntime() const { return RequiresFullRuntime; } - /// Get barrier to synchronize all threads in a block. void syncCTAThreads(CodeGenFunction &CGF); @@ -161,12 +159,6 @@ /// Constant for NVPTX for better optimization. bool isDefaultLocationConstant() const override { return true; } - /// Returns additional flags that can be stored in reserved_2 field of the - /// default location. - /// For NVPTX target contains data about SPMD/Non-SPMD execution mode + - /// Full/Lightweight runtime mode. Used for better optimization. - unsigned getDefaultLocationReserved2Flags() const override; - public: explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); void clear() override; @@ -386,17 +378,9 @@ /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; - /// Check if the full runtime is required (default - yes). - bool RequiresFullRuntime = true; - - /// true if we're emitting the code for the target region and next parallel - /// region is L0 for sure. - bool IsInTargetMasterThreadRegion = false; /// true if currently emitting code for target/teams/distribute region, false /// - otherwise. bool IsInTTDRegion = false; - /// true if we're definitely in the parallel region. - bool IsInParallelRegion = false; /// Map between an outlined function and its wrapper. llvm::DenseMap WrapperFunctionsMap; @@ -421,12 +405,10 @@ using EscapedParamsTy = llvm::SmallPtrSet; struct FunctionData { DeclToAddrMapTy LocalVarData; - llvm::Optional SecondaryLocalVarData = llvm::None; EscapedParamsTy EscapedParameters; llvm::SmallVector EscapedVariableLengthDecls; llvm::SmallVector, 4> EscapedVariableLengthDeclsAddrs; - llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr MappedParams; }; /// Maps the function to the list of the globalized variables with their @@ -438,9 +420,6 @@ /// reductions. /// All the records are gathered into a union `union.type` is created. llvm::SmallVector TeamsReductions; - /// Shared pointer for the global memory in the global memory buffer used for - /// the given kernel. - llvm::GlobalVariable *KernelStaticGlobalized = nullptr; /// Pair of the Non-SPMD team and all reductions variables in this team /// region. std::pair> Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -73,29 +73,15 @@ CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode = CGOpenMPRuntimeGPU::EM_Unknown; CGOpenMPRuntimeGPU::ExecutionMode &ExecMode; - bool SavedRuntimeMode = false; - bool *RuntimeMode = nullptr; public: - /// Constructor for Non-SPMD mode. - ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode) + ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode, CGOpenMPRuntimeGPU::ExecutionMode NewExecMode) : ExecMode(ExecMode) { SavedExecMode = ExecMode; - ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD; - } - /// Constructor for SPMD mode. - ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode, - bool &RuntimeMode, bool FullRuntimeMode) - : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { - SavedExecMode = ExecMode; - SavedRuntimeMode = RuntimeMode; - ExecMode = CGOpenMPRuntimeGPU::EM_SPMD; - RuntimeMode = FullRuntimeMode; + ExecMode = NewExecMode; } ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; - if (RuntimeMode) - *RuntimeMode = SavedRuntimeMode; } }; @@ -109,9 +95,6 @@ /// Global memory alignment for performance. GlobalMemoryAlignment = 128, - - /// Maximal size of the shared memory buffer. - SharedMemorySize = 128, }; static const ValueDecl *getPrivateItem(const Expr *RefExpr) { @@ -1012,7 +995,7 @@ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, CGOpenMPRuntimeGPU::EM_NonSPMD); EntryFunctionState EST; WrapperFunctionsMap.clear(); @@ -1047,8 +1030,7 @@ void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime())); - IsInTargetMasterThreadRegion = IsSPMD; + Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD)); if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } @@ -1060,7 +1042,7 @@ emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime()); + OMPBuilder.createTargetDeinit(Bld, IsSPMD); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -1070,9 +1052,7 @@ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { ExecutionRuntimeModesRAII ModeRAII( - CurrentExecutionMode, RequiresFullRuntime, - CGM.getLangOpts().OpenMPCUDAForceFullRuntime || - !supportsLightweightRuntime(CGM.getContext(), D)); + CurrentExecutionMode, CGOpenMPRuntimeGPU::EM_SPMD); EntryFunctionState EST; // Emit target region as a standalone region. @@ -1181,21 +1161,6 @@ (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; } // anonymous namespace -unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const { - switch (getExecutionMode()) { - case EM_SPMD: - if (requiresFullRuntime()) - return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); - return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; - case EM_NonSPMD: - assert(requiresFullRuntime() && "Expected full runtime."); - return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); - case EM_Unknown: - return UndefinedMode; - } - llvm_unreachable("Unknown flags are requested."); -} - CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) @@ -1242,33 +1207,13 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. - class NVPTXPrePostActionTy : public PrePostActionTy { - bool &IsInParallelRegion; - bool PrevIsInParallelRegion; - - public: - NVPTXPrePostActionTy(bool &IsInParallelRegion) - : IsInParallelRegion(IsInParallelRegion) {} - void Enter(CodeGenFunction &CGF) override { - PrevIsInParallelRegion = IsInParallelRegion; - IsInParallelRegion = true; - } - void Exit(CodeGenFunction &CGF) override { - IsInParallelRegion = PrevIsInParallelRegion; - } - } Action(IsInParallelRegion); - CodeGen.setAction(Action); bool PrevIsInTTDRegion = IsInTTDRegion; IsInTTDRegion = false; - bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; - IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; - if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD && - !IsInParallelRegion) { + if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) { llvm::Function *WrapperFun = createParallelDataSharingWrapper(OutlinedFun, D); WrapperFunctionsMap[OutlinedFun] = WrapperFun; @@ -3660,16 +3605,6 @@ assert(VD->isCanonicalDecl() && "Expected canonical declaration"); Data.insert(std::make_pair(VD, MappedVarData())); } - if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { - CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); - VarChecker.Visit(Body); - I->getSecond().SecondaryLocalVarData.emplace(); - DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData; - for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { - assert(VD->isCanonicalDecl() && "Expected canonical declaration"); - Data.insert(std::make_pair(VD, MappedVarData())); - } - } if (!NeedToDelayGlobalization) { emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); struct GlobalizationScope final : EHScopeStack::Cleanup { Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1228,17 +1228,13 @@ /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime); + InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD); /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime); + void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); ///} Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -443,8 +443,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) +__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1) +__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3765,8 +3765,7 @@ } OpenMPIRBuilder::InsertPointTy -OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime) { +OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3778,14 +3777,12 @@ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); ConstantInt *UseGenericStateMachine = ConstantInt::getBool(Int32->getContext(), !IsSPMD); - ConstantInt *RequiresFullRuntimeVal = - ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); + Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -3819,8 +3816,7 @@ } void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD, - bool RequiresFullRuntime) { + bool IsSPMD) { if (!updateToLocation(Loc)) return; @@ -3830,13 +3826,11 @@ ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *RequiresFullRuntimeVal = - ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); + Builder.CreateCall(Fn, {Ident, IsSPMDVal}); } std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef Parts, Index: openmp/libomptarget/DeviceRTL/include/Interface.h =================================================================== --- openmp/libomptarget/DeviceRTL/include/Interface.h +++ openmp/libomptarget/DeviceRTL/include/Interface.h @@ -217,9 +217,9 @@ int8_t __kmpc_is_spmd_exec_mode(); int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine, bool); + bool UseGenericStateMachine); -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool); +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode); ///} Index: openmp/libomptarget/DeviceRTL/src/Kernel.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -66,7 +66,7 @@ /// \param Ident Source location identification, can be NULL. /// int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine, bool) { + bool UseGenericStateMachine) { FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) { @@ -125,7 +125,7 @@ /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) { FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; state::assumeInitialState(IsSPMD);