diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -48,558 +48,614 @@ using namespace CodeGen; using namespace llvm::omp; -namespace { -/// Base class for handling code generation inside OpenMP regions. -class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { -public: - /// Kinds of OpenMP regions used in codegen. - enum CGOpenMPRegionKind { - /// Region with outlined function for standalone 'parallel' - /// directive. - ParallelOutlinedRegion, - /// Region with outlined function for standalone 'task' directive. - TaskOutlinedRegion, - /// Region for constructs that do not require function outlining, - /// like 'for', 'sections', 'atomic' etc. directives. - InlinedRegion, - /// Region with outlined function for standalone 'target' directive. - TargetRegion, - }; +namespace +{ + /// Base class for handling code generation inside OpenMP regions. + class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo + { + public: + /// Kinds of OpenMP regions used in codegen. + enum CGOpenMPRegionKind + { + /// Region with outlined function for standalone 'parallel' + /// directive. + ParallelOutlinedRegion, + /// Region with outlined function for standalone 'task' directive. + TaskOutlinedRegion, + /// Region for constructs that do not require function outlining, + /// like 'for', 'sections', 'atomic' etc. directives. + InlinedRegion, + /// Region with outlined function for standalone 'target' directive. + TargetRegion, + }; - CGOpenMPRegionInfo(const CapturedStmt &CS, - const CGOpenMPRegionKind RegionKind, - const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, - bool HasCancel) - : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), - CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} + CGOpenMPRegionInfo(const CapturedStmt &CS, + const CGOpenMPRegionKind RegionKind, + const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, + bool HasCancel) + : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), + CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} - CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, - const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, - bool HasCancel) - : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), - Kind(Kind), HasCancel(HasCancel) {} + CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, + const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, + bool HasCancel) + : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), + Kind(Kind), HasCancel(HasCancel) {} - /// Get a variable or parameter for storing global thread id - /// inside OpenMP construct. - virtual const VarDecl *getThreadIDVariable() const = 0; + /// Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + virtual const VarDecl *getThreadIDVariable() const = 0; - /// Emit the captured statement body. - void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; + /// Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; - /// Get an LValue for the current ThreadID variable. - /// \return LValue for thread id variable. This LValue always has type int32*. - virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + /// Get an LValue for the current ThreadID variable. + /// \return LValue for thread id variable. This LValue always has type int32*. + virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); - virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} + virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} - CGOpenMPRegionKind getRegionKind() const { return RegionKind; } + CGOpenMPRegionKind getRegionKind() const { return RegionKind; } - OpenMPDirectiveKind getDirectiveKind() const { return Kind; } + OpenMPDirectiveKind getDirectiveKind() const { return Kind; } - bool hasCancel() const { return HasCancel; } + bool hasCancel() const { return HasCancel; } - static bool classof(const CGCapturedStmtInfo *Info) { - return Info->getKind() == CR_OpenMP; - } + static bool classof(const CGCapturedStmtInfo *Info) + { + return Info->getKind() == CR_OpenMP; + } - ~CGOpenMPRegionInfo() override = default; + ~CGOpenMPRegionInfo() override = default; -protected: - CGOpenMPRegionKind RegionKind; - RegionCodeGenTy CodeGen; - OpenMPDirectiveKind Kind; - bool HasCancel; -}; + protected: + CGOpenMPRegionKind RegionKind; + RegionCodeGenTy CodeGen; + OpenMPDirectiveKind Kind; + bool HasCancel; + }; -/// API for captured statement code generation in OpenMP constructs. -class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { -public: - CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, - const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel, - StringRef HelperName) - : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, - HasCancel), - ThreadIDVar(ThreadIDVar), HelperName(HelperName) { - assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); - } + /// API for captured statement code generation in OpenMP constructs. + class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo + { + public: + CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, + const RegionCodeGenTy &CodeGen, + OpenMPDirectiveKind Kind, bool HasCancel, + StringRef HelperName) + : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, + HasCancel), + ThreadIDVar(ThreadIDVar), HelperName(HelperName) + { + assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); + } - /// Get a variable or parameter for storing global thread id - /// inside OpenMP construct. - const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } + /// Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } - /// Get the name of the capture helper. - StringRef getHelperName() const override { return HelperName; } + /// Get the name of the capture helper. + StringRef getHelperName() const override { return HelperName; } - static bool classof(const CGCapturedStmtInfo *Info) { - return CGOpenMPRegionInfo::classof(Info) && - cast(Info)->getRegionKind() == - ParallelOutlinedRegion; - } + static bool classof(const CGCapturedStmtInfo *Info) + { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == + ParallelOutlinedRegion; + } -private: - /// A variable or parameter storing global thread id for OpenMP - /// constructs. - const VarDecl *ThreadIDVar; - StringRef HelperName; -}; - -/// API for captured statement code generation in OpenMP constructs. -class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { -public: - class UntiedTaskActionTy final : public PrePostActionTy { - bool Untied; - const VarDecl *PartIDVar; - const RegionCodeGenTy UntiedCodeGen; - llvm::SwitchInst *UntiedSwitch = nullptr; + private: + /// A variable or parameter storing global thread id for OpenMP + /// constructs. + const VarDecl *ThreadIDVar; + StringRef HelperName; + }; + /// API for captured statement code generation in OpenMP constructs. + class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo + { public: - UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, - const RegionCodeGenTy &UntiedCodeGen) - : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} - void Enter(CodeGenFunction &CGF) override { - if (Untied) { - // Emit task switching point. - LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( - CGF.GetAddrOfLocalVar(PartIDVar), - PartIDVar->getType()->castAs()); - llvm::Value *Res = - CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); - llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); - UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); - CGF.EmitBlock(DoneBB); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); - CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); - UntiedSwitch->addCase(CGF.Builder.getInt32(0), - CGF.Builder.GetInsertBlock()); - emitUntiedSwitch(CGF); + class UntiedTaskActionTy final : public PrePostActionTy + { + bool Untied; + const VarDecl *PartIDVar; + const RegionCodeGenTy UntiedCodeGen; + llvm::SwitchInst *UntiedSwitch = nullptr; + + public: + UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, + const RegionCodeGenTy &UntiedCodeGen) + : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} + void Enter(CodeGenFunction &CGF) override + { + if (Untied) + { + // Emit task switching point. + LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs()); + llvm::Value *Res = + CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); + UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); + CGF.EmitBlock(DoneBB); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(0), + CGF.Builder.GetInsertBlock()); + emitUntiedSwitch(CGF); + } } - } - void emitUntiedSwitch(CodeGenFunction &CGF) const { - if (Untied) { - LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( - CGF.GetAddrOfLocalVar(PartIDVar), - PartIDVar->getType()->castAs()); - CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), - PartIdLVal); - UntiedCodeGen(CGF); - CodeGenFunction::JumpDest CurPoint = - CGF.getJumpDestInCurrentScope(".untied.next."); - CGF.EmitBranch(CGF.ReturnBlock.getBlock()); - CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); - UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), - CGF.Builder.GetInsertBlock()); - CGF.EmitBranchThroughCleanup(CurPoint); - CGF.EmitBlock(CurPoint.getBlock()); + void emitUntiedSwitch(CodeGenFunction &CGF) const + { + if (Untied) + { + LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs()); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + PartIdLVal); + UntiedCodeGen(CGF); + CodeGenFunction::JumpDest CurPoint = + CGF.getJumpDestInCurrentScope(".untied.next."); + CGF.EmitBranch(CGF.ReturnBlock.getBlock()); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + CGF.Builder.GetInsertBlock()); + CGF.EmitBranchThroughCleanup(CurPoint); + CGF.EmitBlock(CurPoint.getBlock()); + } } + unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } + }; + CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, + const VarDecl *ThreadIDVar, + const RegionCodeGenTy &CodeGen, + OpenMPDirectiveKind Kind, bool HasCancel, + const UntiedTaskActionTy &Action) + : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), + ThreadIDVar(ThreadIDVar), Action(Action) + { + assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } - unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } - }; - CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, - const VarDecl *ThreadIDVar, - const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel, - const UntiedTaskActionTy &Action) - : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar), Action(Action) { - assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); - } - /// Get a variable or parameter for storing global thread id - /// inside OpenMP construct. - const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } + /// Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } - /// Get an LValue for the current ThreadID variable. - LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; + /// Get an LValue for the current ThreadID variable. + LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; - /// Get the name of the capture helper. - StringRef getHelperName() const override { return ".omp_outlined."; } + /// Get the name of the capture helper. + StringRef getHelperName() const override { return ".omp_outlined."; } - void emitUntiedSwitch(CodeGenFunction &CGF) override { - Action.emitUntiedSwitch(CGF); - } + void emitUntiedSwitch(CodeGenFunction &CGF) override + { + Action.emitUntiedSwitch(CGF); + } - static bool classof(const CGCapturedStmtInfo *Info) { - return CGOpenMPRegionInfo::classof(Info) && - cast(Info)->getRegionKind() == - TaskOutlinedRegion; - } + static bool classof(const CGCapturedStmtInfo *Info) + { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == + TaskOutlinedRegion; + } + + private: + /// A variable or parameter storing global thread id for OpenMP + /// constructs. + const VarDecl *ThreadIDVar; + /// Action for emitting code for untied tasks. + const UntiedTaskActionTy &Action; + }; -private: - /// A variable or parameter storing global thread id for OpenMP + /// API for inlined captured statement code generation in OpenMP /// constructs. - const VarDecl *ThreadIDVar; - /// Action for emitting code for untied tasks. - const UntiedTaskActionTy &Action; -}; - -/// API for inlined captured statement code generation in OpenMP -/// constructs. -class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { -public: - CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, - const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) - : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), - OldCSI(OldCSI), - OuterRegionInfo(dyn_cast_or_null(OldCSI)) {} - - // Retrieve the value of the context parameter. - llvm::Value *getContextValue() const override { - if (OuterRegionInfo) - return OuterRegionInfo->getContextValue(); - llvm_unreachable("No context value for inlined OpenMP region"); - } - - void setContextValue(llvm::Value *V) override { - if (OuterRegionInfo) { - OuterRegionInfo->setContextValue(V); - return; + class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo + { + public: + CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, + const RegionCodeGenTy &CodeGen, + OpenMPDirectiveKind Kind, bool HasCancel) + : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), + OldCSI(OldCSI), + OuterRegionInfo(dyn_cast_or_null(OldCSI)) {} + + // Retrieve the value of the context parameter. + llvm::Value *getContextValue() const override + { + if (OuterRegionInfo) + return OuterRegionInfo->getContextValue(); + llvm_unreachable("No context value for inlined OpenMP region"); } - llvm_unreachable("No context value for inlined OpenMP region"); - } - /// Lookup the captured field decl for a variable. - const FieldDecl *lookup(const VarDecl *VD) const override { - if (OuterRegionInfo) - return OuterRegionInfo->lookup(VD); - // If there is no outer outlined region,no need to lookup in a list of - // captured variables, we can use the original one. - return nullptr; - } + void setContextValue(llvm::Value *V) override + { + if (OuterRegionInfo) + { + OuterRegionInfo->setContextValue(V); + return; + } + llvm_unreachable("No context value for inlined OpenMP region"); + } - FieldDecl *getThisFieldDecl() const override { - if (OuterRegionInfo) - return OuterRegionInfo->getThisFieldDecl(); - return nullptr; - } + /// Lookup the captured field decl for a variable. + const FieldDecl *lookup(const VarDecl *VD) const override + { + if (OuterRegionInfo) + return OuterRegionInfo->lookup(VD); + // If there is no outer outlined region,no need to lookup in a list of + // captured variables, we can use the original one. + return nullptr; + } - /// Get a variable or parameter for storing global thread id - /// inside OpenMP construct. - const VarDecl *getThreadIDVariable() const override { - if (OuterRegionInfo) - return OuterRegionInfo->getThreadIDVariable(); - return nullptr; - } + FieldDecl *getThisFieldDecl() const override + { + if (OuterRegionInfo) + return OuterRegionInfo->getThisFieldDecl(); + return nullptr; + } - /// Get an LValue for the current ThreadID variable. - LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { - if (OuterRegionInfo) - return OuterRegionInfo->getThreadIDVariableLValue(CGF); - llvm_unreachable("No LValue for inlined OpenMP construct"); - } + /// Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override + { + if (OuterRegionInfo) + return OuterRegionInfo->getThreadIDVariable(); + return nullptr; + } - /// Get the name of the capture helper. - StringRef getHelperName() const override { - if (auto *OuterRegionInfo = getOldCSI()) - return OuterRegionInfo->getHelperName(); - llvm_unreachable("No helper name for inlined OpenMP construct"); - } + /// Get an LValue for the current ThreadID variable. + LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override + { + if (OuterRegionInfo) + return OuterRegionInfo->getThreadIDVariableLValue(CGF); + llvm_unreachable("No LValue for inlined OpenMP construct"); + } - void emitUntiedSwitch(CodeGenFunction &CGF) override { - if (OuterRegionInfo) - OuterRegionInfo->emitUntiedSwitch(CGF); - } + /// Get the name of the capture helper. + StringRef getHelperName() const override + { + if (auto *OuterRegionInfo = getOldCSI()) + return OuterRegionInfo->getHelperName(); + llvm_unreachable("No helper name for inlined OpenMP construct"); + } - CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } + void emitUntiedSwitch(CodeGenFunction &CGF) override + { + if (OuterRegionInfo) + OuterRegionInfo->emitUntiedSwitch(CGF); + } - static bool classof(const CGCapturedStmtInfo *Info) { - return CGOpenMPRegionInfo::classof(Info) && - cast(Info)->getRegionKind() == InlinedRegion; - } + CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } + + static bool classof(const CGCapturedStmtInfo *Info) + { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == InlinedRegion; + } + + ~CGOpenMPInlinedRegionInfo() override = default; + + private: + /// CodeGen info about outer OpenMP region. + CodeGenFunction::CGCapturedStmtInfo *OldCSI; + CGOpenMPRegionInfo *OuterRegionInfo; + }; - ~CGOpenMPInlinedRegionInfo() override = default; + /// API for captured statement code generation in OpenMP target + /// constructs. For this captures, implicit parameters are used instead of the + /// captured fields. The name of the target region has to be unique in a given + /// application so it is provided by the client, because only the client has + /// the information to generate that. + class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo + { + public: + CGOpenMPTargetRegionInfo(const CapturedStmt &CS, + const RegionCodeGenTy &CodeGen, StringRef HelperName) + : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, + /*HasCancel=*/false), + HelperName(HelperName) + { + } -private: - /// CodeGen info about outer OpenMP region. - CodeGenFunction::CGCapturedStmtInfo *OldCSI; - CGOpenMPRegionInfo *OuterRegionInfo; -}; + /// This is unused for target regions because each starts executing + /// with a single thread. + const VarDecl *getThreadIDVariable() const override { return nullptr; } -/// API for captured statement code generation in OpenMP target -/// constructs. For this captures, implicit parameters are used instead of the -/// captured fields. The name of the target region has to be unique in a given -/// application so it is provided by the client, because only the client has -/// the information to generate that. -class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { -public: - CGOpenMPTargetRegionInfo(const CapturedStmt &CS, - const RegionCodeGenTy &CodeGen, StringRef HelperName) - : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, - /*HasCancel=*/false), - HelperName(HelperName) {} + /// Get the name of the capture helper. + StringRef getHelperName() const override { return HelperName; } - /// This is unused for target regions because each starts executing - /// with a single thread. - const VarDecl *getThreadIDVariable() const override { return nullptr; } + static bool classof(const CGCapturedStmtInfo *Info) + { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == TargetRegion; + } - /// Get the name of the capture helper. - StringRef getHelperName() const override { return HelperName; } + private: + StringRef HelperName; + }; - static bool classof(const CGCapturedStmtInfo *Info) { - return CGOpenMPRegionInfo::classof(Info) && - cast(Info)->getRegionKind() == TargetRegion; + static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) + { + llvm_unreachable("No codegen for expressions"); } + /// API for generation of expressions captured in a innermost OpenMP + /// region. + class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo + { + public: + CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) + : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, + OMPD_unknown, + /*HasCancel=*/false), + PrivScope(CGF) + { + // Make sure the globals captured in the provided statement are local by + // using the privatization logic. We assume the same variable is not + // captured more than once. + for (const auto &C : CS.captures()) + { + if (!C.capturesVariable() && !C.capturesVariableByCopy()) + continue; -private: - StringRef HelperName; -}; + const VarDecl *VD = C.getCapturedVar(); + if (VD->isLocalVarDeclOrParm()) + continue; -static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { - llvm_unreachable("No codegen for expressions"); -} -/// API for generation of expressions captured in a innermost OpenMP -/// region. -class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { -public: - CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) - : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, - OMPD_unknown, - /*HasCancel=*/false), - PrivScope(CGF) { - // Make sure the globals captured in the provided statement are local by - // using the privatization logic. We assume the same variable is not - // captured more than once. - for (const auto &C : CS.captures()) { - if (!C.capturesVariable() && !C.capturesVariableByCopy()) - continue; + DeclRefExpr DRE(CGF.getContext(), const_cast(VD), + /*RefersToEnclosingVariableOrCapture=*/false, + VD->getType().getNonReferenceType(), VK_LValue, + C.getLocation()); + PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); + } + (void)PrivScope.Privatize(); + } - const VarDecl *VD = C.getCapturedVar(); - if (VD->isLocalVarDeclOrParm()) - continue; + /// Lookup the captured field decl for a variable. + const FieldDecl *lookup(const VarDecl *VD) const override + { + if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + return FD; + return nullptr; + } - DeclRefExpr DRE(CGF.getContext(), const_cast(VD), - /*RefersToEnclosingVariableOrCapture=*/false, - VD->getType().getNonReferenceType(), VK_LValue, - C.getLocation()); - PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); + /// Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, const Stmt *S) override + { + llvm_unreachable("No body for expressions"); } - (void)PrivScope.Privatize(); - } - /// Lookup the captured field decl for a variable. - const FieldDecl *lookup(const VarDecl *VD) const override { - if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) - return FD; - return nullptr; - } + /// Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override + { + llvm_unreachable("No thread id for expressions"); + } - /// Emit the captured statement body. - void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { - llvm_unreachable("No body for expressions"); - } - - /// Get a variable or parameter for storing global thread id - /// inside OpenMP construct. - const VarDecl *getThreadIDVariable() const override { - llvm_unreachable("No thread id for expressions"); - } - - /// Get the name of the capture helper. - StringRef getHelperName() const override { - llvm_unreachable("No helper name for expressions"); - } - - static bool classof(const CGCapturedStmtInfo *Info) { return false; } - -private: - /// Private scope to capture global variables. - CodeGenFunction::OMPPrivateScope PrivScope; -}; - -/// RAII for emitting code of OpenMP constructs. -class InlinedOpenMPRegionRAII { - CodeGenFunction &CGF; - llvm::DenseMap LambdaCaptureFields; - FieldDecl *LambdaThisCaptureField = nullptr; - const CodeGen::CGBlockInfo *BlockInfo = nullptr; - bool NoInheritance = false; - -public: - /// Constructs region for combined constructs. - /// \param CodeGen Code generation sequence for combined directives. Includes - /// a list of functions used for code generation of implicitly inlined - /// regions. - InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel, - bool NoInheritance = true) - : CGF(CGF), NoInheritance(NoInheritance) { - // Start emission for the construct. - CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( - CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); - if (NoInheritance) { - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - LambdaThisCaptureField = CGF.LambdaThisCaptureField; - CGF.LambdaThisCaptureField = nullptr; - BlockInfo = CGF.BlockInfo; - CGF.BlockInfo = nullptr; - } - } - - ~InlinedOpenMPRegionRAII() { - // Restore original CapturedStmtInfo only if we're done with code emission. - auto *OldCSI = - cast(CGF.CapturedStmtInfo)->getOldCSI(); - delete CGF.CapturedStmtInfo; - CGF.CapturedStmtInfo = OldCSI; - if (NoInheritance) { - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - CGF.LambdaThisCaptureField = LambdaThisCaptureField; - CGF.BlockInfo = BlockInfo; - } - } -}; - -/// Values for bit flags used in the ident_t to describe the fields. -/// All enumeric elements are named and described in accordance with the code -/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h -enum OpenMPLocationFlags : unsigned { - /// Use trampoline for internal microtask. - OMP_IDENT_IMD = 0x01, - /// Use c-style ident structure. - OMP_IDENT_KMPC = 0x02, - /// Atomic reduction option for kmpc_reduce. - OMP_ATOMIC_REDUCE = 0x10, - /// Explicit 'barrier' directive. - OMP_IDENT_BARRIER_EXPL = 0x20, - /// Implicit barrier in code. - OMP_IDENT_BARRIER_IMPL = 0x40, - /// Implicit barrier in 'for' directive. - OMP_IDENT_BARRIER_IMPL_FOR = 0x40, - /// Implicit barrier in 'sections' directive. - OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, - /// Implicit barrier in 'single' directive. - OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, - /// Call of __kmp_for_static_init for static loop. - OMP_IDENT_WORK_LOOP = 0x200, - /// Call of __kmp_for_static_init for sections. - OMP_IDENT_WORK_SECTIONS = 0x400, - /// Call of __kmp_for_static_init for distribute. - OMP_IDENT_WORK_DISTRIBUTE = 0x800, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) -}; - -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Values for bit flags for marking which requires clauses have been used. -enum OpenMPOffloadingRequiresDirFlags : int64_t { - /// flag undefined. - OMP_REQ_UNDEFINED = 0x000, - /// no requires clause present. - OMP_REQ_NONE = 0x001, - /// reverse_offload clause. - OMP_REQ_REVERSE_OFFLOAD = 0x002, - /// unified_address clause. - OMP_REQ_UNIFIED_ADDRESS = 0x004, - /// unified_shared_memory clause. - OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, - /// dynamic_allocators clause. - OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) -}; - -enum OpenMPOffloadingReservedDeviceIDs { - /// Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, -}; -} // anonymous namespace + /// Get the name of the capture helper. + StringRef getHelperName() const override + { + llvm_unreachable("No helper name for expressions"); + } -/// Describes ident structure that describes a source location. -/// All descriptions are taken from -/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h -/// Original structure: -/// typedef struct ident { -/// kmp_int32 reserved_1; /**< might be used in Fortran; -/// see above */ -/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; -/// KMP_IDENT_KMPC identifies this union -/// member */ -/// kmp_int32 reserved_2; /**< not really used in Fortran any more; -/// see above */ -///#if USE_ITT_BUILD -/// /* but currently used for storing -/// region-specific ITT */ -/// /* contextual information. */ -///#endif /* USE_ITT_BUILD */ -/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for -/// C++ */ -/// char const *psource; /**< String describing the source location. -/// The string is composed of semi-colon separated -// fields which describe the source file, -/// the function and a pair of line numbers that -/// delimit the construct. -/// */ -/// } ident_t; -enum IdentFieldIndex { - /// might be used in Fortran - IdentField_Reserved_1, - /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. - IdentField_Flags, - /// Not really used in Fortran any more - IdentField_Reserved_2, - /// Source[4] in Fortran, do not use for C++ - IdentField_Reserved_3, - /// String describing the source location. The string is composed of - /// semi-colon separated fields which describe the source file, the function - /// and a pair of line numbers that delimit the construct. - IdentField_PSource -}; - -/// Schedule types for 'omp for' loops (these enumerators are taken from -/// the enum sched_type in kmp.h). -enum OpenMPSchedType { - /// Lower bound for default (unordered) versions. - OMP_sch_lower = 32, - OMP_sch_static_chunked = 33, - OMP_sch_static = 34, - OMP_sch_dynamic_chunked = 35, - OMP_sch_guided_chunked = 36, - OMP_sch_runtime = 37, - OMP_sch_auto = 38, - /// static with chunk adjustment (e.g., simd) - OMP_sch_static_balanced_chunked = 45, - /// Lower bound for 'ordered' versions. - OMP_ord_lower = 64, - OMP_ord_static_chunked = 65, - OMP_ord_static = 66, - OMP_ord_dynamic_chunked = 67, - OMP_ord_guided_chunked = 68, - OMP_ord_runtime = 69, - OMP_ord_auto = 70, - OMP_sch_default = OMP_sch_static, - /// dist_schedule types - OMP_dist_sch_static_chunked = 91, - OMP_dist_sch_static = 92, - /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. - /// Set if the monotonic schedule modifier was present. - OMP_sch_modifier_monotonic = (1 << 29), - /// Set if the nonmonotonic schedule modifier was present. - OMP_sch_modifier_nonmonotonic = (1 << 30), -}; - -/// A basic class for pre|post-action for advanced codegen sequence for OpenMP -/// region. -class CleanupTy final : public EHScopeStack::Cleanup { - PrePostActionTy *Action; - -public: - explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - Action->Exit(CGF); - } -}; + static bool classof(const CGCapturedStmtInfo *Info) { return false; } + + private: + /// Private scope to capture global variables. + CodeGenFunction::OMPPrivateScope PrivScope; + }; + + /// RAII for emitting code of OpenMP constructs. + class InlinedOpenMPRegionRAII + { + CodeGenFunction &CGF; + llvm::DenseMap LambdaCaptureFields; + FieldDecl *LambdaThisCaptureField = nullptr; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; + bool NoInheritance = false; + + public: + /// Constructs region for combined constructs. + /// \param CodeGen Code generation sequence for combined directives. Includes + /// a list of functions used for code generation of implicitly inlined + /// regions. + InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, + OpenMPDirectiveKind Kind, bool HasCancel, + bool NoInheritance = true) + : CGF(CGF), NoInheritance(NoInheritance) + { + // Start emission for the construct. + CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( + CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); + if (NoInheritance) + { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + LambdaThisCaptureField = CGF.LambdaThisCaptureField; + CGF.LambdaThisCaptureField = nullptr; + BlockInfo = CGF.BlockInfo; + CGF.BlockInfo = nullptr; + } + } + + ~InlinedOpenMPRegionRAII() + { + // Restore original CapturedStmtInfo only if we're done with code emission. + auto *OldCSI = + cast(CGF.CapturedStmtInfo)->getOldCSI(); + delete CGF.CapturedStmtInfo; + CGF.CapturedStmtInfo = OldCSI; + if (NoInheritance) + { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + CGF.LambdaThisCaptureField = LambdaThisCaptureField; + CGF.BlockInfo = BlockInfo; + } + } + }; + + /// Values for bit flags used in the ident_t to describe the fields. + /// All enumeric elements are named and described in accordance with the code + /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h + enum OpenMPLocationFlags : unsigned + { + /// Use trampoline for internal microtask. + OMP_IDENT_IMD = 0x01, + /// Use c-style ident structure. + OMP_IDENT_KMPC = 0x02, + /// Atomic reduction option for kmpc_reduce. + OMP_ATOMIC_REDUCE = 0x10, + /// Explicit 'barrier' directive. + OMP_IDENT_BARRIER_EXPL = 0x20, + /// Implicit barrier in code. + OMP_IDENT_BARRIER_IMPL = 0x40, + /// Implicit barrier in 'for' directive. + OMP_IDENT_BARRIER_IMPL_FOR = 0x40, + /// Implicit barrier in 'sections' directive. + OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, + /// Implicit barrier in 'single' directive. + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, + /// Call of __kmp_for_static_init for static loop. + OMP_IDENT_WORK_LOOP = 0x200, + /// Call of __kmp_for_static_init for sections. + OMP_IDENT_WORK_SECTIONS = 0x400, + /// Call of __kmp_for_static_init for distribute. + OMP_IDENT_WORK_DISTRIBUTE = 0x800, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) + }; + + namespace + { + LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + /// Values for bit flags for marking which requires clauses have been used. + enum OpenMPOffloadingRequiresDirFlags : int64_t + { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires clause present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) + }; + + enum OpenMPOffloadingReservedDeviceIDs + { + /// Device ID if the device was not defined, runtime should get it + /// from environment variables in the spec. + OMP_DEVICEID_UNDEF = -1, + }; + } // anonymous namespace + + /// Describes ident structure that describes a source location. + /// All descriptions are taken from + /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h + /// Original structure: + /// typedef struct ident { + /// kmp_int32 reserved_1; /**< might be used in Fortran; + /// see above */ + /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; + /// KMP_IDENT_KMPC identifies this union + /// member */ + /// kmp_int32 reserved_2; /**< not really used in Fortran any more; + /// see above */ + /// #if USE_ITT_BUILD + /// /* but currently used for storing + /// region-specific ITT */ + /// /* contextual information. */ + /// #endif /* USE_ITT_BUILD */ + /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for + /// C++ */ + /// char const *psource; /**< String describing the source location. + /// The string is composed of semi-colon separated + // fields which describe the source file, + /// the function and a pair of line numbers that + /// delimit the construct. + /// */ + /// } ident_t; + enum IdentFieldIndex + { + /// might be used in Fortran + IdentField_Reserved_1, + /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. + IdentField_Flags, + /// Not really used in Fortran any more + IdentField_Reserved_2, + /// Source[4] in Fortran, do not use for C++ + IdentField_Reserved_3, + /// String describing the source location. The string is composed of + /// semi-colon separated fields which describe the source file, the function + /// and a pair of line numbers that delimit the construct. + IdentField_PSource + }; + + /// Schedule types for 'omp for' loops (these enumerators are taken from + /// the enum sched_type in kmp.h). + enum OpenMPSchedType + { + /// Lower bound for default (unordered) versions. + OMP_sch_lower = 32, + OMP_sch_static_chunked = 33, + OMP_sch_static = 34, + OMP_sch_dynamic_chunked = 35, + OMP_sch_guided_chunked = 36, + OMP_sch_runtime = 37, + OMP_sch_auto = 38, + /// static with chunk adjustment (e.g., simd) + OMP_sch_static_balanced_chunked = 45, + /// Lower bound for 'ordered' versions. + OMP_ord_lower = 64, + OMP_ord_static_chunked = 65, + OMP_ord_static = 66, + OMP_ord_dynamic_chunked = 67, + OMP_ord_guided_chunked = 68, + OMP_ord_runtime = 69, + OMP_ord_auto = 70, + OMP_sch_default = OMP_sch_static, + /// dist_schedule types + OMP_dist_sch_static_chunked = 91, + OMP_dist_sch_static = 92, + /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. + /// Set if the monotonic schedule modifier was present. + OMP_sch_modifier_monotonic = (1 << 29), + /// Set if the nonmonotonic schedule modifier was present. + OMP_sch_modifier_nonmonotonic = (1 << 30), + }; + + /// A basic class for pre|post-action for advanced codegen sequence for OpenMP + /// region. + class CleanupTy final : public EHScopeStack::Cleanup + { + PrePostActionTy *Action; + + public: + explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override + { + if (!CGF.HaveInsertPoint()) + return; + Action->Exit(CGF); + } + }; } // anonymous namespace -void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { +void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const +{ CodeGenFunction::RunCleanupsScope Scope(CGF); - if (PrePostAction) { + if (PrePostAction) + { CGF.EHStack.pushCleanup(NormalAndEHCleanup, PrePostAction); Callback(CodeGen, CGF, *PrePostAction); - } else { + } + else + { PrePostActionTy Action; Callback(CodeGen, CGF, Action); } @@ -608,7 +664,8 @@ /// Check if the combiner is a call to UDR combiner and if it is so return the /// UDR decl used for reduction. static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { +getReductionInit(const Expr *ReductionOp) +{ if (const auto *CE = dyn_cast(ReductionOp)) if (const auto *OVE = dyn_cast(CE->getCallee())) if (const auto *DRE = @@ -622,8 +679,10 @@ const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { + QualType Ty) +{ + if (DRD->getInitializer()) + { std::pair Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); const auto *CE = cast(InitOp); @@ -641,7 +700,9 @@ RValue Func = RValue::get(Reduction.second); CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); CGF.EmitIgnoredExpr(InitOp); - } else { + } + else + { llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); auto *GV = new llvm::GlobalVariable( @@ -649,7 +710,8 @@ llvm::GlobalValue::PrivateLinkage, Init, Name); LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { + switch (CGF.getEvaluationKind(Ty)) + { case TEK_Scalar: InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); break; @@ -657,7 +719,8 @@ InitRVal = RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; - case TEK_Aggregate: { + case TEK_Aggregate: + { OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), @@ -681,7 +744,8 @@ QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, - Address SrcAddr = Address::invalid()) { + Address SrcAddr = Address::invalid()) +{ // Perform element-by-element initialization. QualType ElementTy; @@ -714,7 +778,8 @@ llvm::PHINode *SrcElementPHI = nullptr; Address SrcElementCurrent = Address::invalid(); - if (DRD) { + if (DRD) + { SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); SrcElementPHI->addIncoming(SrcBegin, EntryBB); @@ -732,15 +797,18 @@ // Emit copy. { CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (EmitDeclareReductionInit) { + if (EmitDeclareReductionInit) + { emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, SrcElementCurrent, ElementTy); - } else + } + else CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), /*IsInitializer=*/false); } - if (DRD) { + if (DRD) + { // Shift the address forward by one element. llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, @@ -762,12 +830,14 @@ CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { +LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) +{ return CGF.EmitOMPSharedLValue(E); } LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, - const Expr *E) { + const Expr *E) +{ if (const auto *OASE = dyn_cast(E)) return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); return LValue(); @@ -775,7 +845,8 @@ void ReductionCodeGen::emitAggregateInitialization( CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, - const OMPDeclareReductionDecl *DRD) { + const OMPDeclareReductionDecl *DRD) +{ // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. @@ -793,7 +864,8 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds, ArrayRef Origs, ArrayRef Privates, - ArrayRef ReductionOps) { + ArrayRef ReductionOps) +{ ClausesData.reserve(Shareds.size()); SharedAddresses.reserve(Shareds.size()); Sizes.reserve(Shareds.size()); @@ -801,7 +873,8 @@ const auto *IOrig = Origs.begin(); const auto *IPriv = Privates.begin(); const auto *IRed = ReductionOps.begin(); - for (const Expr *Ref : Shareds) { + for (const Expr *Ref : Shareds) + { ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); std::advance(IOrig, 1); std::advance(IPriv, 1); @@ -809,25 +882,31 @@ } } -void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { +void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) +{ assert(SharedAddresses.size() == N && OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."); LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); SharedAddresses.emplace_back(First, Second); - if (ClausesData[N].Shared == ClausesData[N].Ref) { + if (ClausesData[N].Shared == ClausesData[N].Ref) + { OrigAddresses.emplace_back(First, Second); - } else { + } + else + { LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); OrigAddresses.emplace_back(First, Second); } } -void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) +{ QualType PrivateType = getPrivateType(N); bool AsArraySection = isa(ClausesData[N].Ref); - if (!PrivateType->isVariablyModifiedType()) { + if (!PrivateType->isVariablyModifiedType()) + { Sizes.emplace_back( CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), nullptr); @@ -837,14 +916,17 @@ llvm::Value *SizeInChars; auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); - if (AsArraySection) { + if (AsArraySection) + { Size = CGF.Builder.CreatePtrDiff(ElemType, OrigAddresses[N].second.getPointer(CGF), OrigAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); - } else { + } + else + { SizeInChars = CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); @@ -859,9 +941,11 @@ } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, - llvm::Value *Size) { + llvm::Value *Size) +{ QualType PrivateType = getPrivateType(N); - if (!PrivateType->isVariablyModifiedType()) { + if (!PrivateType->isVariablyModifiedType()) + { assert(!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items."); @@ -877,40 +961,49 @@ void ReductionCodeGen::emitInitialization( CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, - llvm::function_ref DefaultInit) { + llvm::function_ref DefaultInit) +{ assert(SharedAddresses.size() > N && "No variable was generated"); const auto *PrivateVD = cast(cast(ClausesData[N].Private)->getDecl()); const OMPDeclareReductionDecl *DRD = getReductionInit(ClausesData[N].ReductionOp); - if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { + if (CGF.getContext().getAsArrayType(PrivateVD->getType())) + { if (DRD && DRD->getInitializer()) (void)DefaultInit(CGF); emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); - } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + } + else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) + { (void)DefaultInit(CGF); QualType SharedType = SharedAddresses[N].first.getType(); emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, PrivateAddr, SharedAddr, SharedType); - } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && - !CGF.isTrivialInitializer(PrivateVD->getInit())) { + } + else if (!DefaultInit(CGF) && PrivateVD->hasInit() && + !CGF.isTrivialInitializer(PrivateVD->getInit())) + { CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, PrivateVD->getType().getQualifiers(), /*IsInitializer=*/false); } } -bool ReductionCodeGen::needCleanups(unsigned N) { +bool ReductionCodeGen::needCleanups(unsigned N) +{ QualType PrivateType = getPrivateType(N); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); return DTorKind != QualType::DK_none; } void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr) { + Address PrivateAddr) +{ QualType PrivateType = getPrivateType(N); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); - if (needCleanups(N)) { + if (needCleanups(N)) + { PrivateAddr = CGF.Builder.CreateElementBitCast( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); @@ -918,13 +1011,18 @@ } static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { + LValue BaseLV) +{ BaseTy = BaseTy.getNonReferenceType(); while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (const auto *PtrTy = BaseTy->getAs()) { + !CGF.getContext().hasSameType(BaseTy, ElTy)) + { + if (const auto *PtrTy = BaseTy->getAs()) + { BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); - } else { + } + else + { LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } @@ -938,13 +1036,15 @@ } static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - Address OriginalBaseAddress, llvm::Value *Addr) { + Address OriginalBaseAddress, llvm::Value *Addr) +{ Address Tmp = Address::invalid(); Address TopTmp = Address::invalid(); Address MostTopTmp = Address::invalid(); BaseTy = BaseTy.getNonReferenceType(); while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { + !CGF.getContext().hasSameType(BaseTy, ElTy)) + { Tmp = CGF.CreateMemTemp(BaseTy); if (TopTmp.isValid()) CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); @@ -954,7 +1054,8 @@ BaseTy = BaseTy->getPointeeType(); } - if (Tmp.isValid()) { + if (Tmp.isValid()) + { Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( Addr, Tmp.getElementType()); CGF.Builder.CreateStore(Addr, Tmp); @@ -966,9 +1067,11 @@ return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); } -static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { +static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) +{ const VarDecl *OrigVD = nullptr; - if (const auto *OASE = dyn_cast(Ref)) { + if (const auto *OASE = dyn_cast(Ref)) + { const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); while (const auto *TempOASE = dyn_cast(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); @@ -976,7 +1079,9 @@ Base = TempASE->getBase()->IgnoreParenImpCasts(); DE = cast(Base); OrigVD = cast(DE->getDecl()); - } else if (const auto *ASE = dyn_cast(Ref)) { + } + else if (const auto *ASE = dyn_cast(Ref)) + { const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); while (const auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); @@ -987,9 +1092,11 @@ } Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr) { + Address PrivateAddr) +{ const DeclRefExpr *DE; - if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { + if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) + { BaseDecls.emplace_back(OrigVD); LValue OriginalBaseLValue = CGF.EmitLValue(DE); LValue BaseLValue = @@ -1013,19 +1120,22 @@ return PrivateAddr; } -bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { +bool ReductionCodeGen::usesReductionInitializer(unsigned N) const +{ const OMPDeclareReductionDecl *DRD = getReductionInit(ClausesData[N].ReductionOp); return DRD && DRD->getInitializer(); } -LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { +LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) +{ return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType()->castAs()); } -void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { +void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) +{ if (!CGF.HaveInsertPoint()) return; // 1.2.2 OpenMP Language Terminology @@ -1041,14 +1151,16 @@ } LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( - CodeGenFunction &CGF) { + CodeGenFunction &CGF) +{ return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType(), AlignmentSource::Decl); } static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, - QualType FieldTy) { + QualType FieldTy) +{ auto *Field = FieldDecl::Create( C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), @@ -1059,7 +1171,8 @@ } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { + : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() +{ KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, hasRequiresUnifiedSharedMemory(), @@ -1071,10 +1184,12 @@ loadOffloadInfoMetadata(); } -void CGOpenMPRuntime::clear() { +void CGOpenMPRuntime::clear() +{ InternalVars.clear(); // Clean non-target variable declarations possibly used only in debug info. - for (const auto &Data : EmittedNonTargetVariables) { + for (const auto &Data : EmittedNonTargetVariables) + { if (!Data.getValue().pointsToAliveValue()) continue; auto *GV = dyn_cast(Data.getValue()); @@ -1086,14 +1201,16 @@ } } -std::string CGOpenMPRuntime::getName(ArrayRef Parts) const { +std::string CGOpenMPRuntime::getName(ArrayRef Parts) const +{ return OMPBuilder.createPlatformSpecificName(Parts); } static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, - const VarDecl *Out, bool IsCombiner) { + const VarDecl *Out, bool IsCombiner) +{ // void .omp_combiner.(Ty *in, Ty *out); ASTContext &C = CGM.getContext(); QualType PtrTy = C.getPointerType(Ty).withRestrict(); @@ -1112,7 +1229,8 @@ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); - if (CGM.getLangOpts().Optimize) { + if (CGM.getLangOpts().Optimize) + { Fn->removeFnAttr(llvm::Attribute::NoInline); Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); @@ -1133,7 +1251,8 @@ .getAddress(CGF)); (void)Scope.Privatize(); if (!IsCombiner && Out->hasInit() && - !CGF.isTrivialInitializer(Out->getInit())) { + !CGF.isTrivialInitializer(Out->getInit())) + { CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), Out->getType().getQualifiers(), /*IsInitializer=*/true); @@ -1146,7 +1265,8 @@ } void CGOpenMPRuntime::emitUserDefinedReduction( - CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { + CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) +{ if (UDRMap.count(D) > 0) return; llvm::Function *Combiner = emitCombinerOrInitializer( @@ -1155,7 +1275,8 @@ cast(cast(D->getCombinerOut())->getDecl()), /*IsCombiner=*/true); llvm::Function *Initializer = nullptr; - if (const Expr *Init = D->getInitializer()) { + if (const Expr *Init = D->getInitializer()) + { Initializer = emitCombinerOrInitializer( CGM, D->getType(), D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init @@ -1165,14 +1286,16 @@ /*IsCombiner=*/false); } UDRMap.try_emplace(D, Combiner, Initializer); - if (CGF) { + if (CGF) + { auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); Decls.second.push_back(D); } } std::pair -CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { +CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) +{ auto I = UDRMap.find(D); if (I != UDRMap.end()) return I->second; @@ -1180,55 +1303,61 @@ return UDRMap.lookup(D); } -namespace { -// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR -// Builder if one is present. -struct PushAndPopStackRAII { - PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, - bool HasCancel, llvm::omp::Directive Kind) - : OMPBuilder(OMPBuilder) { - if (!OMPBuilder) - return; +namespace +{ + // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR + // Builder if one is present. + struct PushAndPopStackRAII + { + PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, + bool HasCancel, llvm::omp::Directive Kind) + : OMPBuilder(OMPBuilder) + { + if (!OMPBuilder) + return; - // The following callback is the crucial part of clangs cleanup process. - // - // NOTE: - // Once the OpenMPIRBuilder is used to create parallel regions (and - // similar), the cancellation destination (Dest below) is determined via - // IP. That means if we have variables to finalize we split the block at IP, - // use the new block (=BB) as destination to build a JumpDest (via - // getJumpDestInCurrentScope(BB)) which then is fed to - // EmitBranchThroughCleanup. Furthermore, there will not be the need - // to push & pop an FinalizationInfo object. - // The FiniCB will still be needed but at the point where the - // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. - auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { - assert(IP.getBlock()->end() == IP.getPoint() && - "Clang CG should cause non-terminated block!"); - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.restoreIP(IP); - CodeGenFunction::JumpDest Dest = - CGF.getOMPCancelDestination(OMPD_parallel); - CGF.EmitBranchThroughCleanup(Dest); - }; + // The following callback is the crucial part of clangs cleanup process. + // + // NOTE: + // Once the OpenMPIRBuilder is used to create parallel regions (and + // similar), the cancellation destination (Dest below) is determined via + // IP. That means if we have variables to finalize we split the block at IP, + // use the new block (=BB) as destination to build a JumpDest (via + // getJumpDestInCurrentScope(BB)) which then is fed to + // EmitBranchThroughCleanup. Furthermore, there will not be the need + // to push & pop an FinalizationInfo object. + // The FiniCB will still be needed but at the point where the + // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. + auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) + { + assert(IP.getBlock()->end() == IP.getPoint() && + "Clang CG should cause non-terminated block!"); + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.restoreIP(IP); + CodeGenFunction::JumpDest Dest = + CGF.getOMPCancelDestination(OMPD_parallel); + CGF.EmitBranchThroughCleanup(Dest); + }; - // TODO: Remove this once we emit parallel regions through the - // OpenMPIRBuilder as it can do this setup internally. - llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); - OMPBuilder->pushFinalizationCB(std::move(FI)); - } - ~PushAndPopStackRAII() { - if (OMPBuilder) - OMPBuilder->popFinalizationCB(); - } - llvm::OpenMPIRBuilder *OMPBuilder; -}; + // TODO: Remove this once we emit parallel regions through the + // OpenMPIRBuilder as it can do this setup internally. + llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); + OMPBuilder->pushFinalizationCB(std::move(FI)); + } + ~PushAndPopStackRAII() + { + if (OMPBuilder) + OMPBuilder->popFinalizationCB(); + } + llvm::OpenMPIRBuilder *OMPBuilder; + }; } // namespace static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, - const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { + const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) +{ assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); CodeGenFunction CGF(CGM, true); @@ -1264,7 +1393,8 @@ llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) +{ const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); return emitParallelOrTeamsOutlinedFunction( CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); @@ -1272,7 +1402,8 @@ llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) +{ const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); return emitParallelOrTeamsOutlinedFunction( CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); @@ -1282,9 +1413,11 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { + bool Tied, unsigned &NumberOfParts) +{ auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &) + { llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); llvm::Value *TaskArgs[] = { @@ -1326,24 +1459,30 @@ } void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, - bool AtCurrentPoint) { + bool AtCurrentPoint) +{ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); - if (AtCurrentPoint) { + if (AtCurrentPoint) + { Elem.second.ServiceInsertPt = new llvm::BitCastInst( Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); - } else { + } + else + { Elem.second.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); } } -void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { +void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) +{ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - if (Elem.second.ServiceInsertPt) { + if (Elem.second.ServiceInsertPt) + { llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; Elem.second.ServiceInsertPt = nullptr; Ptr->eraseFromParent(); @@ -1352,7 +1491,8 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, - SmallString<128> &Buffer) { + SmallString<128> &Buffer) +{ llvm::raw_svector_ostream OS(Buffer); // Build debug location PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); @@ -1365,14 +1505,18 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - unsigned Flags, bool EmitLoc) { + unsigned Flags, bool EmitLoc) +{ uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) || - Loc.isInvalid()) { + Loc.isInvalid()) + { SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); - } else { + } + else + { std::string FunctionName; if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) FunctionName = FD->getQualifiedNameAsString(); @@ -1389,11 +1533,13 @@ } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc) +{ assert(CGF.CurFn && "No function in current CodeGenFunction."); // If the OpenMPIRBuilder is used we need to use it for all thread id calls as // the clang invariants used below might be broken. - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder) + { SmallString<128> Buffer; OMPBuilder.updateToLocation(CGF.Builder.saveIP()); uint32_t SrcLocStrSize; @@ -1407,15 +1553,18 @@ // Check whether we've already cached a load of the thread id in this // function. auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); - if (I != OpenMPLocThreadIDMap.end()) { + if (I != OpenMPLocThreadIDMap.end()) + { ThreadID = I->second.ThreadID; if (ThreadID != nullptr) return ThreadID; } // If exceptions are enabled, do not use parameter to avoid possible crash. if (auto *OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { + dyn_cast_or_null(CGF.CapturedStmtInfo)) + { + if (OMPRegionInfo->getThreadIDVariable()) + { // Check if this an outlined function with thread id passed as argument. LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); @@ -1426,11 +1575,13 @@ cast(LVal.getPointer(CGF))->getParent() == TopBlock || cast(LVal.getPointer(CGF))->getParent() == - CGF.Builder.GetInsertBlock()) { + CGF.Builder.GetInsertBlock()) + { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. - if (CGF.Builder.GetInsertBlock() == TopBlock) { + if (CGF.Builder.GetInsertBlock() == TopBlock) + { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } @@ -1457,20 +1608,24 @@ return Call; } -void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { +void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) +{ assert(CGF.CurFn && "No function in current CodeGenFunction."); - if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { + if (OpenMPLocThreadIDMap.count(CGF.CurFn)) + { clearLocThreadIdInsertPt(CGF); OpenMPLocThreadIDMap.erase(CGF.CurFn); } - if (FunctionUDRMap.count(CGF.CurFn) > 0) { - for(const auto *D : FunctionUDRMap[CGF.CurFn]) + if (FunctionUDRMap.count(CGF.CurFn) > 0) + { + for (const auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); FunctionUDRMap.erase(CGF.CurFn); } auto I = FunctionUDMMap.find(CGF.CurFn); - if (I != FunctionUDMMap.end()) { - for(const auto *D : I->second) + if (I != FunctionUDMMap.end()) + { + for (const auto *D : I->second) UDMMap.erase(D); FunctionUDMMap.erase(I); } @@ -1478,12 +1633,15 @@ FunctionToUntiedTaskStackMap.erase(CGF.CurFn); } -llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { +llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() +{ return OMPBuilder.IdentPtr; } -llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { - if (!Kmpc_MicroTy) { +llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() +{ + if (!Kmpc_MicroTy) + { // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), llvm::PointerType::getUnqual(CGM.Int32Ty)}; @@ -1494,7 +1652,8 @@ llvm::FunctionCallee CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, - bool IsGPUDistribute) { + bool IsGPUDistribute) +{ assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name; @@ -1512,15 +1671,15 @@ llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - CGM.Int32Ty, // schedtype - llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter - PtrTy, // p_lower - PtrTy, // p_upper - PtrTy, // p_stride - ITy, // incr - ITy // chunk + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk }; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); @@ -1528,7 +1687,8 @@ } llvm::FunctionCallee -CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { +CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) +{ assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -1536,13 +1696,14 @@ ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - CGM.Int32Ty, // schedtype - ITy, // lower - ITy, // upper - ITy, // stride - ITy // chunk + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + ITy, // lower + ITy, // upper + ITy, // stride + ITy // chunk }; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); @@ -1550,7 +1711,8 @@ } llvm::FunctionCallee -CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { +CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) +{ assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -1567,7 +1729,8 @@ } llvm::FunctionCallee -CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { +CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) +{ assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -1577,12 +1740,12 @@ llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter - PtrTy, // p_lower - PtrTy, // p_upper - PtrTy // p_stride + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy // p_stride }; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); @@ -1594,7 +1757,8 @@ /// the relevant entry source location. static llvm::TargetRegionEntryInfo getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - StringRef ParentName = "") { + StringRef ParentName = "") +{ SourceManager &SM = C.getSourceManager(); // The loc should be always valid and have a file ID (the user cannot use @@ -1606,7 +1770,8 @@ assert(PLoc.isValid() && "Source location is expected to be always valid."); llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + { PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); assert(PLoc.isValid() && "Source location is expected to be always valid."); if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) @@ -1618,7 +1783,8 @@ PLoc.getLine()); } -Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { +Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) +{ if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); std::optional Res = @@ -1626,12 +1792,14 @@ if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - HasRequiresUnifiedSharedMemory))) { + HasRequiresUnifiedSharedMemory))) + { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); OS << CGM.getMangledName(GlobalDecl(VD)); - if (!VD->isExternallyVisible()) { + if (!VD->isExternallyVisible()) + { auto EntryInfo = getTargetEntryUniqueInfo( CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc()); OS << llvm::format("_%x", EntryInfo.FileID); @@ -1641,7 +1809,8 @@ llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); - if (!Ptr) { + if (!Ptr) + { Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName); auto *GV = cast(Ptr); @@ -1657,7 +1826,8 @@ } llvm::Constant * -CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { +CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) +{ assert(!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. @@ -1669,7 +1839,8 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, - SourceLocation Loc) { + SourceLocation Loc) +{ if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return VDAddr; @@ -1690,7 +1861,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, - llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { + llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) +{ // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); @@ -1710,18 +1882,21 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( const VarDecl *VD, Address VDAddr, SourceLocation Loc, - bool PerformInit, CodeGenFunction *CGF) { + bool PerformInit, CodeGenFunction *CGF) +{ if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return nullptr; VD = VD->getDefinition(CGM.getContext()); - if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { + if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) + { QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; const Expr *Init = VD->getAnyInitializer(); - if (CGM.getLangOpts().CPlusPlus && PerformInit) { + if (CGM.getLangOpts().CPlusPlus && PerformInit) + { // Generate function that re-emits the declaration's initializer into the // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); @@ -1754,7 +1929,8 @@ CtorCGF.FinishFunction(); Ctor = Fn; } - if (VD->getType().isDestructedType() != QualType::DK_none) { + if (VD->getType().isDestructedType() != QualType::DK_none) + { // Generate function that emits destructor call for the threadprivate copy // of the variable VD CodeGenFunction DtorCGF(CGM); @@ -1797,19 +1973,22 @@ // Must be NULL - reserved by runtime, but currently it requires that this // parameter is always NULL. Otherwise it fires assertion. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); - if (Ctor == nullptr) { + if (Ctor == nullptr) + { auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg=*/false) ->getPointerTo(); Ctor = llvm::Constant::getNullValue(CtorTy); } - if (Dtor == nullptr) { + if (Dtor == nullptr) + { auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg=*/false) ->getPointerTo(); Dtor = llvm::Constant::getNullValue(DtorTy); } - if (!CGF) { + if (!CGF) + { auto *InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); std::string Name = getName({"__omp_threadprivate_init_", ""}); @@ -1831,7 +2010,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, - bool PerformInit) { + bool PerformInit) +{ if (CGM.getLangOpts().OMPTargetTriples.empty() && !CGM.getLangOpts().OpenMPIsDevice) return false; @@ -1860,10 +2040,12 @@ OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); - if (CGM.getLangOpts().CPlusPlus && PerformInit) { + if (CGM.getLangOpts().CPlusPlus && PerformInit) + { llvm::Constant *Ctor; llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsDevice) + { // Generate function that re-emits the declaration's initializer into // the threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); @@ -1893,8 +2075,10 @@ CtorCGF.FinishFunction(); Ctor = Fn; ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); - } else { - Ctor = new llvm::GlobalVariable( + } + else + { + Ctor = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); @@ -1909,10 +2093,12 @@ CtorEntryInfo, Ctor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); } - if (VD->getType().isDestructedType() != QualType::DK_none) { + if (VD->getType().isDestructedType() != QualType::DK_none) + { llvm::Constant *Dtor; llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsDevice) + { // Generate function that emits destructor call for the threadprivate // copy of the variable VD CodeGenFunction DtorCGF(CGM); @@ -1943,7 +2129,9 @@ DtorCGF.FinishFunction(); Dtor = Fn; ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); - } else { + } + else + { Dtor = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, @@ -1963,13 +2151,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, - StringRef Name) { + StringRef Name) +{ std::string Suffix = getName({"artificial", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( VarLVType, Twine(Name).concat(Suffix).str()); if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && - CGM.getTarget().isTLSSupported()) { + CGM.getTarget().isTLSSupported()) + { GAddr->setThreadLocal(/*Val=*/true); return Address(GAddr, GAddr->getValueType(), CGM.getContext().getTypeAlignInChars(VarType)); @@ -1996,13 +2186,15 @@ void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, - const RegionCodeGenTy &ElseGen) { + const RegionCodeGenTy &ElseGen) +{ CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. bool CondConstant; - if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { + if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) + { if (CondConstant) ThenGen(CGF); else @@ -2037,13 +2229,15 @@ llvm::Function *OutlinedFn, ArrayRef CapturedVars, const Expr *IfCond, - llvm::Value *NumThreads) { + llvm::Value *NumThreads) +{ if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); auto &M = CGM.getModule(); auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, - this](CodeGenFunction &CGF, PrePostActionTy &) { + this](CodeGenFunction &CGF, PrePostActionTy &) + { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { @@ -2059,7 +2253,8 @@ CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, - this](CodeGenFunction &CGF, PrePostActionTy &) { + this](CodeGenFunction &CGF, PrePostActionTy &) + { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); // Build calls: @@ -2097,9 +2292,12 @@ M, OMPRTL___kmpc_end_serialized_parallel), EndArgs); }; - if (IfCond) { + if (IfCond) + { emitIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { + } + else + { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } @@ -2112,7 +2310,8 @@ // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and // return the address of that temp. Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc) +{ if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) @@ -2128,55 +2327,63 @@ return ThreadIDTemp; } -llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { +llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) +{ std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); std::string Name = getName({Prefix, "var"}); return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); } -namespace { -/// Common pre(post)-action for different OpenMP constructs. -class CommonActionTy final : public PrePostActionTy { - llvm::FunctionCallee EnterCallee; - ArrayRef EnterArgs; - llvm::FunctionCallee ExitCallee; - ArrayRef ExitArgs; - bool Conditional; - llvm::BasicBlock *ContBlock = nullptr; - -public: - CommonActionTy(llvm::FunctionCallee EnterCallee, - ArrayRef EnterArgs, - llvm::FunctionCallee ExitCallee, - ArrayRef ExitArgs, bool Conditional = false) - : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), - ExitArgs(ExitArgs), Conditional(Conditional) {} - void Enter(CodeGenFunction &CGF) override { - llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); - if (Conditional) { - llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); - auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); - ContBlock = CGF.createBasicBlock("omp_if.end"); - // Generate the branch (If-stmt) - CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); - CGF.EmitBlock(ThenBlock); - } - } - void Done(CodeGenFunction &CGF) { - // Emit the rest of blocks/branches - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); - } - void Exit(CodeGenFunction &CGF) override { - CGF.EmitRuntimeCall(ExitCallee, ExitArgs); - } -}; +namespace +{ + /// Common pre(post)-action for different OpenMP constructs. + class CommonActionTy final : public PrePostActionTy + { + llvm::FunctionCallee EnterCallee; + ArrayRef EnterArgs; + llvm::FunctionCallee ExitCallee; + ArrayRef ExitArgs; + bool Conditional; + llvm::BasicBlock *ContBlock = nullptr; + + public: + CommonActionTy(llvm::FunctionCallee EnterCallee, + ArrayRef EnterArgs, + llvm::FunctionCallee ExitCallee, + ArrayRef ExitArgs, bool Conditional = false) + : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), + ExitArgs(ExitArgs), Conditional(Conditional) {} + void Enter(CodeGenFunction &CGF) override + { + llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); + if (Conditional) + { + llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + } + } + void Done(CodeGenFunction &CGF) + { + // Emit the rest of blocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + void Exit(CodeGenFunction &CGF) override + { + CGF.EmitRuntimeCall(ExitCallee, ExitArgs); + } + }; } // anonymous namespace void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, - SourceLocation Loc, const Expr *Hint) { + SourceLocation Loc, const Expr *Hint) +{ // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); // CriticalOpGen(); // __kmpc_end_critical(ident_t *, gtid, Lock); @@ -2187,7 +2394,8 @@ getCriticalRegionLock(CriticalName)}; llvm::SmallVector EnterArgs(std::begin(Args), std::end(Args)); - if (Hint) { + if (Hint) + { EnterArgs.push_back(CGF.Builder.CreateIntCast( CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); } @@ -2205,7 +2413,8 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, - SourceLocation Loc) { + SourceLocation Loc) +{ if (!CGF.HaveInsertPoint()) return; // if(__kmpc_master(ident_t *, gtid)) { @@ -2228,7 +2437,8 @@ void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, - SourceLocation Loc, const Expr *Filter) { + SourceLocation Loc, const Expr *Filter) +{ if (!CGF.HaveInsertPoint()) return; // if(__kmpc_masked(ident_t *, gtid, filter)) { @@ -2256,12 +2466,16 @@ } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc) +{ if (!CGF.HaveInsertPoint()) return; - if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) + { OMPBuilder.createTaskyield(CGF.Builder); - } else { + } + else + { // Build call __kmpc_omp_taskyield(loc, thread_id, 0); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), @@ -2277,7 +2491,8 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, - SourceLocation Loc) { + SourceLocation Loc) +{ if (!CGF.HaveInsertPoint()) return; // __kmpc_taskgroup(ident_t *, gtid); @@ -2298,7 +2513,8 @@ /// Given an array of pointers to variables, project the address of a /// given variable. static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, - unsigned Index, const VarDecl *Var) { + unsigned Index, const VarDecl *Var) +{ // Pull out the pointer to the variable. Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); @@ -2314,7 +2530,8 @@ CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef CopyprivateVars, ArrayRef DestExprs, ArrayRef SrcExprs, ArrayRef AssignmentOps, - SourceLocation Loc) { + SourceLocation Loc) +{ ASTContext &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; @@ -2349,7 +2566,8 @@ // *(Type1*)Dst[1] = *(Type1*)Src[1]; // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; - for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { + for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) + { const auto *DestVar = cast(cast(DestExprs[I])->getDecl()); Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); @@ -2372,7 +2590,8 @@ ArrayRef CopyprivateVars, ArrayRef SrcExprs, ArrayRef DstExprs, - ArrayRef AssignmentOps) { + ArrayRef AssignmentOps) +{ if (!CGF.HaveInsertPoint()) return; assert(CopyprivateVars.size() == SrcExprs.size() && @@ -2389,7 +2608,8 @@ // , did_it); Address DidIt = Address::invalid(); - if (!CopyprivateVars.empty()) { + if (!CopyprivateVars.empty()) + { // int32 did_it = 0; QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); @@ -2407,14 +2627,16 @@ /*Conditional=*/true); SingleOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_single, SingleOpGen); - if (DidIt.isValid()) { + if (DidIt.isValid()) + { // did_it = 1; CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); } Action.Done(CGF); // call __kmpc_copyprivate(ident_t *, gtid, , , // , did_it); - if (DidIt.isValid()) { + if (DidIt.isValid()) + { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); QualType CopyprivateArrayTy = C.getConstantArrayType( C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, @@ -2422,7 +2644,8 @@ // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); - for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { + for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) + { Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( @@ -2455,14 +2678,16 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, - SourceLocation Loc, bool IsThreads) { + SourceLocation Loc, bool IsThreads) +{ if (!CGF.HaveInsertPoint()) return; // __kmpc_ordered(ident_t *, gtid); // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered - if (IsThreads) { + if (IsThreads) + { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_ordered), @@ -2477,7 +2702,8 @@ emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } -unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { +unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) +{ unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; @@ -2494,12 +2720,15 @@ void CGOpenMPRuntime::getDefaultScheduleAndChunk( CodeGenFunction &CGF, const OMPLoopDirective &S, - OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { + OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const +{ // Check if the loop directive is actually a doacross loop directive. In this // case choose static, 1 schedule. if (llvm::any_of( S.getClausesOfKind(), - [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { + [](const OMPOrderedClause *C) + { return C->getNumForLoops(); })) + { ScheduleKind = OMPC_SCHEDULE_static; // Chunk size is 1 in this case. llvm::APInt ChunkSize(32, 1); @@ -2512,11 +2741,13 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { + bool ForceSimpleCall) +{ // Check if we should use the OMPBuilder auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo); - if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) + { CGF.Builder.restoreIP(OMPBuilder.createBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); return; @@ -2531,13 +2762,16 @@ // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - if (OMPRegionInfo) { - if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { + if (OMPRegionInfo) + { + if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) + { llvm::Value *Result = CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_cancel_barrier), Args); - if (EmitChecks) { + if (EmitChecks) + { // if (__kmpc_cancel_barrier()) { // exit from construct; // } @@ -2561,7 +2795,8 @@ } void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, - Expr *ME, bool IsFatal) { + Expr *ME, bool IsFatal) +{ llvm::Value *MVL = ME ? CGF.EmitStringLiteralLValue(cast(ME)).getPointer(CGF) : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); @@ -2578,8 +2813,10 @@ /// Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, - bool Chunked, bool Ordered) { - switch (ScheduleKind) { + bool Chunked, bool Ordered) +{ + switch (ScheduleKind) + { case OMPC_SCHEDULE_static: return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) : (Ordered ? OMP_ord_static : OMP_sch_static); @@ -2600,38 +2837,44 @@ /// Map the OpenMP distribute schedule to the runtime enumeration. static OpenMPSchedType -getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { +getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) +{ // only static is allowed for dist_schedule return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; } bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, - bool Chunked) const { + bool Chunked) const +{ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } bool CGOpenMPRuntime::isStaticNonchunked( - OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const +{ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); return Schedule == OMP_dist_sch_static; } bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, - bool Chunked) const { + bool Chunked) const +{ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static_chunked; } bool CGOpenMPRuntime::isStaticChunked( - OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const +{ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); return Schedule == OMP_dist_sch_static_chunked; } -bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { +bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const +{ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); @@ -2640,9 +2883,11 @@ static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, - OpenMPScheduleClauseModifier M2) { + OpenMPScheduleClauseModifier M2) +{ int Modifier = 0; - switch (M1) { + switch (M1) + { case OMPC_SCHEDULE_MODIFIER_monotonic: Modifier = OMP_sch_modifier_monotonic; break; @@ -2657,7 +2902,8 @@ case OMPC_SCHEDULE_MODIFIER_unknown: break; } - switch (M2) { + switch (M2) + { case OMPC_SCHEDULE_MODIFIER_monotonic: Modifier = OMP_sch_modifier_monotonic; break; @@ -2678,7 +2924,8 @@ // as if the monotonic modifier is specified. Otherwise, unless the monotonic // modifier is specified, the effect is as if the nonmonotonic modifier is // specified. - if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { + if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) + { if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || @@ -2692,7 +2939,8 @@ void CGOpenMPRuntime::emitForDispatchInit( CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, - bool Ordered, const DispatchRTInput &DispatchValues) { + bool Ordered, const DispatchRTInput &DispatchValues) +{ if (!CGF.HaveInsertPoint()) return; OpenMPSchedType Schedule = getRuntimeSchedule( @@ -2726,7 +2974,8 @@ CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, - const CGOpenMPRuntime::StaticRTInput &Values) { + const CGOpenMPRuntime::StaticRTInput &Values) +{ if (!CGF.HaveInsertPoint()) return; @@ -2743,13 +2992,16 @@ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, // kmp_int[32|64] incr, kmp_int[32|64] chunk); llvm::Value *Chunk = Values.Chunk; - if (Chunk == nullptr) { + if (Chunk == nullptr) + { assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule"); // If the Chunk was not specified in the clause - use default value 1. Chunk = CGF.Builder.getIntN(Values.IVSize, 1); - } else { + } + else + { assert((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || @@ -2775,15 +3027,16 @@ SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, - const StaticRTInput &Values) { + const StaticRTInput &Values) +{ OpenMPSchedType ScheduleNum = getRuntimeSchedule( ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); assert(isOpenMPWorksharingDirective(DKind) && "Expected loop-based or sections-based directive."); llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, - isOpenMPLoopDirective(DKind) - ? OMP_IDENT_WORK_LOOP - : OMP_IDENT_WORK_SECTIONS); + isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); @@ -2795,7 +3048,8 @@ void CGOpenMPRuntime::emitDistributeStaticInit( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, - const CGOpenMPRuntime::StaticRTInput &Values) { + const CGOpenMPRuntime::StaticRTInput &Values) +{ OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); llvm::Value *UpdatedLocation = @@ -2815,7 +3069,8 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind DKind) { + OpenMPDirectiveKind DKind) +{ if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); @@ -2823,9 +3078,9 @@ emitUpdateLocation(CGF, Loc, isOpenMPDistributeDirective(DKind) ? OMP_IDENT_WORK_DISTRIBUTE - : isOpenMPLoopDirective(DKind) - ? OMP_IDENT_WORK_LOOP - : OMP_IDENT_WORK_SECTIONS), + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && @@ -2843,7 +3098,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, - bool IVSigned) { + bool IVSigned) +{ if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); @@ -2855,7 +3111,8 @@ SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, - Address ST) { + Address ST) +{ // Call __kmpc_dispatch_next( // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, @@ -2877,7 +3134,8 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, - SourceLocation Loc) { + SourceLocation Loc) +{ if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) @@ -2891,7 +3149,8 @@ void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, ProcBindKind ProcBind, - SourceLocation Loc) { + SourceLocation Loc) +{ if (!CGF.HaveInsertPoint()) return; assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); @@ -2905,10 +3164,14 @@ } void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef, - SourceLocation Loc, llvm::AtomicOrdering AO) { - if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + SourceLocation Loc, llvm::AtomicOrdering AO) +{ + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) + { OMPBuilder.createFlush(CGF.Builder); - } else { + } + else + { if (!CGF.HaveInsertPoint()) return; // Build call void __kmpc_flush(ident_t *loc) @@ -2918,33 +3181,36 @@ } } -namespace { -/// Indexes of fields for type kmp_task_t. -enum KmpTaskTFields { - /// List of shared variables. - KmpTaskTShareds, - /// Task routine. - KmpTaskTRoutine, - /// Partition id for the untied tasks. - KmpTaskTPartId, - /// Function with call of destructors for private variables. - Data1, - /// Task priority. - Data2, - /// (Taskloops only) Lower bound. - KmpTaskTLowerBound, - /// (Taskloops only) Upper bound. - KmpTaskTUpperBound, - /// (Taskloops only) Stride. - KmpTaskTStride, - /// (Taskloops only) Is last iteration flag. - KmpTaskTLastIter, - /// (Taskloops only) Reduction data. - KmpTaskTReductions, -}; +namespace +{ + /// Indexes of fields for type kmp_task_t. + enum KmpTaskTFields + { + /// List of shared variables. + KmpTaskTShareds, + /// Task routine. + KmpTaskTRoutine, + /// Partition id for the untied tasks. + KmpTaskTPartId, + /// Function with call of destructors for private variables. + Data1, + /// Task priority. + Data2, + /// (Taskloops only) Lower bound. + KmpTaskTLowerBound, + /// (Taskloops only) Upper bound. + KmpTaskTUpperBound, + /// (Taskloops only) Stride. + KmpTaskTStride, + /// (Taskloops only) Is last iteration flag. + KmpTaskTLastIter, + /// (Taskloops only) Reduction data. + KmpTaskTReductions, + }; } // anonymous namespace -void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { +void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() +{ // If we are in simd mode or there are no entries, we don't need to do // anything. if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) @@ -2952,42 +3218,53 @@ llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, - const llvm::TargetRegionEntryInfo &EntryInfo) -> void { + const llvm::TargetRegionEntryInfo &EntryInfo) -> void + { SourceLocation Loc; - if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { + if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) + { for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), E = CGM.getContext().getSourceManager().fileinfo_end(); - I != E; ++I) { + I != E; ++I) + { if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && - I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { + I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) + { Loc = CGM.getContext().getSourceManager().translateFileLineCol( I->getFirst(), EntryInfo.Line, 1); break; } } } - switch (Kind) { - case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { + switch (Kind) + { + case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Offloading entry for target region in " "%0 is incorrect: either the " "address or the ID is invalid."); CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; - } break; - case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { + } + break; + case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Offloading entry for declare target " "variable %0 is incorrect: the " "address is invalid."); CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; - } break; - case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { + } + break; + case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Offloading entry for declare target variable is incorrect: the " "address is invalid."); CGM.getDiags().Report(DiagID); - } break; + } + break; } }; @@ -2997,7 +3274,8 @@ /// Loads all the offload entries information from the host IR /// metadata. -void CGOpenMPRuntime::loadOffloadInfoMetadata() { +void CGOpenMPRuntime::loadOffloadInfoMetadata() +{ // If we are in target mode, load the metadata from the host IR. This code has // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). @@ -3008,7 +3286,8 @@ return; auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); - if (auto EC = Buf.getError()) { + if (auto EC = Buf.getError()) + { CGM.getDiags().Report(diag::err_cannot_open_file) << CGM.getLangOpts().OMPHostIRFile << EC.message(); return; @@ -3018,7 +3297,8 @@ auto ME = expectedToErrorOrAndEmitErrors( C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); - if (auto EC = ME.getError()) { + if (auto EC = ME.getError()) + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); CGM.getDiags().Report(DiagID) @@ -3029,8 +3309,10 @@ OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); } -void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { - if (!KmpRoutineEntryPtrTy) { +void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) +{ + if (!KmpRoutineEntryPtrTy) + { // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. ASTContext &C = CGM.getContext(); QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; @@ -3041,25 +3323,29 @@ } } -namespace { -struct PrivateHelpersTy { - PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, - const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) - : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), - PrivateElemInit(PrivateElemInit) {} - PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} - const Expr *OriginalRef = nullptr; - const VarDecl *Original = nullptr; - const VarDecl *PrivateCopy = nullptr; - const VarDecl *PrivateElemInit = nullptr; - bool isLocalPrivate() const { - return !OriginalRef && !PrivateCopy && !PrivateElemInit; - } -}; -typedef std::pair PrivateDataTy; +namespace +{ + struct PrivateHelpersTy + { + PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, + const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) + : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), + PrivateElemInit(PrivateElemInit) {} + PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} + const Expr *OriginalRef = nullptr; + const VarDecl *Original = nullptr; + const VarDecl *PrivateCopy = nullptr; + const VarDecl *PrivateElemInit = nullptr; + bool isLocalPrivate() const + { + return !OriginalRef && !PrivateCopy && !PrivateElemInit; + } + }; + typedef std::pair PrivateDataTy; } // anonymous namespace -static bool isAllocatableDecl(const VarDecl *VD) { +static bool isAllocatableDecl(const VarDecl *VD) +{ const VarDecl *CVD = VD->getCanonicalDecl(); if (!CVD->hasAttr()) return false; @@ -3070,27 +3356,32 @@ } static RecordDecl * -createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { - if (!Privates.empty()) { +createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) +{ + if (!Privates.empty()) + { ASTContext &C = CGM.getContext(); // Build struct .kmp_privates_t. { // /* private vars */ // }; RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); RD->startDefinition(); - for (const auto &Pair : Privates) { + for (const auto &Pair : Privates) + { const VarDecl *VD = Pair.second.Original; QualType Type = VD->getType().getNonReferenceType(); // If the private variable is a local variable with lvalue ref type, // allocate the pointer instead of the pointee type. - if (Pair.second.isLocalPrivate()) { + if (Pair.second.isLocalPrivate()) + { if (VD->getType()->isLValueReferenceType()) Type = C.getPointerType(Type); if (isAllocatableDecl(VD)) Type = C.getPointerType(Type); } FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); - if (VD->hasAttrs()) { + if (VD->hasAttrs()) + { for (specific_attr_iterator I(VD->getAttrs().begin()), E(VD->getAttrs().end()); I != E; ++I) @@ -3106,7 +3397,8 @@ static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, - QualType KmpRoutineEntryPointerQTy) { + QualType KmpRoutineEntryPointerQTy) +{ ASTContext &C = CGM.getContext(); // Build struct kmp_task_t { // void * shareds; @@ -3134,7 +3426,8 @@ addFieldToRecordDecl(C, RD, KmpInt32Ty); addFieldToRecordDecl(C, RD, KmpCmplrdataTy); addFieldToRecordDecl(C, RD, KmpCmplrdataTy); - if (isOpenMPTaskLoopDirective(Kind)) { + if (isOpenMPTaskLoopDirective(Kind)) + { QualType KmpUInt64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); QualType KmpInt64Ty = @@ -3151,7 +3444,8 @@ static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, - ArrayRef Privates) { + ArrayRef Privates) +{ ASTContext &C = CGM.getContext(); // Build struct kmp_task_t_with_privates { // kmp_task_t task_data; @@ -3183,7 +3477,8 @@ QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, - llvm::Value *TaskPrivatesMap) { + llvm::Value *TaskPrivatesMap) +{ ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, @@ -3233,11 +3528,14 @@ auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); llvm::Value *PrivatesParam; - if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { + if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) + { LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); - } else { + } + else + { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -3249,7 +3547,8 @@ .getPointer()}; SmallVector CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); - if (isOpenMPTaskLoopDirective(Kind)) { + if (isOpenMPTaskLoopDirective(Kind)) + { auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); @@ -3285,7 +3584,8 @@ SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, - QualType KmpTaskTWithPrivatesQTy) { + QualType KmpTaskTWithPrivatesQTy) +{ ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, @@ -3319,9 +3619,11 @@ auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); Base = CGF.EmitLValueForField(Base, *FI); for (const auto *Field : - cast(FI->getType()->getAsTagDecl())->fields()) { + cast(FI->getType()->getAsTagDecl())->fields()) + { if (QualType::DestructionKind DtorKind = - Field->getType().isDestructedType()) { + Field->getType().isDestructedType()) + { LValue FieldLValue = CGF.EmitLValueForField(Base, Field); CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); } @@ -3343,7 +3645,8 @@ static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, - ArrayRef Privates) { + ArrayRef Privates) +{ ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( @@ -3353,7 +3656,8 @@ Args.push_back(&TaskPrivatesArg); llvm::DenseMap, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (const Expr *E : Data.PrivateVars) { + for (const Expr *E : Data.PrivateVars) + { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3364,7 +3668,8 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : Data.FirstprivateVars) { + for (const Expr *E : Data.FirstprivateVars) + { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3375,7 +3680,8 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : Data.LastprivateVars) { + for (const Expr *E : Data.LastprivateVars) + { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3386,7 +3692,8 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const VarDecl *VD : Data.PrivateLocals) { + for (const VarDecl *VD : Data.PrivateLocals) + { QualType Ty = VD->getType().getNonReferenceType(); if (VD->getType()->isLValueReferenceType()) Ty = C.getPointerType(Ty); @@ -3410,7 +3717,8 @@ &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, TaskPrivatesMapFnInfo); - if (CGM.getLangOpts().Optimize) { + if (CGM.getLangOpts().Optimize) + { TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); @@ -3425,7 +3733,8 @@ TaskPrivatesArg.getType()->castAs()); const auto *PrivatesQTyRD = cast(PrivatesQTy->getAsTagDecl()); Counter = 0; - for (const FieldDecl *Field : PrivatesQTyRD->fields()) { + for (const FieldDecl *Field : PrivatesQTyRD->fields()) + { LValue FieldLVal = CGF.EmitLValueForField(Base, Field); const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; LValue RefLVal = @@ -3446,7 +3755,8 @@ const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, - ArrayRef Privates, bool ForDup) { + ArrayRef Privates, bool ForDup) +{ ASTContext &C = CGF.getContext(); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); @@ -3463,7 +3773,8 @@ // PointersArray, SizesArray, and MappersArray. The original variables for // these arrays are not captured and we get their addresses explicitly. if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || - (IsTargetTask && KmpTaskSharedsPtr.isValid())) { + (IsTargetTask && KmpTaskSharedsPtr.isValid())) + { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), @@ -3471,25 +3782,30 @@ SharedsTy); } FI = cast(FI->getType()->getAsTagDecl())->field_begin(); - for (const PrivateDataTy &Pair : Privates) { + for (const PrivateDataTy &Pair : Privates) + { // Do not initialize private locals. - if (Pair.second.isLocalPrivate()) { + if (Pair.second.isLocalPrivate()) + { ++FI; continue; } const VarDecl *VD = Pair.second.PrivateCopy; const Expr *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa(Init) && - !CGF.isTrivialInitializer(Init)))) { + !CGF.isTrivialInitializer(Init)))) + { LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); - if (const VarDecl *Elem = Pair.second.PrivateElemInit) { + if (const VarDecl *Elem = Pair.second.PrivateElemInit) + { const VarDecl *OriginalVD = Pair.second.Original; // Check if the variable is the target-based BasePointersArray, // PointersArray, SizesArray, or MappersArray. LValue SharedRefLValue; QualType Type = PrivateLValue.getType(); const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); - if (IsTargetTask && !SharedField) { + if (IsTargetTask && !SharedField) + { assert(isa(OriginalVD) && isa(OriginalVD->getDeclContext()) && cast(OriginalVD->getDeclContext()) @@ -3500,37 +3816,48 @@ "Expected artificial target data variable."); SharedRefLValue = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); - } else if (ForDup) { + } + else if (ForDup) + { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( SharedRefLValue.getAddress(CGF).withAlignment( C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); - } else if (CGF.LambdaCaptureFields.count( - Pair.second.Original->getCanonicalDecl()) > 0 || - isa_and_nonnull(CGF.CurCodeDecl)) { + } + else if (CGF.LambdaCaptureFields.count( + Pair.second.Original->getCanonicalDecl()) > 0 || + isa_and_nonnull(CGF.CurCodeDecl)) + { SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); - } else { + } + else + { // Processing for implicitly captured variables. InlinedOpenMPRegionRAII Region( CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, /*HasCancel=*/false, /*NoInheritance=*/true); SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } - if (Type->isArrayType()) { + if (Type->isArrayType()) + { // Initialize firstprivate array. - if (!isa(Init) || CGF.isTrivialInitializer(Init)) { + if (!isa(Init) || CGF.isTrivialInitializer(Init)) + { // Perform simple memcpy. CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); - } else { + } + else + { // Initialize firstprivate array using element-by-element // initialization. CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, - Address SrcElement) { + Address SrcElement) + { // Clean up any temporaries needed by the initialization. CodeGenFunction::OMPPrivateScope InitScope(CGF); InitScope.addPrivate(Elem, SrcElement); @@ -3543,7 +3870,9 @@ /*IsInitializer=*/false); }); } - } else { + } + else + { CodeGenFunction::OMPPrivateScope InitScope(CGF); InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); (void)InitScope.Privatize(); @@ -3551,7 +3880,9 @@ CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } - } else { + } + else + { CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } } @@ -3561,9 +3892,11 @@ /// Check if duplication function is required for taskloops. static bool checkInitIsRequired(CodeGenFunction &CGF, - ArrayRef Privates) { + ArrayRef Privates) +{ bool InitRequired = false; - for (const PrivateDataTy &Pair : Privates) { + for (const PrivateDataTy &Pair : Privates) + { if (Pair.second.isLocalPrivate()) continue; const VarDecl *VD = Pair.second.PrivateCopy; @@ -3576,7 +3909,6 @@ return InitRequired; } - /// Emit task_dup function (for initialization of /// private/firstprivate/lastprivate vars and last_iter flag) /// \code @@ -3594,7 +3926,8 @@ const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, - ArrayRef Privates, bool WithLastIter) { + ArrayRef Privates, bool WithLastIter) +{ ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, @@ -3624,7 +3957,8 @@ CGF.GetAddrOfLocalVar(&DstArg), KmpTaskTWithPrivatesPtrQTy->castAs()); // task_dst->liter = lastpriv; - if (WithLastIter) { + if (WithLastIter) + { auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); LValue Base = CGF.EmitLValueForField( TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); @@ -3637,7 +3971,8 @@ // Emit initial values for private copies (if any). assert(!Privates.empty()); Address KmpTaskSharedsPtr = Address::invalid(); - if (!Data.FirstprivateVars.empty()) { + if (!Data.FirstprivateVars.empty()) + { LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&SrcArg), KmpTaskTWithPrivatesPtrQTy->castAs()); @@ -3660,8 +3995,10 @@ /// \return true if cleanups are required, false otherwise. static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, - ArrayRef Privates) { - for (const PrivateDataTy &P : Privates) { + ArrayRef Privates) +{ + for (const PrivateDataTy &P : Privates) + { if (P.second.isLocalPrivate()) continue; QualType Ty = P.second.Original->getType().getNonReferenceType(); @@ -3671,104 +4008,119 @@ return false; } -namespace { -/// Loop generator for OpenMP iterator expression. -class OMPIteratorGeneratorScope final - : public CodeGenFunction::OMPPrivateScope { - CodeGenFunction &CGF; - const OMPIteratorExpr *E = nullptr; - SmallVector ContDests; - SmallVector ExitDests; - OMPIteratorGeneratorScope() = delete; - OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; - -public: - OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) - : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { - if (!E) - return; - SmallVector Uppers; - for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { - Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); - const auto *VD = cast(E->getIteratorDecl(I)); - addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); - const OMPIteratorHelperData &HelperData = E->getHelper(I); - addPrivate( - HelperData.CounterVD, - CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); - } - Privatize(); - - for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { - const OMPIteratorHelperData &HelperData = E->getHelper(I); - LValue CLVal = - CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), - HelperData.CounterVD->getType()); - // Counter = 0; - CGF.EmitStoreOfScalar( - llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), - CLVal); - CodeGenFunction::JumpDest &ContDest = - ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); - CodeGenFunction::JumpDest &ExitDest = - ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); - // N = ; - llvm::Value *N = Uppers[I]; - // cont: - // if (Counter < N) goto body; else goto exit; - CGF.EmitBlock(ContDest.getBlock()); - auto *CVal = - CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); - llvm::Value *Cmp = - HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() - ? CGF.Builder.CreateICmpSLT(CVal, N) - : CGF.Builder.CreateICmpULT(CVal, N); - llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); - CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); - // body: - CGF.EmitBlock(BodyBB); - // Iteri = Begini + Counter * Stepi; - CGF.EmitIgnoredExpr(HelperData.Update); - } - } - ~OMPIteratorGeneratorScope() { - if (!E) - return; - for (unsigned I = E->numOfIterators(); I > 0; --I) { - // Counter = Counter + 1; - const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); - CGF.EmitIgnoredExpr(HelperData.CounterUpdate); - // goto cont; - CGF.EmitBranchThroughCleanup(ContDests[I - 1]); - // exit: - CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); +namespace +{ + /// Loop generator for OpenMP iterator expression. + class OMPIteratorGeneratorScope final + : public CodeGenFunction::OMPPrivateScope + { + CodeGenFunction &CGF; + const OMPIteratorExpr *E = nullptr; + SmallVector ContDests; + SmallVector ExitDests; + OMPIteratorGeneratorScope() = delete; + OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; + + public: + OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) + : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) + { + if (!E) + return; + SmallVector Uppers; + for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) + { + Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); + const auto *VD = cast(E->getIteratorDecl(I)); + addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); + const OMPIteratorHelperData &HelperData = E->getHelper(I); + addPrivate( + HelperData.CounterVD, + CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); + } + Privatize(); + + for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) + { + const OMPIteratorHelperData &HelperData = E->getHelper(I); + LValue CLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), + HelperData.CounterVD->getType()); + // Counter = 0; + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), + CLVal); + CodeGenFunction::JumpDest &ContDest = + ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); + CodeGenFunction::JumpDest &ExitDest = + ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); + // N = ; + llvm::Value *N = Uppers[I]; + // cont: + // if (Counter < N) goto body; else goto exit; + CGF.EmitBlock(ContDest.getBlock()); + auto *CVal = + CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); + llvm::Value *Cmp = + HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() + ? CGF.Builder.CreateICmpSLT(CVal, N) + : CGF.Builder.CreateICmpULT(CVal, N); + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); + CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); + // body: + CGF.EmitBlock(BodyBB); + // Iteri = Begini + Counter * Stepi; + CGF.EmitIgnoredExpr(HelperData.Update); + } } - } -}; + ~OMPIteratorGeneratorScope() + { + if (!E) + return; + for (unsigned I = E->numOfIterators(); I > 0; --I) + { + // Counter = Counter + 1; + const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); + CGF.EmitIgnoredExpr(HelperData.CounterUpdate); + // goto cont; + CGF.EmitBranchThroughCleanup(ContDests[I - 1]); + // exit: + CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); + } + } + }; } // namespace static std::pair -getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { +getPointerAndSize(CodeGenFunction &CGF, const Expr *E) +{ const auto *OASE = dyn_cast(E); llvm::Value *Addr; - if (OASE) { + if (OASE) + { const Expr *Base = OASE->getBase(); Addr = CGF.EmitScalarExpr(Base); - } else { + } + else + { Addr = CGF.EmitLValue(E).getPointer(CGF); } llvm::Value *SizeVal; QualType Ty = E->getType(); - if (OASE) { + if (OASE) + { SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); - for (const Expr *SE : OASE->getDimensions()) { + for (const Expr *SE : OASE->getDimensions()) + { llvm::Value *Sz = CGF.EmitScalarExpr(SE); Sz = CGF.EmitScalarConversion( Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); } - } else if (const auto *ASE = - dyn_cast(E->IgnoreParenImpCasts())) { + } + else if (const auto *ASE = + dyn_cast(E->IgnoreParenImpCasts())) + { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); Address UpAddrAddress = UpAddrLVal.getAddress(CGF); @@ -3777,16 +4129,20 @@ llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else { + } + else + { SizeVal = CGF.getTypeSize(Ty); } return std::make_pair(Addr, SizeVal); } /// Builds kmp_depend_info, if it is not built yet, and builds flags type. -static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { +static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) +{ QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); - if (KmpTaskAffinityInfoTy.isNull()) { + if (KmpTaskAffinityInfoTy.isNull()) + { RecordDecl *KmpAffinityInfoRD = C.buildImplicitRecord("kmp_task_affinity_info_t"); KmpAffinityInfoRD->startDefinition(); @@ -3802,12 +4158,14 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, - Address Shareds, const OMPTaskDataTy &Data) { + Address Shareds, const OMPTaskDataTy &Data) +{ ASTContext &C = CGM.getContext(); llvm::SmallVector Privates; // Aggregate privates and sort them by the alignment. const auto *I = Data.PrivateCopies.begin(); - for (const Expr *E : Data.PrivateVars) { + for (const Expr *E : Data.PrivateVars) + { const auto *VD = cast(cast(E)->getDecl()); Privates.emplace_back( C.getDeclAlign(VD), @@ -3817,7 +4175,8 @@ } I = Data.FirstprivateCopies.begin(); const auto *IElemInitRef = Data.FirstprivateInits.begin(); - for (const Expr *E : Data.FirstprivateVars) { + for (const Expr *E : Data.FirstprivateVars) + { const auto *VD = cast(cast(E)->getDecl()); Privates.emplace_back( C.getDeclAlign(VD), @@ -3828,7 +4187,8 @@ ++IElemInitRef; } I = Data.LastprivateCopies.begin(); - for (const Expr *E : Data.LastprivateVars) { + for (const Expr *E : Data.LastprivateVars) + { const auto *VD = cast(cast(E)->getDecl()); Privates.emplace_back( C.getDeclAlign(VD), @@ -3836,32 +4196,39 @@ /*PrivateElemInit=*/nullptr)); ++I; } - for (const VarDecl *VD : Data.PrivateLocals) { + for (const VarDecl *VD : Data.PrivateLocals) + { if (isAllocatableDecl(VD)) Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); else Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); } llvm::stable_sort(Privates, - [](const PrivateDataTy &L, const PrivateDataTy &R) { + [](const PrivateDataTy &L, const PrivateDataTy &R) + { return L.first > R.first; }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). - if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { - if (SavedKmpTaskloopTQTy.isNull()) { + if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) + { + if (SavedKmpTaskloopTQTy.isNull()) + { SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } KmpTaskTQTy = SavedKmpTaskloopTQTy; - } else { + } + else + { assert((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && "Expected taskloop, task or target directive"); - if (SavedKmpTaskTQTy.isNull()) { + if (SavedKmpTaskTQTy.isNull()) + { SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } @@ -3885,13 +4252,16 @@ llvm::Value *TaskPrivatesMap = nullptr; llvm::Type *TaskPrivatesMapTy = std::next(TaskFunction->arg_begin(), 3)->getType(); - if (!Privates.empty()) { + if (!Privates.empty()) + { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); - } else { + } + else + { TaskPrivatesMap = llvm::ConstantPointerNull::get( cast(TaskPrivatesMapTy)); } @@ -3908,7 +4278,8 @@ // Task flags. Format is taken from // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, // description of kmp_tasking_flags struct. - enum { + enum + { TiedFlag = 0x1, FinalFlag = 0x2, DestructorsFlag = 0x8, @@ -3917,7 +4288,8 @@ }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; - if (!Privates.empty()) { + if (!Privates.empty()) + { NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); if (NeedsCleanup) @@ -3936,11 +4308,11 @@ TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); SmallVector AllocArgs = {emitUpdateLocation(CGF, Loc), - getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, - SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TaskEntry, KmpRoutineEntryPtrTy)}; + getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, + SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry, KmpRoutineEntryPtrTy)}; llvm::Value *NewTask; - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; if (auto *C = D.getSingleClause()) @@ -3957,7 +4329,9 @@ OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), AllocArgs); - } else { + } + else + { NewTask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), @@ -3966,7 +4340,8 @@ // Emit detach clause initialization. // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, // task_descriptor); - if (const auto *DC = D.getSingleClause()) { + if (const auto *DC = D.getSingleClause()) + { const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); LValue EvtLVal = CGF.EmitLValue(Evt); @@ -3984,32 +4359,44 @@ CGF.EmitStoreOfScalar(EvtVal, EvtLVal); } // Process affinity clauses. - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { // Process list of affinity data. ASTContext &C = CGM.getContext(); Address AffinitiesArray = Address::invalid(); // Calculate number of elements to form the array of affinity data. llvm::Value *NumOfElements = nullptr; unsigned NumAffinities = 0; - for (const auto *C : D.getClausesOfKind()) { - if (const Expr *Modifier = C->getModifier()) { + for (const auto *C : D.getClausesOfKind()) + { + if (const Expr *Modifier = C->getModifier()) + { const auto *IE = cast(Modifier->IgnoreParenImpCasts()); - for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) + { llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); NumOfElements = NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; } - } else { + } + else + { NumAffinities += C->varlist_size(); } } getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); // Fields ids in kmp_task_affinity_info record. - enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; + enum RTLAffinityInfoFieldsTy + { + BaseAddr, + Len, + Flags + }; QualType KmpTaskAffinityInfoArrayTy; - if (NumOfElements) { + if (NumOfElements) + { NumOfElements = CGF.Builder.CreateNUWAdd( llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); auto *OVE = new (C) OpaqueValueExpr( @@ -4028,7 +4415,9 @@ AffinitiesArray = CGF.GetAddrOfLocalVar(PD); NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, /*isSigned=*/false); - } else { + } + else + { KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( KmpTaskAffinityInfoTy, llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, @@ -4044,12 +4433,15 @@ // Fill array by elements without iterators. unsigned Pos = 0; bool HasIterator = false; - for (const auto *C : D.getClausesOfKind()) { - if (C->getModifier()) { + for (const auto *C : D.getClausesOfKind()) + { + if (C->getModifier()) + { HasIterator = true; continue; } - for (const Expr *E : C->varlists()) { + for (const Expr *E : C->varlists()) + { llvm::Value *Addr; llvm::Value *Size; std::tie(Addr, Size) = getPointerAndSize(CGF, E); @@ -4069,20 +4461,23 @@ } } LValue PosLVal; - if (HasIterator) { + if (HasIterator) + { PosLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), C.getSizeType()); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); } // Process elements with iterators. - for (const auto *C : D.getClausesOfKind()) { + for (const auto *C : D.getClausesOfKind()) + { const Expr *Modifier = C->getModifier(); if (!Modifier) continue; OMPIteratorGeneratorScope IteratorScope( CGF, cast_or_null(Modifier->IgnoreParenImpCasts())); - for (const Expr *E : C->varlists()) { + for (const Expr *E : C->varlists()) + { llvm::Value *Addr; llvm::Value *Size; std::tie(Addr, Size) = getPointerAndSize(CGF, E); @@ -4127,7 +4522,8 @@ // Fill the data in the resulting kmp_task_t record. // Copy shareds if there are any. Address KmpTaskSharedsPtr = Address::invalid(); - if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { + if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) + { KmpTaskSharedsPtr = Address( CGF.EmitLoadOfScalar( CGF.EmitLValueForField( @@ -4141,12 +4537,14 @@ } // Emit initial values for private copies (if any). TaskResultTy Result; - if (!Privates.empty()) { + if (!Privates.empty()) + { emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/false); if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && - (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { + (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) + { Result.TaskDupFn = emitTaskDupFunction( CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, @@ -4154,12 +4552,17 @@ } } // Fields of union "kmp_cmplrdata_t" for destructors and priority. - enum { Priority = 0, Destructors = 1 }; + enum + { + Priority = 0, + Destructors = 1 + }; // Provide pointer to function with destructors for privates. auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); const RecordDecl *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); - if (NeedsCleanup) { + if (NeedsCleanup) + { llvm::Value *DestructorFn = emitDestructorsFunction( CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy); @@ -4171,7 +4574,8 @@ DestructorsLV); } // Set priority. - if (Data.Priority.getInt()) { + if (Data.Priority.getInt()) + { LValue Data2LV = CGF.EmitLValueForField( TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); LValue PriorityLV = CGF.EmitLValueForField( @@ -4187,9 +4591,11 @@ } /// Translates internal dependency kind into the runtime kind. -static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { +static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) +{ RTLDependenceKindTy DepKind; - switch (K) { + switch (K) + { case OMPC_DEPEND_in: DepKind = RTLDependenceKindTy::DepIn; break; @@ -4219,9 +4625,11 @@ /// Builds kmp_depend_info, if it is not built yet, and builds flags type. static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, - QualType &FlagsTy) { + QualType &FlagsTy) +{ FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); - if (KmpDependInfoTy.isNull()) { + if (KmpDependInfoTy.isNull()) + { RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); KmpDependInfoRD->startDefinition(); addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); @@ -4234,7 +4642,8 @@ std::pair CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, - SourceLocation Loc) { + SourceLocation Loc) +{ ASTContext &C = CGM.getContext(); QualType FlagsTy; getDependTypes(C, KmpDependInfoTy, FlagsTy); @@ -4263,7 +4672,8 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion Pos, const OMPTaskDataTy::DependData &Data, - Address DependenciesArray) { + Address DependenciesArray) +{ CodeGenModule &CGM = CGF.CGM; ASTContext &C = CGM.getContext(); QualType FlagsTy; @@ -4276,23 +4686,30 @@ CGF, cast_or_null( Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() : nullptr)); - for (const Expr *E : Data.DepExprs) { + for (const Expr *E : Data.DepExprs) + { llvm::Value *Addr; llvm::Value *Size; // The expression will be a nullptr in the 'omp_all_memory' case. - if (E) { + if (E) + { std::tie(Addr, Size) = getPointerAndSize(CGF, E); Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); - } else { + } + else + { Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); Size = llvm::ConstantInt::get(CGF.SizeTy, 0); } LValue Base; - if (unsigned *P = Pos.dyn_cast()) { + if (unsigned *P = Pos.dyn_cast()) + { Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); - } else { + } + else + { assert(E && "Expected a non-null expression"); LValue &PosLVal = *Pos.get(); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); @@ -4319,9 +4736,12 @@ CGF.EmitStoreOfScalar( llvm::ConstantInt::get(LLVMFlagsTy, static_cast(DepKind)), FlagsLVal); - if (unsigned *P = Pos.dyn_cast()) { + if (unsigned *P = Pos.dyn_cast()) + { ++(*P); - } else { + } + else + { LValue &PosLVal = *Pos.get(); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); Idx = CGF.Builder.CreateNUWAdd(Idx, @@ -4333,7 +4753,8 @@ SmallVector CGOpenMPRuntime::emitDepobjElementsSizes( CodeGenFunction &CGF, QualType &KmpDependInfoTy, - const OMPTaskDataTy::DependData &Data) { + const OMPTaskDataTy::DependData &Data) +{ assert(Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependency kind."); SmallVector Sizes; @@ -4344,7 +4765,8 @@ CGF, cast_or_null( Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() : nullptr)); - for (const Expr *E : Data.DepExprs) { + for (const Expr *E : Data.DepExprs) + { llvm::Value *NumDeps; LValue Base; LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); @@ -4361,7 +4783,8 @@ SizeLVals.push_back(NumLVal); } } - for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { + for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) + { llvm::Value *Size = CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); Sizes.push_back(Size); @@ -4373,7 +4796,8 @@ QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, - Address DependenciesArray) { + Address DependenciesArray) +{ assert(Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependency kind."); llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); @@ -4382,7 +4806,8 @@ CGF, cast_or_null( Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() : nullptr)); - for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { + for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) + { const Expr *E = Data.DepExprs[I]; llvm::Value *NumDeps; LValue Base; @@ -4408,10 +4833,10 @@ std::pair CGOpenMPRuntime::emitDependClause( CodeGenFunction &CGF, ArrayRef Dependencies, - SourceLocation Loc) { - if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { - return D.DepExprs.empty(); - })) + SourceLocation Loc) +{ + if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) + { return D.DepExprs.empty(); })) return std::make_pair(nullptr, Address::invalid()); // Process list of dependencies. ASTContext &C = CGM.getContext(); @@ -4419,7 +4844,8 @@ llvm::Value *NumOfElements = nullptr; unsigned NumDependencies = std::accumulate( Dependencies.begin(), Dependencies.end(), 0, - [](unsigned V, const OMPTaskDataTy::DependData &D) { + [](unsigned V, const OMPTaskDataTy::DependData &D) + { return D.DepKind == OMPC_DEPEND_depobj ? V : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); @@ -4433,11 +4859,14 @@ llvm::ConstantInt::get(CGF.IntPtrTy, 0); // Calculate number of depobj dependencies and regular deps with the // iterators. - for (const OMPTaskDataTy::DependData &D : Dependencies) { - if (D.DepKind == OMPC_DEPEND_depobj) { + for (const OMPTaskDataTy::DependData &D : Dependencies) + { + if (D.DepKind == OMPC_DEPEND_depobj) + { SmallVector Sizes = emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); - for (llvm::Value *Size : Sizes) { + for (llvm::Value *Size : Sizes) + { NumOfDepobjElements = CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); } @@ -4446,8 +4875,10 @@ } // Include number of iterations, if any. - if (const auto *IE = cast_or_null(D.IteratorExpr)) { - for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + if (const auto *IE = cast_or_null(D.IteratorExpr)) + { + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) + { llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( @@ -4461,14 +4892,17 @@ } QualType KmpDependInfoArrayTy; - if (HasDepobjDeps || HasRegularWithIterators) { + if (HasDepobjDeps || HasRegularWithIterators) + { NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, /*isSigned=*/false); - if (HasDepobjDeps) { + if (HasDepobjDeps) + { NumOfElements = CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); } - if (HasRegularWithIterators) { + if (HasRegularWithIterators) + { NumOfElements = CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); } @@ -4488,7 +4922,9 @@ DependenciesArray = CGF.GetAddrOfLocalVar(PD); NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, /*isSigned=*/false); - } else { + } + else + { KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); @@ -4499,7 +4935,8 @@ /*isSigned=*/false); } unsigned Pos = 0; - for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { + for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) + { if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || Dependencies[I].IteratorExpr) continue; @@ -4510,7 +4947,8 @@ LValue PosLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); - for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { + for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) + { if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || !Dependencies[I].IteratorExpr) continue; @@ -4518,8 +4956,10 @@ DependenciesArray); } // Copy final depobj arrays without iterators. - if (HasDepobjDeps) { - for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { + if (HasDepobjDeps) + { + for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) + { if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) continue; emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], @@ -4533,7 +4973,8 @@ Address CGOpenMPRuntime::emitDepobjDependClause( CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, - SourceLocation Loc) { + SourceLocation Loc) +{ if (Dependencies.DepExprs.empty()) return Address::invalid(); // Process list of dependencies. @@ -4553,9 +4994,11 @@ llvm::Value *NumDepsVal; CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); if (const auto *IE = - cast_or_null(Dependencies.IteratorExpr)) { + cast_or_null(Dependencies.IteratorExpr)) + { NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); - for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) + { llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); @@ -4568,7 +5011,9 @@ Size = CGF.Builder.CreateNUWMul(Size, RecSize); NumDepsVal = CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); - } else { + } + else + { QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); @@ -4601,14 +5046,17 @@ llvm::PointerUnion Pos; unsigned Idx = 1; LValue PosLVal; - if (Dependencies.IteratorExpr) { + if (Dependencies.IteratorExpr) + { PosLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), C.getSizeType()); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, /*IsInit=*/true); Pos = &PosLVal; - } else { + } + else + { Pos = &Idx; } emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); @@ -4619,7 +5067,8 @@ } void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, - SourceLocation Loc) { + SourceLocation Loc) +{ ASTContext &C = CGM.getContext(); QualType FlagsTy; getDependTypes(C, KmpDependInfoTy, FlagsTy); @@ -4647,7 +5096,8 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, - SourceLocation Loc) { + SourceLocation Loc) +{ ASTContext &C = CGM.getContext(); QualType FlagsTy; getDependTypes(C, KmpDependInfoTy, FlagsTy); @@ -4699,7 +5149,8 @@ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data) +{ if (!CGF.HaveInsertPoint()) return; @@ -4724,9 +5175,10 @@ // list is not empty llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; + llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask}; llvm::Value *DepTaskArgs[7]; - if (!Data.Dependences.empty()) { + if (!Data.Dependences.empty()) + { DepTaskArgs[0] = UpLoc; DepTaskArgs[1] = ThreadID; DepTaskArgs[2] = NewTask; @@ -4736,18 +5188,23 @@ DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, - &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { - if (!Data.Tied) { + &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) + { + if (!Data.Tied) + { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); } - if (!Data.Dependences.empty()) { + if (!Data.Dependences.empty()) + { CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), DepTaskArgs); - } else { + } + else + { CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_omp_task), TaskArgs); @@ -4759,7 +5216,8 @@ }; llvm::Value *DepWaitTaskArgs[7]; - if (!Data.Dependences.empty()) { + if (!Data.Dependences.empty()) + { DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; DepWaitTaskArgs[2] = NumOfElements; @@ -4772,7 +5230,8 @@ auto &M = CGM.getModule(); auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, &Data, &DepWaitTaskArgs, - Loc](CodeGenFunction &CGF, PrePostActionTy &) { + Loc](CodeGenFunction &CGF, PrePostActionTy &) + { CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 @@ -4784,7 +5243,8 @@ DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, - Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) + { Action.Enter(CGF); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, @@ -4806,11 +5266,14 @@ RCG(CGF); }; - if (IfCond) { + if (IfCond) + { emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); - } else { - RegionCodeGenTy ThenRCG(ThenCodeGen); - ThenRCG(CGF); + } + else + { + RegionCodeGenTy ThenRCG(ThenCodeGen); + ThenRCG(CGF); } } @@ -4819,7 +5282,8 @@ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data) +{ if (!CGF.HaveInsertPoint()) return; TaskResultTy Result = @@ -4832,10 +5296,13 @@ llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *IfVal; - if (IfCond) { + if (IfCond) + { IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, /*isSigned=*/true); - } else { + } + else + { IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); } @@ -4867,13 +5334,21 @@ LValue RedLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); - if (Data.Reductions) { + if (Data.Reductions) + { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); - } else { + } + else + { CGF.EmitNullInitialization(RedLVal.getAddress(CGF), CGF.getContext().VoidPtrTy); } - enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; + enum + { + NoSchedule = 0, + Grainsize = 1, + NumTasks = 2 + }; llvm::Value *TaskArgs[] = { UpLoc, ThreadID, @@ -4915,7 +5390,8 @@ const llvm::function_ref &RedOpGen, const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, - const Expr *UpExpr = nullptr) { + const Expr *UpExpr = nullptr) +{ // Perform element-by-element initialization. QualType ElementTy; Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); @@ -4987,13 +5463,15 @@ /// is, otherwise consider it as combiner of UDR decl and emit it as a call of /// UDR combiner function. static void emitReductionCombiner(CodeGenFunction &CGF, - const Expr *ReductionOp) { + const Expr *ReductionOp) +{ if (const auto *CE = dyn_cast(ReductionOp)) if (const auto *OVE = dyn_cast(CE->getCallee())) if (const auto *DRE = dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts())) if (const auto *DRD = - dyn_cast(DRE->getDecl())) { + dyn_cast(DRE->getDecl())) + { std::pair Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); RValue Func = RValue::get(Reduction.first); @@ -5007,7 +5485,8 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction( SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef Privates, ArrayRef LHSExprs, - ArrayRef RHSExprs, ArrayRef ReductionOps) { + ArrayRef RHSExprs, ArrayRef ReductionOps) +{ ASTContext &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); @@ -5046,7 +5525,8 @@ CodeGenFunction::OMPPrivateScope Scope(CGF); const auto *IPriv = Privates.begin(); unsigned Idx = 0; - for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { + for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) + { const auto *RHSVar = cast(cast(RHSExprs[I])->getDecl()); Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); @@ -5054,7 +5534,8 @@ cast(cast(LHSExprs[I])->getDecl()); Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); QualType PrivTy = (*IPriv)->getType(); - if (PrivTy->isVariablyModifiedType()) { + if (PrivTy->isVariablyModifiedType()) + { // Get array size and emit VLA type. ++Idx; Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); @@ -5071,17 +5552,22 @@ IPriv = Privates.begin(); const auto *ILHS = LHSExprs.begin(); const auto *IRHS = RHSExprs.begin(); - for (const Expr *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { + for (const Expr *E : ReductionOps) + { + if ((*IPriv)->getType()->isArrayType()) + { // Emit reduction for array section. const auto *LHSVar = cast(cast(*ILHS)->getDecl()); const auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction( CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { + [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) + { emitReductionCombiner(CGF, E); }); - } else { + } + else + { // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, E); } @@ -5098,17 +5584,22 @@ const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, - const DeclRefExpr *RHS) { - if (PrivateRef->getType()->isArrayType()) { + const DeclRefExpr *RHS) +{ + if (PrivateRef->getType()->isArrayType()) + { // Emit reduction for array section. const auto *LHSVar = cast(LHS->getDecl()); const auto *RHSVar = cast(RHS->getDecl()); EmitOMPAggregateReduction( CGF, PrivateRef->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { + [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) + { emitReductionCombiner(CGF, ReductionOp); }); - } else { + } + else + { // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, ReductionOp); } @@ -5119,7 +5610,8 @@ ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, - ReductionOptionsTy Options) { + ReductionOptionsTy Options) +{ if (!CGF.HaveInsertPoint()) return; @@ -5163,12 +5655,14 @@ ASTContext &C = CGM.getContext(); - if (SimpleReduction) { + if (SimpleReduction) + { CodeGenFunction::RunCleanupsScope Scope(CGF); const auto *IPriv = Privates.begin(); const auto *ILHS = LHSExprs.begin(); const auto *IRHS = RHSExprs.begin(); - for (const Expr *E : ReductionOps) { + for (const Expr *E : ReductionOps) + { emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), cast(*IRHS)); ++IPriv; @@ -5181,7 +5675,8 @@ // 1. Build a list of reduction variables. // void *RedList[] = {[0], ..., [-1]}; auto Size = RHSExprs.size(); - for (const Expr *E : Privates) { + for (const Expr *E : Privates) + { if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; @@ -5194,13 +5689,15 @@ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); const auto *IPriv = Privates.begin(); unsigned Idx = 0; - for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) + { Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); - if ((*IPriv)->getType()->isVariablyModifiedType()) { + if ((*IPriv)->getType()->isVariablyModifiedType()) + { // Store array size. ++Idx; Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); @@ -5236,8 +5733,8 @@ CGF.Builder.getInt32(RHSExprs.size()), // i32 ReductionArrayTySize, // size_type sizeof(RedList) RL, // void *RedList - ReductionFn, // void (*) (void *, void *) - Lock // kmp_critical_name *& + ReductionFn, // void (*) (void *, void *) + Lock // kmp_critical_name *& }; llvm::Value *Res = CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( @@ -5267,12 +5764,14 @@ Lock // kmp_critical_name *& }; auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + CodeGenFunction &CGF, PrePostActionTy &Action) + { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); const auto *IPriv = Privates.begin(); const auto *ILHS = LHSExprs.begin(); const auto *IRHS = RHSExprs.begin(); - for (const Expr *E : ReductionOps) { + for (const Expr *E : ReductionOps) + { RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), cast(*IRHS)); ++IPriv; @@ -5302,42 +5801,51 @@ CGF.EmitBlock(Case2BB); auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + CodeGenFunction &CGF, PrePostActionTy &Action) + { const auto *ILHS = LHSExprs.begin(); const auto *IRHS = RHSExprs.begin(); const auto *IPriv = Privates.begin(); - for (const Expr *E : ReductionOps) { + for (const Expr *E : ReductionOps) + { const Expr *XExpr = nullptr; const Expr *EExpr = nullptr; const Expr *UpExpr = nullptr; BinaryOperatorKind BO = BO_Comma; - if (const auto *BO = dyn_cast(E)) { - if (BO->getOpcode() == BO_Assign) { + if (const auto *BO = dyn_cast(E)) + { + if (BO->getOpcode() == BO_Assign) + { XExpr = BO->getLHS(); UpExpr = BO->getRHS(); } } // Try to emit update expression as a simple atomic. const Expr *RHSExpr = UpExpr; - if (RHSExpr) { + if (RHSExpr) + { // Analyze RHS part of the whole expression. if (const auto *ACO = dyn_cast( - RHSExpr->IgnoreParenImpCasts())) { + RHSExpr->IgnoreParenImpCasts())) + { // If this is a conditional operator, analyze its condition for // min/max reduction operator. RHSExpr = ACO->getCond(); } if (const auto *BORHS = - dyn_cast(RHSExpr->IgnoreParenImpCasts())) { + dyn_cast(RHSExpr->IgnoreParenImpCasts())) + { EExpr = BORHS->getRHS(); BO = BORHS->getOpcode(); } } - if (XExpr) { + if (XExpr) + { const auto *VD = cast(cast(*ILHS)->getDecl()); auto &&AtomicRedGen = [BO, VD, Loc](CodeGenFunction &CGF, const Expr *XExpr, - const Expr *EExpr, const Expr *UpExpr) { + const Expr *EExpr, const Expr *UpExpr) + { LValue X = CGF.EmitLValue(XExpr); RValue E; if (EExpr) @@ -5345,7 +5853,8 @@ CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::AtomicOrdering::Monotonic, Loc, - [&CGF, UpExpr, VD, Loc](RValue XRValue) { + [&CGF, UpExpr, VD, Loc](RValue XRValue) + { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); Address LHSTemp = CGF.CreateMemTemp(VD->getType()); CGF.emitOMPSimpleStore( @@ -5356,38 +5865,48 @@ return CGF.EmitAnyExpr(UpExpr); }); }; - if ((*IPriv)->getType()->isArrayType()) { + if ((*IPriv)->getType()->isArrayType()) + { // Emit atomic reduction for array section. const auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, AtomicRedGen, XExpr, EExpr, UpExpr); - } else { + } + else + { // Emit atomic reduction for array subscript or single variable. AtomicRedGen(CGF, XExpr, EExpr, UpExpr); } - } else { + } + else + { // Emit as a critical region. auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, - const Expr *, const Expr *) { + const Expr *, const Expr *) + { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); std::string Name = RT.getName({"atomic_reduction"}); RT.emitCriticalRegion( CGF, Name, - [=](CodeGenFunction &CGF, PrePostActionTy &Action) { + [=](CodeGenFunction &CGF, PrePostActionTy &Action) + { Action.Enter(CGF); emitReductionCombiner(CGF, E); }, Loc); }; - if ((*IPriv)->getType()->isArrayType()) { + if ((*IPriv)->getType()->isArrayType()) + { const auto *LHSVar = cast(cast(*ILHS)->getDecl()); const auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, CritRedGen); - } else { + } + else + { CritRedGen(CGF, nullptr, nullptr, nullptr); } } @@ -5397,7 +5916,8 @@ } }; RegionCodeGenTy AtomicRCG(AtomicCodeGen); - if (!WithNowait) { + if (!WithNowait) + { // Add emission of __kmpc_end_reduce(, , &); llvm::Value *EndArgs[] = { IdentTLoc, // ident_t * @@ -5410,7 +5930,9 @@ EndArgs); AtomicRCG.setAction(Action); AtomicRCG(CGF); - } else { + } + else + { AtomicRCG(CGF); } @@ -5421,7 +5943,8 @@ /// Generates unique name for artificial threadprivate variables. /// Format is: "." "_" "" static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, - const Expr *Ref) { + const Expr *Ref) +{ SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); const clang::DeclRefExpr *DE; @@ -5446,7 +5969,8 @@ /// \endcode static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, - ReductionCodeGen &RCG, unsigned N) { + ReductionCodeGen &RCG, unsigned N) +{ ASTContext &C = CGM.getContext(); QualType VoidPtrTy = C.VoidPtrTy; VoidPtrTy.addRestrict(); @@ -5476,7 +6000,8 @@ llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. - if (RCG.getSizes(N).second) { + if (RCG.getSizes(N).second) + { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); @@ -5488,7 +6013,8 @@ // If initializer uses initializer from declare reduction construct, emit a // pointer to the address of the original reduction item (reuired by reduction // initializer) - if (RCG.usesReductionInitializer(N)) { + if (RCG.usesReductionInitializer(N)) + { Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); OrigAddr = CGF.EmitLoadOfPointer( SharedAddr, @@ -5498,7 +6024,8 @@ // %0 = bitcast void* %arg to * // store , * %0 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, - [](CodeGenFunction &) { return false; }); + [](CodeGenFunction &) + { return false; }); CGF.FinishFunction(); return Fn; } @@ -5518,7 +6045,8 @@ ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, - const Expr *PrivateRef) { + const Expr *PrivateRef) +{ ASTContext &C = CGM.getContext(); const auto *LHSVD = cast(cast(LHS)->getDecl()); const auto *RHSVD = cast(cast(RHS)->getDecl()); @@ -5542,7 +6070,8 @@ llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. - if (RCG.getSizes(N).second) { + if (RCG.getSizes(N).second) + { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); @@ -5567,8 +6096,8 @@ // Pull out the pointer to the variable. CGF.EmitLoadOfPointer( CGF.Builder.CreateElementBitCast( - CGF.GetAddrOfLocalVar(&ParamIn), - CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), + CGF.GetAddrOfLocalVar(&ParamIn), + CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), C.getPointerType(RHSVD->getType())->castAs())); PrivateScope.Privatize(); // Emit the combiner body: @@ -5591,7 +6120,8 @@ /// \endcode static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, - ReductionCodeGen &RCG, unsigned N) { + ReductionCodeGen &RCG, unsigned N) +{ if (!RCG.needCleanups(N)) return nullptr; ASTContext &C = CGM.getContext(); @@ -5614,7 +6144,8 @@ llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. - if (RCG.getSizes(N).second) { + if (RCG.getSizes(N).second) + { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); @@ -5631,7 +6162,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( CodeGenFunction &CGF, SourceLocation Loc, ArrayRef LHSExprs, - ArrayRef RHSExprs, const OMPTaskDataTy &Data) { + ArrayRef RHSExprs, const OMPTaskDataTy &Data) +{ if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) return nullptr; @@ -5651,7 +6183,7 @@ const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); - const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *FlagsFD = addFieldToRecordDecl( @@ -5666,7 +6198,8 @@ Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, Data.ReductionCopies, Data.ReductionOps); - for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { + for (unsigned Cnt = 0; Cnt < Size; ++Cnt) + { // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; @@ -5720,15 +6253,18 @@ CGF.EmitStoreOfScalar(CombAddr, CombLVal); // ElemLVal.flags = 0; LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); - if (DelayedCreation) { + if (DelayedCreation) + { CGF.EmitStoreOfScalar( llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); - } else + } + else CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), FlagsLVal.getType()); } - if (Data.IsReductionWithTaskMod) { + if (Data.IsReductionWithTaskMod) + { // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int // is_ws, int num, void *data); llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); @@ -5760,7 +6296,8 @@ void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, - bool IsWorksharingReduction) { + bool IsWorksharingReduction) +{ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int // is_ws, int num, void *data); llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); @@ -5779,11 +6316,13 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, - unsigned N) { + unsigned N) +{ auto Sizes = RCG.getSizes(N); // Emit threadprivate global variable if the type is non-constant // (Sizes.second = nullptr). - if (Sizes.second) { + if (Sizes.second) + { llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, /*isSigned=*/false); Address SizeAddr = getAddrOfArtificialThreadPrivate( @@ -5796,7 +6335,8 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, - LValue SharedLVal) { + LValue SharedLVal) +{ // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), @@ -5814,14 +6354,18 @@ } void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data) +{ if (!CGF.HaveInsertPoint()) return; - if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { + if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) + { // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. OMPBuilder.createTaskwait(CGF.Builder); - } else { + } + else + { llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); auto &M = CGM.getModule(); @@ -5829,7 +6373,8 @@ llvm::Value *NumOfElements; std::tie(NumOfElements, DependenciesArray) = emitDependClause(CGF, Data.Dependences, Loc); - if (!Data.Dependences.empty()) { + if (!Data.Dependences.empty()) + { llvm::Value *DepWaitTaskArgs[7]; DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; @@ -5849,8 +6394,9 @@ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_omp_taskwait_deps_51), DepWaitTaskArgs); - - } else { + } + else + { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); @@ -5869,7 +6415,8 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnerKind, const RegionCodeGenTy &CodeGen, - bool HasCancel) { + bool HasCancel) +{ if (!CGF.HaveInsertPoint()) return; InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, @@ -5879,17 +6426,20 @@ CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } -namespace { -enum RTCancelKind { - CancelNoreq = 0, - CancelParallel = 1, - CancelLoop = 2, - CancelSections = 3, - CancelTaskgroup = 4 -}; +namespace +{ + enum RTCancelKind + { + CancelNoreq = 0, + CancelParallel = 1, + CancelLoop = 2, + CancelSections = 3, + CancelTaskgroup = 4 + }; } // anonymous namespace -static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { +static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) +{ RTCancelKind CancelKind = CancelNoreq; if (CancelRegion == OMPD_parallel) CancelKind = CancelParallel; @@ -5897,7 +6447,8 @@ CancelKind = CancelLoop; else if (CancelRegion == OMPD_sections) CancelKind = CancelSections; - else { + else + { assert(CancelRegion == OMPD_taskgroup); CancelKind = CancelTaskgroup; } @@ -5906,16 +6457,19 @@ void CGOpenMPRuntime::emitCancellationPointCall( CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind CancelRegion) { + OpenMPDirectiveKind CancelRegion) +{ if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind); if (auto *OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { + dyn_cast_or_null(CGF.CapturedStmtInfo)) + { // For 'cancellation point taskgroup', the task region info may not have a // cancel. This may instead happen in another adjacent task. - if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { + if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) + { llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; @@ -5946,16 +6500,19 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, - OpenMPDirectiveKind CancelRegion) { + OpenMPDirectiveKind CancelRegion) +{ if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind); auto &M = CGM.getModule(); if (auto *OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { + dyn_cast_or_null(CGF.CapturedStmtInfo)) + { auto &&ThenGen = [this, &M, Loc, CancelRegion, - OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { + OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) + { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), @@ -5980,53 +6537,65 @@ CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; - if (IfCond) { + if (IfCond) + { emitIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &, PrePostActionTy &) {}); - } else { + } + else + { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } } } -namespace { -/// Cleanup action for uses_allocators support. -class OMPUsesAllocatorsActionTy final : public PrePostActionTy { - ArrayRef> Allocators; +namespace +{ + /// Cleanup action for uses_allocators support. + class OMPUsesAllocatorsActionTy final : public PrePostActionTy + { + ArrayRef> Allocators; -public: - OMPUsesAllocatorsActionTy( - ArrayRef> Allocators) - : Allocators(Allocators) {} - void Enter(CodeGenFunction &CGF) override { - if (!CGF.HaveInsertPoint()) - return; - for (const auto &AllocatorData : Allocators) { - CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( - CGF, AllocatorData.first, AllocatorData.second); + public: + OMPUsesAllocatorsActionTy( + ArrayRef> Allocators) + : Allocators(Allocators) {} + void Enter(CodeGenFunction &CGF) override + { + if (!CGF.HaveInsertPoint()) + return; + for (const auto &AllocatorData : Allocators) + { + CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( + CGF, AllocatorData.first, AllocatorData.second); + } } - } - void Exit(CodeGenFunction &CGF) override { - if (!CGF.HaveInsertPoint()) - return; - for (const auto &AllocatorData : Allocators) { - CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, - AllocatorData.first); + void Exit(CodeGenFunction &CGF) override + { + if (!CGF.HaveInsertPoint()) + return; + for (const auto &AllocatorData : Allocators) + { + CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, + AllocatorData.first); + } } - } -}; + }; } // namespace void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) +{ assert(!ParentName.empty() && "Invalid target entry parent name!"); HasEmittedTargetRegion = true; SmallVector, 4> Allocators; - for (const auto *C : D.getClausesOfKind()) { - for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + for (const auto *C : D.getClausesOfKind()) + { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) + { const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); if (!D.AllocatorTraits) continue; @@ -6041,7 +6610,8 @@ void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, - const Expr *AllocatorTraits) { + const Expr *AllocatorTraits) +{ llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); // Use default memspace handle. @@ -6075,7 +6645,8 @@ } void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, - const Expr *Allocator) { + const Expr *Allocator) +{ llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); @@ -6093,20 +6664,22 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) +{ auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName); CodeGenFunction CGF(CGM, true); llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = - [&CGF, &D, &CodeGen](StringRef EntryFnName) { - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + [&CGF, &D, &CodeGen](StringRef EntryFnName) + { + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); - }; + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + }; // Get NumTeams and ThreadLimit attributes int32_t DefaultValTeams = -1; @@ -6125,7 +6698,8 @@ /// Checks if the expression is constant or does not have non-trivial function /// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { +static bool isTrivial(ASTContext &Ctx, const Expr *E) +{ // We can skip constant expressions. // We can skip expressions with trivial calls or simple expressions. return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || @@ -6134,12 +6708,16 @@ } const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, - const Stmt *Body) { + const Stmt *Body) +{ const Stmt *Child = Body->IgnoreContainers(); - while (const auto *C = dyn_cast_or_null(Child)) { + while (const auto *C = dyn_cast_or_null(Child)) + { Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast(S)) { + for (const Stmt *S : C->body()) + { + if (const auto *E = dyn_cast(S)) + { if (isTrivial(Ctx, E)) continue; } @@ -6148,8 +6726,10 @@ isa(S) || isa(S)) continue; // Analyze declarations. - if (const auto *DS = dyn_cast(S)) { - if (llvm::all_of(DS->decls(), [](const Decl *D) { + if (const auto *DS = dyn_cast(S)) + { + if (llvm::all_of(DS->decls(), [](const Decl *D) + { if (isa(D) || isa(D) || isa(D) || isa(D) || isa(D) || isa(D) || @@ -6160,8 +6740,7 @@ const auto *VD = dyn_cast(D); if (!VD) return false; - return VD->hasGlobalStorage() || !VD->isUsed(); - })) + return VD->hasGlobalStorage() || !VD->isUsed(); })) continue; } // Found multiple children - cannot get the one child only. @@ -6177,22 +6756,28 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal) { + int32_t &DefaultVal) +{ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && "Expected target-based executable directive."); - switch (DirectiveKind) { - case OMPD_target: { + switch (DirectiveKind) + { + case OMPD_target: + { const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); if (const auto *NestedDir = - dyn_cast_or_null(ChildStmt)) { - if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { - if (NestedDir->hasClausesOfKind()) { + dyn_cast_or_null(ChildStmt)) + { + if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) + { + if (NestedDir->hasClausesOfKind()) + { const Expr *NumTeams = NestedDir->getSingleClause()->getNumTeams(); if (NumTeams->isIntegerConstantExpr(CGF.getContext())) @@ -6205,7 +6790,8 @@ return nullptr; } if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || - isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { + isOpenMPSimdDirective(NestedDir->getDirectiveKind())) + { DefaultVal = 1; return nullptr; } @@ -6220,8 +6806,10 @@ case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: { - if (D.hasClausesOfKind()) { + case OMPD_target_teams_distribute_parallel_for_simd: + { + if (D.hasClausesOfKind()) + { const Expr *NumTeams = D.getSingleClause()->getNumTeams(); if (NumTeams->isIntegerConstantExpr(CGF.getContext())) @@ -6305,36 +6893,41 @@ } llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &D) { + CodeGenFunction &CGF, const OMPExecutableDirective &D) +{ assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the teams directive expected to be emitted " "only for the host!"); CGBuilderTy &Bld = CGF.Builder; int32_t DefaultNT = -1; const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); - if (NumTeams != nullptr) { + if (NumTeams != nullptr) + { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); - switch (DirectiveKind) { - case OMPD_target: { + switch (DirectiveKind) + { + case OMPD_target: + { const auto *CS = D.getInnermostCapturedStmt(); CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); + /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + /*isSigned=*/true); } case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: { + case OMPD_target_teams_distribute_parallel_for_simd: + { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); + /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + /*isSigned=*/true); } default: break; @@ -6345,40 +6938,55 @@ } static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, - llvm::Value *DefaultThreadLimitVal) { + llvm::Value *DefaultThreadLimitVal) +{ const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); - if (const auto *Dir = dyn_cast_or_null(Child)) { - if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + if (const auto *Dir = dyn_cast_or_null(Child)) + { + if (isOpenMPParallelDirective(Dir->getDirectiveKind())) + { llvm::Value *NumThreads = nullptr; llvm::Value *CondVal = nullptr; // Handle if clause. If if clause present, the number of threads is // calculated as ? ( ? : 0 ) : 1. - if (Dir->hasClausesOfKind()) { + if (Dir->hasClausesOfKind()) + { CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); const OMPIfClause *IfClause = nullptr; - for (const auto *C : Dir->getClausesOfKind()) { + for (const auto *C : Dir->getClausesOfKind()) + { if (C->getNameModifier() == OMPD_unknown || - C->getNameModifier() == OMPD_parallel) { + C->getNameModifier() == OMPD_parallel) + { IfClause = C; break; } } - if (IfClause) { + if (IfClause) + { const Expr *Cond = IfClause->getCondition(); bool Result; - if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) + { if (!Result) return CGF.Builder.getInt32(1); - } else { + } + else + { CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); if (const auto *PreInit = - cast_or_null(IfClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr()) { + cast_or_null(IfClause->getPreInitStmt())) + { + for (const auto *I : PreInit->decls()) + { + if (!I->hasAttr()) + { CGF.EmitVarDecl(cast(*I)); - } else { + } + else + { CodeGenFunction::AutoVarEmission Emission = CGF.EmitAutoVarAlloca(cast(*I)); CGF.EmitAutoVarCleanups(Emission); @@ -6391,7 +6999,8 @@ } // Check the value of num_threads clause iff if clause was not specified // or is not evaluated to false. - if (Dir->hasClausesOfKind()) { + if (Dir->hasClausesOfKind()) + { CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); const auto *NumThreadsClause = @@ -6399,11 +7008,16 @@ CodeGenFunction::LexicalScope Scope( CGF, NumThreadsClause->getNumThreads()->getSourceRange()); if (const auto *PreInit = - cast_or_null(NumThreadsClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr()) { + cast_or_null(NumThreadsClause->getPreInitStmt())) + { + for (const auto *I : PreInit->decls()) + { + if (!I->hasAttr()) + { CGF.EmitVarDecl(cast(*I)); - } else { + } + else + { CodeGenFunction::AutoVarEmission Emission = CGF.EmitAutoVarAlloca(cast(*I)); CGF.EmitAutoVarCleanups(Emission); @@ -6417,12 +7031,15 @@ NumThreads = CGF.Builder.CreateSelect( CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), DefaultThreadLimitVal, NumThreads); - } else { + } + else + { NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal : CGF.Builder.getInt32(0); } // Process condition of the if clause. - if (CondVal) { + if (CondVal) + { NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, CGF.Builder.getInt32(1)); } @@ -6436,18 +7053,21 @@ const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal) { + int32_t &DefaultVal) +{ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && "Expected target-based executable directive."); - switch (DirectiveKind) { + switch (DirectiveKind) + { case OMPD_target: // Teams have no clause thread_limit return nullptr; case OMPD_target_teams: case OMPD_target_teams_distribute: - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { const auto *ThreadLimitClause = D.getSingleClause(); const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) @@ -6461,10 +7081,12 @@ case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: { + case OMPD_target_teams_distribute_parallel_for_simd: + { Expr *ThreadLimit = nullptr; Expr *NumThreads = nullptr; - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { const auto *ThreadLimitClause = D.getSingleClause(); ThreadLimit = ThreadLimitClause->getThreadLimit(); if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) @@ -6472,13 +7094,17 @@ ThreadLimit->getIntegerConstantExpr(CGF.getContext())) DefaultVal = Constant->getExtValue(); } - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { const auto *NumThreadsClause = D.getSingleClause(); NumThreads = NumThreadsClause->getNumThreads(); - if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { + if (NumThreads->isIntegerConstantExpr(CGF.getContext())) + { if (auto Constant = - NumThreads->getIntegerConstantExpr(CGF.getContext())) { - if (Constant->getExtValue() < DefaultVal) { + NumThreads->getIntegerConstantExpr(CGF.getContext())) + { + if (Constant->getExtValue() < DefaultVal) + { DefaultVal = Constant->getExtValue(); ThreadLimit = NumThreads; } @@ -6557,7 +7183,8 @@ } llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &D) { + CodeGenFunction &CGF, const OMPExecutableDirective &D) +{ assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the teams directive expected to be emitted " "only for the host!"); @@ -6567,8 +7194,10 @@ CGBuilderTy &Bld = CGF.Builder; llvm::Value *ThreadLimitVal = nullptr; llvm::Value *NumThreadsVal = nullptr; - switch (DirectiveKind) { - case OMPD_target: { + switch (DirectiveKind) + { + case OMPD_target: + { const CapturedStmt *CS = D.getInnermostCapturedStmt(); if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) return NumThreads; @@ -6577,19 +7206,26 @@ // TODO: The standard is not clear how to resolve two thread limit clauses, // let's pick the teams one if it's present, otherwise the target one. const auto *ThreadLimitClause = D.getSingleClause(); - if (const auto *Dir = dyn_cast_or_null(Child)) { - if (const auto *TLC = Dir->getSingleClause()) { + if (const auto *Dir = dyn_cast_or_null(Child)) + { + if (const auto *TLC = Dir->getSingleClause()) + { ThreadLimitClause = TLC; CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); CodeGenFunction::LexicalScope Scope( CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); if (const auto *PreInit = - cast_or_null(ThreadLimitClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr()) { + cast_or_null(ThreadLimitClause->getPreInitStmt())) + { + for (const auto *I : PreInit->decls()) + { + if (!I->hasAttr()) + { CGF.EmitVarDecl(cast(*I)); - } else { + } + else + { CodeGenFunction::AutoVarEmission Emission = CGF.EmitAutoVarAlloca(cast(*I)); CGF.EmitAutoVarCleanups(Emission); @@ -6598,22 +7234,26 @@ } } } - if (ThreadLimitClause) { + if (ThreadLimitClause) + { llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } - if (const auto *Dir = dyn_cast_or_null(Child)) { + if (const auto *Dir = dyn_cast_or_null(Child)) + { if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && - !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { + !isOpenMPDistributeDirective(Dir->getDirectiveKind())) + { CS = Dir->getInnermostCapturedStmt(); const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); Dir = dyn_cast_or_null(Child); } if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && - !isOpenMPSimdDirective(Dir->getDirectiveKind())) { + !isOpenMPSimdDirective(Dir->getDirectiveKind())) + { CS = Dir->getInnermostCapturedStmt(); if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) return NumThreads; @@ -6623,8 +7263,10 @@ } return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); } - case OMPD_target_teams: { - if (D.hasClausesOfKind()) { + case OMPD_target_teams: + { + if (D.hasClausesOfKind()) + { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause(); llvm::Value *ThreadLimit = CGF.EmitScalarExpr( @@ -6637,8 +7279,10 @@ return NumThreads; const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); - if (const auto *Dir = dyn_cast_or_null(Child)) { - if (Dir->getDirectiveKind() == OMPD_distribute) { + if (const auto *Dir = dyn_cast_or_null(Child)) + { + if (Dir->getDirectiveKind() == OMPD_distribute) + { CS = Dir->getInnermostCapturedStmt(); if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) return NumThreads; @@ -6647,7 +7291,8 @@ return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); } case OMPD_target_teams_distribute: - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause(); llvm::Value *ThreadLimit = CGF.EmitScalarExpr( @@ -6663,32 +7308,41 @@ case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: { + case OMPD_target_teams_distribute_parallel_for_simd: + { llvm::Value *CondVal = nullptr; // Handle if clause. If if clause present, the number of threads is // calculated as ? ( ? : 0 ) : 1. - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { const OMPIfClause *IfClause = nullptr; - for (const auto *C : D.getClausesOfKind()) { + for (const auto *C : D.getClausesOfKind()) + { if (C->getNameModifier() == OMPD_unknown || - C->getNameModifier() == OMPD_parallel) { + C->getNameModifier() == OMPD_parallel) + { IfClause = C; break; } } - if (IfClause) { + if (IfClause) + { const Expr *Cond = IfClause->getCondition(); bool Result; - if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) + { if (!Result) return Bld.getInt32(1); - } else { + } + else + { CodeGenFunction::RunCleanupsScope Scope(CGF); CondVal = CGF.EvaluateExprAsBool(Cond); } } } - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause(); llvm::Value *ThreadLimit = CGF.EmitScalarExpr( @@ -6696,7 +7350,8 @@ ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } - if (D.hasClausesOfKind()) { + if (D.hasClausesOfKind()) + { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); const auto *NumThreadsClause = D.getSingleClause(); llvm::Value *NumThreads = CGF.EmitScalarExpr( @@ -6784,1964 +7439,2126 @@ llvm_unreachable("Unsupported directive kind."); } -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); - -// Utility to handle information from clauses associated with a given -// construct that use mappable expressions (e.g. 'map' clause, 'to' clause). -// It provides a convenient interface to obtain the information and generate -// code for that information. -class MappableExprsHandler { -public: - /// Get the offset of the OMP_MAP_MEMBER_OF field. - static unsigned getFlagMemberOffset() { - unsigned Offset = 0; - for (uint64_t Remain = - static_cast>( - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); - !(Remain & 1); Remain = Remain >> 1) - Offset++; - return Offset; - } - - /// Class that holds debugging information for a data mapping to be passed to - /// the runtime library. - class MappingExprInfo { - /// The variable declaration used for the data mapping. - const ValueDecl *MapDecl = nullptr; - /// The original expression used in the map clause, or null if there is - /// none. - const Expr *MapExpr = nullptr; +namespace +{ + LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + // Utility to handle information from clauses associated with a given + // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). + // It provides a convenient interface to obtain the information and generate + // code for that information. + class MappableExprsHandler + { public: - MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) - : MapDecl(MapDecl), MapExpr(MapExpr) {} + /// Get the offset of the OMP_MAP_MEMBER_OF field. + static unsigned getFlagMemberOffset() + { + unsigned Offset = 0; + for (uint64_t Remain = + static_cast>( + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); + !(Remain & 1); Remain = Remain >> 1) + Offset++; + return Offset; + } + + /// Class that holds debugging information for a data mapping to be passed to + /// the runtime library. + class MappingExprInfo + { + /// The variable declaration used for the data mapping. + const ValueDecl *MapDecl = nullptr; + /// The original expression used in the map clause, or null if there is + /// none. + const Expr *MapExpr = nullptr; - const ValueDecl *getMapDecl() const { return MapDecl; } - const Expr *getMapExpr() const { return MapExpr; } - }; + public: + MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) + : MapDecl(MapDecl), MapExpr(MapExpr) {} - /// Class that associates information with a base pointer to be passed to the - /// runtime library. - class BasePointerInfo { - /// The base pointer. - llvm::Value *Ptr = nullptr; - /// The base declaration that refers to this device pointer, or null if - /// there is none. - const ValueDecl *DevPtrDecl = nullptr; + const ValueDecl *getMapDecl() const { return MapDecl; } + const Expr *getMapExpr() const { return MapExpr; } + }; - public: - BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) - : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} - llvm::Value *operator*() const { return Ptr; } - const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } - void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } - }; + /// Class that associates information with a base pointer to be passed to the + /// runtime library. + class BasePointerInfo + { + /// The base pointer. + llvm::Value *Ptr = nullptr; + /// The base declaration that refers to this device pointer, or null if + /// there is none. + const ValueDecl *DevPtrDecl = nullptr; - using MapExprsArrayTy = SmallVector; - using MapBaseValuesArrayTy = SmallVector; - using MapValuesArrayTy = SmallVector; - using MapFlagsArrayTy = SmallVector; - using MapMappersArrayTy = SmallVector; - using MapDimArrayTy = SmallVector; - using MapNonContiguousArrayTy = SmallVector; - - /// This structure contains combined information generated for mappable - /// clauses, including base pointers, pointers, sizes, map types, user-defined - /// mappers, and non-contiguous information. - struct MapCombinedInfoTy { - struct StructNonContiguousInfo { - bool IsNonContiguous = false; - MapDimArrayTy Dims; - MapNonContiguousArrayTy Offsets; - MapNonContiguousArrayTy Counts; - MapNonContiguousArrayTy Strides; + public: + BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) + : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} + llvm::Value *operator*() const { return Ptr; } + const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } + void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } }; - MapExprsArrayTy Exprs; - MapBaseValuesArrayTy BasePointers; - MapValuesArrayTy Pointers; - MapValuesArrayTy Sizes; - MapFlagsArrayTy Types; - MapMappersArrayTy Mappers; - StructNonContiguousInfo NonContigInfo; - - /// Append arrays in \a CurInfo. - void append(MapCombinedInfoTy &CurInfo) { - Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); - BasePointers.append(CurInfo.BasePointers.begin(), - CurInfo.BasePointers.end()); - Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); - Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); - Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); - Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); - NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), - CurInfo.NonContigInfo.Dims.end()); - NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), - CurInfo.NonContigInfo.Offsets.end()); - NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), - CurInfo.NonContigInfo.Counts.end()); - NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), - CurInfo.NonContigInfo.Strides.end()); - } - }; - /// Map between a struct and the its lowest & highest elements which have been - /// mapped. - /// [ValueDecl *] --> {LE(FieldIndex, Pointer), - /// HE(FieldIndex, Pointer)} - struct StructRangeInfoTy { - MapCombinedInfoTy PreliminaryMapData; - std::pair LowestElem = { - 0, Address::invalid()}; - std::pair HighestElem = { - 0, Address::invalid()}; - Address Base = Address::invalid(); - Address LB = Address::invalid(); - bool IsArraySection = false; - bool HasCompleteRecord = false; - }; + using MapExprsArrayTy = SmallVector; + using MapBaseValuesArrayTy = SmallVector; + using MapValuesArrayTy = SmallVector; + using MapFlagsArrayTy = SmallVector; + using MapMappersArrayTy = SmallVector; + using MapDimArrayTy = SmallVector; + using MapNonContiguousArrayTy = SmallVector; + + /// This structure contains combined information generated for mappable + /// clauses, including base pointers, pointers, sizes, map types, user-defined + /// mappers, and non-contiguous information. + struct MapCombinedInfoTy + { + struct StructNonContiguousInfo + { + bool IsNonContiguous = false; + MapDimArrayTy Dims; + MapNonContiguousArrayTy Offsets; + MapNonContiguousArrayTy Counts; + MapNonContiguousArrayTy Strides; + }; + MapExprsArrayTy Exprs; + MapBaseValuesArrayTy BasePointers; + MapValuesArrayTy Pointers; + MapValuesArrayTy Sizes; + MapFlagsArrayTy Types; + MapMappersArrayTy Mappers; + StructNonContiguousInfo NonContigInfo; + + /// Append arrays in \a CurInfo. + void append(MapCombinedInfoTy &CurInfo) + { + Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); + BasePointers.append(CurInfo.BasePointers.begin(), + CurInfo.BasePointers.end()); + Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); + Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); + Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); + Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); + NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), + CurInfo.NonContigInfo.Dims.end()); + NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), + CurInfo.NonContigInfo.Offsets.end()); + NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), + CurInfo.NonContigInfo.Counts.end()); + NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), + CurInfo.NonContigInfo.Strides.end()); + } + }; -private: - /// Kind that defines how a device pointer has to be returned. - struct MapInfo { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType = OMPC_MAP_unknown; - ArrayRef MapModifiers; - ArrayRef MotionModifiers; - bool ReturnDevicePointer = false; - bool IsImplicit = false; - const ValueDecl *Mapper = nullptr; - const Expr *VarRef = nullptr; - bool ForDeviceAddr = false; - - MapInfo() = default; - MapInfo( - OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - ArrayRef MotionModifiers, - bool ReturnDevicePointer, bool IsImplicit, - const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, - bool ForDeviceAddr = false) - : Components(Components), MapType(MapType), MapModifiers(MapModifiers), - MotionModifiers(MotionModifiers), - ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), - Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} - }; + /// Map between a struct and the its lowest & highest elements which have been + /// mapped. + /// [ValueDecl *] --> {LE(FieldIndex, Pointer), + /// HE(FieldIndex, Pointer)} + struct StructRangeInfoTy + { + MapCombinedInfoTy PreliminaryMapData; + std::pair LowestElem = { + 0, Address::invalid()}; + std::pair HighestElem = { + 0, Address::invalid()}; + Address Base = Address::invalid(); + Address LB = Address::invalid(); + bool IsArraySection = false; + bool HasCompleteRecord = false; + }; - /// If use_device_ptr or use_device_addr is used on a decl which is a struct - /// member and there is no map information about it, then emission of that - /// entry is deferred until the whole struct has been processed. - struct DeferredDevicePtrEntryTy { - const Expr *IE = nullptr; - const ValueDecl *VD = nullptr; - bool ForDeviceAddr = false; - - DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, - bool ForDeviceAddr) - : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} - }; + private: + /// Kind that defines how a device pointer has to be returned. + struct MapInfo + { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + ArrayRef MapModifiers; + ArrayRef MotionModifiers; + bool ReturnDevicePointer = false; + bool IsImplicit = false; + const ValueDecl *Mapper = nullptr; + const Expr *VarRef = nullptr; + bool ForDeviceAddr = false; + + MapInfo() = default; + MapInfo( + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + OpenMPMapClauseKind MapType, + ArrayRef MapModifiers, + ArrayRef MotionModifiers, + bool ReturnDevicePointer, bool IsImplicit, + const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, + bool ForDeviceAddr = false) + : Components(Components), MapType(MapType), MapModifiers(MapModifiers), + MotionModifiers(MotionModifiers), + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), + Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} + }; + + /// If use_device_ptr or use_device_addr is used on a decl which is a struct + /// member and there is no map information about it, then emission of that + /// entry is deferred until the whole struct has been processed. + struct DeferredDevicePtrEntryTy + { + const Expr *IE = nullptr; + const ValueDecl *VD = nullptr; + bool ForDeviceAddr = false; - /// The target directive from where the mappable clauses were extracted. It - /// is either a executable directive or a user-defined mapper directive. - llvm::PointerUnion - CurDir; - - /// Function the directive is being generated for. - CodeGenFunction &CGF; - - /// Set of all first private variables in the current directive. - /// bool data is set to true if the variable is implicitly marked as - /// firstprivate, false otherwise. - llvm::DenseMap, bool> FirstPrivateDecls; - - /// Map between device pointer declarations and their expression components. - /// The key value for declarations in 'this' is null. - llvm::DenseMap< - const ValueDecl *, - SmallVector> - DevPointersMap; - - /// Map between device addr declarations and their expression components. - /// The key value for declarations in 'this' is null. - llvm::DenseMap< - const ValueDecl *, - SmallVector> - HasDevAddrsMap; - - /// Map between lambda declarations and their map type. - llvm::DenseMap LambdasMap; - - llvm::Value *getExprTypeSize(const Expr *E) const { - QualType ExprTy = E->getType().getCanonicalType(); - - // Calculate the size for array shaping expression. - if (const auto *OAE = dyn_cast(E)) { - llvm::Value *Size = - CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); - for (const Expr *SE : OAE->getDimensions()) { - llvm::Value *Sz = CGF.EmitScalarExpr(SE); - Sz = CGF.EmitScalarConversion(Sz, SE->getType(), - CGF.getContext().getSizeType(), - SE->getExprLoc()); - Size = CGF.Builder.CreateNUWMul(Size, Sz); + DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, + bool ForDeviceAddr) + : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} + }; + + /// The target directive from where the mappable clauses were extracted. It + /// is either a executable directive or a user-defined mapper directive. + llvm::PointerUnion + CurDir; + + /// Function the directive is being generated for. + CodeGenFunction &CGF; + + /// Set of all first private variables in the current directive. + /// bool data is set to true if the variable is implicitly marked as + /// firstprivate, false otherwise. + llvm::DenseMap, bool> FirstPrivateDecls; + + /// Map between device pointer declarations and their expression components. + /// The key value for declarations in 'this' is null. + llvm::DenseMap< + const ValueDecl *, + SmallVector> + DevPointersMap; + + /// Map between device addr declarations and their expression components. + /// The key value for declarations in 'this' is null. + llvm::DenseMap< + const ValueDecl *, + SmallVector> + HasDevAddrsMap; + + /// Map between lambda declarations and their map type. + llvm::DenseMap LambdasMap; + + llvm::Value *getExprTypeSize(const Expr *E) const + { + QualType ExprTy = E->getType().getCanonicalType(); + + // Calculate the size for array shaping expression. + if (const auto *OAE = dyn_cast(E)) + { + llvm::Value *Size = + CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); + for (const Expr *SE : OAE->getDimensions()) + { + llvm::Value *Sz = CGF.EmitScalarExpr(SE); + Sz = CGF.EmitScalarConversion(Sz, SE->getType(), + CGF.getContext().getSizeType(), + SE->getExprLoc()); + Size = CGF.Builder.CreateNUWMul(Size, Sz); + } + return Size; } - return Size; - } - - // Reference types are ignored for mapping purposes. - if (const auto *RefTy = ExprTy->getAs()) - ExprTy = RefTy->getPointeeType().getCanonicalType(); - - // Given that an array section is considered a built-in type, we need to - // do the calculation based on the length of the section instead of relying - // on CGF.getTypeSize(E->getType()). - if (const auto *OAE = dyn_cast(E)) { - QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( - OAE->getBase()->IgnoreParenImpCasts()) - .getCanonicalType(); - - // If there is no length associated with the expression and lower bound is - // not specified too, that means we are using the whole length of the - // base. - if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && - !OAE->getLowerBound()) - return CGF.getTypeSize(BaseTy); - - llvm::Value *ElemSize; - if (const auto *PTy = BaseTy->getAs()) { - ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); - } else { - const auto *ATy = cast(BaseTy.getTypePtr()); - assert(ATy && "Expecting array type if not a pointer type."); - ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); + + // Reference types are ignored for mapping purposes. + if (const auto *RefTy = ExprTy->getAs()) + ExprTy = RefTy->getPointeeType().getCanonicalType(); + + // Given that an array section is considered a built-in type, we need to + // do the calculation based on the length of the section instead of relying + // on CGF.getTypeSize(E->getType()). + if (const auto *OAE = dyn_cast(E)) + { + QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( + OAE->getBase()->IgnoreParenImpCasts()) + .getCanonicalType(); + + // If there is no length associated with the expression and lower bound is + // not specified too, that means we are using the whole length of the + // base. + if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && + !OAE->getLowerBound()) + return CGF.getTypeSize(BaseTy); + + llvm::Value *ElemSize; + if (const auto *PTy = BaseTy->getAs()) + { + ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); + } + else + { + const auto *ATy = cast(BaseTy.getTypePtr()); + assert(ATy && "Expecting array type if not a pointer type."); + ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); + } + + // If we don't have a length at this point, that is because we have an + // array section with a single element. + if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) + return ElemSize; + + if (const Expr *LenExpr = OAE->getLength()) + { + llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); + LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), + CGF.getContext().getSizeType(), + LenExpr->getExprLoc()); + return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + } + assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && + OAE->getLowerBound() && "expected array_section[lb:]."); + // Size = sizetype - lb * elemtype; + llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); + llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); + LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), + CGF.getContext().getSizeType(), + OAE->getLowerBound()->getExprLoc()); + LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); + llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); + llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); + LengthVal = CGF.Builder.CreateSelect( + Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); + return LengthVal; } + return CGF.getTypeSize(ExprTy); + } - // If we don't have a length at this point, that is because we have an - // array section with a single element. - if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) - return ElemSize; - - if (const Expr *LenExpr = OAE->getLength()) { - llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); - LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), - CGF.getContext().getSizeType(), - LenExpr->getExprLoc()); - return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + /// Return the corresponding bits for a given map clause modifier. Add + /// a flag marking the map as a pointer if requested. Add a flag marking the + /// map as the first one of a series of maps that relate to the same map + /// expression. + OpenMPOffloadMappingFlags getMapTypeBits( + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, + ArrayRef MotionModifiers, bool IsImplicit, + bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const + { + OpenMPOffloadMappingFlags Bits = + IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT + : OpenMPOffloadMappingFlags::OMP_MAP_NONE; + switch (MapType) + { + case OMPC_MAP_alloc: + case OMPC_MAP_release: + // alloc and release is the default behavior in the runtime library, i.e. + // if we don't pass any bits alloc/release that is what the runtime is + // going to do. Therefore, we don't need to signal anything for these two + // type modifiers. + break; + case OMPC_MAP_to: + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; + break; + case OMPC_MAP_from: + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; + break; + case OMPC_MAP_tofrom: + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM; + break; + case OMPC_MAP_delete: + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; + break; + case OMPC_MAP_unknown: + llvm_unreachable("Unexpected map type!"); } - assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && - OAE->getLowerBound() && "expected array_section[lb:]."); - // Size = sizetype - lb * elemtype; - llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); - llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); - LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), - CGF.getContext().getSizeType(), - OAE->getLowerBound()->getExprLoc()); - LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); - llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); - llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); - LengthVal = CGF.Builder.CreateSelect( - Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); - return LengthVal; - } - return CGF.getTypeSize(ExprTy); - } - - /// Return the corresponding bits for a given map clause modifier. Add - /// a flag marking the map as a pointer if requested. Add a flag marking the - /// map as the first one of a series of maps that relate to the same map - /// expression. - OpenMPOffloadMappingFlags getMapTypeBits( - OpenMPMapClauseKind MapType, ArrayRef MapModifiers, - ArrayRef MotionModifiers, bool IsImplicit, - bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { - OpenMPOffloadMappingFlags Bits = - IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT - : OpenMPOffloadMappingFlags::OMP_MAP_NONE; - switch (MapType) { - case OMPC_MAP_alloc: - case OMPC_MAP_release: - // alloc and release is the default behavior in the runtime library, i.e. - // if we don't pass any bits alloc/release that is what the runtime is - // going to do. Therefore, we don't need to signal anything for these two - // type modifiers. - break; - case OMPC_MAP_to: - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; - break; - case OMPC_MAP_from: - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; - break; - case OMPC_MAP_tofrom: - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | - OpenMPOffloadMappingFlags::OMP_MAP_FROM; - break; - case OMPC_MAP_delete: - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; - break; - case OMPC_MAP_unknown: - llvm_unreachable("Unexpected map type!"); - } - if (AddPtrFlag) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; - if (AddIsTargetParamFlag) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; - if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; - if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; - if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || - llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; - if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; - if (IsNonContiguous) - Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; - return Bits; - } - - /// Return true if the provided expression is a final array section. A - /// final array section, is one whose length can't be proved to be one. - bool isFinalArraySectionExpression(const Expr *E) const { - const auto *OASE = dyn_cast(E); - - // It is not an array section and therefore not a unity-size one. - if (!OASE) - return false; + if (AddPtrFlag) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + if (AddIsTargetParamFlag) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || + llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + if (IsNonContiguous) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; + return Bits; + } + + /// Return true if the provided expression is a final array section. A + /// final array section, is one whose length can't be proved to be one. + bool isFinalArraySectionExpression(const Expr *E) const + { + const auto *OASE = dyn_cast(E); - // An array section with no colon always refer to a single element. - if (OASE->getColonLocFirst().isInvalid()) - return false; + // It is not an array section and therefore not a unity-size one. + if (!OASE) + return false; - const Expr *Length = OASE->getLength(); - - // If we don't have a length we have to check if the array has size 1 - // for this dimension. Also, we should always expect a length if the - // base type is pointer. - if (!Length) { - QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( - OASE->getBase()->IgnoreParenImpCasts()) - .getCanonicalType(); - if (const auto *ATy = dyn_cast(BaseQTy.getTypePtr())) - return ATy->getSize().getSExtValue() != 1; - // If we don't have a constant dimension length, we have to consider - // the current section as having any size, so it is not necessarily - // unitary. If it happen to be unity size, that's user fault. - return true; + // An array section with no colon always refer to a single element. + if (OASE->getColonLocFirst().isInvalid()) + return false; + + const Expr *Length = OASE->getLength(); + + // If we don't have a length we have to check if the array has size 1 + // for this dimension. Also, we should always expect a length if the + // base type is pointer. + if (!Length) + { + QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( + OASE->getBase()->IgnoreParenImpCasts()) + .getCanonicalType(); + if (const auto *ATy = dyn_cast(BaseQTy.getTypePtr())) + return ATy->getSize().getSExtValue() != 1; + // If we don't have a constant dimension length, we have to consider + // the current section as having any size, so it is not necessarily + // unitary. If it happen to be unity size, that's user fault. + return true; + } + + // Check if the length evaluates to 1. + Expr::EvalResult Result; + if (!Length->EvaluateAsInt(Result, CGF.getContext())) + return true; // Can have more that size 1. + + llvm::APSInt ConstLength = Result.Val.getInt(); + return ConstLength.getSExtValue() != 1; } - // Check if the length evaluates to 1. - Expr::EvalResult Result; - if (!Length->EvaluateAsInt(Result, CGF.getContext())) - return true; // Can have more that size 1. - - llvm::APSInt ConstLength = Result.Val.getInt(); - return ConstLength.getSExtValue() != 1; - } - - /// Generate the base pointers, section pointers, sizes, map type bits, and - /// user-defined mappers (all included in \a CombinedInfo) for the provided - /// map type, map or motion modifiers, and expression components. - /// \a IsFirstComponent should be set to true if the provided set of - /// components is the first associated with a capture. - void generateInfoForComponentList( - OpenMPMapClauseKind MapType, ArrayRef MapModifiers, - ArrayRef MotionModifiers, - OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, - bool IsFirstComponentList, bool IsImplicit, - const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, - const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, - ArrayRef - OverlappedElements = std::nullopt) const { - // The following summarizes what has to be generated for each map and the - // types below. The generated information is expressed in this order: - // base pointer, section pointer, size, flags - // (to add to the ones that come from the map type and modifier). - // - // double d; - // int i[100]; - // float *p; - // int **a = &i; - // - // struct S1 { - // int i; - // float f[50]; - // } - // struct S2 { - // int i; - // float f[50]; - // S1 s; - // double *p; - // struct S2 *ps; - // int &ref; - // } - // S2 s; - // S2 *ps; - // - // map(d) - // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM - // - // map(i) - // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM - // - // map(i[1:23]) - // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM - // - // map(p) - // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM - // - // map(p[1:24]) - // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ - // in unified shared memory mode or for local pointers - // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM - // - // map((*a)[0:3]) - // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM - // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM - // - // map(**a) - // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM - // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM - // - // map(s) - // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM - // - // map(s.i) - // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM - // - // map(s.s.f) - // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM - // - // map(s.p) - // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM - // - // map(to: s.p[:22]) - // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) - // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) - // &(s.p), &(s.p[0]), 22*sizeof(double), - // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) - // (*) alloc space for struct members, only this is a target parameter - // (**) map the pointer (nothing to be mapped in this example) (the compiler - // optimizes this entry out, same in the examples below) - // (***) map the pointee (map: to) - // - // map(to: s.ref) - // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) - // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) - // (*) alloc space for struct members, only this is a target parameter - // (**) map the pointer (nothing to be mapped in this example) (the compiler - // optimizes this entry out, same in the examples below) - // (***) map the pointee (map: to) - // - // map(s.ps) - // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM - // - // map(from: s.ps->s.i) - // &s, &(s.ps), sizeof(S2*), TARGET_PARAM - // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) - // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM - // - // map(to: s.ps->ps) - // &s, &(s.ps), sizeof(S2*), TARGET_PARAM - // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) - // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO - // - // map(s.ps->ps->ps) - // &s, &(s.ps), sizeof(S2*), TARGET_PARAM - // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) - // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ - // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM - // - // map(to: s.ps->ps->s.f[:22]) - // &s, &(s.ps), sizeof(S2*), TARGET_PARAM - // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) - // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ - // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO - // - // map(ps) - // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM - // - // map(ps->i) - // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM - // - // map(ps->s.f) - // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM - // - // map(from: ps->p) - // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM - // - // map(to: ps->p[:22]) - // ps, &(ps->p), sizeof(double*), TARGET_PARAM - // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) - // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO - // - // map(ps->ps) - // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM - // - // map(from: ps->ps->s.i) - // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM - // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) - // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM - // - // map(from: ps->ps->ps) - // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM - // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM - // - // map(ps->ps->ps->ps) - // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM - // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ - // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM - // - // map(to: ps->ps->ps->s.f[:22]) - // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM - // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ - // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO - // - // map(to: s.f[:22]) map(from: s.p[:33]) - // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + - // sizeof(double*) (**), TARGET_PARAM - // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO - // &s, &(s.p), sizeof(double*), MEMBER_OF(1) - // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM - // (*) allocate contiguous space needed to fit all mapped members even if - // we allocate space for members not mapped (in this example, - // s.f[22..49] and s.s are not mapped, yet we must allocate space for - // them as well because they fall between &s.f[0] and &s.p) - // - // map(from: s.f[:22]) map(to: ps->p[:33]) - // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM - // ps, &(ps->p), sizeof(S2*), TARGET_PARAM - // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) - // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO - // (*) the struct this entry pertains to is the 2nd element in the list of - // arguments, hence MEMBER_OF(2) - // - // map(from: s.f[:22], s.s) map(to: ps->p[:33]) - // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM - // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM - // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM - // ps, &(ps->p), sizeof(S2*), TARGET_PARAM - // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) - // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO - // (*) the struct this entry pertains to is the 4th element in the list - // of arguments, hence MEMBER_OF(4) - - // Track if the map information being generated is the first for a capture. - bool IsCaptureFirstInfo = IsFirstComponentList; - // When the variable is on a declare target link or in a to clause with - // unified memory, a reference is needed to hold the host/device address - // of the variable. - bool RequiresReference = false; - - // Scan the components from the base to the complete expression. - auto CI = Components.rbegin(); - auto CE = Components.rend(); - auto I = CI; - - // Track if the map information being generated is the first for a list of - // components. - bool IsExpressionFirstInfo = true; - bool FirstPointerInComplexData = false; - Address BP = Address::invalid(); - const Expr *AssocExpr = I->getAssociatedExpression(); - const auto *AE = dyn_cast(AssocExpr); - const auto *OASE = dyn_cast(AssocExpr); - const auto *OAShE = dyn_cast(AssocExpr); - - if (isa(AssocExpr)) { - // The base is the 'this' pointer. The content of the pointer is going - // to be the base of the field being mapped. - BP = CGF.LoadCXXThisAddress(); - } else if ((AE && isa(AE->getBase()->IgnoreParenImpCasts())) || + /// Generate the base pointers, section pointers, sizes, map type bits, and + /// user-defined mappers (all included in \a CombinedInfo) for the provided + /// map type, map or motion modifiers, and expression components. + /// \a IsFirstComponent should be set to true if the provided set of + /// components is the first associated with a capture. + void generateInfoForComponentList( + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, + ArrayRef MotionModifiers, + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, + bool IsFirstComponentList, bool IsImplicit, + const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, + const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, + ArrayRef + OverlappedElements = std::nullopt) const + { + // The following summarizes what has to be generated for each map and the + // types below. The generated information is expressed in this order: + // base pointer, section pointer, size, flags + // (to add to the ones that come from the map type and modifier). + // + // double d; + // int i[100]; + // float *p; + // int **a = &i; + // + // struct S1 { + // int i; + // float f[50]; + // } + // struct S2 { + // int i; + // float f[50]; + // S1 s; + // double *p; + // struct S2 *ps; + // int &ref; + // } + // S2 s; + // S2 *ps; + // + // map(d) + // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM + // + // map(i) + // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM + // + // map(i[1:23]) + // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM + // + // map(p) + // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM + // + // map(p[1:24]) + // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ + // in unified shared memory mode or for local pointers + // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM + // + // map((*a)[0:3]) + // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM + // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM + // + // map(**a) + // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM + // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM + // + // map(s) + // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM + // + // map(s.i) + // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM + // + // map(s.s.f) + // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM + // + // map(s.p) + // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM + // + // map(to: s.p[:22]) + // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) + // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) + // &(s.p), &(s.p[0]), 22*sizeof(double), + // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) + // (*) alloc space for struct members, only this is a target parameter + // (**) map the pointer (nothing to be mapped in this example) (the compiler + // optimizes this entry out, same in the examples below) + // (***) map the pointee (map: to) + // + // map(to: s.ref) + // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) + // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) + // (*) alloc space for struct members, only this is a target parameter + // (**) map the pointer (nothing to be mapped in this example) (the compiler + // optimizes this entry out, same in the examples below) + // (***) map the pointee (map: to) + // + // map(s.ps) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM + // + // map(from: s.ps->s.i) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM + // + // map(to: s.ps->ps) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO + // + // map(s.ps->ps->ps) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM + // + // map(to: s.ps->ps->s.f[:22]) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO + // + // map(ps) + // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM + // + // map(ps->i) + // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM + // + // map(ps->s.f) + // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM + // + // map(from: ps->p) + // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM + // + // map(to: ps->p[:22]) + // ps, &(ps->p), sizeof(double*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) + // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO + // + // map(ps->ps) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM + // + // map(from: ps->ps->s.i) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM + // + // map(from: ps->ps->ps) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM + // + // map(ps->ps->ps->ps) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM + // + // map(to: ps->ps->ps->s.f[:22]) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO + // + // map(to: s.f[:22]) map(from: s.p[:33]) + // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + + // sizeof(double*) (**), TARGET_PARAM + // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO + // &s, &(s.p), sizeof(double*), MEMBER_OF(1) + // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM + // (*) allocate contiguous space needed to fit all mapped members even if + // we allocate space for members not mapped (in this example, + // s.f[22..49] and s.s are not mapped, yet we must allocate space for + // them as well because they fall between &s.f[0] and &s.p) + // + // map(from: s.f[:22]) map(to: ps->p[:33]) + // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM + // ps, &(ps->p), sizeof(S2*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) + // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO + // (*) the struct this entry pertains to is the 2nd element in the list of + // arguments, hence MEMBER_OF(2) + // + // map(from: s.f[:22], s.s) map(to: ps->p[:33]) + // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM + // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM + // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM + // ps, &(ps->p), sizeof(S2*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) + // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO + // (*) the struct this entry pertains to is the 4th element in the list + // of arguments, hence MEMBER_OF(4) + + // Track if the map information being generated is the first for a capture. + bool IsCaptureFirstInfo = IsFirstComponentList; + // When the variable is on a declare target link or in a to clause with + // unified memory, a reference is needed to hold the host/device address + // of the variable. + bool RequiresReference = false; + + // Scan the components from the base to the complete expression. + auto CI = Components.rbegin(); + auto CE = Components.rend(); + auto I = CI; + + // Track if the map information being generated is the first for a list of + // components. + bool IsExpressionFirstInfo = true; + bool FirstPointerInComplexData = false; + Address BP = Address::invalid(); + const Expr *AssocExpr = I->getAssociatedExpression(); + const auto *AE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); + const auto *OAShE = dyn_cast(AssocExpr); + + if (isa(AssocExpr)) + { + // The base is the 'this' pointer. The content of the pointer is going + // to be the base of the field being mapped. + BP = CGF.LoadCXXThisAddress(); + } + else if ((AE && isa(AE->getBase()->IgnoreParenImpCasts())) || (OASE && - isa(OASE->getBase()->IgnoreParenImpCasts()))) { - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); - } else if (OAShE && - isa(OAShE->getBase()->IgnoreParenCasts())) { - BP = Address( - CGF.EmitScalarExpr(OAShE->getBase()), - CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), - CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); - } else { - // The base is the reference to the variable. - // BP = &Var. - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); - if (const auto *VD = - dyn_cast_or_null(I->getAssociatedDeclaration())) { - if (std::optional Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { - if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { - RequiresReference = true; - BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); - } - } + isa(OASE->getBase()->IgnoreParenImpCasts()))) + { + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); } - - // If the variable is a pointer and is being dereferenced (i.e. is not - // the last component), the base has to be the pointer itself, not its - // reference. References are ignored for mapping purposes. - QualType Ty = - I->getAssociatedDeclaration()->getType().getNonReferenceType(); - if (Ty->isAnyPointerType() && std::next(I) != CE) { - // No need to generate individual map information for the pointer, it - // can be associated with the combined storage if shared memory mode is - // active or the base declaration is not global variable. - const auto *VD = dyn_cast(I->getAssociatedDeclaration()); - if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || - !VD || VD->hasLocalStorage()) - BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); - else - FirstPointerInComplexData = true; - ++I; + else if (OAShE && + isa(OAShE->getBase()->IgnoreParenCasts())) + { + BP = Address( + CGF.EmitScalarExpr(OAShE->getBase()), + CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), + CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); } - } + else + { + // The base is the reference to the variable. + // BP = &Var. + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); + if (const auto *VD = + dyn_cast_or_null(I->getAssociatedDeclaration())) + { + if (std::optional Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) + { + if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) + { + RequiresReference = true; + BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); + } + } + } - // Track whether a component of the list should be marked as MEMBER_OF some - // combined entry (for partial structs). Only the first PTR_AND_OBJ entry - // in a component list should be marked as MEMBER_OF, all subsequent entries - // do not belong to the base struct. E.g. - // struct S2 s; - // s.ps->ps->ps->f[:] - // (1) (2) (3) (4) - // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a - // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) - // is the pointee of ps(2) which is not member of struct s, so it should not - // be marked as such (it is still PTR_AND_OBJ). - // The variable is initialized to false so that PTR_AND_OBJ entries which - // are not struct members are not considered (e.g. array of pointers to - // data). - bool ShouldBeMemberOf = false; - - // Variable keeping track of whether or not we have encountered a component - // in the component list which is a member expression. Useful when we have a - // pointer or a final array section, in which case it is the previous - // component in the list which tells us whether we have a member expression. - // E.g. X.f[:] - // While processing the final array section "[:]" it is "f" which tells us - // whether we are dealing with a member of a declared struct. - const MemberExpr *EncounteredME = nullptr; - - // Track for the total number of dimension. Start from one for the dummy - // dimension. - uint64_t DimSize = 1; - - bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; - bool IsPrevMemberReference = false; - - for (; I != CE; ++I) { - // If the current component is member of a struct (parent struct) mark it. - if (!EncounteredME) { - EncounteredME = dyn_cast(I->getAssociatedExpression()); - // If we encounter a PTR_AND_OBJ entry from now on it should be marked - // as MEMBER_OF the parent struct. - if (EncounteredME) { - ShouldBeMemberOf = true; - // Do not emit as complex pointer if this is actually not array-like - // expression. - if (FirstPointerInComplexData) { - QualType Ty = std::prev(I) - ->getAssociatedDeclaration() - ->getType() - .getNonReferenceType(); + // If the variable is a pointer and is being dereferenced (i.e. is not + // the last component), the base has to be the pointer itself, not its + // reference. References are ignored for mapping purposes. + QualType Ty = + I->getAssociatedDeclaration()->getType().getNonReferenceType(); + if (Ty->isAnyPointerType() && std::next(I) != CE) + { + // No need to generate individual map information for the pointer, it + // can be associated with the combined storage if shared memory mode is + // active or the base declaration is not global variable. + const auto *VD = dyn_cast(I->getAssociatedDeclaration()); + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + !VD || VD->hasLocalStorage()) BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); - FirstPointerInComplexData = false; - } + else + FirstPointerInComplexData = true; + ++I; } } - auto Next = std::next(I); - - // We need to generate the addresses and sizes if this is the last - // component, if the component is a pointer or if it is an array section - // whose length can't be proved to be one. If this is a pointer, it - // becomes the base address for the following components. - - // A final array section, is one whose length can't be proved to be one. - // If the map item is non-contiguous then we don't treat any array section - // as final array section. - bool IsFinalArraySection = - !IsNonContiguous && - isFinalArraySectionExpression(I->getAssociatedExpression()); - - // If we have a declaration for the mapping use that, otherwise use - // the base declaration of the map clause. - const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) - ? I->getAssociatedDeclaration() - : BaseDecl; - MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() - : MapExpr; - - // Get information on whether the element is a pointer. Have to do a - // special treatment for array sections given that they are built-in - // types. - const auto *OASE = - dyn_cast(I->getAssociatedExpression()); - const auto *OAShE = - dyn_cast(I->getAssociatedExpression()); - const auto *UO = dyn_cast(I->getAssociatedExpression()); - const auto *BO = dyn_cast(I->getAssociatedExpression()); - bool IsPointer = - OAShE || - (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) - .getCanonicalType() - ->isAnyPointerType()) || - I->getAssociatedExpression()->getType()->isAnyPointerType(); - bool IsMemberReference = isa(I->getAssociatedExpression()) && - MapDecl && - MapDecl->getType()->isLValueReferenceType(); - bool IsNonDerefPointer = IsPointer && - !(UO && UO->getOpcode() != UO_Deref) && !BO && - !IsNonContiguous; - - if (OASE) - ++DimSize; - - if (Next == CE || IsMemberReference || IsNonDerefPointer || - IsFinalArraySection) { - // If this is not the last component, we expect the pointer to be - // associated with an array expression or member expression. - assert((Next == CE || - isa(Next->getAssociatedExpression()) || - isa(Next->getAssociatedExpression()) || - isa(Next->getAssociatedExpression()) || - isa(Next->getAssociatedExpression()) || - isa(Next->getAssociatedExpression()) || - isa(Next->getAssociatedExpression())) && - "Unexpected expression"); - - Address LB = Address::invalid(); - Address LowestElem = Address::invalid(); - auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, - const MemberExpr *E) { - const Expr *BaseExpr = E->getBase(); - // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a - // scalar. - LValue BaseLV; - if (E->isArrow()) { - LValueBaseInfo BaseInfo; - TBAAAccessInfo TBAAInfo; - Address Addr = - CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); - QualType PtrTy = BaseExpr->getType()->getPointeeType(); - BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); - } else { - BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); + // Track whether a component of the list should be marked as MEMBER_OF some + // combined entry (for partial structs). Only the first PTR_AND_OBJ entry + // in a component list should be marked as MEMBER_OF, all subsequent entries + // do not belong to the base struct. E.g. + // struct S2 s; + // s.ps->ps->ps->f[:] + // (1) (2) (3) (4) + // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a + // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) + // is the pointee of ps(2) which is not member of struct s, so it should not + // be marked as such (it is still PTR_AND_OBJ). + // The variable is initialized to false so that PTR_AND_OBJ entries which + // are not struct members are not considered (e.g. array of pointers to + // data). + bool ShouldBeMemberOf = false; + + // Variable keeping track of whether or not we have encountered a component + // in the component list which is a member expression. Useful when we have a + // pointer or a final array section, in which case it is the previous + // component in the list which tells us whether we have a member expression. + // E.g. X.f[:] + // While processing the final array section "[:]" it is "f" which tells us + // whether we are dealing with a member of a declared struct. + const MemberExpr *EncounteredME = nullptr; + + // Track for the total number of dimension. Start from one for the dummy + // dimension. + uint64_t DimSize = 1; + + bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; + bool IsPrevMemberReference = false; + + for (; I != CE; ++I) + { + // If the current component is member of a struct (parent struct) mark it. + if (!EncounteredME) + { + EncounteredME = dyn_cast(I->getAssociatedExpression()); + // If we encounter a PTR_AND_OBJ entry from now on it should be marked + // as MEMBER_OF the parent struct. + if (EncounteredME) + { + ShouldBeMemberOf = true; + // Do not emit as complex pointer if this is actually not array-like + // expression. + if (FirstPointerInComplexData) + { + QualType Ty = std::prev(I) + ->getAssociatedDeclaration() + ->getType() + .getNonReferenceType(); + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); + FirstPointerInComplexData = false; + } } - return BaseLV; - }; - if (OAShE) { - LowestElem = LB = - Address(CGF.EmitScalarExpr(OAShE->getBase()), - CGF.ConvertTypeForMem( - OAShE->getBase()->getType()->getPointeeType()), - CGF.getContext().getTypeAlignInChars( - OAShE->getBase()->getType())); - } else if (IsMemberReference) { - const auto *ME = cast(I->getAssociatedExpression()); - LValue BaseLVal = EmitMemberExprBase(CGF, ME); - LowestElem = CGF.EmitLValueForFieldInitialization( - BaseLVal, cast(MapDecl)) - .getAddress(CGF); - LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) - .getAddress(CGF); - } else { - LowestElem = LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) - .getAddress(CGF); } - // If this component is a pointer inside the base struct then we don't - // need to create any entry for it - it will be combined with the object - // it is pointing to into a single PTR_AND_OBJ entry. - bool IsMemberPointerOrAddr = - EncounteredME && - (((IsPointer || ForDeviceAddr) && - I->getAssociatedExpression() == EncounteredME) || - (IsPrevMemberReference && !IsPointer) || - (IsMemberReference && Next != CE && - !Next->getAssociatedExpression()->getType()->isPointerType())); - if (!OverlappedElements.empty() && Next == CE) { - // Handle base element with the info for overlapped elements. - assert(!PartialStruct.Base.isValid() && "The base element is set."); - assert(!IsPointer && - "Unexpected base element with the pointer type."); - // Mark the whole struct as the struct that requires allocation on the - // device. - PartialStruct.LowestElem = {0, LowestElem}; - CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( - I->getAssociatedExpression()->getType()); - Address HB = CGF.Builder.CreateConstGEP( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), - TypeSize.getQuantity() - 1); - PartialStruct.HighestElem = { - std::numeric_limits::max(), - HB}; - PartialStruct.Base = BP; - PartialStruct.LB = LB; - assert( - PartialStruct.PreliminaryMapData.BasePointers.empty() && - "Overlapped elements must be used only once for the variable."); - std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); - // Emit data for non-overlapped data. - OpenMPOffloadMappingFlags Flags = - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | - getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, - /*AddPtrFlag=*/false, - /*AddIsTargetParamFlag=*/false, IsNonContiguous); - llvm::Value *Size = nullptr; - // Do bitcopy of all non-overlapped structure elements. - for (OMPClauseMappableExprCommon::MappableExprComponentListRef - Component : OverlappedElements) { - Address ComponentLB = Address::invalid(); - for (const OMPClauseMappableExprCommon::MappableComponent &MC : - Component) { - if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { - const auto *FD = dyn_cast(VD); - if (FD && FD->getType()->isLValueReferenceType()) { - const auto *ME = - cast(MC.getAssociatedExpression()); - LValue BaseLVal = EmitMemberExprBase(CGF, ME); - ComponentLB = - CGF.EmitLValueForFieldInitialization(BaseLVal, FD) - .getAddress(CGF); - } else { - ComponentLB = - CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(CGF); + auto Next = std::next(I); + + // We need to generate the addresses and sizes if this is the last + // component, if the component is a pointer or if it is an array section + // whose length can't be proved to be one. If this is a pointer, it + // becomes the base address for the following components. + + // A final array section, is one whose length can't be proved to be one. + // If the map item is non-contiguous then we don't treat any array section + // as final array section. + bool IsFinalArraySection = + !IsNonContiguous && + isFinalArraySectionExpression(I->getAssociatedExpression()); + + // If we have a declaration for the mapping use that, otherwise use + // the base declaration of the map clause. + const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) + ? I->getAssociatedDeclaration() + : BaseDecl; + MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() + : MapExpr; + + // Get information on whether the element is a pointer. Have to do a + // special treatment for array sections given that they are built-in + // types. + const auto *OASE = + dyn_cast(I->getAssociatedExpression()); + const auto *OAShE = + dyn_cast(I->getAssociatedExpression()); + const auto *UO = dyn_cast(I->getAssociatedExpression()); + const auto *BO = dyn_cast(I->getAssociatedExpression()); + bool IsPointer = + OAShE || + (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) + .getCanonicalType() + ->isAnyPointerType()) || + I->getAssociatedExpression()->getType()->isAnyPointerType(); + bool IsMemberReference = isa(I->getAssociatedExpression()) && + MapDecl && + MapDecl->getType()->isLValueReferenceType(); + bool IsNonDerefPointer = IsPointer && + !(UO && UO->getOpcode() != UO_Deref) && !BO && + !IsNonContiguous; + + if (OASE) + ++DimSize; + + if (Next == CE || IsMemberReference || IsNonDerefPointer || + IsFinalArraySection) + { + // If this is not the last component, we expect the pointer to be + // associated with an array expression or member expression. + assert((Next == CE || + isa(Next->getAssociatedExpression()) || + isa(Next->getAssociatedExpression()) || + isa(Next->getAssociatedExpression()) || + isa(Next->getAssociatedExpression()) || + isa(Next->getAssociatedExpression()) || + isa(Next->getAssociatedExpression())) && + "Unexpected expression"); + + Address LB = Address::invalid(); + Address LowestElem = Address::invalid(); + auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, + const MemberExpr *E) + { + const Expr *BaseExpr = E->getBase(); + // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a + // scalar. + LValue BaseLV; + if (E->isArrow()) + { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Addr = + CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); + QualType PtrTy = BaseExpr->getType()->getPointeeType(); + BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); + } + else + { + BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); + } + return BaseLV; + }; + if (OAShE) + { + LowestElem = LB = + Address(CGF.EmitScalarExpr(OAShE->getBase()), + CGF.ConvertTypeForMem( + OAShE->getBase()->getType()->getPointeeType()), + CGF.getContext().getTypeAlignInChars( + OAShE->getBase()->getType())); + } + else if (IsMemberReference) + { + const auto *ME = cast(I->getAssociatedExpression()); + LValue BaseLVal = EmitMemberExprBase(CGF, ME); + LowestElem = CGF.EmitLValueForFieldInitialization( + BaseLVal, cast(MapDecl)) + .getAddress(CGF); + LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) + .getAddress(CGF); + } + else + { + LowestElem = LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); + } + + // If this component is a pointer inside the base struct then we don't + // need to create any entry for it - it will be combined with the object + // it is pointing to into a single PTR_AND_OBJ entry. + bool IsMemberPointerOrAddr = + EncounteredME && + (((IsPointer || ForDeviceAddr) && + I->getAssociatedExpression() == EncounteredME) || + (IsPrevMemberReference && !IsPointer) || + (IsMemberReference && Next != CE && + !Next->getAssociatedExpression()->getType()->isPointerType())); + if (!OverlappedElements.empty() && Next == CE) + { + // Handle base element with the info for overlapped elements. + assert(!PartialStruct.Base.isValid() && "The base element is set."); + assert(!IsPointer && + "Unexpected base element with the pointer type."); + // Mark the whole struct as the struct that requires allocation on the + // device. + PartialStruct.LowestElem = {0, LowestElem}; + CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( + I->getAssociatedExpression()->getType()); + Address HB = CGF.Builder.CreateConstGEP( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), + TypeSize.getQuantity() - 1); + PartialStruct.HighestElem = { + std::numeric_limits::max(), + HB}; + PartialStruct.Base = BP; + PartialStruct.LB = LB; + assert( + PartialStruct.PreliminaryMapData.BasePointers.empty() && + "Overlapped elements must be used only once for the variable."); + std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); + // Emit data for non-overlapped data. + OpenMPOffloadMappingFlags Flags = + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, + /*AddPtrFlag=*/false, + /*AddIsTargetParamFlag=*/false, IsNonContiguous); + llvm::Value *Size = nullptr; + // Do bitcopy of all non-overlapped structure elements. + for (OMPClauseMappableExprCommon::MappableExprComponentListRef + Component : OverlappedElements) + { + Address ComponentLB = Address::invalid(); + for (const OMPClauseMappableExprCommon::MappableComponent &MC : + Component) + { + if (const ValueDecl *VD = MC.getAssociatedDeclaration()) + { + const auto *FD = dyn_cast(VD); + if (FD && FD->getType()->isLValueReferenceType()) + { + const auto *ME = + cast(MC.getAssociatedExpression()); + LValue BaseLVal = EmitMemberExprBase(CGF, ME); + ComponentLB = + CGF.EmitLValueForFieldInitialization(BaseLVal, FD) + .getAddress(CGF); + } + else + { + ComponentLB = + CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) + .getAddress(CGF); + } + Size = CGF.Builder.CreatePtrDiff( + CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), + CGF.EmitCastToVoidPtr(LB.getPointer())); + break; } - Size = CGF.Builder.CreatePtrDiff( - CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), - CGF.EmitCastToVoidPtr(LB.getPointer())); - break; } + assert(Size && "Failed to determine structure size"); + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.Types.push_back(Flags); + CombinedInfo.Mappers.push_back(nullptr); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } - assert(Size && "Failed to determine structure size"); CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); CombinedInfo.BasePointers.push_back(BP.getPointer()); CombinedInfo.Pointers.push_back(LB.getPointer()); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - Size, CGF.Int64Ty, /*isSigned=*/true)); + Size = CGF.Builder.CreatePtrDiff( + CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), + CGF.EmitCastToVoidPtr(LB.getPointer())); + CombinedInfo.Sizes.push_back( + CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back(Flags); CombinedInfo.Mappers.push_back(nullptr); CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1); - LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); + break; } - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.getPointer()); - CombinedInfo.Pointers.push_back(LB.getPointer()); - Size = CGF.Builder.CreatePtrDiff( - CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), - CGF.EmitCastToVoidPtr(LB.getPointer())); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(Flags); - CombinedInfo.Mappers.push_back(nullptr); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); - break; - } - llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); - if (!IsMemberPointerOrAddr || - (Next == CE && MapType != OMPC_MAP_unknown)) { - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.getPointer()); - CombinedInfo.Pointers.push_back(LB.getPointer()); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); - - // If Mapper is valid, the last component inherits the mapper. - bool HasMapper = Mapper && Next == CE; - CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); - - // We need to add a pointer flag for each map that comes from the - // same expression except for the first one. We also need to signal - // this map is the first one that relates with the current capture - // (there is a set of entries for each capture). - OpenMPOffloadMappingFlags Flags = getMapTypeBits( - MapType, MapModifiers, MotionModifiers, IsImplicit, - !IsExpressionFirstInfo || RequiresReference || - FirstPointerInComplexData || IsMemberReference, - IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); - - if (!IsExpressionFirstInfo || IsMemberReference) { - // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, - // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. - if (IsPointer || (IsMemberReference && Next != CE)) - Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | - OpenMPOffloadMappingFlags::OMP_MAP_FROM | - OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | - OpenMPOffloadMappingFlags::OMP_MAP_DELETE | - OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); - - if (ShouldBeMemberOf) { - // Set placeholder value MEMBER_OF=FFFF to indicate that the flag - // should be later updated with the correct value of MEMBER_OF. - Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; - // From now on, all subsequent PTR_AND_OBJ entries should not be - // marked as MEMBER_OF. - ShouldBeMemberOf = false; + llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); + if (!IsMemberPointerOrAddr || + (Next == CE && MapType != OMPC_MAP_unknown)) + { + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.Sizes.push_back( + CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); + + // If Mapper is valid, the last component inherits the mapper. + bool HasMapper = Mapper && Next == CE; + CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); + + // We need to add a pointer flag for each map that comes from the + // same expression except for the first one. We also need to signal + // this map is the first one that relates with the current capture + // (there is a set of entries for each capture). + OpenMPOffloadMappingFlags Flags = getMapTypeBits( + MapType, MapModifiers, MotionModifiers, IsImplicit, + !IsExpressionFirstInfo || RequiresReference || + FirstPointerInComplexData || IsMemberReference, + IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); + + if (!IsExpressionFirstInfo || IsMemberReference) + { + // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, + // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. + if (IsPointer || (IsMemberReference && Next != CE)) + Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM | + OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | + OpenMPOffloadMappingFlags::OMP_MAP_DELETE | + OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); + + if (ShouldBeMemberOf) + { + // Set placeholder value MEMBER_OF=FFFF to indicate that the flag + // should be later updated with the correct value of MEMBER_OF. + Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; + // From now on, all subsequent PTR_AND_OBJ entries should not be + // marked as MEMBER_OF. + ShouldBeMemberOf = false; + } } - } - CombinedInfo.Types.push_back(Flags); - } + CombinedInfo.Types.push_back(Flags); + } - // If we have encountered a member expression so far, keep track of the - // mapped member. If the parent is "*this", then the value declaration - // is nullptr. - if (EncounteredME) { - const auto *FD = cast(EncounteredME->getMemberDecl()); - unsigned FieldIndex = FD->getFieldIndex(); - - // Update info about the lowest and highest elements for this struct - if (!PartialStruct.Base.isValid()) { - PartialStruct.LowestElem = {FieldIndex, LowestElem}; - if (IsFinalArraySection) { - Address HB = - CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) - .getAddress(CGF); - PartialStruct.HighestElem = {FieldIndex, HB}; - } else { + // If we have encountered a member expression so far, keep track of the + // mapped member. If the parent is "*this", then the value declaration + // is nullptr. + if (EncounteredME) + { + const auto *FD = cast(EncounteredME->getMemberDecl()); + unsigned FieldIndex = FD->getFieldIndex(); + + // Update info about the lowest and highest elements for this struct + if (!PartialStruct.Base.isValid()) + { + PartialStruct.LowestElem = {FieldIndex, LowestElem}; + if (IsFinalArraySection) + { + Address HB = + CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) + .getAddress(CGF); + PartialStruct.HighestElem = {FieldIndex, HB}; + } + else + { + PartialStruct.HighestElem = {FieldIndex, LowestElem}; + } + PartialStruct.Base = BP; + PartialStruct.LB = BP; + } + else if (FieldIndex < PartialStruct.LowestElem.first) + { + PartialStruct.LowestElem = {FieldIndex, LowestElem}; + } + else if (FieldIndex > PartialStruct.HighestElem.first) + { PartialStruct.HighestElem = {FieldIndex, LowestElem}; } - PartialStruct.Base = BP; - PartialStruct.LB = BP; - } else if (FieldIndex < PartialStruct.LowestElem.first) { - PartialStruct.LowestElem = {FieldIndex, LowestElem}; - } else if (FieldIndex > PartialStruct.HighestElem.first) { - PartialStruct.HighestElem = {FieldIndex, LowestElem}; } - } - // Need to emit combined struct for array sections. - if (IsFinalArraySection || IsNonContiguous) - PartialStruct.IsArraySection = true; + // Need to emit combined struct for array sections. + if (IsFinalArraySection || IsNonContiguous) + PartialStruct.IsArraySection = true; - // If we have a final array section, we are done with this expression. - if (IsFinalArraySection) - break; + // If we have a final array section, we are done with this expression. + if (IsFinalArraySection) + break; + + // The pointer becomes the base for the next element. + if (Next != CE) + BP = IsMemberReference ? LowestElem : LB; - // The pointer becomes the base for the next element. - if (Next != CE) - BP = IsMemberReference ? LowestElem : LB; - - IsExpressionFirstInfo = false; - IsCaptureFirstInfo = false; - FirstPointerInComplexData = false; - IsPrevMemberReference = IsMemberReference; - } else if (FirstPointerInComplexData) { - QualType Ty = Components.rbegin() - ->getAssociatedDeclaration() - ->getType() - .getNonReferenceType(); - BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); - FirstPointerInComplexData = false; + IsExpressionFirstInfo = false; + IsCaptureFirstInfo = false; + FirstPointerInComplexData = false; + IsPrevMemberReference = IsMemberReference; + } + else if (FirstPointerInComplexData) + { + QualType Ty = Components.rbegin() + ->getAssociatedDeclaration() + ->getType() + .getNonReferenceType(); + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); + FirstPointerInComplexData = false; + } } - } - // If ran into the whole component - allocate the space for the whole - // record. - if (!EncounteredME) - PartialStruct.HasCompleteRecord = true; + // If ran into the whole component - allocate the space for the whole + // record. + if (!EncounteredME) + PartialStruct.HasCompleteRecord = true; - if (!IsNonContiguous) - return; + if (!IsNonContiguous) + return; - const ASTContext &Context = CGF.getContext(); - - // For supporting stride in array section, we need to initialize the first - // dimension size as 1, first offset as 0, and first count as 1 - MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; - MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; - MapValuesArrayTy CurStrides; - MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; - uint64_t ElementTypeSize; - - // Collect Size information for each dimension and get the element size as - // the first Stride. For example, for `int arr[10][10]`, the DimSizes - // should be [10, 10] and the first stride is 4 btyes. - for (const OMPClauseMappableExprCommon::MappableComponent &Component : - Components) { - const Expr *AssocExpr = Component.getAssociatedExpression(); - const auto *OASE = dyn_cast(AssocExpr); + const ASTContext &Context = CGF.getContext(); + + // For supporting stride in array section, we need to initialize the first + // dimension size as 1, first offset as 0, and first count as 1 + MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; + MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; + MapValuesArrayTy CurStrides; + MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; + uint64_t ElementTypeSize; + + // Collect Size information for each dimension and get the element size as + // the first Stride. For example, for `int arr[10][10]`, the DimSizes + // should be [10, 10] and the first stride is 4 btyes. + for (const OMPClauseMappableExprCommon::MappableComponent &Component : + Components) + { + const Expr *AssocExpr = Component.getAssociatedExpression(); + const auto *OASE = dyn_cast(AssocExpr); - if (!OASE) - continue; + if (!OASE) + continue; - QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); - auto *CAT = Context.getAsConstantArrayType(Ty); - auto *VAT = Context.getAsVariableArrayType(Ty); - - // We need all the dimension size except for the last dimension. - assert((VAT || CAT || &Component == &*Components.begin()) && - "Should be either ConstantArray or VariableArray if not the " - "first Component"); - - // Get element size if CurStrides is empty. - if (CurStrides.empty()) { - const Type *ElementType = nullptr; - if (CAT) - ElementType = CAT->getElementType().getTypePtr(); - else if (VAT) - ElementType = VAT->getElementType().getTypePtr(); - else - assert(&Component == &*Components.begin() && - "Only expect pointer (non CAT or VAT) when this is the " - "first Component"); - // If ElementType is null, then it means the base is a pointer - // (neither CAT nor VAT) and we'll attempt to get ElementType again - // for next iteration. - if (ElementType) { - // For the case that having pointer as base, we need to remove one - // level of indirection. - if (&Component != &*Components.begin()) - ElementType = ElementType->getPointeeOrArrayElementType(); - ElementTypeSize = - Context.getTypeSizeInChars(ElementType).getQuantity(); - CurStrides.push_back( - llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); + QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + auto *CAT = Context.getAsConstantArrayType(Ty); + auto *VAT = Context.getAsVariableArrayType(Ty); + + // We need all the dimension size except for the last dimension. + assert((VAT || CAT || &Component == &*Components.begin()) && + "Should be either ConstantArray or VariableArray if not the " + "first Component"); + + // Get element size if CurStrides is empty. + if (CurStrides.empty()) + { + const Type *ElementType = nullptr; + if (CAT) + ElementType = CAT->getElementType().getTypePtr(); + else if (VAT) + ElementType = VAT->getElementType().getTypePtr(); + else + assert(&Component == &*Components.begin() && + "Only expect pointer (non CAT or VAT) when this is the " + "first Component"); + // If ElementType is null, then it means the base is a pointer + // (neither CAT nor VAT) and we'll attempt to get ElementType again + // for next iteration. + if (ElementType) + { + // For the case that having pointer as base, we need to remove one + // level of indirection. + if (&Component != &*Components.begin()) + ElementType = ElementType->getPointeeOrArrayElementType(); + ElementTypeSize = + Context.getTypeSizeInChars(ElementType).getQuantity(); + CurStrides.push_back( + llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); + } + } + // Get dimension value except for the last dimension since we don't need + // it. + if (DimSizes.size() < Components.size() - 1) + { + if (CAT) + DimSizes.push_back(llvm::ConstantInt::get( + CGF.Int64Ty, CAT->getSize().getZExtValue())); + else if (VAT) + DimSizes.push_back(CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, + /*IsSigned=*/false)); } } - // Get dimension value except for the last dimension since we don't need - // it. - if (DimSizes.size() < Components.size() - 1) { - if (CAT) - DimSizes.push_back(llvm::ConstantInt::get( - CGF.Int64Ty, CAT->getSize().getZExtValue())); - else if (VAT) - DimSizes.push_back(CGF.Builder.CreateIntCast( - CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, - /*IsSigned=*/false)); - } - } - // Skip the dummy dimension since we have already have its information. - auto *DI = DimSizes.begin() + 1; - // Product of dimension. - llvm::Value *DimProd = - llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); - - // Collect info for non-contiguous. Notice that offset, count, and stride - // are only meaningful for array-section, so we insert a null for anything - // other than array-section. - // Also, the size of offset, count, and stride are not the same as - // pointers, base_pointers, sizes, or dims. Instead, the size of offset, - // count, and stride are the same as the number of non-contiguous - // declaration in target update to/from clause. - for (const OMPClauseMappableExprCommon::MappableComponent &Component : - Components) { - const Expr *AssocExpr = Component.getAssociatedExpression(); - - if (const auto *AE = dyn_cast(AssocExpr)) { - llvm::Value *Offset = CGF.Builder.CreateIntCast( - CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, - /*isSigned=*/false); - CurOffsets.push_back(Offset); - CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); - CurStrides.push_back(CurStrides.back()); - continue; - } + // Skip the dummy dimension since we have already have its information. + auto *DI = DimSizes.begin() + 1; + // Product of dimension. + llvm::Value *DimProd = + llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); + + // Collect info for non-contiguous. Notice that offset, count, and stride + // are only meaningful for array-section, so we insert a null for anything + // other than array-section. + // Also, the size of offset, count, and stride are not the same as + // pointers, base_pointers, sizes, or dims. Instead, the size of offset, + // count, and stride are the same as the number of non-contiguous + // declaration in target update to/from clause. + for (const OMPClauseMappableExprCommon::MappableComponent &Component : + Components) + { + const Expr *AssocExpr = Component.getAssociatedExpression(); + + if (const auto *AE = dyn_cast(AssocExpr)) + { + llvm::Value *Offset = CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, + /*isSigned=*/false); + CurOffsets.push_back(Offset); + CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); + CurStrides.push_back(CurStrides.back()); + continue; + } - const auto *OASE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); - if (!OASE) - continue; + if (!OASE) + continue; - // Offset - const Expr *OffsetExpr = OASE->getLowerBound(); - llvm::Value *Offset = nullptr; - if (!OffsetExpr) { - // If offset is absent, then we just set it to zero. - Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); - } else { - Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), - CGF.Int64Ty, - /*isSigned=*/false); - } - CurOffsets.push_back(Offset); + // Offset + const Expr *OffsetExpr = OASE->getLowerBound(); + llvm::Value *Offset = nullptr; + if (!OffsetExpr) + { + // If offset is absent, then we just set it to zero. + Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); + } + else + { + Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), + CGF.Int64Ty, + /*isSigned=*/false); + } + CurOffsets.push_back(Offset); - // Count - const Expr *CountExpr = OASE->getLength(); - llvm::Value *Count = nullptr; - if (!CountExpr) { - // In Clang, once a high dimension is an array section, we construct all - // the lower dimension as array section, however, for case like - // arr[0:2][2], Clang construct the inner dimension as an array section - // but it actually is not in an array section form according to spec. - if (!OASE->getColonLocFirst().isValid() && - !OASE->getColonLocSecond().isValid()) { - Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); - } else { - // OpenMP 5.0, 2.1.5 Array Sections, Description. - // When the length is absent it defaults to ⌈(size − - // lower-bound)/stride⌉, where size is the size of the array - // dimension. - const Expr *StrideExpr = OASE->getStride(); - llvm::Value *Stride = - StrideExpr - ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), - CGF.Int64Ty, /*isSigned=*/false) - : nullptr; - if (Stride) - Count = CGF.Builder.CreateUDiv( - CGF.Builder.CreateNUWSub(*DI, Offset), Stride); + // Count + const Expr *CountExpr = OASE->getLength(); + llvm::Value *Count = nullptr; + if (!CountExpr) + { + // In Clang, once a high dimension is an array section, we construct all + // the lower dimension as array section, however, for case like + // arr[0:2][2], Clang construct the inner dimension as an array section + // but it actually is not in an array section form according to spec. + if (!OASE->getColonLocFirst().isValid() && + !OASE->getColonLocSecond().isValid()) + { + Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); + } else - Count = CGF.Builder.CreateNUWSub(*DI, Offset); + { + // OpenMP 5.0, 2.1.5 Array Sections, Description. + // When the length is absent it defaults to ⌈(size − + // lower-bound)/stride⌉, where size is the size of the array + // dimension. + const Expr *StrideExpr = OASE->getStride(); + llvm::Value *Stride = + StrideExpr + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), + CGF.Int64Ty, /*isSigned=*/false) + : nullptr; + if (Stride) + Count = CGF.Builder.CreateUDiv( + CGF.Builder.CreateNUWSub(*DI, Offset), Stride); + else + Count = CGF.Builder.CreateNUWSub(*DI, Offset); + } + } + else + { + Count = CGF.EmitScalarExpr(CountExpr); } - } else { - Count = CGF.EmitScalarExpr(CountExpr); + Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); + CurCounts.push_back(Count); + + // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size + // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: + // Offset Count Stride + // D0 0 1 4 (int) <- dummy dimension + // D1 0 2 8 (2 * (1) * 4) + // D2 1 2 20 (1 * (1 * 5) * 4) + // D3 0 2 200 (2 * (1 * 5 * 4) * 4) + const Expr *StrideExpr = OASE->getStride(); + llvm::Value *Stride = + StrideExpr + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), + CGF.Int64Ty, /*isSigned=*/false) + : nullptr; + DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); + if (Stride) + CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); + else + CurStrides.push_back(DimProd); + if (DI != DimSizes.end()) + ++DI; } - Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); - CurCounts.push_back(Count); - - // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size - // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: - // Offset Count Stride - // D0 0 1 4 (int) <- dummy dimension - // D1 0 2 8 (2 * (1) * 4) - // D2 1 2 20 (1 * (1 * 5) * 4) - // D3 0 2 200 (2 * (1 * 5 * 4) * 4) - const Expr *StrideExpr = OASE->getStride(); - llvm::Value *Stride = - StrideExpr - ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), - CGF.Int64Ty, /*isSigned=*/false) - : nullptr; - DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); - if (Stride) - CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); - else - CurStrides.push_back(DimProd); - if (DI != DimSizes.end()) - ++DI; - } - - CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); - CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); - CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); - } - - /// Return the adjusted map modifiers if the declaration a capture refers to - /// appears in a first-private clause. This is expected to be used only with - /// directives that start with 'target'. - OpenMPOffloadMappingFlags - getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { - assert(Cap.capturesVariable() && "Expected capture by reference only!"); - - // A first private variable captured by reference will use only the - // 'private ptr' and 'map to' flag. Return the right flags if the captured - // declaration is known as first-private in this handler. - if (FirstPrivateDecls.count(Cap.getCapturedVar())) { - if (Cap.getCapturedVar()->getType()->isAnyPointerType()) - return OpenMPOffloadMappingFlags::OMP_MAP_TO | - OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; - return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | - OpenMPOffloadMappingFlags::OMP_MAP_TO; - } - auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); - if (I != LambdasMap.end()) - // for map(to: lambda): using user specified map type. - return getMapTypeBits( - I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), - /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), - /*AddPtrFlag=*/false, - /*AddIsTargetParamFlag=*/false, - /*isNonContiguous=*/false); - return OpenMPOffloadMappingFlags::OMP_MAP_TO | - OpenMPOffloadMappingFlags::OMP_MAP_FROM; - } - - static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Rotate by getFlagMemberOffset() bits. - return static_cast(((uint64_t)Position + 1) - << getFlagMemberOffset()); - } - - static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, - OpenMPOffloadMappingFlags MemberOfFlag) { - // If the entry is PTR_AND_OBJ but has not been marked with the special - // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be - // marked as MEMBER_OF. - if (static_cast>( - Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) && - static_cast>( - (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF)) - return; - // Reset the placeholder value to prepare the flag for the assignment of the - // proper MEMBER_OF value. - Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; - Flags |= MemberOfFlag; - } - - void getPlainLayout(const CXXRecordDecl *RD, - llvm::SmallVectorImpl &Layout, - bool AsBase) const { - const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); + CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); + CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); + CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); + } - llvm::StructType *St = - AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); + /// Return the adjusted map modifiers if the declaration a capture refers to + /// appears in a first-private clause. This is expected to be used only with + /// directives that start with 'target'. + OpenMPOffloadMappingFlags + getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const + { + assert(Cap.capturesVariable() && "Expected capture by reference only!"); - unsigned NumElements = St->getNumElements(); - llvm::SmallVector< - llvm::PointerUnion, 4> - RecordLayout(NumElements); + // A first private variable captured by reference will use only the + // 'private ptr' and 'map to' flag. Return the right flags if the captured + // declaration is known as first-private in this handler. + if (FirstPrivateDecls.count(Cap.getCapturedVar())) + { + if (Cap.getCapturedVar()->getType()->isAnyPointerType()) + return OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | + OpenMPOffloadMappingFlags::OMP_MAP_TO; + } + auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); + if (I != LambdasMap.end()) + // for map(to: lambda): using user specified map type. + return getMapTypeBits( + I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), + /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), + /*AddPtrFlag=*/false, + /*AddIsTargetParamFlag=*/false, + /*isNonContiguous=*/false); + return OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + + static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) + { + // Rotate by getFlagMemberOffset() bits. + return static_cast(((uint64_t)Position + 1) + << getFlagMemberOffset()); + } - // Fill bases. - for (const auto &I : RD->bases()) { - if (I.isVirtual()) - continue; - const auto *Base = I.getType()->getAsCXXRecordDecl(); - // Ignore empty bases. - if (Base->isEmpty() || CGF.getContext() - .getASTRecordLayout(Base) - .getNonVirtualSize() - .isZero()) - continue; + static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, + OpenMPOffloadMappingFlags MemberOfFlag) + { + // If the entry is PTR_AND_OBJ but has not been marked with the special + // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be + // marked as MEMBER_OF. + if (static_cast>( + Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) && + static_cast>( + (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF)) + return; - unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); - RecordLayout[FieldIndex] = Base; + // Reset the placeholder value to prepare the flag for the assignment of the + // proper MEMBER_OF value. + Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; + Flags |= MemberOfFlag; } - // Fill in virtual bases. - for (const auto &I : RD->vbases()) { - const auto *Base = I.getType()->getAsCXXRecordDecl(); - // Ignore empty bases. - if (Base->isEmpty()) - continue; - unsigned FieldIndex = RL.getVirtualBaseIndex(Base); - if (RecordLayout[FieldIndex]) - continue; - RecordLayout[FieldIndex] = Base; - } - // Fill in all the fields. - assert(!RD->isUnion() && "Unexpected union."); - for (const auto *Field : RD->fields()) { - // Fill in non-bitfields. (Bitfields always use a zero pattern, which we - // will fill in later.) - if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { - unsigned FieldIndex = RL.getLLVMFieldNo(Field); - RecordLayout[FieldIndex] = Field; + + void getPlainLayout(const CXXRecordDecl *RD, + llvm::SmallVectorImpl &Layout, + bool AsBase) const + { + const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); + + llvm::StructType *St = + AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); + + unsigned NumElements = St->getNumElements(); + llvm::SmallVector< + llvm::PointerUnion, 4> + RecordLayout(NumElements); + + // Fill bases. + for (const auto &I : RD->bases()) + { + if (I.isVirtual()) + continue; + const auto *Base = I.getType()->getAsCXXRecordDecl(); + // Ignore empty bases. + if (Base->isEmpty() || CGF.getContext() + .getASTRecordLayout(Base) + .getNonVirtualSize() + .isZero()) + continue; + + unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); + RecordLayout[FieldIndex] = Base; + } + // Fill in virtual bases. + for (const auto &I : RD->vbases()) + { + const auto *Base = I.getType()->getAsCXXRecordDecl(); + // Ignore empty bases. + if (Base->isEmpty()) + continue; + unsigned FieldIndex = RL.getVirtualBaseIndex(Base); + if (RecordLayout[FieldIndex]) + continue; + RecordLayout[FieldIndex] = Base; + } + // Fill in all the fields. + assert(!RD->isUnion() && "Unexpected union."); + for (const auto *Field : RD->fields()) + { + // Fill in non-bitfields. (Bitfields always use a zero pattern, which we + // will fill in later.) + if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) + { + unsigned FieldIndex = RL.getLLVMFieldNo(Field); + RecordLayout[FieldIndex] = Field; + } + } + for (const llvm::PointerUnion + &Data : RecordLayout) + { + if (Data.isNull()) + continue; + if (const auto *Base = Data.dyn_cast()) + getPlainLayout(Base, Layout, /*AsBase=*/true); + else + Layout.push_back(Data.get()); } } - for (const llvm::PointerUnion - &Data : RecordLayout) { - if (Data.isNull()) - continue; - if (const auto *Base = Data.dyn_cast()) - getPlainLayout(Base, Layout, /*AsBase=*/true); - else - Layout.push_back(Data.get()); - } - } - - /// Generate all the base pointers, section pointers, sizes, map types, and - /// mappers for the extracted mappable expressions (all included in \a - /// CombinedInfo). Also, for each item that relates with a device pointer, a - /// pair of the relevant declaration and index where it occurs is appended to - /// the device pointers info array. - void generateAllInfoForClauses( - ArrayRef Clauses, MapCombinedInfoTy &CombinedInfo, - const llvm::DenseSet> &SkipVarSet = - llvm::DenseSet>()) const { - // We have to process the component lists that relate with the same - // declaration in a single chunk so that we can generate the map flags - // correctly. Therefore, we organize all lists in a map. - enum MapKind { Present, Allocs, Other, Total }; - llvm::MapVector, - SmallVector, 4>> - Info; - - // Helper function to fill the information map for the different supported - // clauses. - auto &&InfoGen = - [&Info, &SkipVarSet]( - const ValueDecl *D, MapKind Kind, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - ArrayRef MotionModifiers, - bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, - const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { - if (SkipVarSet.contains(D)) - return; - auto It = Info.find(D); - if (It == Info.end()) - It = Info - .insert(std::make_pair( - D, SmallVector, 4>(Total))) - .first; - It->second[Kind].emplace_back( - L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, - IsImplicit, Mapper, VarRef, ForDeviceAddr); - }; - for (const auto *Cl : Clauses) { - const auto *C = dyn_cast(Cl); - if (!C) - continue; - MapKind Kind = Other; - if (llvm::is_contained(C->getMapTypeModifiers(), - OMPC_MAP_MODIFIER_present)) - Kind = Present; - else if (C->getMapType() == OMPC_MAP_alloc) - Kind = Allocs; - const auto *EI = C->getVarRefs().begin(); - for (const auto L : C->component_lists()) { - const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; - InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), - C->getMapTypeModifiers(), std::nullopt, - /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), - E); - ++EI; + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted mappable expressions (all included in \a + /// CombinedInfo). Also, for each item that relates with a device pointer, a + /// pair of the relevant declaration and index where it occurs is appended to + /// the device pointers info array. + void generateAllInfoForClauses( + ArrayRef Clauses, MapCombinedInfoTy &CombinedInfo, + const llvm::DenseSet> &SkipVarSet = + llvm::DenseSet>()) const + { + // We have to process the component lists that relate with the same + // declaration in a single chunk so that we can generate the map flags + // correctly. Therefore, we organize all lists in a map. + enum MapKind + { + Present, + Allocs, + Other, + Total + }; + llvm::MapVector, + SmallVector, 4>> + Info; + + // Helper function to fill the information map for the different supported + // clauses. + auto &&InfoGen = + [&Info, &SkipVarSet]( + const ValueDecl *D, MapKind Kind, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef MapModifiers, + ArrayRef MotionModifiers, + bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, + const Expr *VarRef = nullptr, bool ForDeviceAddr = false) + { + if (SkipVarSet.contains(D)) + return; + auto It = Info.find(D); + if (It == Info.end()) + It = Info + .insert(std::make_pair( + D, SmallVector, 4>(Total))) + .first; + It->second[Kind].emplace_back( + L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, + IsImplicit, Mapper, VarRef, ForDeviceAddr); + }; + + for (const auto *Cl : Clauses) + { + const auto *C = dyn_cast(Cl); + if (!C) + continue; + MapKind Kind = Other; + if (llvm::is_contained(C->getMapTypeModifiers(), + OMPC_MAP_MODIFIER_present)) + Kind = Present; + else if (C->getMapType() == OMPC_MAP_alloc) + Kind = Allocs; + const auto *EI = C->getVarRefs().begin(); + for (const auto L : C->component_lists()) + { + const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; + InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), + C->getMapTypeModifiers(), std::nullopt, + /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), + E); + ++EI; + } } - } - for (const auto *Cl : Clauses) { - const auto *C = dyn_cast(Cl); - if (!C) - continue; - MapKind Kind = Other; - if (llvm::is_contained(C->getMotionModifiers(), - OMPC_MOTION_MODIFIER_present)) - Kind = Present; - const auto *EI = C->getVarRefs().begin(); - for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, - C->getMotionModifiers(), /*ReturnDevicePointer=*/false, - C->isImplicit(), std::get<2>(L), *EI); - ++EI; + for (const auto *Cl : Clauses) + { + const auto *C = dyn_cast(Cl); + if (!C) + continue; + MapKind Kind = Other; + if (llvm::is_contained(C->getMotionModifiers(), + OMPC_MOTION_MODIFIER_present)) + Kind = Present; + const auto *EI = C->getVarRefs().begin(); + for (const auto L : C->component_lists()) + { + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, + C->getMotionModifiers(), /*ReturnDevicePointer=*/false, + C->isImplicit(), std::get<2>(L), *EI); + ++EI; + } } - } - for (const auto *Cl : Clauses) { - const auto *C = dyn_cast(Cl); - if (!C) - continue; - MapKind Kind = Other; - if (llvm::is_contained(C->getMotionModifiers(), - OMPC_MOTION_MODIFIER_present)) - Kind = Present; - const auto *EI = C->getVarRefs().begin(); - for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, - std::nullopt, C->getMotionModifiers(), - /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), - *EI); - ++EI; + for (const auto *Cl : Clauses) + { + const auto *C = dyn_cast(Cl); + if (!C) + continue; + MapKind Kind = Other; + if (llvm::is_contained(C->getMotionModifiers(), + OMPC_MOTION_MODIFIER_present)) + Kind = Present; + const auto *EI = C->getVarRefs().begin(); + for (const auto L : C->component_lists()) + { + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, + std::nullopt, C->getMotionModifiers(), + /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), + *EI); + ++EI; + } } - } - // Look at the use_device_ptr and use_device_addr clauses information and - // mark the existing map entries as such. If there is no map information for - // an entry in the use_device_ptr and use_device_addr list, we create one - // with map type 'alloc' and zero size section. It is the user fault if that - // was not mapped before. If there is no map information and the pointer is - // a struct member, then we defer the emission of that entry until the whole - // struct has been processed. - llvm::MapVector, - SmallVector> - DeferredInfo; - MapCombinedInfoTy UseDeviceDataCombinedInfo; - - auto &&UseDeviceDataCombinedInfoGen = - [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, - CodeGenFunction &CGF) { - UseDeviceDataCombinedInfo.Exprs.push_back(VD); - UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD); - UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); - UseDeviceDataCombinedInfo.Sizes.push_back( - llvm::Constant::getNullValue(CGF.Int64Ty)); - UseDeviceDataCombinedInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); - UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); - }; + // Look at the use_device_ptr and use_device_addr clauses information and + // mark the existing map entries as such. If there is no map information for + // an entry in the use_device_ptr and use_device_addr list, we create one + // with map type 'alloc' and zero size section. It is the user fault if that + // was not mapped before. If there is no map information and the pointer is + // a struct member, then we defer the emission of that entry until the whole + // struct has been processed. + llvm::MapVector, + SmallVector> + DeferredInfo; + MapCombinedInfoTy UseDeviceDataCombinedInfo; + + auto &&UseDeviceDataCombinedInfoGen = + [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, + CodeGenFunction &CGF) + { + UseDeviceDataCombinedInfo.Exprs.push_back(VD); + UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD); + UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); + UseDeviceDataCombinedInfo.Sizes.push_back( + llvm::Constant::getNullValue(CGF.Int64Ty)); + UseDeviceDataCombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); + UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); + }; - auto &&MapInfoGen = - [&DeferredInfo, &UseDeviceDataCombinedInfoGen, - &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, - OMPClauseMappableExprCommon::MappableExprComponentListRef - Components, - bool IsImplicit, bool IsDevAddr) { - // We didn't find any match in our map information - generate a zero - // size array section - if the pointer is a struct member we defer - // this action until the whole struct has been processed. - if (isa(IE)) { - // Insert the pointer into Info to be processed by - // generateInfoForComponentList. Because it is a member pointer - // without a pointee, no entry will be generated for it, therefore - // we need to generate one after the whole struct has been - // processed. Nonetheless, generateInfoForComponentList must be - // called to take the pointer into account for the calculation of - // the range of the partial struct. - InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, - std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, - nullptr, nullptr, IsDevAddr); - DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); - } else { - llvm::Value *Ptr; - if (IsDevAddr) { - if (IE->isGLValue()) - Ptr = CGF.EmitLValue(IE).getPointer(CGF); - else - Ptr = CGF.EmitScalarExpr(IE); - } else { - Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); - } - UseDeviceDataCombinedInfoGen(VD, Ptr, CGF); + auto &&MapInfoGen = + [&DeferredInfo, &UseDeviceDataCombinedInfoGen, + &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, + OMPClauseMappableExprCommon::MappableExprComponentListRef + Components, + bool IsImplicit, bool IsDevAddr) + { + // We didn't find any match in our map information - generate a zero + // size array section - if the pointer is a struct member we defer + // this action until the whole struct has been processed. + if (isa(IE)) + { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been + // processed. Nonetheless, generateInfoForComponentList must be + // called to take the pointer into account for the calculation of + // the range of the partial struct. + InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, + std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, + nullptr, nullptr, IsDevAddr); + DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); + } + else + { + llvm::Value *Ptr; + if (IsDevAddr) + { + if (IE->isGLValue()) + Ptr = CGF.EmitLValue(IE).getPointer(CGF); + else + Ptr = CGF.EmitScalarExpr(IE); } - }; + else + { + Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); + } + UseDeviceDataCombinedInfoGen(VD, Ptr, CGF); + } + }; - auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, - const Expr *IE, bool IsDevAddr) -> bool { - // We potentially have map information for this declaration already. - // Look for the first set of components that refer to it. If found, - // return true. - // If the first component is a member expression, we have to look into - // 'this', which maps to null in the map of map information. Otherwise - // look directly for the information. - auto It = Info.find(isa(IE) ? nullptr : VD); - if (It != Info.end()) { - bool Found = false; - for (auto &Data : It->second) { - auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { - return MI.Components.back().getAssociatedDeclaration() == VD; - }); - // If we found a map entry, signal that the pointer has to be - // returned and move on to the next declaration. Exclude cases where - // the base pointer is mapped as array subscript, array section or - // array shaping. The base address is passed as a pointer to base in - // this case and cannot be used as a base for use_device_ptr list - // item. - if (CI != Data.end()) { - if (IsDevAddr) { - CI->ReturnDevicePointer = true; - Found = true; - break; - } else { - auto PrevCI = std::next(CI->Components.rbegin()); - const auto *VarD = dyn_cast(VD); - if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || - isa(IE) || - !VD->getType().getNonReferenceType()->isPointerType() || - PrevCI == CI->Components.rend() || - isa(PrevCI->getAssociatedExpression()) || !VarD || - VarD->hasLocalStorage()) { + auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, + const Expr *IE, bool IsDevAddr) -> bool + { + // We potentially have map information for this declaration already. + // Look for the first set of components that refer to it. If found, + // return true. + // If the first component is a member expression, we have to look into + // 'this', which maps to null in the map of map information. Otherwise + // look directly for the information. + auto It = Info.find(isa(IE) ? nullptr : VD); + if (It != Info.end()) + { + bool Found = false; + for (auto &Data : It->second) + { + auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) + { return MI.Components.back().getAssociatedDeclaration() == VD; }); + // If we found a map entry, signal that the pointer has to be + // returned and move on to the next declaration. Exclude cases where + // the base pointer is mapped as array subscript, array section or + // array shaping. The base address is passed as a pointer to base in + // this case and cannot be used as a base for use_device_ptr list + // item. + if (CI != Data.end()) + { + if (IsDevAddr) + { CI->ReturnDevicePointer = true; Found = true; break; } + else + { + auto PrevCI = std::next(CI->Components.rbegin()); + const auto *VarD = dyn_cast(VD); + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + isa(IE) || + !VD->getType().getNonReferenceType()->isPointerType() || + PrevCI == CI->Components.rend() || + isa(PrevCI->getAssociatedExpression()) || !VarD || + VarD->hasLocalStorage()) + { + CI->ReturnDevicePointer = true; + Found = true; + break; + } + } } } + return Found; } - return Found; - } - return false; - }; + return false; + }; - // Look at the use_device_ptr clause information and mark the existing map - // entries as such. If there is no map information for an entry in the - // use_device_ptr list, we create one with map type 'alloc' and zero size - // section. It is the user fault if that was not mapped before. If there is - // no map information and the pointer is a struct member, then we defer the - // emission of that entry until the whole struct has been processed. - for (const auto *Cl : Clauses) { - const auto *C = dyn_cast(Cl); - if (!C) - continue; - for (const auto L : C->component_lists()) { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components = - std::get<1>(L); - assert(!Components.empty() && - "Not expecting empty list of components!"); - const ValueDecl *VD = Components.back().getAssociatedDeclaration(); - VD = cast(VD->getCanonicalDecl()); - const Expr *IE = Components.back().getAssociatedExpression(); - if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) + // Look at the use_device_ptr clause information and mark the existing map + // entries as such. If there is no map information for an entry in the + // use_device_ptr list, we create one with map type 'alloc' and zero size + // section. It is the user fault if that was not mapped before. If there is + // no map information and the pointer is a struct member, then we defer the + // emission of that entry until the whole struct has been processed. + for (const auto *Cl : Clauses) + { + const auto *C = dyn_cast(Cl); + if (!C) continue; - MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), - /*IsDevAddr=*/false); + for (const auto L : C->component_lists()) + { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components = + std::get<1>(L); + assert(!Components.empty() && + "Not expecting empty list of components!"); + const ValueDecl *VD = Components.back().getAssociatedDeclaration(); + VD = cast(VD->getCanonicalDecl()); + const Expr *IE = Components.back().getAssociatedExpression(); + if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) + continue; + MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), + /*IsDevAddr=*/false); + } } - } - llvm::SmallDenseSet, 4> Processed; - for (const auto *Cl : Clauses) { - const auto *C = dyn_cast(Cl); - if (!C) - continue; - for (const auto L : C->component_lists()) { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components = - std::get<1>(L); - assert(!std::get<1>(L).empty() && - "Not expecting empty list of components!"); - const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); - if (!Processed.insert(VD).second) - continue; - VD = cast(VD->getCanonicalDecl()); - const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); - if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) + llvm::SmallDenseSet, 4> Processed; + for (const auto *Cl : Clauses) + { + const auto *C = dyn_cast(Cl); + if (!C) continue; - MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), - /*IsDevAddr=*/true); + for (const auto L : C->component_lists()) + { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components = + std::get<1>(L); + assert(!std::get<1>(L).empty() && + "Not expecting empty list of components!"); + const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); + if (!Processed.insert(VD).second) + continue; + VD = cast(VD->getCanonicalDecl()); + const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); + if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) + continue; + MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), + /*IsDevAddr=*/true); + } } - } - - for (const auto &Data : Info) { - StructRangeInfoTy PartialStruct; - // Temporary generated information. - MapCombinedInfoTy CurInfo; - const Decl *D = Data.first; - const ValueDecl *VD = cast_or_null(D); - for (const auto &M : Data.second) { - for (const MapInfo &L : M) { - assert(!L.Components.empty() && - "Not expecting declaration with no component lists."); - // Remember the current base pointer index. - unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); - CurInfo.NonContigInfo.IsNonContiguous = - L.Components.back().isNonContiguous(); - generateInfoForComponentList( - L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, - CurInfo, PartialStruct, /*IsFirstComponentList=*/false, - L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); - - // If this entry relates with a device pointer, set the relevant - // declaration and add the 'return pointer' flag. - if (L.ReturnDevicePointer) { - assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && - "Unexpected number of mapped base pointers."); - - const ValueDecl *RelevantVD = - L.Components.back().getAssociatedDeclaration(); - assert(RelevantVD && - "No relevant declaration related with device pointer??"); - - CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( - RelevantVD); - CurInfo.Types[CurrentBasePointersIdx] |= - OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + for (const auto &Data : Info) + { + StructRangeInfoTy PartialStruct; + // Temporary generated information. + MapCombinedInfoTy CurInfo; + const Decl *D = Data.first; + const ValueDecl *VD = cast_or_null(D); + for (const auto &M : Data.second) + { + for (const MapInfo &L : M) + { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + + // Remember the current base pointer index. + unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); + CurInfo.NonContigInfo.IsNonContiguous = + L.Components.back().isNonContiguous(); + generateInfoForComponentList( + L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, + CurInfo, PartialStruct, /*IsFirstComponentList=*/false, + L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); + + // If this entry relates with a device pointer, set the relevant + // declaration and add the 'return pointer' flag. + if (L.ReturnDevicePointer) + { + assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && + "Unexpected number of mapped base pointers."); + + const ValueDecl *RelevantVD = + L.Components.back().getAssociatedDeclaration(); + assert(RelevantVD && + "No relevant declaration related with device pointer??"); + + CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( + RelevantVD); + CurInfo.Types[CurrentBasePointersIdx] |= + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + } } } - } - // Append any pending zero-length pointers which are struct members and - // used with use_device_ptr or use_device_addr. - auto CI = DeferredInfo.find(Data.first); - if (CI != DeferredInfo.end()) { - for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr; - llvm::Value *Ptr; - if (L.ForDeviceAddr) { - if (L.IE->isGLValue()) - Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + // Append any pending zero-length pointers which are struct members and + // used with use_device_ptr or use_device_addr. + auto CI = DeferredInfo.find(Data.first); + if (CI != DeferredInfo.end()) + { + for (const DeferredDevicePtrEntryTy &L : CI->second) + { + llvm::Value *BasePtr; + llvm::Value *Ptr; + if (L.ForDeviceAddr) + { + if (L.IE->isGLValue()) + Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + else + Ptr = this->CGF.EmitScalarExpr(L.IE); + BasePtr = Ptr; + // Entry is RETURN_PARAM. Also, set the placeholder value + // MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); + } else - Ptr = this->CGF.EmitScalarExpr(L.IE); - BasePtr = Ptr; - // Entry is RETURN_PARAM. Also, set the placeholder value - // MEMBER_OF=FFFF so that the entry is later updated with the - // correct value of MEMBER_OF. - CurInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); - } else { - BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); - Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), - L.IE->getExprLoc()); - // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the - // placeholder value MEMBER_OF=FFFF so that the entry is later - // updated with the correct value of MEMBER_OF. - CurInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | - OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); + { + BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), + L.IE->getExprLoc()); + // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the + // placeholder value MEMBER_OF=FFFF so that the entry is later + // updated with the correct value of MEMBER_OF. + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); + } + CurInfo.Exprs.push_back(L.VD); + CurInfo.BasePointers.emplace_back(BasePtr, L.VD); + CurInfo.Pointers.push_back(Ptr); + CurInfo.Sizes.push_back( + llvm::Constant::getNullValue(this->CGF.Int64Ty)); + CurInfo.Mappers.push_back(nullptr); } - CurInfo.Exprs.push_back(L.VD); - CurInfo.BasePointers.emplace_back(BasePtr, L.VD); - CurInfo.Pointers.push_back(Ptr); - CurInfo.Sizes.push_back( - llvm::Constant::getNullValue(this->CGF.Int64Ty)); - CurInfo.Mappers.push_back(nullptr); } - } - // If there is an entry in PartialStruct it means we have a struct with - // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) { - CurInfo.NonContigInfo.Dims.push_back(0); - emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); - } + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + { + CurInfo.NonContigInfo.Dims.push_back(0); + emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); + } - // We need to append the results of this capture to what we already - // have. - CombinedInfo.append(CurInfo); - } - // Append data for use_device_ptr clauses. - CombinedInfo.append(UseDeviceDataCombinedInfo); - } - -public: - MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(&Dir), CGF(CGF) { - // Extract firstprivate clause information. - for (const auto *C : Dir.getClausesOfKind()) - for (const auto *D : C->varlists()) - FirstPrivateDecls.try_emplace( - cast(cast(D)->getDecl()), C->isImplicit()); - // Extract implicit firstprivates from uses_allocators clauses. - for (const auto *C : Dir.getClausesOfKind()) { - for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { - OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); - if (const auto *DRE = dyn_cast_or_null(D.AllocatorTraits)) - FirstPrivateDecls.try_emplace(cast(DRE->getDecl()), - /*Implicit=*/true); - else if (const auto *VD = dyn_cast( - cast(D.Allocator->IgnoreParenImpCasts()) - ->getDecl())) - FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); + // We need to append the results of this capture to what we already + // have. + CombinedInfo.append(CurInfo); } + // Append data for use_device_ptr clauses. + CombinedInfo.append(UseDeviceDataCombinedInfo); } - // Extract device pointer clause information. - for (const auto *C : Dir.getClausesOfKind()) - for (auto L : C->component_lists()) - DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); - // Extract device addr clause information. - for (const auto *C : Dir.getClausesOfKind()) - for (auto L : C->component_lists()) - HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); - // Extract map information. - for (const auto *C : Dir.getClausesOfKind()) { - if (C->getMapType() != OMPC_MAP_to) - continue; - for (auto L : C->component_lists()) { - const ValueDecl *VD = std::get<0>(L); - const auto *RD = VD ? VD->getType() - .getCanonicalType() - .getNonReferenceType() - ->getAsCXXRecordDecl() - : nullptr; - if (RD && RD->isLambda()) - LambdasMap.try_emplace(std::get<0>(L), C); + + public: + MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) + { + // Extract firstprivate clause information. + for (const auto *C : Dir.getClausesOfKind()) + for (const auto *D : C->varlists()) + FirstPrivateDecls.try_emplace( + cast(cast(D)->getDecl()), C->isImplicit()); + // Extract implicit firstprivates from uses_allocators clauses. + for (const auto *C : Dir.getClausesOfKind()) + { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) + { + OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + if (const auto *DRE = dyn_cast_or_null(D.AllocatorTraits)) + FirstPrivateDecls.try_emplace(cast(DRE->getDecl()), + /*Implicit=*/true); + else if (const auto *VD = dyn_cast( + cast(D.Allocator->IgnoreParenImpCasts()) + ->getDecl())) + FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); + } + } + // Extract device pointer clause information. + for (const auto *C : Dir.getClausesOfKind()) + for (auto L : C->component_lists()) + DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); + // Extract device addr clause information. + for (const auto *C : Dir.getClausesOfKind()) + for (auto L : C->component_lists()) + HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); + // Extract map information. + for (const auto *C : Dir.getClausesOfKind()) + { + if (C->getMapType() != OMPC_MAP_to) + continue; + for (auto L : C->component_lists()) + { + const ValueDecl *VD = std::get<0>(L); + const auto *RD = VD ? VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl() + : nullptr; + if (RD && RD->isLambda()) + LambdasMap.try_emplace(std::get<0>(L), C); + } } } - } - /// Constructor for the declare mapper directive. - MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) - : CurDir(&Dir), CGF(CGF) {} + /// Constructor for the declare mapper directive. + MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) {} - /// Generate code for the combined entry if we have a partially mapped struct - /// and take care of the mapping flags of the arguments corresponding to - /// individual struct members. - void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, - MapFlagsArrayTy &CurTypes, - const StructRangeInfoTy &PartialStruct, - const ValueDecl *VD = nullptr, - bool NotTargetParams = true) const { - if (CurTypes.size() == 1 && - ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && - !PartialStruct.IsArraySection) - return; - Address LBAddr = PartialStruct.LowestElem.second; - Address HBAddr = PartialStruct.HighestElem.second; - if (PartialStruct.HasCompleteRecord) { - LBAddr = PartialStruct.LB; - HBAddr = PartialStruct.LB; - } - CombinedInfo.Exprs.push_back(VD); - // Base is the base of the struct - CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); - // Pointer is the address of the lowest element - llvm::Value *LB = LBAddr.getPointer(); - const CXXMethodDecl *MD = - CGF.CurFuncDecl ? dyn_cast(CGF.CurFuncDecl) : nullptr; - const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; - // When VD is not null, it is not field of class, skip generating this[:1]. - bool HasBaseClass = RD && !VD ? RD->getNumBases() > 0 : false; - // There should not be a mapper for a combined entry. - if (HasBaseClass) { - // OpenMP 5.2 148:21: - // If the target construct is within a class non-static member function, - // and a variable is an accessible data member of the object for which the - // non-static data member function is invoked, the variable is treated as - // if the this[:1] expression had appeared in a map clause with a map-type - // of tofrom. - // Emit this[:1] - CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); - QualType Ty = MD->getThisType()->getPointeeType(); - llvm::Value *Size = - CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, - /*isSigned=*/true); - CombinedInfo.Sizes.push_back(Size); - } else { - CombinedInfo.Pointers.push_back(LB); - // Size is (addr of {highest+1} element) - (addr of lowest element) - llvm::Value *HB = HBAddr.getPointer(); - llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( - HBAddr.getElementType(), HB, /*Idx0=*/1); - llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); - llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); - llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); - llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, - /*isSigned=*/false); - CombinedInfo.Sizes.push_back(Size); - } - CombinedInfo.Mappers.push_back(nullptr); - // Map type is always TARGET_PARAM, if generate info for captures. - CombinedInfo.Types.push_back( - NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE - : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); - // If any element has the present modifier, then make sure the runtime - // doesn't attempt to allocate the struct. - if (CurTypes.end() != - llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { - return static_cast>( - Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); - })) - CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; - // Remove TARGET_PARAM flag from the first element - (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; - // If any element has the ompx_hold modifier, then make sure the runtime - // uses the hold reference count for the struct as a whole so that it won't - // be unmapped by an extra dynamic reference count decrement. Add it to all - // elements as well so the runtime knows which reference count to check - // when determining whether it's time for device-to-host transfers of - // individual elements. - if (CurTypes.end() != - llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { - return static_cast>( - Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); - })) { - CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; - for (auto &M : CurTypes) - M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; - } - - // All other current entries will be MEMBER_OF the combined entry - // (except for PTR_AND_OBJ entries which do not have a placeholder value - // 0xFFFF in the MEMBER_OF field). - OpenMPOffloadMappingFlags MemberOfFlag = - getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); - for (auto &M : CurTypes) - setCorrectMemberOfFlag(M, MemberOfFlag); - } - - /// Generate all the base pointers, section pointers, sizes, map types, and - /// mappers for the extracted mappable expressions (all included in \a - /// CombinedInfo). Also, for each item that relates with a device pointer, a - /// pair of the relevant declaration and index where it occurs is appended to - /// the device pointers info array. - void generateAllInfo( - MapCombinedInfoTy &CombinedInfo, - const llvm::DenseSet> &SkipVarSet = - llvm::DenseSet>()) const { - assert(CurDir.is() && - "Expect a executable directive"); - const auto *CurExecDir = CurDir.get(); - generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); - } - - /// Generate all the base pointers, section pointers, sizes, map types, and - /// mappers for the extracted map clauses of user-defined mapper (all included - /// in \a CombinedInfo). - void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { - assert(CurDir.is() && - "Expect a declare mapper directive"); - const auto *CurMapperDir = CurDir.get(); - generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); - } - - /// Emit capture info for lambdas for variables captured by reference. - void generateInfoForLambdaCaptures( - const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, - llvm::DenseMap &LambdaPointers) const { - QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); - const auto *RD = VDType->getAsCXXRecordDecl(); - if (!RD || !RD->isLambda()) - return; - Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), - CGF.getContext().getDeclAlign(VD)); - LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); - llvm::DenseMap Captures; - FieldDecl *ThisCapture = nullptr; - RD->getCaptureFields(Captures, ThisCapture); - if (ThisCapture) { - LValue ThisLVal = - CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); - LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); - LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), - VDLVal.getPointer(CGF)); + /// Generate code for the combined entry if we have a partially mapped struct + /// and take care of the mapping flags of the arguments corresponding to + /// individual struct members. + void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, + MapFlagsArrayTy &CurTypes, + const StructRangeInfoTy &PartialStruct, + const ValueDecl *VD = nullptr, + bool NotTargetParams = true) const + { + if (CurTypes.size() == 1 && + ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && + !PartialStruct.IsArraySection) + return; + Address LBAddr = PartialStruct.LowestElem.second; + Address HBAddr = PartialStruct.HighestElem.second; + if (PartialStruct.HasCompleteRecord) + { + LBAddr = PartialStruct.LB; + HBAddr = PartialStruct.LB; + } CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); - CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), - CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | - OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | - OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + // Base is the base of the struct + CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); + // Pointer is the address of the lowest element + llvm::Value *LB = LBAddr.getPointer(); + const CXXMethodDecl *MD = + CGF.CurFuncDecl ? dyn_cast(CGF.CurFuncDecl) : nullptr; + const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; + // When VD is not null, it is not field of class, skip generating this[:1]. + bool HasBaseClass = RD && !VD ? RD->getNumBases() > 0 : false; + // There should not be a mapper for a combined entry. + if (HasBaseClass) + { + // OpenMP 5.2 148:21: + // If the target construct is within a class non-static member function, + // and a variable is an accessible data member of the object for which the + // non-static data member function is invoked, the variable is treated as + // if the this[:1] expression had appeared in a map clause with a map-type + // of tofrom. + // Emit this[:1] + CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); + QualType Ty = MD->getThisType()->getPointeeType(); + llvm::Value *Size = + CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, + /*isSigned=*/true); + CombinedInfo.Sizes.push_back(Size); + } + else + { + CombinedInfo.Pointers.push_back(LB); + // Size is (addr of {highest+1} element) - (addr of lowest element) + llvm::Value *HB = HBAddr.getPointer(); + llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( + HBAddr.getElementType(), HB, /*Idx0=*/1); + llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); + llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); + llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); + llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, + /*isSigned=*/false); + CombinedInfo.Sizes.push_back(Size); + } CombinedInfo.Mappers.push_back(nullptr); + // Map type is always TARGET_PARAM, if generate info for captures. + CombinedInfo.Types.push_back( + NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE + : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); + // If any element has the present modifier, then make sure the runtime + // doesn't attempt to allocate the struct. + if (CurTypes.end() != + llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) + { return static_cast>( + Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); })) + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + // Remove TARGET_PARAM flag from the first element + (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + // If any element has the ompx_hold modifier, then make sure the runtime + // uses the hold reference count for the struct as a whole so that it won't + // be unmapped by an extra dynamic reference count decrement. Add it to all + // elements as well so the runtime knows which reference count to check + // when determining whether it's time for device-to-host transfers of + // individual elements. + if (CurTypes.end() != + llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) + { return static_cast>( + Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); })) + { + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + for (auto &M : CurTypes) + M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + } + + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = + getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); + for (auto &M : CurTypes) + setCorrectMemberOfFlag(M, MemberOfFlag); + } + + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted mappable expressions (all included in \a + /// CombinedInfo). Also, for each item that relates with a device pointer, a + /// pair of the relevant declaration and index where it occurs is appended to + /// the device pointers info array. + void generateAllInfo( + MapCombinedInfoTy &CombinedInfo, + const llvm::DenseSet> &SkipVarSet = + llvm::DenseSet>()) const + { + assert(CurDir.is() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get(); + generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); } - for (const LambdaCapture &LC : RD->captures()) { - if (!LC.capturesVariable()) - continue; - const VarDecl *VD = cast(LC.getCapturedVar()); - if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) - continue; - auto It = Captures.find(VD); - assert(It != Captures.end() && "Found lambda capture without field."); - LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); - if (LC.getCaptureKind() == LCK_ByRef) { - LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); - LambdaPointers.try_emplace(VarLVal.getPointer(CGF), - VDLVal.getPointer(CGF)); - CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); - CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize( - VD->getType().getCanonicalType().getNonReferenceType()), - CGF.Int64Ty, /*isSigned=*/true)); - } else { - RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); - LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted map clauses of user-defined mapper (all included + /// in \a CombinedInfo). + void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const + { + assert(CurDir.is() && + "Expect a declare mapper directive"); + const auto *CurMapperDir = CurDir.get(); + generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); + } + + /// Emit capture info for lambdas for variables captured by reference. + void generateInfoForLambdaCaptures( + const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, + llvm::DenseMap &LambdaPointers) const + { + QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); + const auto *RD = VDType->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + return; + Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), + CGF.getContext().getDeclAlign(VD)); + LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); + llvm::DenseMap Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture) + { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); + LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); - CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); - CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); + CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); + CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); + CombinedInfo.Sizes.push_back( + CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), + CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + CombinedInfo.Mappers.push_back(nullptr); + } + for (const LambdaCapture &LC : RD->captures()) + { + if (!LC.capturesVariable()) + continue; + const VarDecl *VD = cast(LC.getCapturedVar()); + if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) + continue; + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + if (LC.getCaptureKind() == LCK_ByRef) + { + LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize( + VD->getType().getCanonicalType().getNonReferenceType()), + CGF.Int64Ty, /*isSigned=*/true)); + } + else + { + RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); + CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); + } + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + CombinedInfo.Mappers.push_back(nullptr); } - CombinedInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | - OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | - OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); - CombinedInfo.Mappers.push_back(nullptr); } - } - /// Set correct indices for lambdas captures. - void adjustMemberOfForLambdaCaptures( - const llvm::DenseMap &LambdaPointers, - MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, - MapFlagsArrayTy &Types) const { - for (unsigned I = 0, E = Types.size(); I < E; ++I) { - // Set correct member_of idx for all implicit lambda captures. - if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | - OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | - OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) - continue; - llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); - assert(BasePtr && "Unable to find base lambda address."); - int TgtIdx = -1; - for (unsigned J = I; J > 0; --J) { - unsigned Idx = J - 1; - if (Pointers[Idx] != BasePtr) + /// Set correct indices for lambdas captures. + void adjustMemberOfForLambdaCaptures( + const llvm::DenseMap &LambdaPointers, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapFlagsArrayTy &Types) const + { + for (unsigned I = 0, E = Types.size(); I < E; ++I) + { + // Set correct member_of idx for all implicit lambda captures. + if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) continue; - TgtIdx = Idx; - break; + llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); + assert(BasePtr && "Unable to find base lambda address."); + int TgtIdx = -1; + for (unsigned J = I; J > 0; --J) + { + unsigned Idx = J - 1; + if (Pointers[Idx] != BasePtr) + continue; + TgtIdx = Idx; + break; + } + assert(TgtIdx != -1 && "Unable to find parent lambda."); + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); + setCorrectMemberOfFlag(Types[I], MemberOfFlag); } - assert(TgtIdx != -1 && "Unable to find parent lambda."); - // All other current entries will be MEMBER_OF the combined entry - // (except for PTR_AND_OBJ entries which do not have a placeholder value - // 0xFFFF in the MEMBER_OF field). - OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); - setCorrectMemberOfFlag(Types[I], MemberOfFlag); } - } - /// Generate the base pointers, section pointers, sizes, map types, and - /// mappers associated to a given capture (all included in \a CombinedInfo). - void generateInfoForCapture(const CapturedStmt::Capture *Cap, - llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, - StructRangeInfoTy &PartialStruct) const { - assert(!Cap->capturesVariableArrayType() && - "Not expecting to generate map info for a variable array type!"); + /// Generate the base pointers, section pointers, sizes, map types, and + /// mappers associated to a given capture (all included in \a CombinedInfo). + void generateInfoForCapture(const CapturedStmt::Capture *Cap, + llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, + StructRangeInfoTy &PartialStruct) const + { + assert(!Cap->capturesVariableArrayType() && + "Not expecting to generate map info for a variable array type!"); - // We need to know when we generating information for the first component - const ValueDecl *VD = Cap->capturesThis() - ? nullptr - : Cap->getCapturedVar()->getCanonicalDecl(); + // We need to know when we generating information for the first component + const ValueDecl *VD = Cap->capturesThis() + ? nullptr + : Cap->getCapturedVar()->getCanonicalDecl(); - // for map(to: lambda): skip here, processing it in - // generateDefaultMapInfo - if (LambdasMap.count(VD)) - return; + // for map(to: lambda): skip here, processing it in + // generateDefaultMapInfo + if (LambdasMap.count(VD)) + return; - // If this declaration appears in a is_device_ptr clause we just have to - // pass the pointer by value. If it is a reference to a declaration, we just - // pass its value. - if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { - CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.emplace_back(Arg, VD); - CombinedInfo.Pointers.push_back(Arg); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, - /*isSigned=*/true)); - CombinedInfo.Types.push_back( - (Cap->capturesVariable() - ? OpenMPOffloadMappingFlags::OMP_MAP_TO - : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) | - OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); - CombinedInfo.Mappers.push_back(nullptr); - return; - } + // If this declaration appears in a is_device_ptr clause we just have to + // pass the pointer by value. If it is a reference to a declaration, we just + // pass its value. + if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) + { + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.emplace_back(Arg, VD); + CombinedInfo.Pointers.push_back(Arg); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, + /*isSigned=*/true)); + CombinedInfo.Types.push_back( + (Cap->capturesVariable() + ? OpenMPOffloadMappingFlags::OMP_MAP_TO + : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); + CombinedInfo.Mappers.push_back(nullptr); + return; + } - using MapData = - std::tuple, bool, - const ValueDecl *, const Expr *>; - SmallVector DeclComponentLists; - // For member fields list in is_device_ptr, store it in - // DeclComponentLists for generating components info. - static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; - auto It = DevPointersMap.find(VD); - if (It != DevPointersMap.end()) - for (const auto &MCL : It->second) - DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, - /*IsImpicit = */ true, nullptr, - nullptr); - auto I = HasDevAddrsMap.find(VD); - if (I != HasDevAddrsMap.end()) - for (const auto &MCL : I->second) - DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, - /*IsImpicit = */ true, nullptr, - nullptr); - assert(CurDir.is() && - "Expect a executable directive"); - const auto *CurExecDir = CurDir.get(); - for (const auto *C : CurExecDir->getClausesOfKind()) { - const auto *EI = C->getVarRefs().begin(); - for (const auto L : C->decl_component_lists(VD)) { - const ValueDecl *VDecl, *Mapper; - // The Expression is not correct if the mapping is implicit - const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - std::tie(VDecl, Components, Mapper) = L; - assert(VDecl == VD && "We got information for the wrong declaration??"); - assert(!Components.empty() && - "Not expecting declaration with no component lists."); - DeclComponentLists.emplace_back(Components, C->getMapType(), - C->getMapTypeModifiers(), - C->isImplicit(), Mapper, E); - ++EI; + using MapData = + std::tuple, bool, + const ValueDecl *, const Expr *>; + SmallVector DeclComponentLists; + // For member fields list in is_device_ptr, store it in + // DeclComponentLists for generating components info. + static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; + auto It = DevPointersMap.find(VD); + if (It != DevPointersMap.end()) + for (const auto &MCL : It->second) + DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, + /*IsImpicit = */ true, nullptr, + nullptr); + auto I = HasDevAddrsMap.find(VD); + if (I != HasDevAddrsMap.end()) + for (const auto &MCL : I->second) + DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, + /*IsImpicit = */ true, nullptr, + nullptr); + assert(CurDir.is() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get(); + for (const auto *C : CurExecDir->getClausesOfKind()) + { + const auto *EI = C->getVarRefs().begin(); + for (const auto L : C->decl_component_lists(VD)) + { + const ValueDecl *VDecl, *Mapper; + // The Expression is not correct if the mapping is implicit + const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + std::tie(VDecl, Components, Mapper) = L; + assert(VDecl == VD && "We got information for the wrong declaration??"); + assert(!Components.empty() && + "Not expecting declaration with no component lists."); + DeclComponentLists.emplace_back(Components, C->getMapType(), + C->getMapTypeModifiers(), + C->isImplicit(), Mapper, E); + ++EI; + } } - } - llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, - const MapData &RHS) { + llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, + const MapData &RHS) + { ArrayRef MapModifiers = std::get<2>(LHS); OpenMPMapClauseKind MapType = std::get<1>(RHS); bool HasPresent = @@ -8752,253 +9569,276 @@ bool HasPresentR = llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); bool HasAllocsR = MapType == OMPC_MAP_alloc; - return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); - }); - - // Find overlapping elements (including the offset from the base element). - llvm::SmallDenseMap< - const MapData *, - llvm::SmallVector< - OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, - 4> - OverlappedData; - size_t Count = 0; - for (const MapData &L : DeclComponentLists) { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - ArrayRef MapModifiers; - bool IsImplicit; - const ValueDecl *Mapper; - const Expr *VarRef; - std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = - L; - ++Count; - for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; - std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, - VarRef) = L1; - auto CI = Components.rbegin(); - auto CE = Components.rend(); - auto SI = Components1.rbegin(); - auto SE = Components1.rend(); - for (; CI != CE && SI != SE; ++CI, ++SI) { - if (CI->getAssociatedExpression()->getStmtClass() != - SI->getAssociatedExpression()->getStmtClass()) - break; - // Are we dealing with different variables/fields? - if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) - break; - } - // Found overlapping if, at least for one component, reached the head - // of the components list. - if (CI == CE || SI == SE) { - // Ignore it if it is the same component. - if (CI == CE && SI == SE) - continue; - const auto It = (SI == SE) ? CI : SI; - // If one component is a pointer and another one is a kind of - // dereference of this pointer (array subscript, section, dereference, - // etc.), it is not an overlapping. - // Same, if one component is a base and another component is a - // dereferenced pointer memberexpr with the same base. - if (!isa(It->getAssociatedExpression()) || - (std::prev(It)->getAssociatedDeclaration() && - std::prev(It) - ->getAssociatedDeclaration() - ->getType() - ->isPointerType()) || - (It->getAssociatedDeclaration() && - It->getAssociatedDeclaration()->getType()->isPointerType() && - std::next(It) != CE && std::next(It) != SE)) - continue; - const MapData &BaseData = CI == CE ? L : L1; - OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = - SI == SE ? Components : Components1; - auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); - OverlappedElements.getSecond().push_back(SubData); + return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); }); + + // Find overlapping elements (including the offset from the base element). + llvm::SmallDenseMap< + const MapData *, + llvm::SmallVector< + OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, + 4> + OverlappedData; + size_t Count = 0; + for (const MapData &L : DeclComponentLists) + { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef MapModifiers; + bool IsImplicit; + const ValueDecl *Mapper; + const Expr *VarRef; + std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = + L; + ++Count; + for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) + { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; + std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, + VarRef) = L1; + auto CI = Components.rbegin(); + auto CE = Components.rend(); + auto SI = Components1.rbegin(); + auto SE = Components1.rend(); + for (; CI != CE && SI != SE; ++CI, ++SI) + { + if (CI->getAssociatedExpression()->getStmtClass() != + SI->getAssociatedExpression()->getStmtClass()) + break; + // Are we dealing with different variables/fields? + if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) + break; + } + // Found overlapping if, at least for one component, reached the head + // of the components list. + if (CI == CE || SI == SE) + { + // Ignore it if it is the same component. + if (CI == CE && SI == SE) + continue; + const auto It = (SI == SE) ? CI : SI; + // If one component is a pointer and another one is a kind of + // dereference of this pointer (array subscript, section, dereference, + // etc.), it is not an overlapping. + // Same, if one component is a base and another component is a + // dereferenced pointer memberexpr with the same base. + if (!isa(It->getAssociatedExpression()) || + (std::prev(It)->getAssociatedDeclaration() && + std::prev(It) + ->getAssociatedDeclaration() + ->getType() + ->isPointerType()) || + (It->getAssociatedDeclaration() && + It->getAssociatedDeclaration()->getType()->isPointerType() && + std::next(It) != CE && std::next(It) != SE)) + continue; + const MapData &BaseData = CI == CE ? L : L1; + OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = + SI == SE ? Components : Components1; + auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); + OverlappedElements.getSecond().push_back(SubData); + } } } - } - // Sort the overlapped elements for each item. - llvm::SmallVector Layout; - if (!OverlappedData.empty()) { - const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); - const Type *OrigType = BaseType->getPointeeOrArrayElementType(); - while (BaseType != OrigType) { - BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); - OrigType = BaseType->getPointeeOrArrayElementType(); + // Sort the overlapped elements for each item. + llvm::SmallVector Layout; + if (!OverlappedData.empty()) + { + const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); + const Type *OrigType = BaseType->getPointeeOrArrayElementType(); + while (BaseType != OrigType) + { + BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); + OrigType = BaseType->getPointeeOrArrayElementType(); + } + + if (const auto *CRD = BaseType->getAsCXXRecordDecl()) + getPlainLayout(CRD, Layout, /*AsBase=*/false); + else + { + const auto *RD = BaseType->getAsRecordDecl(); + Layout.append(RD->field_begin(), RD->field_end()); + } } + for (auto &Pair : OverlappedData) + { + llvm::stable_sort( + Pair.getSecond(), + [&Layout]( + OMPClauseMappableExprCommon::MappableExprComponentListRef First, + OMPClauseMappableExprCommon::MappableExprComponentListRef + Second) + { + auto CI = First.rbegin(); + auto CE = First.rend(); + auto SI = Second.rbegin(); + auto SE = Second.rend(); + for (; CI != CE && SI != SE; ++CI, ++SI) + { + if (CI->getAssociatedExpression()->getStmtClass() != + SI->getAssociatedExpression()->getStmtClass()) + break; + // Are we dealing with different variables/fields? + if (CI->getAssociatedDeclaration() != + SI->getAssociatedDeclaration()) + break; + } + + // Lists contain the same elements. + if (CI == CE && SI == SE) + return false; - if (const auto *CRD = BaseType->getAsCXXRecordDecl()) - getPlainLayout(CRD, Layout, /*AsBase=*/false); - else { - const auto *RD = BaseType->getAsRecordDecl(); - Layout.append(RD->field_begin(), RD->field_end()); + // List with less elements is less than list with more elements. + if (CI == CE || SI == SE) + return CI == CE; + + const auto *FD1 = cast(CI->getAssociatedDeclaration()); + const auto *FD2 = cast(SI->getAssociatedDeclaration()); + if (FD1->getParent() == FD2->getParent()) + return FD1->getFieldIndex() < FD2->getFieldIndex(); + const auto *It = + llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) + { return FD == FD1 || FD == FD2; }); + return *It == FD1; + }); } - } - for (auto &Pair : OverlappedData) { - llvm::stable_sort( - Pair.getSecond(), - [&Layout]( - OMPClauseMappableExprCommon::MappableExprComponentListRef First, - OMPClauseMappableExprCommon::MappableExprComponentListRef - Second) { - auto CI = First.rbegin(); - auto CE = First.rend(); - auto SI = Second.rbegin(); - auto SE = Second.rend(); - for (; CI != CE && SI != SE; ++CI, ++SI) { - if (CI->getAssociatedExpression()->getStmtClass() != - SI->getAssociatedExpression()->getStmtClass()) - break; - // Are we dealing with different variables/fields? - if (CI->getAssociatedDeclaration() != - SI->getAssociatedDeclaration()) - break; - } - // Lists contain the same elements. - if (CI == CE && SI == SE) - return false; - - // List with less elements is less than list with more elements. - if (CI == CE || SI == SE) - return CI == CE; - - const auto *FD1 = cast(CI->getAssociatedDeclaration()); - const auto *FD2 = cast(SI->getAssociatedDeclaration()); - if (FD1->getParent() == FD2->getParent()) - return FD1->getFieldIndex() < FD2->getFieldIndex(); - const auto *It = - llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { - return FD == FD1 || FD == FD2; - }); - return *It == FD1; - }); + // Associated with a capture, because the mapping flags depend on it. + // Go through all of the elements with the overlapped elements. + bool IsFirstComponentList = true; + for (const auto &Pair : OverlappedData) + { + const MapData &L = *Pair.getFirst(); + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef MapModifiers; + bool IsImplicit; + const ValueDecl *Mapper; + const Expr *VarRef; + std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = + L; + ArrayRef + OverlappedComponents = Pair.getSecond(); + generateInfoForComponentList( + MapType, MapModifiers, std::nullopt, Components, CombinedInfo, + PartialStruct, IsFirstComponentList, IsImplicit, Mapper, + /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); + IsFirstComponentList = false; + } + // Go through other elements without overlapped elements. + for (const MapData &L : DeclComponentLists) + { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef MapModifiers; + bool IsImplicit; + const ValueDecl *Mapper; + const Expr *VarRef; + std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = + L; + auto It = OverlappedData.find(&L); + if (It == OverlappedData.end()) + generateInfoForComponentList(MapType, MapModifiers, std::nullopt, + Components, CombinedInfo, PartialStruct, + IsFirstComponentList, IsImplicit, Mapper, + /*ForDeviceAddr=*/false, VD, VarRef); + IsFirstComponentList = false; + } } - // Associated with a capture, because the mapping flags depend on it. - // Go through all of the elements with the overlapped elements. - bool IsFirstComponentList = true; - for (const auto &Pair : OverlappedData) { - const MapData &L = *Pair.getFirst(); - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - ArrayRef MapModifiers; - bool IsImplicit; - const ValueDecl *Mapper; - const Expr *VarRef; - std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = - L; - ArrayRef - OverlappedComponents = Pair.getSecond(); - generateInfoForComponentList( - MapType, MapModifiers, std::nullopt, Components, CombinedInfo, - PartialStruct, IsFirstComponentList, IsImplicit, Mapper, - /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); - IsFirstComponentList = false; - } - // Go through other elements without overlapped elements. - for (const MapData &L : DeclComponentLists) { - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - ArrayRef MapModifiers; - bool IsImplicit; - const ValueDecl *Mapper; - const Expr *VarRef; - std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = - L; - auto It = OverlappedData.find(&L); - if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, std::nullopt, - Components, CombinedInfo, PartialStruct, - IsFirstComponentList, IsImplicit, Mapper, - /*ForDeviceAddr=*/false, VD, VarRef); - IsFirstComponentList = false; - } - } - - /// Generate the default map information for a given capture \a CI, - /// record field declaration \a RI and captured value \a CV. - void generateDefaultMapInfo(const CapturedStmt::Capture &CI, - const FieldDecl &RI, llvm::Value *CV, - MapCombinedInfoTy &CombinedInfo) const { - bool IsImplicit = true; - // Do the default mapping. - if (CI.capturesThis()) { - CombinedInfo.Exprs.push_back(nullptr); - CombinedInfo.BasePointers.push_back(CV); - CombinedInfo.Pointers.push_back(CV); - const auto *PtrTy = cast(RI.getType().getTypePtr()); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), - CGF.Int64Ty, /*isSigned=*/true)); - // Default map type. - CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | - OpenMPOffloadMappingFlags::OMP_MAP_FROM); - } else if (CI.capturesVariableByCopy()) { - const VarDecl *VD = CI.getCapturedVar(); - CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); - CombinedInfo.BasePointers.push_back(CV); - CombinedInfo.Pointers.push_back(CV); - if (!RI.getType()->isAnyPointerType()) { - // We have to signal to the runtime captures passed by value that are - // not pointers. - CombinedInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); - } else { - // Pointers are implicitly mapped with a zero size and no flags - // (other than first map that is added for all implicit maps). - CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); - CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); + /// Generate the default map information for a given capture \a CI, + /// record field declaration \a RI and captured value \a CV. + void generateDefaultMapInfo(const CapturedStmt::Capture &CI, + const FieldDecl &RI, llvm::Value *CV, + MapCombinedInfoTy &CombinedInfo) const + { + bool IsImplicit = true; + // Do the default mapping. + if (CI.capturesThis()) + { + CombinedInfo.Exprs.push_back(nullptr); + CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.Pointers.push_back(CV); + const auto *PtrTy = cast(RI.getType().getTypePtr()); + CombinedInfo.Sizes.push_back( + CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), + CGF.Int64Ty, /*isSigned=*/true)); + // Default map type. + CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM); } - auto I = FirstPrivateDecls.find(VD); - if (I != FirstPrivateDecls.end()) - IsImplicit = I->getSecond(); - } else { - assert(CI.capturesVariable() && "Expected captured reference."); - const auto *PtrTy = cast(RI.getType().getTypePtr()); - QualType ElementType = PtrTy->getPointeeType(); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); - // The default map type for a scalar/complex type is 'to' because by - // default the value doesn't have to be retrieved. For an aggregate - // type, the default is 'tofrom'. - CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); - const VarDecl *VD = CI.getCapturedVar(); - auto I = FirstPrivateDecls.find(VD); - CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); - CombinedInfo.BasePointers.push_back(CV); - if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { - Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( - CV, ElementType, CGF.getContext().getDeclAlign(VD), - AlignmentSource::Decl)); - CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); - } else { + else if (CI.capturesVariableByCopy()) + { + const VarDecl *VD = CI.getCapturedVar(); + CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); + CombinedInfo.BasePointers.push_back(CV); CombinedInfo.Pointers.push_back(CV); + if (!RI.getType()->isAnyPointerType()) + { + // We have to signal to the runtime captures passed by value that are + // not pointers. + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); + } + else + { + // Pointers are implicitly mapped with a zero size and no flags + // (other than first map that is added for all implicit maps). + CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); + CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); + } + auto I = FirstPrivateDecls.find(VD); + if (I != FirstPrivateDecls.end()) + IsImplicit = I->getSecond(); } - if (I != FirstPrivateDecls.end()) - IsImplicit = I->getSecond(); - } - // Every default map produces a single argument which is a target parameter. - CombinedInfo.Types.back() |= - OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + else + { + assert(CI.capturesVariable() && "Expected captured reference."); + const auto *PtrTy = cast(RI.getType().getTypePtr()); + QualType ElementType = PtrTy->getPointeeType(); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); + // The default map type for a scalar/complex type is 'to' because by + // default the value doesn't have to be retrieved. For an aggregate + // type, the default is 'tofrom'. + CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); + const VarDecl *VD = CI.getCapturedVar(); + auto I = FirstPrivateDecls.find(VD); + CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); + CombinedInfo.BasePointers.push_back(CV); + if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) + { + Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( + CV, ElementType, CGF.getContext().getDeclAlign(VD), + AlignmentSource::Decl)); + CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); + } + else + { + CombinedInfo.Pointers.push_back(CV); + } + if (I != FirstPrivateDecls.end()) + IsImplicit = I->getSecond(); + } + // Every default map produces a single argument which is a target parameter. + CombinedInfo.Types.back() |= + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; - // Add flag stating this is an implicit map. - if (IsImplicit) - CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + // Add flag stating this is an implicit map. + if (IsImplicit) + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; - // No user-defined mapper for default mapping. - CombinedInfo.Mappers.push_back(nullptr); - } -}; + // No user-defined mapper for default mapping. + CombinedInfo.Mappers.push_back(nullptr); + } + }; } // anonymous namespace static void emitNonContiguousDescriptor( CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, - CGOpenMPRuntime::TargetDataInfo &Info) { + CGOpenMPRuntime::TargetDataInfo &Info) +{ CodeGenModule &CGM = CGF.CGM; MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo &NonContigInfo = CombinedInfo.NonContigInfo; @@ -9022,11 +9862,17 @@ RD->completeDefinition(); QualType DimTy = C.getRecordType(RD); - enum { OffsetFD = 0, CountFD, StrideFD }; + enum + { + OffsetFD = 0, + CountFD, + StrideFD + }; // We need two index variable here since the size of "Dims" is the same as the // size of Components, however, the size of offset, count, and stride is equal // to the size of base declaration that is non-contiguous. - for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { + for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) + { // Skip emitting ir if dimension size is 1 since it cannot be // non-contiguous. if (NonContigInfo.Dims[I] == 1) @@ -9035,7 +9881,8 @@ QualType ArrayTy = C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); - for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { + for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) + { unsigned RevIdx = EE - II - 1; LValue DimsLVal = CGF.MakeAddrLValue( CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); @@ -9065,7 +9912,8 @@ } // Try to extract the base declaration from a `this->x` expression if possible. -static ValueDecl *getDeclFromThisExpr(const Expr *E) { +static ValueDecl *getDeclFromThisExpr(const Expr *E) +{ if (!E) return nullptr; @@ -9080,29 +9928,36 @@ /// offloading runtime library. llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, - MappableExprsHandler::MappingExprInfo &MapExprs) { + MappableExprsHandler::MappingExprInfo &MapExprs) +{ uint32_t SrcLocStrSize; if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); SourceLocation Loc; - if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { + if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) + { if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) Loc = VD->getLocation(); else Loc = MapExprs.getMapExpr()->getExprLoc(); - } else { + } + else + { Loc = MapExprs.getMapDecl()->getLocation(); } std::string ExprName; - if (MapExprs.getMapExpr()) { + if (MapExprs.getMapExpr()) + { PrintingPolicy P(CGF.getContext().getLangOpts()); llvm::raw_string_ostream OS(ExprName); MapExprs.getMapExpr()->printPretty(OS, nullptr, P); OS.flush(); - } else { + } + else + { ExprName = MapExprs.getMapDecl()->getNameAsString(); } @@ -9118,7 +9973,8 @@ static void emitOffloadingArrays( CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, - bool IsNonContiguous = false) { + bool IsNonContiguous = false) +{ CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGF.getContext(); @@ -9126,7 +9982,8 @@ Info.clearArrayInfo(); Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); - if (Info.NumberOfPtrs) { + if (Info.NumberOfPtrs) + { // Detect if we have any capture size requiring runtime evaluation of the // size so that a constant array could be eventually used. @@ -9151,9 +10008,12 @@ SmallVector ConstSizes( CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); - for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { - if (auto *CI = dyn_cast(CombinedInfo.Sizes[I])) { - if (!isa(CI) && !isa(CI)) { + for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) + { + if (auto *CI = dyn_cast(CombinedInfo.Sizes[I])) + { + if (!isa(CI) && !isa(CI)) + { if (IsNonContiguous && static_cast>( CombinedInfo.Types[I] & @@ -9168,13 +10028,16 @@ RuntimeSizes.set(I); } - if (RuntimeSizes.all()) { + if (RuntimeSizes.all()) + { QualType SizeArrayType = Ctx.getConstantArrayType( Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Info.RTArgs.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); - } else { + } + else + { auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); @@ -9182,7 +10045,8 @@ CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - if (RuntimeSizes.any()) { + if (RuntimeSizes.any()) + { QualType SizeArrayType = Ctx.getConstantArrayType( Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); @@ -9197,7 +10061,9 @@ /*DestWidth=*/64, /*Signed=*/false))), CGF.getTypeSize(SizeArrayType)); Info.RTArgs.SizesArray = Buffer.getPointer(); - } else { + } + else + { Info.RTArgs.SizesArray = SizesArrayGbl; } } @@ -9217,11 +10083,15 @@ // The information types are only built if there is debug information // requested. - if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) + { Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue( llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); - } else { - auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + } + else + { + auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) + { return emitMappingInformation(CGF, OMPBuilder, MapExpr); }; SmallVector InfoMap(CombinedInfo.Exprs.size()); @@ -9235,26 +10105,31 @@ // If there's a present map type modifier, it must not be applied to the end // of a region, so generate a separate map type array in that case. - if (Info.separateBeginEndCalls()) { + if (Info.separateBeginEndCalls()) + { bool EndMapTypesDiffer = false; - for (uint64_t &Type : Mapping) { + for (uint64_t &Type : Mapping) + { if (Type & static_cast>( - OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) { + OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) + { Type &= ~static_cast>( OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); EndMapTypesDiffer = true; } } - if (EndMapTypesDiffer) { + if (EndMapTypesDiffer) + { MapTypesArrayGbl = OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl; } } - for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { + for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) + { llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), @@ -9279,7 +10154,8 @@ Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); - if (RuntimeSizes.test(I)) { + if (RuntimeSizes.test(I)) + { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray, @@ -9294,7 +10170,8 @@ // Fill up the mapper array. llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); - if (CombinedInfo.Mappers[I]) { + if (CombinedInfo.Mappers[I]) + { MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( cast(CombinedInfo.Mappers[I])); MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); @@ -9314,7 +10191,8 @@ /// Check for inner distribute directive. static const OMPExecutableDirective * -getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { +getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) +{ const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); @@ -9322,20 +10200,24 @@ CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); if (const auto *NestedDir = - dyn_cast_or_null(ChildStmt)) { + dyn_cast_or_null(ChildStmt)) + { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); - switch (D.getDirectiveKind()) { + switch (D.getDirectiveKind()) + { case OMPD_target: if (isOpenMPDistributeDirective(DKind)) return NestedDir; - if (DKind == OMPD_teams) { + if (DKind == OMPD_teams) + { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return nullptr; ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); if (const auto *NND = - dyn_cast_or_null(ChildStmt)) { + dyn_cast_or_null(ChildStmt)) + { DKind = NND->getDirectiveKind(); if (isOpenMPDistributeDirective(DKind)) return NND; @@ -9455,7 +10337,8 @@ /// } /// \endcode void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, - CodeGenFunction *CGF) { + CodeGenFunction *CGF) +{ if (UDMMap.count(D) > 0) return; ASTContext &C = CGM.getContext(); @@ -9579,7 +10462,8 @@ MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); // Fill up the runtime mapper handle for all components. - for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { + for (unsigned I = 0; I < Info.BasePointers.size(); ++I) + { llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( @@ -9672,15 +10556,18 @@ CurMapType->addIncoming(FromMapType, FromBB); CurMapType->addIncoming(MemberMapType, ToElseBB); - llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, + llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, CurSizeArg, CurMapType, CurNameArg}; - if (Info.Mappers[I]) { + if (Info.Mappers[I]) + { // Call the corresponding mapper function. llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( cast(Info.Mappers[I])); assert(MapperFunc && "Expect a valid mapper function is available."); MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); - } else { + } + else + { // Call the runtime API __tgt_push_mapper_component to fill up the runtime // data structure. MapperCGF.EmitRuntimeCall( @@ -9710,7 +10597,8 @@ MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); MapperCGF.FinishFunction(); UDMMap.try_emplace(D, Fn); - if (CGF) { + if (CGF) + { auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); Decls.second.push_back(D); } @@ -9726,7 +10614,8 @@ CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, - bool IsInit) { + bool IsInit) +{ StringRef Prefix = IsInit ? ".init" : ".del"; // Evaluate if this is an array section. @@ -9741,7 +10630,8 @@ OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); llvm::Value *DeleteCond; llvm::Value *Cond; - if (IsInit) { + if (IsInit) + { // base != begin? llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); // IsPtrAndObj? @@ -9755,7 +10645,9 @@ Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); DeleteCond = MapperCGF.Builder.CreateIsNull( DeleteBit, getName({"omp.array", Prefix, ".delete"})); - } else { + } + else + { Cond = IsArray; DeleteCond = MapperCGF.Builder.CreateIsNotNull( DeleteBit, getName({"omp.array", Prefix, ".delete"})); @@ -9784,7 +10676,7 @@ // Call the runtime API __tgt_push_mapper_component to fill up the runtime // data structure. - llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, + llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg, MapName}; MapperCGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), @@ -9793,7 +10685,8 @@ } llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( - const OMPDeclareMapperDecl *D) { + const OMPDeclareMapperDecl *D) +{ auto I = UDMMap.find(D); if (I != UDMMap.end()) return I->second; @@ -9805,7 +10698,8 @@ CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref - SizeEmitter) { + SizeEmitter) +{ OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. @@ -9826,7 +10720,8 @@ llvm::PointerIntPair Device, llvm::function_ref - SizeEmitter) { + SizeEmitter) +{ if (!CGF.HaveInsertPoint()) return; @@ -9841,7 +10736,8 @@ llvm::SmallVector CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &) + { CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); }; emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); @@ -9851,11 +10747,16 @@ llvm::Value *MapNamesArray = nullptr; // Generate code for the host fallback function. auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, - &CS, OffloadingMandatory](CodeGenFunction &CGF) { - if (OffloadingMandatory) { + &CS, OffloadingMandatory](CodeGenFunction &CGF) + { + if (OffloadingMandatory) + { CGF.Builder.CreateUnreachable(); - } else { - if (RequiresOuterTask) { + } + else + { + if (RequiresOuterTask) + { CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } @@ -9865,8 +10766,10 @@ // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter, - FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { - if (Device.getInt() == OMPC_DEVICE_ancestor) { + FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) + { + if (Device.getInt() == OMPC_DEVICE_ancestor) + { // Reverse offloading is not supported, so just execute on the host. FallbackGen(CGF); return; @@ -9887,14 +10790,17 @@ // Emit device ID if any. llvm::Value *DeviceID; - if (Device.getPointer()) { + if (Device.getPointer()) + { assert((Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."); llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); DeviceID = CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); - } else { + } + else + { DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); } @@ -9916,7 +10822,8 @@ emitTargetNumIterationsCall(CGF, D, SizeEmitter); llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); - if (auto *DynMemClause = D.getSingleClause()) { + if (auto *DynMemClause = D.getSingleClause()) + { CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( DynMemClause->getSize(), /*IgnoreResultAssign=*/true); @@ -9985,13 +10892,15 @@ }; // Notify that the host version must be executed. - auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) + { FallbackGen(CGF); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, &MapNamesArray, &CapturedVars, RequiresOuterTask, - &CS](CodeGenFunction &CGF, PrePostActionTy &) { + &CS](CodeGenFunction &CGF, PrePostActionTy &) + { // Fill up the arrays with all the captured variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; @@ -10004,13 +10913,15 @@ auto *CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { + CI != CE; ++CI, ++RI, ++CV) + { MappableExprsHandler::MapCombinedInfoTy CurInfo; MappableExprsHandler::StructRangeInfoTy PartialStruct; // VLA sizes are passed to the outlined region by copy and do not have map // information associated. - if (CI->capturesVariableArrayType()) { + if (CI->capturesVariableArrayType()) + { CurInfo.Exprs.push_back(nullptr); CurInfo.BasePointers.push_back(*CV); CurInfo.Pointers.push_back(*CV); @@ -10022,7 +10933,9 @@ OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); CurInfo.Mappers.push_back(nullptr); - } else { + } + else + { // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); @@ -10049,7 +10962,8 @@ // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) { + if (PartialStruct.Base.isValid()) + { CombinedInfo.append(PartialStruct.PreliminaryMapData); MEHandler.emitCombinedEntry( CombinedInfo, CurInfo.Types, PartialStruct, nullptr, @@ -10094,11 +11008,15 @@ }; auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( - CodeGenFunction &CGF, PrePostActionTy &) { - if (RequiresOuterTask) { + CodeGenFunction &CGF, PrePostActionTy &) + { + if (RequiresOuterTask) + { CodeGenFunction::OMPTargetDataInfo InputInfo; CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); - } else { + } + else + { emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); } }; @@ -10107,21 +11025,28 @@ // offloading, otherwise, just execute on the host. We need to execute on host // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. - if (OutlinedFnID) { - if (IfCond) { + if (OutlinedFnID) + { + if (IfCond) + { emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); - } else { + } + else + { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } - } else { + } + else + { RegionCodeGenTy ElseRCG(TargetElseGen); ElseRCG(CGF); } } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, - StringRef ParentName) { + StringRef ParentName) +{ if (!S) return; @@ -10131,7 +11056,8 @@ isOpenMPTargetExecutionDirective( cast(S)->getDirectiveKind()); - if (RequiresDeviceCodegen) { + if (RequiresDeviceCodegen) + { const auto &E = *cast(S); auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName); @@ -10141,7 +11067,8 @@ if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo)) return; - switch (E.getDirectiveKind()) { + switch (E.getDirectiveKind()) + { case OMPD_target: CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, cast(E)); @@ -10250,7 +11177,8 @@ return; } - if (const auto *E = dyn_cast(S)) { + if (const auto *E = dyn_cast(S)) + { if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) return; @@ -10267,7 +11195,8 @@ scanForTargetRegionsFunctions(II, ParentName); } -static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { +static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) +{ std::optional DevTy = OMPDeclareTargetDeclAttr::getDeviceType(VD); if (!DevTy) @@ -10281,10 +11210,12 @@ return false; } -bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { +bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) +{ // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) { + if (!CGM.getLangOpts().OpenMPIsDevice) + { if (const auto *FD = dyn_cast(GD.getDecl())) if (isAssumedToBeNotEmitted(cast(FD), CGM.getLangOpts().OpenMPIsDevice)) @@ -10294,7 +11225,8 @@ const ValueDecl *VD = cast(GD.getDecl()); // Try to detect target regions in the function. - if (const auto *FD = dyn_cast(VD)) { + if (const auto *FD = dyn_cast(VD)) + { StringRef Name = CGM.getMangledName(GD); scanForTargetRegionsFunctions(FD->getBody(), Name); if (isAssumedToBeNotEmitted(cast(FD), @@ -10307,7 +11239,8 @@ AlreadyEmittedTargetDecls.count(VD) == 0; } -bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { +bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) +{ if (isAssumedToBeNotEmitted(cast(GD.getDecl()), CGM.getLangOpts().OpenMPIsDevice)) return true; @@ -10319,13 +11252,16 @@ // regions in it. We use the complete variant to produce the kernel name // mangling. QualType RDTy = cast(GD.getDecl())->getType(); - if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { - for (const CXXConstructorDecl *Ctor : RD->ctors()) { + if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) + { + for (const CXXConstructorDecl *Ctor : RD->ctors()) + { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); } - if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { + if (const CXXDestructorDecl *Dtor = RD->getDestructor()) + { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); @@ -10339,7 +11275,8 @@ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - HasRequiresUnifiedSharedMemory)) { + HasRequiresUnifiedSharedMemory)) + { DeferredGlobalVariables.insert(cast(GD.getDecl())); return true; } @@ -10347,7 +11284,8 @@ } void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, - llvm::Constant *Addr) { + llvm::Constant *Addr) +{ if (CGM.getLangOpts().OMPTargetTriples.empty() && !CGM.getLangOpts().OpenMPIsDevice) return; @@ -10360,8 +11298,10 @@ std::optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res) { - if (CGM.getLangOpts().OpenMPIsDevice) { + if (!Res) + { + if (CGM.getLangOpts().OpenMPIsDevice) + { // Register non-target variables being emitted in device code (debug info // may cause this). StringRef VarName = CGM.getMangledName(VD); @@ -10377,25 +11317,31 @@ if ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - !HasRequiresUnifiedSharedMemory) { + !HasRequiresUnifiedSharedMemory) + { Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; VarName = CGM.getMangledName(VD); - if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { + if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) + { VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity(); assert(VarSize != 0 && "Expected non-zero size of the variable"); - } else { + } + else + { VarSize = 0; } Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { + if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) + { // Do not create a "ref-variable" if the original is not also available // on the host. if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) return; std::string RefName = getName({VarName, "ref"}); - if (!CGM.GetGlobalValue(RefName)) { + if (!CGM.GetGlobalValue(RefName)) + { llvm::Constant *AddrRef = OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName); auto *GVAddrRef = cast(AddrRef); @@ -10405,7 +11351,9 @@ CGM.addCompilerUsedGlobal(GVAddrRef); } } - } else { + } + else + { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && @@ -10416,10 +11364,13 @@ else Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsDevice) + { VarName = Addr->getName(); Addr = nullptr; - } else { + } + else + { VarName = getAddrOfDeclareTargetVar(VD).getName(); Addr = cast(getAddrOfDeclareTargetVar(VD).getPointer()); } @@ -10431,7 +11382,8 @@ VarName, Addr, VarSize, Flags, Linkage); } -bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { +bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) +{ if (isa(GD.getDecl()) || isa(GD.getDecl())) return emitTargetFunctions(GD); @@ -10439,17 +11391,22 @@ return emitTargetGlobalVariable(GD); } -void CGOpenMPRuntime::emitDeferredTargetDecls() const { - for (const VarDecl *VD : DeferredGlobalVariables) { +void CGOpenMPRuntime::emitDeferredTargetDecls() const +{ + for (const VarDecl *VD : DeferredGlobalVariables) + { std::optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; if ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - !HasRequiresUnifiedSharedMemory) { + !HasRequiresUnifiedSharedMemory) + { CGM.EmitGlobal(VD); - } else { + } + else + { assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && @@ -10461,19 +11418,26 @@ } void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( - CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + CodeGenFunction &CGF, const OMPExecutableDirective &D) const +{ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && " Expected target-based directive."); } -void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { - for (const OMPClause *Clause : D->clauselists()) { - if (Clause->getClauseKind() == OMPC_unified_shared_memory) { +void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) +{ + for (const OMPClause *Clause : D->clauselists()) + { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) + { HasRequiresUnifiedSharedMemory = true; OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); - } else if (const auto *AC = - dyn_cast(Clause)) { - switch (AC->getAtomicDefaultMemOrderKind()) { + } + else if (const auto *AC = + dyn_cast(Clause)) + { + switch (AC->getAtomicDefaultMemOrderKind()) + { case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; break; @@ -10490,16 +11454,19 @@ } } -llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { +llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const +{ return RequiresAtomicOrdering; } bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, - LangAS &AS) { + LangAS &AS) +{ if (!VD || !VD->hasAttr()) return false; const auto *A = VD->getAttr(); - switch(A->getAllocatorType()) { + switch (A->getAllocatorType()) + { case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: // Not supported, fallback to the default mem space. @@ -10519,33 +11486,40 @@ return false; } -bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { +bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const +{ return HasRequiresUnifiedSharedMemory; } CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) - : CGM(CGM) { - if (CGM.getLangOpts().OpenMPIsDevice) { + : CGM(CGM) +{ + if (CGM.getLangOpts().OpenMPIsDevice) + { SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; } } -CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { +CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() +{ if (CGM.getLangOpts().OpenMPIsDevice) CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; } -bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { +bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) +{ if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; const auto *D = cast(GD.getDecl()); // Do not to emit function if it is marked as declare target as it was already // emitted. - if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { + if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) + { + if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) + { if (auto *F = dyn_cast_or_null( CGM.GetGlobalValue(CGM.getMangledName(GD)))) return !F->isDeclaration(); @@ -10557,7 +11531,8 @@ return !AlreadyEmittedTargetDecls.insert(D).second; } -llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { +llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() +{ // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. if (CGM.getLangOpts().OMPTargetTriples.empty() || @@ -10603,7 +11578,8 @@ const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, - ArrayRef CapturedVars) { + ArrayRef CapturedVars) +{ if (!CGF.HaveInsertPoint()) return; @@ -10627,7 +11603,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, - SourceLocation Loc) { + SourceLocation Loc) +{ if (!CGF.HaveInsertPoint()) return; @@ -10656,7 +11633,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, - CGOpenMPRuntime::TargetDataInfo &Info) { + CGOpenMPRuntime::TargetDataInfo &Info) +{ if (!CGF.HaveInsertPoint()) return; @@ -10668,7 +11646,8 @@ // arguments of the runtime call by reference because they are used in the // closing of the region. auto &&BeginThenGen = [this, &D, Device, &Info, - &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { + &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) + { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; @@ -10688,10 +11667,13 @@ // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) { + if (Device) + { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int64Ty, /*isSigned=*/true); - } else { + } + else + { DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); } @@ -10723,7 +11705,8 @@ // Generate code for the closing of the data region. auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &) + { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; @@ -10735,10 +11718,13 @@ // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) { + if (Device) + { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int64Ty, /*isSigned=*/true); - } else { + } + else + { DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); } @@ -10767,8 +11753,10 @@ // region with no privatization in the 'else' branch of the conditional. // Otherwise, we don't have to do anything. auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, - PrePostActionTy &) { - if (!Info.CaptureDeviceAddrMap.empty()) { + PrePostActionTy &) + { + if (!Info.CaptureDeviceAddrMap.empty()) + { CodeGen.setAction(NoPrivAction); CodeGen(CGF); } @@ -10778,23 +11766,30 @@ // to false. auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; - if (IfCond) { + if (IfCond) + { emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); - } else { + } + else + { RegionCodeGenTy RCG(BeginThenGen); RCG(CGF); } // If we don't require privatization of device pointers, we emit the body in // between the runtime calls. This avoids duplicating the body code. - if (Info.CaptureDeviceAddrMap.empty()) { + if (Info.CaptureDeviceAddrMap.empty()) + { CodeGen.setAction(NoPrivAction); CodeGen(CGF); } - if (IfCond) { + if (IfCond) + { emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); - } else { + } + else + { RegionCodeGenTy RCG(EndThenGen); RCG(CGF); } @@ -10802,7 +11797,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, - const Expr *Device) { + const Expr *Device) +{ if (!CGF.HaveInsertPoint()) return; @@ -10816,13 +11812,17 @@ llvm::Value *MapNamesArray = nullptr; // Generate the code for the opening of the data environment. auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, - &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { + &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) + { // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) { + if (Device) + { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int64Ty, /*isSigned=*/true); - } else { + } + else + { DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); } @@ -10847,7 +11847,8 @@ // directive. const bool HasNowait = D.hasClausesOfKind(); RuntimeFunction RTLFn; - switch (D.getDirectiveKind()) { + switch (D.getDirectiveKind()) + { case OMPD_target_enter_data: RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper : OMPRTL___tgt_target_data_begin_mapper; @@ -10937,7 +11938,8 @@ auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, &MapNamesArray](CodeGenFunction &CGF, - PrePostActionTy &) { + PrePostActionTy &) + { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; @@ -10973,36 +11975,43 @@ emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); }; - if (IfCond) { + if (IfCond) + { emitIfClause(CGF, IfCond, TargetThenGen, [](CodeGenFunction &CGF, PrePostActionTy &) {}); - } else { + } + else + { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } } -namespace { +namespace +{ /// Kind of parameter in a function with 'declare simd' directive. -enum ParamKindTy { - Linear, - LinearRef, - LinearUVal, - LinearVal, - Uniform, - Vector, -}; -/// Attribute set of the parameter. -struct ParamAttrTy { - ParamKindTy Kind = Vector; - llvm::APSInt StrideOrArg; - llvm::APSInt Alignment; - bool HasVarStride = false; -}; + enum ParamKindTy + { + Linear, + LinearRef, + LinearUVal, + LinearVal, + Uniform, + Vector, + }; + /// Attribute set of the parameter. + struct ParamAttrTy + { + ParamKindTy Kind = Vector; + llvm::APSInt StrideOrArg; + llvm::APSInt Alignment; + bool HasVarStride = false; + }; } // namespace static unsigned evaluateCDTSize(const FunctionDecl *FD, - ArrayRef ParamAttrs) { + ArrayRef ParamAttrs) +{ // Every vector variant of a SIMD-enabled function has a vector length (VLEN). // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument // of that clause. The VLEN value must be power of 2. @@ -11027,18 +12036,25 @@ return 0; ASTContext &C = FD->getASTContext(); QualType CDT; - if (!RetType.isNull() && !RetType->isVoidType()) { + if (!RetType.isNull() && !RetType->isVoidType()) + { CDT = RetType; - } else { + } + else + { unsigned Offset = 0; - if (const auto *MD = dyn_cast(FD)) { + if (const auto *MD = dyn_cast(FD)) + { if (ParamAttrs[Offset].Kind == Vector) CDT = C.getPointerType(C.getRecordType(MD->getParent())); ++Offset; } - if (CDT.isNull()) { - for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { - if (ParamAttrs[I + Offset].Kind == Vector) { + if (CDT.isNull()) + { + for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) + { + if (ParamAttrs[I + Offset].Kind == Vector) + { CDT = FD->getParamDecl(I)->getType(); break; } @@ -11056,11 +12072,14 @@ /// Mangle the parameter part of the vector function name according to /// their OpenMP classification. The mangling function is defined in /// section 4.5 of the AAVFABI(2021Q1). -static std::string mangleVectorParameters(ArrayRef ParamAttrs) { +static std::string mangleVectorParameters(ArrayRef ParamAttrs) +{ SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - for (const auto &ParamAttr : ParamAttrs) { - switch (ParamAttr.Kind) { + for (const auto &ParamAttr : ParamAttrs) + { + switch (ParamAttr.Kind) + { case Linear: Out << 'l'; break; @@ -11083,7 +12102,8 @@ if (ParamAttr.HasVarStride) Out << "s" << ParamAttr.StrideOrArg; else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || - ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { + ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) + { // Don't print the step value if it is not present or if it is // equal to 1. if (ParamAttr.StrideOrArg < 0) @@ -11103,27 +12123,25 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef ParamAttrs, - OMPDeclareSimdDeclAttr::BranchStateTy State) { - struct ISADataTy { + OMPDeclareSimdDeclAttr::BranchStateTy State) +{ + struct ISADataTy + { char ISA; unsigned VecRegSize; }; ISADataTy ISAData[] = { + {'b', 128}, // SSE { - 'b', 128 - }, // SSE + 'c', 256}, // AVX { - 'c', 256 - }, // AVX + 'd', 256}, // AVX2 { - 'd', 256 - }, // AVX2 - { - 'e', 512 - }, // AVX512 + 'e', 512}, // AVX512 }; llvm::SmallVector Masked; - switch (State) { + switch (State) + { case OMPDeclareSimdDeclAttr::BS_Undefined: Masked.push_back('N'); Masked.push_back('M'); @@ -11135,16 +12153,21 @@ Masked.push_back('M'); break; } - for (char Mask : Masked) { - for (const ISADataTy &Data : ISAData) { + for (char Mask : Masked) + { + for (const ISADataTy &Data : ISAData) + { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << "_ZGV" << Data.ISA << Mask; - if (!VLENVal) { + if (!VLENVal) + { unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); assert(NumElts && "Non-zero simdlen/cdtsize expected"); Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); - } else { + } + else + { Out << VLENVal; } Out << mangleVectorParameters(ParamAttrs); @@ -11161,7 +12184,8 @@ // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). -static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { +static bool getAArch64MTV(QualType QT, ParamKindTy Kind) +{ QT = QT.getCanonicalType(); if (QT->isVoidType()) @@ -11170,7 +12194,7 @@ if (Kind == ParamKindTy::Uniform) return false; - if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef) + if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) return false; if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && @@ -11181,7 +12205,8 @@ } /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. -static bool getAArch64PBV(QualType QT, ASTContext &C) { +static bool getAArch64PBV(QualType QT, ASTContext &C) +{ QT = QT.getCanonicalType(); unsigned Size = C.getTypeSize(QT); @@ -11206,8 +12231,10 @@ /// Computes the lane size (LS) of a return type or of an input parameter, /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. /// TODO: Add support for references, section 3.2.1, item 1. -static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { - if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { +static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) +{ + if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) + { QualType PTy = QT.getCanonicalType()->getPointeeType(); if (getAArch64PBV(PTy, C)) return C.getTypeSize(PTy); @@ -11222,7 +12249,8 @@ // signature of the scalar function, as defined in 3.2.2 of the // AAVFABI. static std::tuple -getNDSWDS(const FunctionDecl *FD, ArrayRef ParamAttrs) { +getNDSWDS(const FunctionDecl *FD, ArrayRef ParamAttrs) +{ QualType RetType = FD->getReturnType().getCanonicalType(); ASTContext &C = FD->getASTContext(); @@ -11230,12 +12258,14 @@ bool OutputBecomesInput = false; llvm::SmallVector Sizes; - if (!RetType->isVoidType()) { + if (!RetType->isVoidType()) + { Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) OutputBecomesInput = true; } - for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { + for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) + { QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); } @@ -11244,7 +12274,8 @@ // The LS of a function parameter / return value can only be a power // of 2, starting from 8 bits, up to 128. assert(llvm::all_of(Sizes, - [](unsigned Size) { + [](unsigned Size) + { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && @@ -11262,7 +12293,8 @@ static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, - llvm::Function *Fn) { + llvm::Function *Fn) +{ SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Prefix << ISA << LMask << VLEN; @@ -11278,8 +12310,10 @@ StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, - llvm::Function *Fn) { - switch (NDS) { + llvm::Function *Fn) +{ + switch (NDS) + { case 8: addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, OutputBecomesInput, Fn); @@ -11313,7 +12347,8 @@ CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, - char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { + char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) +{ // Get basic data for building the vector signature. const auto Data = getNDSWDS(FD, ParamAttrs); @@ -11323,7 +12358,8 @@ // Check the values provided via `simdlen` by the user. // 1. A `simdlen(1)` doesn't produce vector signatures, - if (UserVLEN == 1) { + if (UserVLEN == 1) + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Warning, "The clause simdlen(1) has no effect when targeting aarch64."); @@ -11333,7 +12369,8 @@ // 2. Section 3.3.1, item 1: user input must be a power of 2 for // Advanced SIMD output. - if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { + if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Warning, "The value specified in simdlen must be a " "power of 2 when targeting Advanced SIMD."); @@ -11343,8 +12380,10 @@ // 3. Section 3.4.1. SVE fixed lengh must obey the architectural // limits. - if (ISA == 's' && UserVLEN != 0) { - if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { + if (ISA == 's' && UserVLEN != 0) + { + if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) + { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " "lanes in the architectural constraints " @@ -11359,16 +12398,21 @@ const std::string ParSeq = mangleVectorParameters(ParamAttrs); StringRef Prefix = "_ZGV"; // Generate simdlen from user input (if any). - if (UserVLEN) { - if (ISA == 's') { + if (UserVLEN) + { + if (ISA == 's') + { // SVE generates only a masked function. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, OutputBecomesInput, Fn); - } else { + } + else + { assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); // Advanced SIMD generates one or two functions, depending on // the `[not]inbranch` clause. - switch (State) { + switch (State) + { case OMPDeclareSimdDeclAttr::BS_Undefined: addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, OutputBecomesInput, Fn); @@ -11385,19 +12429,25 @@ break; } } - } else { + } + else + { // If no user simdlen is provided, follow the AAVFABI rules for // generating the vector length. - if (ISA == 's') { + if (ISA == 's') + { // SVE, section 3.4.1, item 1. addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, OutputBecomesInput, Fn); - } else { + } + else + { assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or // two vector names depending on the use of the clause // `[not]inbranch`. - switch (State) { + switch (State) + { case OMPDeclareSimdDeclAttr::BS_Undefined: addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, OutputBecomesInput, Fn); @@ -11418,28 +12468,36 @@ } void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, - llvm::Function *Fn) { + llvm::Function *Fn) +{ ASTContext &C = CGM.getContext(); FD = FD->getMostRecentDecl(); - while (FD) { + while (FD) + { // Map params to their positions in function decl. llvm::DenseMap ParamPositions; if (isa(FD)) ParamPositions.try_emplace(FD, 0); unsigned ParamPos = ParamPositions.size(); - for (const ParmVarDecl *P : FD->parameters()) { + for (const ParmVarDecl *P : FD->parameters()) + { ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); ++ParamPos; } - for (const auto *Attr : FD->specific_attrs()) { + for (const auto *Attr : FD->specific_attrs()) + { llvm::SmallVector ParamAttrs(ParamPositions.size()); // Mark uniform parameters. - for (const Expr *E : Attr->uniforms()) { + for (const Expr *E : Attr->uniforms()) + { E = E->IgnoreParenImpCasts(); unsigned Pos; - if (isa(E)) { + if (isa(E)) + { Pos = ParamPositions[FD]; - } else { + } + else + { const auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); auto It = ParamPositions.find(PVD); @@ -11450,14 +12508,18 @@ } // Get alignment info. auto *NI = Attr->alignments_begin(); - for (const Expr *E : Attr->aligneds()) { + for (const Expr *E : Attr->aligneds()) + { E = E->IgnoreParenImpCasts(); unsigned Pos; QualType ParmTy; - if (isa(E)) { + if (isa(E)) + { Pos = ParamPositions[FD]; ParmTy = E->getType(); - } else { + } + else + { const auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); auto It = ParamPositions.find(PVD); @@ -11476,20 +12538,24 @@ // Mark linear parameters. auto *SI = Attr->steps_begin(); auto *MI = Attr->modifiers_begin(); - for (const Expr *E : Attr->linears()) { + for (const Expr *E : Attr->linears()) + { E = E->IgnoreParenImpCasts(); unsigned Pos; bool IsReferenceType = false; // Rescaling factor needed to compute the linear parameter // value in the mangled name. unsigned PtrRescalingFactor = 1; - if (isa(E)) { + if (isa(E)) + { Pos = ParamPositions[FD]; auto *P = cast(E->getType()); PtrRescalingFactor = CGM.getContext() .getTypeSizeInChars(P->getPointeeType()) .getQuantity(); - } else { + } + else + { const auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); auto It = ParamPositions.find(PVD); @@ -11499,7 +12565,8 @@ PtrRescalingFactor = CGM.getContext() .getTypeSizeInChars(P->getPointeeType()) .getQuantity(); - else if (PVD->getType()->isReferenceType()) { + else if (PVD->getType()->isReferenceType()) + { IsReferenceType = true; PtrRescalingFactor = CGM.getContext() @@ -11518,13 +12585,17 @@ ParamAttr.Kind = Linear; // Assuming a stride of 1, for `linear` without modifiers. ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); - if (*SI) { + if (*SI) + { Expr::EvalResult Result; - if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { + if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) + { if (const auto *DRE = - cast((*SI)->IgnoreParenImpCasts())) { + cast((*SI)->IgnoreParenImpCasts())) + { if (const auto *StridePVD = - dyn_cast(DRE->getDecl())) { + dyn_cast(DRE->getDecl())) + { ParamAttr.HasVarStride = true; auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); assert(It != ParamPositions.end() && @@ -11532,7 +12603,9 @@ ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); } } - } else { + } + else + { ParamAttr.StrideOrArg = Result.Val.getInt(); } } @@ -11548,14 +12621,18 @@ llvm::APSInt VLENVal; SourceLocation ExprLoc; const Expr *VLENExpr = Attr->getSimdlen(); - if (VLENExpr) { + if (VLENExpr) + { VLENVal = VLENExpr->EvaluateKnownConstInt(C); ExprLoc = VLENExpr->getExprLoc(); } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().isX86()) { + if (CGM.getTriple().isX86()) + { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); - } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { + } + else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) + { unsigned VLEN = VLENVal.getExtValue(); StringRef MangledName = Fn->getName(); if (CGM.getTarget().hasFeature("sve")) @@ -11570,41 +12647,47 @@ } } -namespace { -/// Cleanup action for doacross support. -class DoacrossCleanupTy final : public EHScopeStack::Cleanup { -public: - static const int DoacrossFinArgs = 2; +namespace +{ + /// Cleanup action for doacross support. + class DoacrossCleanupTy final : public EHScopeStack::Cleanup + { + public: + static const int DoacrossFinArgs = 2; -private: - llvm::FunctionCallee RTLFn; - llvm::Value *Args[DoacrossFinArgs]; + private: + llvm::FunctionCallee RTLFn; + llvm::Value *Args[DoacrossFinArgs]; -public: - DoacrossCleanupTy(llvm::FunctionCallee RTLFn, - ArrayRef CallArgs) - : RTLFn(RTLFn) { - assert(CallArgs.size() == DoacrossFinArgs); - std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); - } - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(RTLFn, Args); - } -}; + public: + DoacrossCleanupTy(llvm::FunctionCallee RTLFn, + ArrayRef CallArgs) + : RTLFn(RTLFn) + { + assert(CallArgs.size() == DoacrossFinArgs); + std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); + } + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override + { + if (!CGF.HaveInsertPoint()) + return; + CGF.EmitRuntimeCall(RTLFn, Args); + } + }; } // namespace void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, - ArrayRef NumIterations) { + ArrayRef NumIterations) +{ if (!CGF.HaveInsertPoint()) return; ASTContext &C = CGM.getContext(); QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); RecordDecl *RD; - if (KmpDimTy.isNull()) { + if (KmpDimTy.isNull()) + { // Build struct kmp_dim { // loop bounds info casted to kmp_int64 // kmp_int64 lo; // lower // kmp_int64 up; // upper @@ -11617,7 +12700,9 @@ addFieldToRecordDecl(C, RD, Int64Ty); RD->completeDefinition(); KmpDimTy = C.getRecordType(RD); - } else { + } + else + { RD = cast(KmpDimTy->getAsTagDecl()); } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); @@ -11626,9 +12711,15 @@ Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); - enum { LowerFD = 0, UpperFD, StrideFD }; + enum + { + LowerFD = 0, + UpperFD, + StrideFD + }; // Fill dims with data. - for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { + for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) + { LValue DimsLVal = CGF.MakeAddrLValue( CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); // dims.upper = num_iterations; @@ -11667,14 +12758,16 @@ } void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, - const OMPDependClause *C) { + const OMPDependClause *C) +{ QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( Int64Ty, Size, nullptr, ArrayType::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); - for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { + for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) + { const Expr *CounterVal = C->getLoopData(I); assert(CounterVal); llvm::Value *CntVal = CGF.EmitScalarConversion( @@ -11688,10 +12781,13 @@ getThreadID(CGF, C->getBeginLoc()), CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; llvm::FunctionCallee RTLFn; - if (C->getDependencyKind() == OMPC_DEPEND_source) { + if (C->getDependencyKind() == OMPC_DEPEND_source) + { RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_doacross_post); - } else { + } + else + { assert(C->getDependencyKind() == OMPC_DEPEND_sink); RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_doacross_wait); @@ -11701,12 +12797,15 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, - ArrayRef Args) const { + ArrayRef Args) const +{ assert(Loc.isValid() && "Outlined function call location must be valid."); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - if (auto *Fn = dyn_cast(Callee.getCallee())) { - if (Fn->doesNotThrow()) { + if (auto *Fn = dyn_cast(Callee.getCallee())) + { + if (Fn->doesNotThrow()) + { CGF.EmitNounwindRuntimeCall(Fn, Args); return; } @@ -11716,11 +12815,13 @@ void CGOpenMPRuntime::emitOutlinedFunctionCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, - ArrayRef Args) const { + ArrayRef Args) const +{ emitCall(CGF, Loc, OutlinedFn, Args); } -void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { +void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) +{ if (const auto *FD = dyn_cast(D)) if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) HasEmittedDeclareTargetRegion = true; @@ -11728,23 +12829,28 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, - const VarDecl *TargetParam) const { + const VarDecl *TargetParam) const +{ return CGF.GetAddrOfLocalVar(NativeParam); } /// Return allocator value from expression, or return a null allocator (default /// when no allocator specified). static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, - const Expr *Allocator) { + const Expr *Allocator) +{ llvm::Value *AllocVal; - if (Allocator) { + if (Allocator) + { AllocVal = CGF.EmitScalarExpr(Allocator); // According to the standard, the original allocator type is a enum // (integer). Convert to pointer type, if required. AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), CGF.getContext().VoidPtrTy, Allocator->getExprLoc()); - } else { + } + else + { // If no allocator specified, it defaults to the null allocator. AllocVal = llvm::Constant::getNullValue( CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); @@ -11753,7 +12859,8 @@ } /// Return the alignment from an allocate directive if present. -static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { +static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) +{ std::optional AllocateAlignment = CGM.getOMPAllocateAlignment(VD); if (!AllocateAlignment) @@ -11763,36 +12870,43 @@ } Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, - const VarDecl *VD) { + const VarDecl *VD) +{ if (!VD) return Address::invalid(); Address UntiedAddr = Address::invalid(); Address UntiedRealAddr = Address::invalid(); auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); - if (It != FunctionToUntiedTaskStackMap.end()) { + if (It != FunctionToUntiedTaskStackMap.end()) + { const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack[It->second]; auto I = UntiedData.find(VD); - if (I != UntiedData.end()) { + if (I != UntiedData.end()) + { UntiedAddr = I->second.first; UntiedRealAddr = I->second.second; } } const VarDecl *CVD = VD->getCanonicalDecl(); - if (CVD->hasAttr()) { + if (CVD->hasAttr()) + { // Use the default allocation. if (!isAllocatableDecl(VD)) return UntiedAddr; llvm::Value *Size; CharUnits Align = CGM.getContext().getDeclAlign(CVD); - if (CVD->getType()->isVariablyModifiedType()) { + if (CVD->getType()->isVariablyModifiedType()) + { Size = CGF.getTypeSize(CVD->getType()); // Align the size: ((size + align - 1) / align) * align Size = CGF.Builder.CreateNUWAdd( Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); - } else { + } + else + { CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); Size = CGM.getSize(Sz.alignTo(Align)); } @@ -11821,7 +12935,8 @@ CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); // Cleanup action for allocate support. - class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { + class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup + { llvm::FunctionCallee RTLFn; SourceLocation::UIntTy LocEncoding; Address Addr; @@ -11833,7 +12948,8 @@ const Expr *AllocExpr) : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), AllocExpr(AllocExpr) {} - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override + { if (!CGF.HaveInsertPoint()) return; llvm::Value *Args[3]; @@ -11863,7 +12979,8 @@ } bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, - const VarDecl *VD) const { + const VarDecl *VD) const +{ auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); if (It == FunctionToUntiedTaskStackMap.end()) return false; @@ -11872,19 +12989,25 @@ CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( CodeGenModule &CGM, const OMPLoopDirective &S) - : CGM(CGM), NeedToPush(S.hasClausesOfKind()) { + : CGM(CGM), NeedToPush(S.hasClausesOfKind()) +{ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); if (!NeedToPush) return; NontemporalDeclsSet &DS = CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); - for (const auto *C : S.getClausesOfKind()) { - for (const Stmt *Ref : C->private_refs()) { + for (const auto *C : S.getClausesOfKind()) + { + for (const Stmt *Ref : C->private_refs()) + { const auto *SimpleRefExpr = cast(Ref)->IgnoreParenImpCasts(); const ValueDecl *VD; - if (const auto *DRE = dyn_cast(SimpleRefExpr)) { + if (const auto *DRE = dyn_cast(SimpleRefExpr)) + { VD = DRE->getDecl(); - } else { + } + else + { const auto *ME = cast(SimpleRefExpr); assert((ME->isImplicitCXXThis() || isa(ME->getBase()->IgnoreParenImpCasts())) && @@ -11896,7 +13019,8 @@ } } -CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { +CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() +{ if (!NeedToPush) return; CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); @@ -11906,7 +13030,8 @@ CodeGenFunction &CGF, const llvm::MapVector, std::pair> &LocalVars) - : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { + : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) +{ if (!NeedToPush) return; CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( @@ -11914,39 +13039,47 @@ CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); } -CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() +{ if (!NeedToPush) return; CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); } -bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { +bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const +{ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); return llvm::any_of( CGM.getOpenMPRuntime().NontemporalDeclsStack, - [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); + [VD](const NontemporalDeclsSet &Set) + { return Set.contains(VD); }); } void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( const OMPExecutableDirective &S, llvm::DenseSet> &NeedToAddForLPCsAsDisabled) - const { + const +{ llvm::DenseSet> NeedToCheckForLPCs; // Vars in target/task regions must be excluded completely. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || - isOpenMPTaskingDirective(S.getDirectiveKind())) { + isOpenMPTaskingDirective(S.getDirectiveKind())) + { SmallVector CaptureRegions; getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); - for (const CapturedStmt::Capture &Cap : CS->captures()) { + for (const CapturedStmt::Capture &Cap : CS->captures()) + { if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) NeedToCheckForLPCs.insert(Cap.getCapturedVar()); } } // Exclude vars in private clauses. - for (const auto *C : S.getClausesOfKind()) { - for (const Expr *Ref : C->varlists()) { + for (const auto *C : S.getClausesOfKind()) + { + for (const Expr *Ref : C->varlists()) + { if (!Ref->getType()->isScalarType()) continue; const auto *DRE = dyn_cast(Ref->IgnoreParenImpCasts()); @@ -11955,8 +13088,10 @@ NeedToCheckForLPCs.insert(DRE->getDecl()); } } - for (const auto *C : S.getClausesOfKind()) { - for (const Expr *Ref : C->varlists()) { + for (const auto *C : S.getClausesOfKind()) + { + for (const Expr *Ref : C->varlists()) + { if (!Ref->getType()->isScalarType()) continue; const auto *DRE = dyn_cast(Ref->IgnoreParenImpCasts()); @@ -11965,8 +13100,10 @@ NeedToCheckForLPCs.insert(DRE->getDecl()); } } - for (const auto *C : S.getClausesOfKind()) { - for (const Expr *Ref : C->varlists()) { + for (const auto *C : S.getClausesOfKind()) + { + for (const Expr *Ref : C->varlists()) + { if (!Ref->getType()->isScalarType()) continue; const auto *DRE = dyn_cast(Ref->IgnoreParenImpCasts()); @@ -11975,8 +13112,10 @@ NeedToCheckForLPCs.insert(DRE->getDecl()); } } - for (const auto *C : S.getClausesOfKind()) { - for (const Expr *Ref : C->varlists()) { + for (const auto *C : S.getClausesOfKind()) + { + for (const Expr *Ref : C->varlists()) + { if (!Ref->getType()->isScalarType()) continue; const auto *DRE = dyn_cast(Ref->IgnoreParenImpCasts()); @@ -11985,8 +13124,10 @@ NeedToCheckForLPCs.insert(DRE->getDecl()); } } - for (const auto *C : S.getClausesOfKind()) { - for (const Expr *Ref : C->varlists()) { + for (const auto *C : S.getClausesOfKind()) + { + for (const Expr *Ref : C->varlists()) + { if (!Ref->getType()->isScalarType()) continue; const auto *DRE = dyn_cast(Ref->IgnoreParenImpCasts()); @@ -11995,10 +13136,13 @@ NeedToCheckForLPCs.insert(DRE->getDecl()); } } - for (const Decl *VD : NeedToCheckForLPCs) { + for (const Decl *VD : NeedToCheckForLPCs) + { for (const LastprivateConditionalData &Data : - llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { - if (Data.DeclToUniqueName.count(VD) > 0) { + llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) + { + if (Data.DeclToUniqueName.count(VD) > 0) + { if (!Data.Disabled) NeedToAddForLPCsAsDisabled.insert(VD); break; @@ -12012,12 +13156,14 @@ : CGM(CGF.CGM), Action((CGM.getLangOpts().OpenMP >= 50 && llvm::any_of(S.getClausesOfKind(), - [](const OMPLastprivateClause *C) { + [](const OMPLastprivateClause *C) + { return C->getKind() == OMPC_LASTPRIVATE_conditional; })) ? ActionToDo::PushAsLastprivateConditional - : ActionToDo::DoNotPush) { + : ActionToDo::DoNotPush) +{ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) return; @@ -12025,11 +13171,13 @@ "Expected a push action."); LastprivateConditionalData &Data = CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); - for (const auto *C : S.getClausesOfKind()) { + for (const auto *C : S.getClausesOfKind()) + { if (C->getKind() != OMPC_LASTPRIVATE_conditional) continue; - for (const Expr *Ref : C->varlists()) { + for (const Expr *Ref : C->varlists()) + { Data.DeclToUniqueName.insert(std::make_pair( cast(Ref->IgnoreParenImpCasts())->getDecl(), SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); @@ -12041,13 +13189,15 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( CodeGenFunction &CGF, const OMPExecutableDirective &S) - : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { + : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) +{ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); if (CGM.getLangOpts().OpenMP < 50) return; llvm::DenseSet> NeedToAddForLPCsAsDisabled; tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); - if (!NeedToAddForLPCsAsDisabled.empty()) { + if (!NeedToAddForLPCsAsDisabled.empty()) + { Action = ActionToDo::DisableLastprivateConditional; LastprivateConditionalData &Data = CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); @@ -12060,19 +13210,23 @@ CGOpenMPRuntime::LastprivateConditionalRAII CGOpenMPRuntime::LastprivateConditionalRAII::disable( - CodeGenFunction &CGF, const OMPExecutableDirective &S) { + CodeGenFunction &CGF, const OMPExecutableDirective &S) +{ return LastprivateConditionalRAII(CGF, S); } -CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { +CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() +{ if (CGM.getLangOpts().OpenMP < 50) return; - if (Action == ActionToDo::DisableLastprivateConditional) { + if (Action == ActionToDo::DisableLastprivateConditional) + { assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && "Expected list of disabled private vars."); CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); } - if (Action == ActionToDo::PushAsLastprivateConditional) { + if (Action == ActionToDo::PushAsLastprivateConditional) + { assert( !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && "Expected list of lastprivate conditional vars."); @@ -12081,7 +13235,8 @@ } Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, - const VarDecl *VD) { + const VarDecl *VD) +{ ASTContext &C = CGM.getContext(); auto I = LastprivateConditionalToTypes.find(CGF.CurFn); if (I == LastprivateConditionalToTypes.end()) @@ -12091,7 +13246,8 @@ const FieldDecl *FiredField; LValue BaseLVal; auto VI = I->getSecond().find(VD); - if (VI == I->getSecond().end()) { + if (VI == I->getSecond().end()) + { RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); RD->startDefinition(); VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); @@ -12101,7 +13257,9 @@ Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); - } else { + } + else + { NewType = std::get<0>(VI->getSecond()); VDField = std::get<1>(VI->getSecond()); FiredField = std::get<2>(VI->getSecond()); @@ -12115,82 +13273,92 @@ return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); } -namespace { -/// Checks if the lastprivate conditional variable is referenced in LHS. -class LastprivateConditionalRefChecker final - : public ConstStmtVisitor { - ArrayRef LPM; - const Expr *FoundE = nullptr; - const Decl *FoundD = nullptr; - StringRef UniqueDeclName; - LValue IVLVal; - llvm::Function *FoundFn = nullptr; - SourceLocation Loc; +namespace +{ + /// Checks if the lastprivate conditional variable is referenced in LHS. + class LastprivateConditionalRefChecker final + : public ConstStmtVisitor + { + ArrayRef LPM; + const Expr *FoundE = nullptr; + const Decl *FoundD = nullptr; + StringRef UniqueDeclName; + LValue IVLVal; + llvm::Function *FoundFn = nullptr; + SourceLocation Loc; -public: - bool VisitDeclRefExpr(const DeclRefExpr *E) { - for (const CGOpenMPRuntime::LastprivateConditionalData &D : - llvm::reverse(LPM)) { - auto It = D.DeclToUniqueName.find(E->getDecl()); - if (It == D.DeclToUniqueName.end()) - continue; - if (D.Disabled) - return false; - FoundE = E; - FoundD = E->getDecl()->getCanonicalDecl(); - UniqueDeclName = It->second; - IVLVal = D.IVLVal; - FoundFn = D.Fn; - break; + public: + bool VisitDeclRefExpr(const DeclRefExpr *E) + { + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) + { + auto It = D.DeclToUniqueName.find(E->getDecl()); + if (It == D.DeclToUniqueName.end()) + continue; + if (D.Disabled) + return false; + FoundE = E; + FoundD = E->getDecl()->getCanonicalDecl(); + UniqueDeclName = It->second; + IVLVal = D.IVLVal; + FoundFn = D.Fn; + break; + } + return FoundE == E; } - return FoundE == E; - } - bool VisitMemberExpr(const MemberExpr *E) { - if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) - return false; - for (const CGOpenMPRuntime::LastprivateConditionalData &D : - llvm::reverse(LPM)) { - auto It = D.DeclToUniqueName.find(E->getMemberDecl()); - if (It == D.DeclToUniqueName.end()) - continue; - if (D.Disabled) + bool VisitMemberExpr(const MemberExpr *E) + { + if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) return false; - FoundE = E; - FoundD = E->getMemberDecl()->getCanonicalDecl(); - UniqueDeclName = It->second; - IVLVal = D.IVLVal; - FoundFn = D.Fn; - break; + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) + { + auto It = D.DeclToUniqueName.find(E->getMemberDecl()); + if (It == D.DeclToUniqueName.end()) + continue; + if (D.Disabled) + return false; + FoundE = E; + FoundD = E->getMemberDecl()->getCanonicalDecl(); + UniqueDeclName = It->second; + IVLVal = D.IVLVal; + FoundFn = D.Fn; + break; + } + return FoundE == E; } - return FoundE == E; - } - bool VisitStmt(const Stmt *S) { - for (const Stmt *Child : S->children()) { - if (!Child) - continue; - if (const auto *E = dyn_cast(Child)) - if (!E->isGLValue()) + bool VisitStmt(const Stmt *S) + { + for (const Stmt *Child : S->children()) + { + if (!Child) continue; - if (Visit(Child)) - return true; + if (const auto *E = dyn_cast(Child)) + if (!E->isGLValue()) + continue; + if (Visit(Child)) + return true; + } + return false; } - return false; - } - explicit LastprivateConditionalRefChecker( - ArrayRef LPM) - : LPM(LPM) {} - std::tuple - getFoundData() const { - return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); - } -}; + explicit LastprivateConditionalRefChecker( + ArrayRef LPM) + : LPM(LPM) {} + std::tuple + getFoundData() const + { + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); + } + }; } // namespace void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, - SourceLocation Loc) { + SourceLocation Loc) +{ // Last updated loop counter for the lastprivate conditional var. // int last_iv = 0; llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); @@ -12218,15 +13386,19 @@ // last_a = priv_a; // } auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, - Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) + { Action.Enter(CGF); llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); // (last_iv <= iv) ? Check if the variable is updated and store new // value in global var. llvm::Value *CmpRes; - if (IVLVal.getType()->isSignedIntegerType()) { + if (IVLVal.getType()->isSignedIntegerType()) + { CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); - } else { + } + else + { assert(IVLVal.getType()->isUnsignedIntegerType() && "Loop iteration variable must be integer."); CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); @@ -12241,13 +13413,16 @@ CGF.EmitStoreOfScalar(IVVal, LastIVLVal); // last_a = priv_a; - switch (CGF.getEvaluationKind(LVal.getType())) { - case TEK_Scalar: { + switch (CGF.getEvaluationKind(LVal.getType())) + { + case TEK_Scalar: + { llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); CGF.EmitStoreOfScalar(PrivVal, LastLVal); break; } - case TEK_Complex: { + case TEK_Complex: + { CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); break; @@ -12263,17 +13438,21 @@ CGF.EmitBlock(ExitBB, /*IsFinished=*/true); }; - if (CGM.getLangOpts().OpenMPSimd) { + if (CGM.getLangOpts().OpenMPSimd) + { // Do not emit as a critical region as no parallel region could be emitted. RegionCodeGenTy ThenRCG(CodeGen); ThenRCG(CGF); - } else { + } + else + { emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); } } void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, - const Expr *LHS) { + const Expr *LHS) +{ if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) return; LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); @@ -12286,14 +13465,15 @@ llvm::Function *FoundFn; std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = Checker.getFoundData(); - if (FoundFn != CGF.CurFn) { + if (FoundFn != CGF.CurFn) + { // Special codegen for inner parallel regions. // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); assert(It != LastprivateConditionalToTypes[FoundFn].end() && "Lastprivate conditional is not found in outer region."); QualType StructTy = std::get<0>(It->getSecond()); - const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); + const FieldDecl *FiredDecl = std::get<2>(It->getSecond()); LValue PrivLVal = CGF.EmitLValue(FoundE); Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivLVal.getAddress(CGF), @@ -12318,12 +13498,14 @@ void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( CodeGenFunction &CGF, const OMPExecutableDirective &D, - const llvm::DenseSet> &IgnoredDecls) { + const llvm::DenseSet> &IgnoredDecls) +{ if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) return; auto Range = llvm::reverse(LastprivateConditionalStack); auto It = llvm::find_if( - Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); + Range, [](const LastprivateConditionalData &D) + { return !D.Disabled; }); if (It == Range.end() || It->Fn != CGF.CurFn) return; auto LPCI = LastprivateConditionalToTypes.find(It->Fn); @@ -12332,7 +13514,8 @@ SmallVector CaptureRegions; getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); - for (const auto &Pair : It->DeclToUniqueName) { + for (const auto &Pair : It->DeclToUniqueName) + { const auto *VD = cast(Pair.first->getCanonicalDecl()); if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) continue; @@ -12368,7 +13551,8 @@ void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, - SourceLocation Loc) { + SourceLocation Loc) +{ if (CGF.getLangOpts().OpenMP < 50) return; auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); @@ -12388,13 +13572,15 @@ llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) +{ llvm_unreachable("Not supported in SIMD-only mode"); } llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12402,7 +13588,8 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { + bool Tied, unsigned &NumberOfParts) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12411,38 +13598,44 @@ llvm::Function *OutlinedFn, ArrayRef CapturedVars, const Expr *IfCond, - llvm::Value *NumThreads) { + llvm::Value *NumThreads) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitCriticalRegion( CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, - const Expr *Hint) { + const Expr *Hint) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc, - const Expr *Filter) { + const Expr *Filter) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTaskgroupRegion( CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12450,14 +13643,16 @@ CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef CopyprivateVars, ArrayRef DestExprs, ArrayRef SrcExprs, - ArrayRef AssignmentOps) { + ArrayRef AssignmentOps) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, - bool IsThreads) { + bool IsThreads) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12465,39 +13660,45 @@ SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { + bool ForceSimpleCall) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitForDispatchInit( CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, - bool Ordered, const DispatchRTInput &DispatchValues) { + bool Ordered, const DispatchRTInput &DispatchValues) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitForStaticInit( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, - const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { + const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitDistributeStaticInit( CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { + OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, - bool IVSigned) { + bool IVSigned) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind DKind) { + OpenMPDirectiveKind DKind) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12505,44 +13706,51 @@ SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, - Address UB, Address ST) { + Address UB, Address ST) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, ProcBindKind ProcBind, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, - CodeGenFunction *CGF) { + CodeGenFunction *CGF) +{ llvm_unreachable("Not supported in SIMD-only mode"); } Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( - CodeGenFunction &CGF, QualType VarType, StringRef Name) { + CodeGenFunction &CGF, QualType VarType, StringRef Name) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef Vars, SourceLocation Loc, - llvm::AtomicOrdering AO) { + llvm::AtomicOrdering AO) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12551,21 +13759,24 @@ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTaskLoopCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, const OMPTaskDataTy &Data) { + const Expr *IfCond, const OMPTaskDataTy &Data) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitReduction( CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, - ArrayRef ReductionOps, ReductionOptionsTy Options) { + ArrayRef ReductionOps, ReductionOptionsTy Options) +{ assert(Options.SimpleReduction && "Only simple reduction is expected."); CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps, Options); @@ -12573,52 +13784,60 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( CodeGenFunction &CGF, SourceLocation Loc, ArrayRef LHSExprs, - ArrayRef RHSExprs, const OMPTaskDataTy &Data) { + ArrayRef RHSExprs, const OMPTaskDataTy &Data) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, - bool IsWorksharingReduction) { + bool IsWorksharingReduction) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, - unsigned N) { + unsigned N) +{ llvm_unreachable("Not supported in SIMD-only mode"); } Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, - LValue SharedLVal) { + LValue SharedLVal) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitCancellationPointCall( CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind CancelRegion) { + OpenMPDirectiveKind CancelRegion) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, - OpenMPDirectiveKind CancelRegion) { + OpenMPDirectiveKind CancelRegion) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) +{ llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12628,19 +13847,23 @@ llvm::PointerIntPair Device, llvm::function_ref - SizeEmitter) { + SizeEmitter) +{ llvm_unreachable("Not supported in SIMD-only mode"); } -bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { +bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) +{ llvm_unreachable("Not supported in SIMD-only mode"); } -bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { +bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) +{ llvm_unreachable("Not supported in SIMD-only mode"); } -bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { +bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) +{ return false; } @@ -12648,50 +13871,58 @@ const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, - ArrayRef CapturedVars) { + ArrayRef CapturedVars) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, - SourceLocation Loc) { + SourceLocation Loc) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, - CGOpenMPRuntime::TargetDataInfo &Info) { + CGOpenMPRuntime::TargetDataInfo &Info) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, - const Expr *Device) { + const Expr *Device) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, - ArrayRef NumIterations) { + ArrayRef NumIterations) +{ llvm_unreachable("Not supported in SIMD-only mode"); } void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, - const OMPDependClause *C) { + const OMPDependClause *C) +{ llvm_unreachable("Not supported in SIMD-only mode"); } const VarDecl * CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const +{ llvm_unreachable("Not supported in SIMD-only mode"); } Address CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, - const VarDecl *TargetParam) const { + const VarDecl *TargetParam) const +{ llvm_unreachable("Not supported in SIMD-only mode"); }