Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H +#include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" @@ -92,6 +93,13 @@ OMPRTL__kmpc_single, // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_end_single, + // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + OMPRTL__kmpc_omp_task_alloc, + // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * + // new_task); + OMPRTL__kmpc_omp_task, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -190,6 +198,12 @@ /// variables. llvm::StringMap, llvm::BumpPtrAllocator> InternalVars; + /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); + llvm::Type *KmpRoutineEntryPtrTy; + QualType KmpRoutineEntryPtrQTy; + + /// \brief Build type kmp_routine_entry_t (if not built yet). + void emitKmpRoutineEntryT(QualType KmpInt32Ty); /// \brief Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. @@ -257,16 +271,26 @@ explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} - /// \brief Emits outlined function for the specified OpenMP directive \a D - /// (required for parallel and task directives). This outlined function has - /// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct - /// context_vars*). + /// \brief Emits outlined function for the specified OpenMP directive \a D. + /// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32 + /// BoundID, struct context_vars*). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar); + /// \brief Emits outlined function for the OpenMP task directive \a D. This + /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 + /// PartID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param PartID If not nullptr - variable used for part id in tasks. + /// + virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar); + /// \brief Cleans up references to the objects in finished function. /// void functionFinished(CodeGenFunction &CGF); @@ -274,7 +298,7 @@ /// \brief Emits code for parallel call of the \a OutlinedFn with variables /// captured in a record which address is stored in \a CapturedStruct. /// \param OutlinedFn Outlined function to be run in parallel threads. Type of - /// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*). + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). /// \param CapturedStruct A pointer to the record with the references to /// variables used in \a OutlinedFn function. /// @@ -412,6 +436,39 @@ /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef Vars, SourceLocation Loc); + + /// \brief Emit task region for the task directive. The task region is + /// emmitted in several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, + /// kmp_task_t *new_task), where new_task is a resulting structure from + /// previous items. + /// \param Tied true if the task is tied (the task is tied to the thread that + /// can suspend its task region), false - untied (the task is not tied to any + /// thread). + /// \param Final Contains either constant bool value, or llvm::Value * of i1 + /// type for final clause. If the value is true, the task forces all of its + /// child tasks to become final and included tasks. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \a + /// TaskFunction. + virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied, + llvm::PointerIntPair Final, + llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Value *Shareds); }; /// \brief RAII for emitting code of CapturedStmt without function outlining. Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp @@ -42,7 +42,8 @@ virtual const VarDecl *getThreadIDVariable() const = 0; /// \brief Get an LValue for the current ThreadID variable. - LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + /// \return LValue for thread id variable. This LValue always has type int32*. + virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); /// \brief Emit the captured statement body. virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; @@ -77,6 +78,41 @@ const VarDecl *ThreadIDVar; }; +/// \brief API for captured statement code generation in OpenMP constructs. +class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { +public: + CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D, + const CapturedStmt &CS, + const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar) + : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar), + PartIDVar(PartIDVar) { + assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); + } + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + virtual const VarDecl *getThreadIDVariable() const override { + return ThreadIDVar; + } + + /// \brief Get an LValue for the current ThreadID variable. + virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; + + /// \brief Emit the captured statement body. + virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { return ".omp_outlined."; } + +private: + /// \brief A variable or parameter storing global thread id for OpenMP + /// constructs. + const VarDecl *ThreadIDVar; + /// \brief A variable or parameter storing part id for OpenMP tasking + /// constructs. + const VarDecl *PartIDVar; +}; + /// \brief API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { @@ -110,6 +146,7 @@ return OuterRegionInfo->getThreadIDVariable(); return nullptr; } + /// \brief Get the name of the capture helper. virtual StringRef getHelperName() const override { llvm_unreachable("No helper name for inlined OpenMP construct"); @@ -126,8 +163,13 @@ LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.MakeNaturalAlignAddrLValue( - CGF.GetAddrOfLocalVar(getThreadIDVariable()), - CGF.getContext().getPointerType(getThreadIDVariable()->getType())); + CGF.Builder.CreateAlignedLoad( + CGF.GetAddrOfLocalVar(getThreadIDVariable()), + CGF.PointerAlignInBytes), + getThreadIDVariable() + ->getType() + ->castAs() + ->getPointeeType()); } void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { @@ -141,8 +183,23 @@ CGCapturedStmtInfo::EmitBody(CGF, S); } +LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( + CodeGenFunction &CGF) { + return CGF.MakeNaturalAlignAddrLValue( + CGF.GetAddrOfLocalVar(getThreadIDVariable()), + getThreadIDVariable()->getType()); +} + +void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF, + const Stmt *S) { + if (PartIDVar) { + // TODO: emit code for untied tasks. + } + CGCapturedStmtInfo::EmitBody(CGF, S); +} + CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), DefaultOpenMPPSource(nullptr) { + : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, @@ -157,6 +214,8 @@ llvm::Value * CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar) { + assert(ThreadIDVar->getType()->isPointerType() && + "thread id variable must be of type kmp_int32 *"); const CapturedStmt *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar); @@ -165,6 +224,19 @@ } llvm::Value * +CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar) { + assert(!ThreadIDVar->getType()->isPointerType() && + "thread id variable must be of type kmp_int32 for tasks"); + auto *CS = cast(D.getAssociatedStmt()); + CodeGenFunction CGF(CGM, true); + CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar); + CGF.CapturedStmtInfo = &CGInfo; + return CGF.GenerateCapturedStmtFunction(*CS); +} + +llvm::Value * CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { @@ -266,12 +338,9 @@ } if (auto OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { - if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) { + if (OMPRegionInfo->getThreadIDVariable()) { // Check if this an outlined function with thread id passed as argument. auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); - LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), - ThreadIDVar->getType()); ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); // If value loaded in entry block, cache it and use it everywhere in // function. @@ -564,6 +633,30 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); break; } + case OMPRTL__kmpc_omp_task_alloc: { + // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + assert(KmpRoutineEntryPtrTy != nullptr && + "Type kmp_routine_entry_t must be created."); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, + CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; + // Return void * and then cast to particular kmp_task_t type. + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); + break; + } + case OMPRTL__kmpc_omp_task: { + // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); + break; + } } return RTLFn; } @@ -767,8 +860,7 @@ if (auto OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) - return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), - Loc).getScalarVal(); + return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); auto ThreadID = getThreadID(CGF, Loc); auto Int32Ty = @@ -1044,6 +1136,200 @@ emitUpdateLocation(CGF, Loc)); } +namespace { +/// \brief Indexes of fields for type kmp_task_t. +enum KmpTaskTFields { + /// \brief List of shared variables. + KmpTaskTShareds, + /// \brief Task routine. + KmpTaskTRoutine, + /// \brief Partition id for the untied tasks. + KmpTaskTPartId, + /// \brief Function with call of destructors for private variables. + KmpTaskTDestructors, +}; +} // namespace + +void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { + if (!KmpRoutineEntryPtrTy) { + // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. + auto &C = CGM.getContext(); + QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; + FunctionProtoType::ExtProtoInfo EPI; + KmpRoutineEntryPtrQTy = C.getPointerType( + C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); + KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); + } +} + +static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, + QualType FieldTy) { + auto *Field = FieldDecl::Create( + C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, + C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + DC->addDecl(Field); +} + +static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM, + QualType KmpInt32Ty, + QualType KmpRoutineEntryPointerQTy) { + auto &C = CGM.getContext(); + // Build struct kmp_task_t { + // void * shareds; + // kmp_routine_entry_t routine; + // kmp_int32 part_id; + // kmp_routine_entry_t destructors; + // /* private vars */ + // }; + auto *RD = C.buildImplicitRecord("kmp_task_t"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + addFieldToRecordDecl(C, RD, KmpInt32Ty); + addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + // TODO: add private fields. + RD->completeDefinition(); + return C.getRecordType(RD); +} + +/// \brief Emit a proxy function which accepts kmp_task_t as the second +/// argument. +/// \code +/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { +/// TaskFunction(gtid, tt->part_id, tt->shareds); +/// return 0; +/// } +/// \endcode +static llvm::Value * +emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, + QualType KmpInt32Ty, QualType KmpTaskTPtrQTy, + QualType SharedsPtrTy, llvm::Value *TaskFunction) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); + ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, KmpTaskTPtrQTy); + Args.push_back(&GtidArg); + Args.push_back(&TaskTypeArg); + FunctionType::ExtInfo Info; + auto &TaskEntryFnInfo = + CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, + /*isVariadic=*/false); + auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); + auto *TaskEntry = + llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, + ".omp_task_entry.", &CGM.getModule()); + CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); + + // TaskFunction(gtid, tt->part_id, tt->shareds); + auto *GtidParam = CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, + C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); + auto TaskTypeArgAddr = CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false, + CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc); + auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr, + /*Idx=*/KmpTaskTPartId); + auto *PartidParam = CGF.EmitLoadOfScalar( + PartidPtr, /*Volatile=*/false, + C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); + auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr, + /*Idx=*/KmpTaskTShareds); + auto *SharedsParam = + CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false, + CGM.PointerAlignInBytes, C.VoidPtrTy, Loc); + llvm::Value *CallArgs[] = { + GtidParam, PartidParam, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))}; + CGF.EmitCallOrInvoke(TaskFunction, CallArgs); + CGF.EmitStoreThroughLValue( + RValue::get(CGF.Builder.getInt32(/*C=*/0)), + CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); + CGF.FinishFunction(); + return TaskEntry; +} + +void CGOpenMPRuntime::emitTaskCall( + CodeGenFunction &CGF, SourceLocation Loc, bool Tied, + llvm::PointerIntPair Final, + llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) { + auto &C = CGM.getContext(); + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + // Build type kmp_routine_entry_t (if not built yet). + emitKmpRoutineEntryT(KmpInt32Ty); + // Build particular struct kmp_task_t for the given task. + auto KmpTaskQTy = + createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy); + QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy); + auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo(); + auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy)); + QualType SharedsPtrTy = C.getPointerType(SharedsTy); + + // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, + // kmp_task_t *tt); + auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, + SharedsPtrTy, TaskFunction); + + // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + // Task flags. Format is taken from + // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, + // description of kmp_tasking_flags struct. + const unsigned TiedFlag = 0x1; + const unsigned FinalFlag = 0x2; + unsigned Flags = Tied ? TiedFlag : 0; + auto *TaskFlags = + Final.getPointer() + ? CGF.Builder.CreateSelect(Final.getPointer(), + CGF.Builder.getInt32(FinalFlag), + CGF.Builder.getInt32(/*C=*/0)) + : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); + TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); + auto SharedsSize = C.getTypeSizeInChars(SharedsTy); + llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize, + CGM.getSize(SharedsSize), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskEntry, KmpRoutineEntryPtrTy)}; + auto *NewTask = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); + auto *NewTaskNewTaskTTy = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy); + // Fill the data in the resulting kmp_task_t record. + // Copy shareds if there are any. + if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) + CGF.EmitAggregateCopy( + CGF.EmitLoadOfScalar( + CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy, + /*Idx=*/KmpTaskTShareds), + /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc), + Shareds, SharedsTy); + // TODO: generate function with destructors for privates. + // Provide pointer to function with destructors for privates. + CGF.Builder.CreateAlignedStore( + llvm::ConstantPointerNull::get( + cast(KmpRoutineEntryPtrTy)), + CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy, + /*Idx=*/KmpTaskTDestructors), + CGM.PointerAlignInBytes); + + // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // libcall. + // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); + llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), NewTask}; + // TODO: add check for untied tasks. + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); +} + InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII( CodeGenFunction &CGF, const OMPExecutableDirective &D) : CGF(CGF) { Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -717,8 +717,35 @@ llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); } -void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) { - llvm_unreachable("CodeGen for 'omp task' is not supported yet."); +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + auto CS = cast(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto *I = CS->getCapturedDecl()->param_begin(); + // The first function argument for tasks is a thread id, the second one is a + // part id (0 for tied tasks, >=0 for untied task). + auto OutlinedFn = + CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I)); + // Check if we should emit tied or untied task. + bool Tied = !S.getSingleClause(OMPC_untied); + // Check if the task is final + llvm::PointerIntPair Final; + if (auto *Clause = S.getSingleClause(OMPC_final)) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = cast(Clause)->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Final.setInt(CondConstant); + else + Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Final.setInt(/*IntVal=*/false); + } + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, + OutlinedFn, SharedsTy, CapturedStruct); } void CodeGenFunction::EmitOMPTaskyieldDirective( Index: cfe/trunk/lib/Sema/SemaOpenMP.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOpenMP.cpp +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp @@ -308,8 +308,7 @@ // bound to the current team is shared. if (DVar.DKind == OMPD_task) { DSAVarData DVarTemp; - for (StackTy::reverse_iterator I = std::next(Iter), - EE = std::prev(Stack.rend()); + for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend(); I != EE; ++I) { // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables // Referenced @@ -1122,11 +1121,19 @@ break; } case OMPD_task: { + QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", KmpInt32Ty), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + // Mark this captured region as inlined, because we don't use outlined + // function directly. + getCurCapturedRegion()->TheCapturedDecl->addAttr( + AlwaysInlineAttr::CreateImplicit( + Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange())); break; } case OMPD_ordered: { Index: cfe/trunk/test/OpenMP/task_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/task_codegen.cpp +++ cfe/trunk/test/OpenMP/task_codegen.cpp @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* } +// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* } +struct S { + int a; + S() : a(0) {} + S(const S &s) : a(s.a) {} + ~S() {} +}; +int a; +// CHECK-LABEL : @main +int main() { +// CHECK: [[B:%.+]] = alloca i8 +// CHECK: [[S:%.+]] = alloca [[STRUCT_S]] + char b; + S s; +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}}) +// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0 +// CHECK: store i8* [[B]], i8** [[B_REF]] +// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1 +// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]] +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]] +// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task shared(a, b, s) + { + a = 15; + b = a; + s.a = 10; + } +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task untied + { + a = 1; + } +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task final(true) + { + a = 2; + } +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.*}}, i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) + const bool flag = false; +#pragma omp task final(flag) + { + a = 3; + } +// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]] +// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0 +// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0 +// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1 +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task final(b) + { + a = 4; + } + return a; +} +// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}*) +// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] +// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 +// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} +// CHECK: store i32 10, i32* %{{.+}} + +// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}*) +// CHECK: store i32 1, i32* [[A_PTR:@.+]] + +// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}*) +// CHECK: store i32 2, i32* [[A_PTR:@.+]] + +// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}*) +// CHECK: store i32 3, i32* [[A_PTR:@.+]] + +// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}*) +// CHECK: store i32 4, i32* [[A_PTR:@.+]] +#endif +