Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H +#include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" @@ -35,6 +36,7 @@ namespace clang { class Expr; class OMPExecutableDirective; +class RecordDecl; class VarDecl; namespace CodeGen { @@ -43,8 +45,6 @@ class CodeGenModule; class CGOpenMPRuntime { -public: - private: enum OpenMPRTLFunction { /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, @@ -88,6 +88,13 @@ OMPRTL__kmpc_master, // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_end_master, + // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + OMPRTL__kmpc_omp_task_alloc, + // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * + // new_task); + OMPRTL__kmpc_omp_task, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -186,6 +193,25 @@ /// variables. llvm::StringMap, llvm::BumpPtrAllocator> InternalVars; + /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); + llvm::Type *KmpRoutineEntryPtrTy; + /// \brief This type is used by the tasking constructs. It stores a list of + /// shared variables (field shareds), pointer to the outlined task function + /// (field routine), current partition id for untied tasks (field part_id), + /// pointer to the function with destructors for the private variables(field + /// destructors) and a list of private variables itself (if any). + /// \code + /// struct kmp_task_t { + /// void * shareds; + /// kmp_routine_entry_t routine; + /// kmp_int32 part_id; + /// kmp_routine_entry_t destructors; + /// /* private vars */ + /// }; + /// \endcode + llvm::Type *KmpTaskTTy; + /// \brief Original RecordDecl for kmp_task_t type. + const RecordDecl *KmpTaskTRD; /// \brief Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. @@ -257,13 +283,16 @@ /// \brief Emits outlined function for the specified OpenMP directive \a D /// (required for parallel and task directives). This outlined function has /// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct - /// context_vars*). + /// context_vars*) (if PartIdVar == nullptr) or void(*)(kmp_int32 + /// /*ThreadID*/, kmp_int32 /*PartID*/, struct context_vars*) (if PartIdVar != + /// nullptr). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. - /// + /// \param PartID If not nullptr - variable used for part id in tasks. virtual llvm::Value * EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar); + const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar = nullptr); /// \brief Cleans up references to the objects in finished function. /// @@ -401,6 +430,40 @@ /// \param Vars List of variables to flush. virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef Vars, SourceLocation Loc); + + /// \brief Emit task region for the task directive. The task region is + /// emmitted in two steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, + /// kmp_task_t *new_task), where new_task is a resulting structure from + /// previous items. + /// \param Tied true if the task is tied (the task is tied to the thread that + /// can suspend its task region), false - untied (the task is not tied to any + /// thread). + /// \param Final Contains either constant bool value, or llvm::Value * of i1 + /// type for final clause. If the value is true, the task forces all of its + /// child tasks to become final and included tasks. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \a + /// TaskFunction. + virtual void + EmitOMPTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied, + llvm::PointerIntPair Final, + llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Value *Shareds); }; } // namespace CodeGen } // namespace clang Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -31,9 +31,9 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS, - const VarDecl *ThreadIDVar) + const VarDecl *ThreadIDVar, const VarDecl *PartIDVar) : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar), - Directive(D) { + PartIDVar(PartIDVar), Directive(D) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } @@ -58,6 +58,9 @@ /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; + /// \brief A variable or parameter storing part id for OpenMP tasking + /// constructs. + const VarDecl *PartIDVar; /// \brief OpenMP executable directive associated with the region. const OMPExecutableDirective &Directive; }; @@ -73,6 +76,9 @@ CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPPrivateClause(Directive, PrivateScope); CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); + // TODO: add support for privates in tasks. + assert((!PartIDVar || !PrivateScope.Privatize()) && + "Private clauses for tasks are not supported yet."); if (PrivateScope.Privatize()) // Emit implicit barrier to synchronize threads and avoid data races. CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(), @@ -80,6 +86,16 @@ CGCapturedStmtInfo::EmitBody(CGF, S); } +static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, + QualType FieldTy) { + auto *Field = FieldDecl::Create( + C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, + C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + DC->addDecl(Field); +} + CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), DefaultOpenMPPSource(nullptr) { IdentTy = llvm::StructType::create( @@ -91,14 +107,39 @@ llvm::PointerType::getUnqual(CGM.Int32Ty)}; Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); + // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. + auto &C = CGM.getContext(); + auto Int32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpRoutineEntryTyArgs[] = {Int32Ty, C.VoidPtrTy}; + FunctionProtoType::ExtProtoInfo EPI; + auto KmpRoutineEntryTy = + C.getFunctionType(Int32Ty, KmpRoutineEntryTyArgs, EPI); + auto KmpRoutineEntryPointerQTy = C.getPointerType(KmpRoutineEntryTy); + KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPointerQTy); + // Build struct kmp_task_t. + auto *RD = C.buildImplicitRecord("kmp_task_t"); + RD->startDefinition(); + // Build void *shareds; + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + // Build kmp_routine_entry_t routine; + addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + // Build kmp_int32 part_id; + addFieldToRecordDecl(C, RD, Int32Ty); + // Build kmp_routine_entry_t destructors; + addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + RD->completeDefinition(); + KmpTaskTRD = RD; + auto KmpTaskQTy = C.getRecordType(KmpTaskTRD); + KmpTaskTTy = CGM.getTypes().ConvertType(KmpTaskQTy); } llvm::Value * CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar) { + const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar) { const CapturedStmt *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar); + CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar); CGF.CapturedStmtInfo = &CGInfo; return CGF.GenerateCapturedStmtFunction(*CS); } @@ -208,9 +249,14 @@ auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable(); auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); - LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), - ThreadIDVar->getType()); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + if (ThreadIDVar->getType()->isPointerType()) { + // Thread id is passed as a pointer + LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), + ThreadIDVar->getType()); + ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + } else + // Thread id is passed as a value (in tasks). + ThreadID = RVal.getScalarVal(); // If value loaded in entry block, cache it and use it everywhere in // function. if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { @@ -475,6 +521,27 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); break; } + case OMPRTL__kmpc_omp_task_alloc: { + // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, + CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; + llvm::FunctionType *FnTy = llvm::FunctionType::get( + KmpTaskTTy->getPointerTo(), TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); + break; + } + case OMPRTL__kmpc_omp_task: { + // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + KmpTaskTTy->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); + break; + } } return RTLFn; } @@ -926,3 +993,154 @@ auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush); CGF.EmitRuntimeCall(RTLFn, Args); } + +namespace { +/// \brief Fields for type kmp_task_t. +enum KmpTaskTFields { + /// \brief List of shared variables. + KmpTaskTShareds, + /// \brief Task routine. + KmpTaskTRoutine, + /// \brief Partition id for the untied tasks. + KmpTaskTPartId, + /// \brief Function with call of destructors for private variables. + KmpTaskTDestructors, +}; +} // namespace + +static RecordDecl *createNewKmpTaskTRecordDecl(ASTContext &C, + const RecordDecl *RD) { + auto *NewRD = C.buildImplicitRecord("kmp_task_t"); + NewRD->startDefinition(); + for (auto *FD : RD->fields()) { + // Add new field to record. + addFieldToRecordDecl(C, NewRD, FD->getType()); + } + return NewRD; +} + +void CGOpenMPRuntime::EmitOMPTaskCall( + CodeGenFunction &CGF, SourceLocation Loc, bool Tied, + llvm::PointerIntPair Final, + llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) { + auto &C = CGM.getContext(); + // Build particular struct kmp_task_t for the given task. + auto *KmpTaskRD = createNewKmpTaskTRecordDecl(C, KmpTaskTRD); + // TODO: add private fields. + KmpTaskRD->completeDefinition(); + auto KmpTaskQTy = C.getRecordType(KmpTaskRD); + auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy)); + QualType SharedsPtrTy = C.getPointerType(SharedsTy); + // Build proxy function which accepts kmp_task_t as the second argument. + // kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + // TaskFunction(gtid, tt->part_id, tt->shareds); + // return 0; + // } + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); + QualType KmpTaskPtrQTy = C.getPointerType(KmpTaskQTy); + FunctionArgList Args; + ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, KmpInt32Ty); + ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, KmpTaskPtrQTy); + Args.push_back(&GtidArg); + Args.push_back(&TaskTypeArg); + FunctionType::ExtInfo Info; + auto &TaskEntryFnInfo = + CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, + /*isVariadic=*/false); + auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); + auto *TaskEntry = llvm::Function::Create( + TaskEntryTy, /*Linkage=*/llvm::GlobalValue::InternalLinkage, + ".omp_task_entry.", &CGM.getModule()); + CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); + CodeGenFunction TaskEntryCGF(CGM); + TaskEntryCGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, + TaskEntryFnInfo, Args); + // TaskFunction(gtid, tt->part_id, tt->shareds); + auto *GtidParam = TaskEntryCGF.EmitLoadOfScalar( + TaskEntryCGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, + C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, + SourceLocation()); + auto TaskTypeArgAddr = TaskEntryCGF.EmitLoadOfScalar( + TaskEntryCGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false, + CGM.PointerAlignInBytes, KmpTaskPtrQTy, SourceLocation()); + auto *PartidPtr = + TaskEntryCGF.Builder.CreateStructGEP(TaskTypeArgAddr, + /*Idx=*/KmpTaskTPartId); + auto *PartidParam = TaskEntryCGF.EmitLoadOfScalar( + PartidPtr, /*Volatile=*/false, + C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, + SourceLocation()); + auto *SharedsPtr = + TaskEntryCGF.Builder.CreateStructGEP(TaskTypeArgAddr, + /*Idx=*/KmpTaskTShareds); + auto *SharedsParam = TaskEntryCGF.EmitLoadOfScalar( + SharedsPtr, /*Volatile=*/false, CGM.PointerAlignInBytes, C.VoidPtrTy, + SourceLocation()); + llvm::Value *CallArgs[] = { + GtidParam, PartidParam, + TaskEntryCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + SharedsParam, TaskEntryCGF.ConvertTypeForMem(SharedsPtrTy))}; + TaskEntryCGF.EmitCallOrInvoke(TaskFunction, CallArgs); + TaskEntryCGF.EmitStoreThroughLValue( + RValue::get(TaskEntryCGF.Builder.getInt32(/*C=*/0)), + TaskEntryCGF.MakeNaturalAlignAddrLValue(TaskEntryCGF.ReturnValue, + KmpInt32Ty)); + TaskEntryCGF.FinishFunction(SourceLocation()); + // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + // Task flags. Format is taken from + // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, + // description of kmp_tasking_flags struct. + const unsigned TiedFlag = 0x1; + const unsigned FinalFlag = 0x2; + unsigned Flags = Tied ? TiedFlag : 0; + auto *TaskFlags = + Final.getPointer() + ? CGF.Builder.CreateSelect(Final.getPointer(), + CGF.Builder.getInt32(FinalFlag), + CGF.Builder.getInt32(/*C=*/0)) + : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); + TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); + auto SharedsSize = C.getTypeSizeInChars(SharedsTy); + llvm::Value *AllocArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPThreadID(CGF, Loc), TaskFlags, + KmpTaskTySize, CGM.getSize(SharedsSize), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskEntry, KmpRoutineEntryPtrTy)}; + auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_task_alloc); + auto *NewTask = CGF.EmitRuntimeCall(RTLFn, AllocArgs); + auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NewTask, KmpTaskTTy->getPointerTo()); + // Fill the data in the resulting kmp_task_t record. + // Copy shareds if there are any. + if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) + CGF.EmitAggregateCopy( + CGF.EmitLoadOfScalar( + CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy, + /*Idx=*/KmpTaskTShareds), + /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, + SourceLocation()), + Shareds, SharedsTy); + // TODO: generate function with destructors for privates. + // Provide pointer to function with destructors for privates. + auto DestructorsType = + std::next(KmpTaskRD->field_begin(), KmpTaskTDestructors)->getType(); + CGF.EmitStoreOfScalar( + llvm::ConstantPointerNull::get( + cast(CGF.ConvertType(DestructorsType))), + CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy, + /*Idx=*/KmpTaskTDestructors), + /*Volatile=*/false, CGM.PointerAlignInBytes, DestructorsType); + // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // libcall. + // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); + llvm::Value *TaskArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPThreadID(CGF, Loc), NewTask}; + RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_task); + CGF.EmitRuntimeCall(RTLFn, TaskArgs); +} + Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -746,8 +746,34 @@ llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); } -void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) { - llvm_unreachable("CodeGen for 'omp task' is not supported yet."); +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + auto CS = cast(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto *I = CS->getCapturedDecl()->param_begin(); + // The first function argument for tasks is a thread id, the second one is a + // part id (0 for tied tasks, >=0 for untied task). + auto OutlinedFn = + CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(S, *I, *std::next(I)); + // Check if we should emit tied or untied task. + bool Tied = !S.getSingleClause(/*K=*/OMPC_untied); + // Check if the task is final + llvm::PointerIntPair Final; + if (auto *Clause = S.getSingleClause(/*K=*/OMPC_final)) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = cast(Clause)->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Final.setInt(CondConstant); + else + Final.setPointer(EvaluateExprAsBool(Cond)); + } else + // By default the task is not final. + Final.setInt(/*IntVal=*/false); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + CGM.getOpenMPRuntime().EmitOMPTaskCall(*this, S.getLocStart(), Tied, Final, + OutlinedFn, SharedsTy, CapturedStruct); } void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) { Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -308,8 +308,7 @@ // bound to the current team is shared. if (DVar.DKind == OMPD_task) { DSAVarData DVarTemp; - for (StackTy::reverse_iterator I = std::next(Iter), - EE = std::prev(Stack.rend()); + for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend(); I != EE; ++I) { // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables // Referenced @@ -1122,11 +1121,19 @@ break; } case OMPD_task: { + QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", KmpInt32Ty), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + // Mark this captured region as inlined, because we don't use outlined + // function directly. + getCurCapturedRegion()->TheCapturedDecl->addAttr( + AlwaysInlineAttr::CreateImplicit( + Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange())); break; } case OMPD_ordered: { Index: test/OpenMP/task_codegen.cpp =================================================================== --- test/OpenMP/task_codegen.cpp +++ test/OpenMP/task_codegen.cpp @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* } +// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* } +struct S { + int a; + S() : a(0) {} + S(const S &s) : a(s.a) {} + ~S() {} +}; +int a; +// CHECK-LABEL : @main +int main() { +// CHECK: [[B:%.+]] = alloca i8 +// CHECK: [[S:%.+]] = alloca [[STRUCT_S]] + char b; + S s; +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}}) +// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0 +// CHECK: store i8* [[B]], i8** [[B_REF]] +// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1 +// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]] +// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8** [[SHAREDS_REF_PTR]] +// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]]) +#pragma omp task shared(a, b, s) + { + a = 15; + b = a; + s.a = 10; + } +// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]]) +#pragma omp task untied + { + a = 1; + } +// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]]) +#pragma omp task final(true) + { + a = 2; + } +// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]]) + const bool flag = false; +#pragma omp task final(flag) + { + a = 3; + } +// CHECK: [[B_VAL:%.+]] = load i8* [[B]] +// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0 +// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0 +// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1 +// CHECK: [[TASK_PTR:%.+]] = call [[KMP_TASK_T]]* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.+}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], [[KMP_TASK_T]]* [[TASK_PTR]]) +#pragma omp task final(b) + { + a = 4; + } + return a; +} +// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.+}}*) +// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: [[A_VAL:%.+]] = load i32* [[A_PTR]] +// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 +// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} +// CHECK: store i32 10, i32* %{{.+}} + +// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.+}}*) +// CHECK: store i32 1, i32* [[A_PTR:@.+]] + +// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.+}}*) +// CHECK: store i32 2, i32* [[A_PTR:@.+]] + +// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.+}}*) +// CHECK: store i32 3, i32* [[A_PTR:@.+]] + +// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.+}}*) +// CHECK: store i32 4, i32* [[A_PTR:@.+]] +#endif +