diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -105,6 +105,7 @@ SmallVector ReductionOrigs; SmallVector ReductionCopies; SmallVector ReductionOps; + SmallVector, 4> PrivateLocals; struct DependData { OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; const Expr *IteratorExpr = nullptr; @@ -245,6 +246,19 @@ ~NontemporalDeclsRAII(); }; + /// Manages list of nontemporal decls for the specified directive. + class UntiedTaskLocalDeclsRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + UntiedTaskLocalDeclsRAII( + CodeGenModule &CGM, + const llvm::DenseMap, Address> + &LocalVars); + ~UntiedTaskLocalDeclsRAII(); + }; + /// Maps the expression for the lastprivate variable to the global copy used /// to store new value because original variables are not mapped in inner /// parallel regions. Only private copies are captured but we need also to @@ -715,6 +729,10 @@ /// The set is the union of all current stack elements. llvm::SmallVector NontemporalDeclsStack; + using UntiedLocalVarsAddressesMap = + llvm::DenseMap, Address>; + llvm::SmallVector UntiedLocalVarsStack; + /// Stack for list of addresses of declarations in current context marked as /// lastprivate conditional. The set is the union of all current stack /// elements. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -181,7 +181,7 @@ UntiedCodeGen(CGF); CodeGenFunction::JumpDest CurPoint = CGF.getJumpDestInCurrentScope(".untied.next."); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBranch(CGF.ReturnBlock.getBlock()); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), CGF.Builder.GetInsertBlock()); @@ -3436,6 +3436,7 @@ const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} + PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} const Expr *OriginalRef = nullptr; const VarDecl *Original = nullptr; const VarDecl *PrivateCopy = nullptr; @@ -3456,6 +3457,10 @@ for (const auto &Pair : Privates) { const VarDecl *VD = Pair.second.Original; QualType Type = VD->getType().getNonReferenceType(); + // If the private variable is a local variable with lvalue ref type, + // allocate the pointer instead of the pointee type. + if (!Pair.second.OriginalRef && VD->getType()->isLValueReferenceType()) + Type = C.getPointerType(Type); FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator I(VD->getAttrs().begin()), @@ -3709,10 +3714,7 @@ /// \endcode static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, - ArrayRef PrivateVars, - ArrayRef FirstprivateVars, - ArrayRef LastprivateVars, - QualType PrivatesQTy, + const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef Privates) { ASTContext &C = CGM.getContext(); FunctionArgList Args; @@ -3721,9 +3723,9 @@ C.getPointerType(PrivatesQTy).withConst().withRestrict(), ImplicitParamDecl::Other); Args.push_back(&TaskPrivatesArg); - llvm::DenseMap PrivateVarsPos; + llvm::DenseMap, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (const Expr *E : PrivateVars) { + for (const Expr *E : Data.PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3734,7 +3736,7 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : FirstprivateVars) { + for (const Expr *E : Data.FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3745,7 +3747,7 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : LastprivateVars) { + for (const Expr *E : Data.LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3756,6 +3758,17 @@ PrivateVarsPos[VD] = Counter; ++Counter; } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = C.getPointerType(Ty); + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), + ImplicitParamDecl::Other)); + PrivateVarsPos[VD] = Counter; + ++Counter; + } const auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); llvm::FunctionType *TaskPrivatesMapTy = @@ -4011,16 +4024,16 @@ /// Checks if destructor function is required to be generated. /// \return true if cleanups are required, false otherwise. static bool -checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { - bool NeedsCleanup = false; - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); - const auto *PrivateRD = cast(FI->getType()->getAsTagDecl()); - for (const FieldDecl *FD : PrivateRD->fields()) { - NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); - if (NeedsCleanup) - break; +checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, + ArrayRef Privates) { + for (const PrivateDataTy &P : Privates) { + if (!P.second.OriginalRef) + continue; + QualType Ty = P.second.Original->getType().getNonReferenceType(); + if (Ty.isDestructedType()) + return true; } - return NeedsCleanup; + return false; } namespace { @@ -4190,9 +4203,12 @@ /*PrivateElemInit=*/nullptr)); ++I; } - llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { - return L.first > R.first; - }); + for (const VarDecl *VD : Data.PrivateLocals) + Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + llvm::stable_sort(Privates, + [](const PrivateDataTy &L, const PrivateDataTy &R) { + return L.first > R.first; + }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4234,9 +4250,8 @@ std::next(TaskFunction->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - TaskPrivatesMap = emitTaskPrivateMappingFunction( - CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, - FI->getType(), Privates); + TaskPrivatesMap = + emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { @@ -4266,7 +4281,8 @@ unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; if (!Privates.empty()) { - NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); + NeedsCleanup = + checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); if (NeedsCleanup) Flags = Flags | DestructorsFlag; } @@ -11218,56 +11234,64 @@ if (!VD) return Address::invalid(); const VarDecl *CVD = VD->getCanonicalDecl(); - if (!CVD->hasAttr()) + if (CVD->hasAttr()) { + const auto *AA = CVD->getAttr(); + // Use the default allocation. + if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Allocator, CGM.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc), + Args, getName({CVD->getName(), ".void.addr"})); + llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, + Allocator}; + llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free); + + CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + getName({CVD->getName(), ".addr"})); + return Address(Addr, Align); + } + if (UntiedLocalVarsStack.empty()) return Address::invalid(); - const auto *AA = CVD->getAttr(); - // Use the default allocation. - if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && - !AA->getAllocator()) + const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); + auto It = UntiedData.find(VD); + if (It == UntiedData.end()) return Address::invalid(); - llvm::Value *Size; - CharUnits Align = CGM.getContext().getDeclAlign(CVD); - if (CVD->getType()->isVariablyModifiedType()) { - Size = CGF.getTypeSize(CVD->getType()); - // Align the size: ((size + align - 1) / align) * align - Size = CGF.Builder.CreateNUWAdd( - Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); - Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); - Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); - } else { - CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); - Size = CGM.getSize(Sz.alignTo(Align)); - } - llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); - assert(AA->getAllocator() && - "Expected allocator expression for non-default allocator."); - llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); - // According to the standard, the original allocator type is a enum (integer). - // Convert to pointer type, if required. - if (Allocator->getType()->isIntegerTy()) - Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); - else if (Allocator->getType()->isPointerTy()) - Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, - CGM.VoidPtrTy); - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc), - Args, getName({CVD->getName(), ".void.addr"})); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, - Allocator}; - llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_free); - CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - getName({CVD->getName(), ".addr"})); - return Address(Addr, Align); + return It->second; } CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( @@ -11302,6 +11326,21 @@ CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); } +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( + CodeGenModule &CGM, + const llvm::DenseMap, Address> &LocalVars) + : CGM(CGM), NeedToPush(!LocalVars.empty()) { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); +} + +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); +} + bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -21,6 +21,7 @@ #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -3785,6 +3786,42 @@ checkForLastprivateConditionalUpdate(*this, S); } +namespace { +/// Get the list of variables declared in the context of the untied tasks. +class CheckVarsEscapingUntiedTaskDeclContext final + : public ConstStmtVisitor { + llvm::SmallVector PrivateDecls; + +public: + explicit CheckVarsEscapingUntiedTaskDeclContext() = default; + virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; + void VisitDeclStmt(const DeclStmt *S) { + if (!S) + return; + // Need to privatize only local vars, static locals can be processed as is. + for (const Decl *D : S->decls()) { + if (const auto *VD = dyn_cast_or_null(D)) + if (VD->hasLocalStorage()) + PrivateDecls.push_back(VD); + } + } + void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } + void VisitCapturedStmt(const CapturedStmt *) { return; } + void VisitLambdaExpr(const LambdaExpr *) { return; } + void VisitBlockExpr(const BlockExpr *) { return; } + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + + /// Swaps list of vars with the provided one. + ArrayRef getPrivateDecls() const { return PrivateDecls; } +}; +} // anonymous namespace + void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, @@ -3885,14 +3922,22 @@ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); } + // Get list of local vars for untied tasks. + if (!Data.Tied) { + CheckVarsEscapingUntiedTaskDeclContext Checker; + Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); + Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), + Checker.getPrivateDecls().end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { + llvm::DenseMap, Address> UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); llvm::SmallVector, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || - !Data.LastprivateVars.empty()) { + !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; @@ -3928,6 +3973,15 @@ PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = CGF.getContext().getPointerType(Ty); + Address PrivatePtr = CGF.CreateMemTemp( + CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); + UntiedLocalVars.try_emplace(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { @@ -3946,6 +4000,13 @@ CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } + // Adjust mapping for internal locals by mapping actual memory instead of + // a pointer to this memory. + for (auto &Pair : UntiedLocalVars) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond() = Replacement; + } } if (Data.Reductions) { OMPPrivateScope FirstprivateScope(CGF); @@ -4040,6 +4101,8 @@ } (void)InRedScope.Privatize(); + CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM, + UntiedLocalVars); Action.Enter(CGF); BodyGen(CGF); }; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // @@ -259,7 +259,7 @@ a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) #pragma omp task untied { @@ -296,26 +296,54 @@ // CHECK: store i32 4, i32* [[A_PTR]] // CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: switch i32 %{{.+}}, label +// UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) +// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ + +// CHECK: [[DONE]]: +// CHECK: br label %[[CLEANUP:[^,]+]] + // CHECK: load i32*, i32** % // CHECK: store i32 1, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT:[^,]+]] +// UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]]) // CHECK: call i8* @__kmpc_omp_task_alloc( // CHECK: call i32 @__kmpc_omp_task(% // CHECK: load i32*, i32** % // CHECK: store i32 2, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] // CHECK: call i32 @__kmpc_omp_taskyield(% // CHECK: load i32*, i32** % // CHECK: store i32 3, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// s1 = S(); +// UNTIEDRT: call void [[CONSTR]](%struct.S* [[TMP:%.+]]) +// UNTIEDRT: [[DST:%.+]] = bitcast %struct.S* [[S1_ADDR]] to i8* +// UNTIEDRT: [[SRC:%.+]] = bitcast %struct.S* [[TMP]] to i8* +// UNTIEDRT: call void @llvm.memcpy.{{.+}}(i8* {{.*}}[[DST]], i8* {{.*}}[[SRC]], i64 4, i1 false) +// UNTIEDRT: call void [[DESTR:@.+]](%struct.S* [[TMP]]) // CHECK: call i32 @__kmpc_omp_taskwait(% // CHECK: load i32*, i32** % // CHECK: store i32 4, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]]) +// CHECK: br label %[[CLEANUP]] + +// CHECK: [[CLEANUP]]: +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: [[EXIT]]: +// UNTIEDRT-NEXT: ret i32 0 struct S1 { int a;