diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -253,9 +253,9 @@ public: UntiedTaskLocalDeclsRAII( - CodeGenModule &CGM, - const llvm::DenseMap, Address> - &LocalVars); + CodeGenFunction &CGF, + const llvm::DenseMap, + std::pair> &LocalVars); ~UntiedTaskLocalDeclsRAII(); }; @@ -432,6 +432,8 @@ std::tuple>> LastprivateConditionalToTypes; + /// Maps function to the position of the untied task locals stack. + llvm::DenseMap FunctionToUntiedTaskStackMap; /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -720,7 +722,8 @@ llvm::SmallVector NontemporalDeclsStack; using UntiedLocalVarsAddressesMap = - llvm::DenseMap, Address>; + llvm::DenseMap, + std::pair>; llvm::SmallVector UntiedLocalVarsStack; /// Stack for list of addresses of declarations in current context marked as @@ -1882,6 +1885,9 @@ /// Destroys user defined allocators specified in the uses_allocators clause. void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator); + + /// Returns true if the variable is a local variable in untied task. + bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const; }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1526,6 +1526,7 @@ FunctionUDMMap.erase(I); } LastprivateConditionalToTypes.erase(CGF.CurFn); + FunctionToUntiedTaskStackMap.erase(CGF.CurFn); } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -3382,6 +3383,17 @@ typedef std::pair PrivateDataTy; } // anonymous namespace +static bool isAllocatableDecl(const VarDecl *VD) { + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr()) + return false; + const auto *AA = CVD->getAttr(); + // Use the default allocation. + return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()); +} + static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { if (!Privates.empty()) { @@ -3396,9 +3408,12 @@ QualType Type = VD->getType().getNonReferenceType(); // If the private variable is a local variable with lvalue ref type, // allocate the pointer instead of the pointee type. - if (Pair.second.isLocalPrivate() && - VD->getType()->isLValueReferenceType()) - Type = C.getPointerType(Type); + if (Pair.second.isLocalPrivate()) { + if (VD->getType()->isLValueReferenceType()) + Type = C.getPointerType(Type); + if (isAllocatableDecl(VD)) + Type = C.getPointerType(Type); + } FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator I(VD->getAttrs().begin()), @@ -3700,6 +3715,8 @@ QualType Ty = VD->getType().getNonReferenceType(); if (VD->getType()->isLValueReferenceType()) Ty = C.getPointerType(Ty); + if (isAllocatableDecl(VD)) + Ty = C.getPointerType(Ty); Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), @@ -3780,8 +3797,10 @@ FI = cast(FI->getType()->getAsTagDecl())->field_begin(); for (const PrivateDataTy &Pair : Privates) { // Do not initialize private locals. - if (Pair.second.isLocalPrivate()) + if (Pair.second.isLocalPrivate()) { + ++FI; continue; + } const VarDecl *VD = Pair.second.PrivateCopy; const Expr *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa(Init) && @@ -4146,8 +4165,12 @@ /*PrivateElemInit=*/nullptr)); ++I; } - for (const VarDecl *VD : Data.PrivateLocals) - Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + for (const VarDecl *VD : Data.PrivateLocals) { + if (isAllocatableDecl(VD)) + Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); + else + Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + } llvm::stable_sort(Privates, [](const PrivateDataTy &L, const PrivateDataTy &R) { return L.first > R.first; @@ -11224,44 +11247,27 @@ return CGF.GetAddrOfLocalVar(NativeParam); } -namespace { -/// Cleanup action for allocate support. -class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { -public: - static const int CleanupArgs = 3; - -private: - llvm::FunctionCallee RTLFn; - llvm::Value *Args[CleanupArgs]; - -public: - OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, - ArrayRef CallArgs) - : RTLFn(RTLFn) { - assert(CallArgs.size() == CleanupArgs && - "Size of arguments does not match."); - std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); - } - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(RTLFn, Args); - } -}; -} // namespace - Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) return Address::invalid(); + Address UntiedAddr = Address::invalid(); + Address UntiedRealAddr = Address::invalid(); + auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); + if (It != FunctionToUntiedTaskStackMap.end()) { + const UntiedLocalVarsAddressesMap &UntiedData = + UntiedLocalVarsStack[It->second]; + auto I = UntiedData.find(VD); + if (I != UntiedData.end()) { + UntiedAddr = I->second.first; + UntiedRealAddr = I->second.second; + } + } const VarDecl *CVD = VD->getCanonicalDecl(); if (CVD->hasAttr()) { - const auto *AA = CVD->getAttr(); // Use the default allocation. - if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && - !AA->getAllocator()) - return Address::invalid(); + if (!isAllocatableDecl(VD)) + return UntiedAddr; llvm::Value *Size; CharUnits Align = CGM.getContext().getDeclAlign(CVD); if (CVD->getType()->isVariablyModifiedType()) { @@ -11276,43 +11282,80 @@ Size = CGM.getSize(Sz.alignTo(Align)); } llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + const auto *AA = CVD->getAttr(); assert(AA->getAllocator() && "Expected allocator expression for non-default allocator."); llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); // According to the standard, the original allocator type is a enum // (integer). Convert to pointer type, if required. - if (Allocator->getType()->isIntegerTy()) - Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); - else if (Allocator->getType()->isPointerTy()) - Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Allocator, CGM.VoidPtrTy); + Allocator = CGF.EmitScalarConversion( + Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, + AA->getAllocator()->getExprLoc()); llvm::Value *Args[] = {ThreadID, Size, Allocator}; llvm::Value *Addr = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_alloc), Args, getName({CVD->getName(), ".void.addr"})); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, - Allocator}; llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free); - - CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); + QualType Ty = CGM.getContext().getPointerType(CVD->getType()); Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - getName({CVD->getName(), ".addr"})); - return Address(Addr, Align); + Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); + if (UntiedAddr.isValid()) + CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); + + // Cleanup action for allocate support. + class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { + llvm::FunctionCallee RTLFn; + unsigned LocEncoding; + Address Addr; + const Expr *Allocator; + + public: + OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, + Address Addr, const Expr *Allocator) + : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), + Allocator(Allocator) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + llvm::Value *Args[3]; + Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( + CGF, SourceLocation::getFromRawEncoding(LocEncoding)); + Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr.getPointer(), CGF.VoidPtrTy); + llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + Args[2] = AllocVal; + + CGF.EmitRuntimeCall(RTLFn, Args); + } + }; + Address VDAddr = + UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); + CGF.EHStack.pushCleanup( + NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), + VDAddr, AA->getAllocator()); + if (UntiedRealAddr.isValid()) + if (auto *Region = + dyn_cast_or_null(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); + return VDAddr; } - if (UntiedLocalVarsStack.empty()) - return Address::invalid(); - const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); - auto It = UntiedData.find(VD); - if (It == UntiedData.end()) - return Address::invalid(); + return UntiedAddr; +} - return It->second; +bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, + const VarDecl *VD) const { + auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); + if (It == FunctionToUntiedTaskStackMap.end()) + return false; + return UntiedLocalVarsStack[It->second].count(VD) > 0; } CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( @@ -11348,11 +11391,14 @@ } CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( - CodeGenModule &CGM, - const llvm::DenseMap, Address> &LocalVars) - : CGM(CGM), NeedToPush(!LocalVars.empty()) { + CodeGenFunction &CGF, + const llvm::DenseMap, + std::pair> &LocalVars) + : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { if (!NeedToPush) return; + CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( + CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1563,6 +1563,17 @@ CapturedVars, IfCond); } +static bool isAllocatableDecl(const VarDecl *VD) { + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr()) + return false; + const auto *AA = CVD->getAttr(); + // Use the default allocation. + return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()); +} + static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, llvm::SmallVectorImpl &) {} @@ -1575,12 +1586,7 @@ if (!VD) return Address::invalid(); const VarDecl *CVD = VD->getCanonicalDecl(); - if (!CVD->hasAttr()) - return Address::invalid(); - const auto *AA = CVD->getAttr(); - // Use the default allocation. - if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && - !AA->getAllocator()) + if (!isAllocatableDecl(CVD)) return Address::invalid(); llvm::Value *Size; CharUnits Align = CGM.getContext().getDeclAlign(CVD); @@ -1596,6 +1602,7 @@ Size = CGM.getSize(Sz.alignTo(Align)); } + const auto *AA = CVD->getAttr(); assert(AA->getAllocator() && "Expected allocator expression for non-default allocator."); llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); @@ -3931,7 +3938,8 @@ auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { - llvm::DenseMap, Address> UntiedLocalVars; + llvm::DenseMap, std::pair> + UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); llvm::SmallVector, 16> FirstprivatePtrs; @@ -3976,9 +3984,11 @@ QualType Ty = VD->getType().getNonReferenceType(); if (VD->getType()->isLValueReferenceType()) Ty = CGF.getContext().getPointerType(Ty); + if (isAllocatableDecl(VD)) + Ty = CGF.getContext().getPointerType(Ty); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); - UntiedLocalVars.try_emplace(VD, PrivatePtr); + UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid()); CallArgs.push_back(PrivatePtr.getPointer()); } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( @@ -4002,9 +4012,18 @@ // Adjust mapping for internal locals by mapping actual memory instead of // a pointer to this memory. for (auto &Pair : UntiedLocalVars) { - Address Replacement(CGF.Builder.CreateLoad(Pair.second), - CGF.getContext().getDeclAlign(Pair.first)); - Pair.getSecond() = Replacement; + if (isAllocatableDecl(Pair.first)) { + llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); + Address Replacement(Ptr, CGF.getPointerAlign()); + Pair.getSecond().first = Replacement; + Ptr = CGF.Builder.CreateLoad(Replacement); + Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond().second = Replacement; + } else { + llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); + Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond().first = Replacement; + } } } if (Data.Reductions) { @@ -4100,7 +4119,7 @@ } (void)InRedScope.Privatize(); - CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM, + CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, UntiedLocalVars); Action.Enter(CGF); BodyGen(CGF); diff --git a/clang/test/OpenMP/allocate_codegen.cpp b/clang/test/OpenMP/allocate_codegen.cpp --- a/clang/test/OpenMP/allocate_codegen.cpp +++ b/clang/test/OpenMP/allocate_codegen.cpp @@ -85,6 +85,7 @@ // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} // CHECK: store i32 %{{.+}}, i32* [[V_ADDR]], // CHECK-NEXT: [[V_VAL:%.+]] = load i32, i32* [[V_ADDR]], +// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = bitcast i32* [[V_ADDR]] to i8* // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[V_VOID_ADDR]], i8* inttoptr (i64 6 to i8*)) // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} // CHECK: ret i32 [[V_VAL]] @@ -101,7 +102,9 @@ // CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float** // CHECK: store float* %{{.+}}, float** [[Z_ADDR]], #pragma omp allocate(a,z) allocator(omp_default_mem_alloc) +// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) +// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) // CHECK: ret void } diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -654,7 +654,8 @@ // CHECK-NEXT: br label %[[LAST_DONE]] // CHECK: [[LAST_DONE]] -// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*)) +// CHECK: [[F_VOID_PTR:%.+]] = bitcast float* [[F_PRIV]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*)) // CHECK-NEXT: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp --- a/clang/test/OpenMP/for_linear_codegen.cpp +++ b/clang/test/OpenMP/for_linear_codegen.cpp @@ -414,6 +414,7 @@ // CHECK: [[ADD:%.+]] = add nsw i64 [[LVAR_VAL]], 3 // CHECK: store i64 [[ADD]], i64* [[LVAR_PRIV]], // CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) +// CHECK: [[LVAR_VOID_PTR:%.+]] = bitcast i64* [[LVAR_PRIV]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[LVAR_VOID_PTR]], i8* inttoptr (i64 5 to i8*)) // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: ret void diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -876,6 +876,7 @@ // CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % +// CHECK: [[VAR3_VOID_PTR:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[VAR3_VOID_PTR]], i8* inttoptr (i64 6 to i8*)) // CHECK: ret void diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -423,6 +423,7 @@ // CHECK-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]], // CHECK: store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]], // CHECK: store i32 0, i32* [[T_VAR_PRIV]], +// CHECK: [[T_VAR_VOID_PTR:%.+]] = bitcast i32* [[T_VAR_PRIV]] to i8* // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*)) // CHECK: ret void @@ -584,6 +585,7 @@ // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8 // ARRAY: [[BC:%.+]] = bitcast double* [[VLA2_PTR]] to i8* // ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC]], i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false) +// ARRAY: [[VLA2_VOID_PTR:%.+]] = bitcast double* [[VLA2_PTR]] to i8* // ARRAY: call void @__kmpc_free(i32 [[GTID]], i8* [[VLA2_VOID_PTR]], i8* inttoptr (i64 8 to i8*)) // ARRAY-NEXT: ret void #endif diff --git a/clang/test/OpenMP/parallel_private_codegen.cpp b/clang/test/OpenMP/parallel_private_codegen.cpp --- a/clang/test/OpenMP/parallel_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_private_codegen.cpp @@ -361,12 +361,13 @@ // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_PTR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], // CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 2 to i8*)) -// CHECK: [[A_PRIV:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32* -// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REF:%.+]], +// CHECK: [[A_PRIV_ADDR:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32* +// CHECK: store i{{[0-9]+}}* [[A_PRIV_ADDR]], i{{[0-9]+}}** [[REF:%.+]], // CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REF]], // CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], // CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 // CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_PRIV_ADDR]] to i8* // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 2 to i8*)) // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s @@ -14,6 +14,19 @@ #ifndef HEADER #define HEADER +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* } // CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* } @@ -258,21 +271,26 @@ a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) -#pragma omp task untied +#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c) { - S s1; + S s1, s2; +#ifdef UNTIEDRT +#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc) +#endif + s2.a = 0; #pragma omp task - a = 4; + a = c = 4; #pragma omp taskyield s1 = S(); + s2.a = 10; #pragma omp taskwait } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} @@ -294,10 +312,13 @@ // CHECK: define internal i32 // CHECK: store i32 4, i32* [[A_PTR]] -// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) +// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}}) // UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, -// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) -// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT: [[S2_ADDR_PTR_REF:%.+]] = alloca %struct.S**, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]], %struct.S*** [[S2_ADDR_PTR_REF]]) +// UNTIEDRT-DAG: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT-DAG: [[S2_ADDR_PTR:%.+]] = load %struct.S**, %struct.S*** [[S2_ADDR_PTR_REF]], +// UNTIEDRT-DAG: [[S2_ADDR:%.+]] = load %struct.S*, %struct.S** [[S2_ADDR_PTR]], // CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ // CHECK: [[DONE]]: @@ -309,16 +330,25 @@ // UNTIEDRT: br label %[[EXIT:[^,]+]] // UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]]) +// UNTIEDRT: [[S2_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 %{{.+}}, i64 4, i8* inttoptr (i64 7 to i8*)) +// UNTIEDRT: [[S2_PTR:%.+]] = bitcast i8* [[S2_VOID_PTR]] to %struct.S* +// UNTIEDRT: store %struct.S* [[S2_PTR]], %struct.S** [[S2_ADDR_PTR]], +// UNTIEDRT: load i32*, i32** % +// UNTIEDRT: store i32 2, i32* % +// UNTIEDRT: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: call void [[CONSTR]](%struct.S* [[S2_ADDR]]) // CHECK: call i8* @__kmpc_omp_task_alloc( // CHECK: call i32 @__kmpc_omp_task(% // CHECK: load i32*, i32** % -// CHECK: store i32 2, i32* % +// CHECK: store i32 {{2|3}}, i32* % // CHECK: call i32 @__kmpc_omp_task(% // UNTIEDRT: br label %[[EXIT]] // CHECK: call i32 @__kmpc_omp_taskyield(% // CHECK: load i32*, i32** % -// CHECK: store i32 3, i32* % +// CHECK: store i32 {{3|4}}, i32* % // CHECK: call i32 @__kmpc_omp_task(% // UNTIEDRT: br label %[[EXIT]] @@ -331,10 +361,13 @@ // CHECK: call i32 @__kmpc_omp_taskwait(% // CHECK: load i32*, i32** % -// CHECK: store i32 4, i32* % +// CHECK: store i32 {{4|5}}, i32* % // CHECK: call i32 @__kmpc_omp_task(% // UNTIEDRT: br label %[[EXIT]] +// UNTIEDRT: call void [[DESTR]](%struct.S* [[S2_ADDR]]) +// UNTIEDRT: [[S2_VOID_PTR:%.+]] = bitcast %struct.S* [[S2_ADDR]] to i8* +// UNTIEDRT: call void @__kmpc_free(i32 %{{.+}}, i8* [[S2_VOID_PTR]], i8* inttoptr (i64 7 to i8*)) // UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]]) // CHECK: br label %[[CLEANUP]]