diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -458,6 +458,16 @@ /// } flags; /// } kmp_depend_info_t; QualType KmpDependInfoTy; + /// Type typedef struct kmp_task_affinity_info { + /// kmp_intptr_t base_addr; + /// size_t len; + /// struct { + /// bool flag1 : 1; + /// bool flag2 : 1; + /// kmp_int32 reserved : 30; + /// } flags; + /// } kmp_task_affinity_info_t; + QualType KmpTaskAffinityInfoTy; /// struct kmp_dim { // loop bounds info casted to kmp_int64 /// kmp_int64 lo; // lower /// kmp_int64 up; // upper diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4024,6 +4024,135 @@ return NeedsCleanup; } +namespace { +/// Loop generator for OpenMP iterator expression. +class OMPIteratorGeneratorScope final + : public CodeGenFunction::OMPPrivateScope { + CodeGenFunction &CGF; + const OMPIteratorExpr *E = nullptr; + SmallVector ContDests; + SmallVector ExitDests; + OMPIteratorGeneratorScope() = delete; + OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; + +public: + OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) + : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { + if (!E) + return; + SmallVector Uppers; + for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { + Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); + const auto *VD = cast(E->getIteratorDecl(I)); + addPrivate(VD, [&CGF, VD]() { + return CGF.CreateMemTemp(VD->getType(), VD->getName()); + }); + const OMPIteratorHelperData &HelperData = E->getHelper(I); + addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { + return CGF.CreateMemTemp(HelperData.CounterVD->getType(), + "counter.addr"); + }); + } + Privatize(); + + for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { + const OMPIteratorHelperData &HelperData = E->getHelper(I); + LValue CLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), + HelperData.CounterVD->getType()); + // Counter = 0; + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), + CLVal); + CodeGenFunction::JumpDest &ContDest = + ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); + CodeGenFunction::JumpDest &ExitDest = + ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); + // N = ; + llvm::Value *N = Uppers[I]; + // cont: + // if (Counter < N) goto body; else goto exit; + CGF.EmitBlock(ContDest.getBlock()); + auto *CVal = + CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); + llvm::Value *Cmp = + HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() + ? CGF.Builder.CreateICmpSLT(CVal, N) + : CGF.Builder.CreateICmpULT(CVal, N); + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); + CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); + // body: + CGF.EmitBlock(BodyBB); + // Iteri = Begini + Counter * Stepi; + CGF.EmitIgnoredExpr(HelperData.Update); + } + } + ~OMPIteratorGeneratorScope() { + if (!E) + return; + for (unsigned I = E->numOfIterators(); I > 0; --I) { + // Counter = Counter + 1; + const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); + CGF.EmitIgnoredExpr(HelperData.CounterUpdate); + // goto cont; + CGF.EmitBranchThroughCleanup(ContDests[I - 1]); + // exit: + CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); + } + } +}; +} // namespace + +static std::pair +getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { + const auto *OASE = dyn_cast(E); + llvm::Value *Addr; + if (OASE) { + const Expr *Base = OASE->getBase(); + Addr = CGF.EmitScalarExpr(Base); + } else { + Addr = CGF.EmitLValue(E).getPointer(CGF); + } + llvm::Value *SizeVal; + QualType Ty = E->getType(); + if (OASE) { + SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); + for (const Expr *SE : OASE->getDimensions()) { + llvm::Value *Sz = CGF.EmitScalarExpr(SE); + Sz = CGF.EmitScalarConversion( + Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); + SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); + } + } else if (const auto *ASE = + dyn_cast(E->IgnoreParenImpCasts())) { + LValue UpAddrLVal = + CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); + llvm::Value *UpAddr = + CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); + llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); + llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); + SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); + } else { + SizeVal = CGF.getTypeSize(Ty); + } + return std::make_pair(Addr, SizeVal); +} + +/// Builds kmp_depend_info, if it is not built yet, and builds flags type. +static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { + QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); + if (KmpTaskAffinityInfoTy.isNull()) { + RecordDecl *KmpAffinityInfoRD = + C.buildImplicitRecord("kmp_task_affinity_info_t"); + KmpAffinityInfoRD->startDefinition(); + addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); + addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); + addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); + KmpAffinityInfoRD->completeDefinition(); + KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); + } +} + CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, @@ -4202,6 +4331,142 @@ Evt->getExprLoc()); CGF.EmitStoreOfScalar(EvtVal, EvtLVal); } + // Process affinity clauses. + if (D.hasClausesOfKind()) { + // Process list of affinity data. + ASTContext &C = CGM.getContext(); + Address AffinitiesArray = Address::invalid(); + // Calculate number of elements to form the array of affinity data. + llvm::Value *NumOfElements = nullptr; + unsigned NumAffinities = 0; + for (const auto *C : D.getClausesOfKind()) { + if (const Expr *Modifier = C->getModifier()) { + const auto *IE = cast(Modifier->IgnoreParenImpCasts()); + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); + Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); + NumOfElements = + NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; + } + } else { + NumAffinities += C->varlist_size(); + } + } + getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); + // Fields ids in kmp_task_affinity_info record. + enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; + + QualType KmpTaskAffinityInfoArrayTy; + if (NumOfElements) { + NumOfElements = CGF.Builder.CreateNUWAdd( + llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); + OpaqueValueExpr OVE( + Loc, + C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), + VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + RValue::get(NumOfElements)); + KmpTaskAffinityInfoArrayTy = + C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, + /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); + // Properly emit variable-sized array. + auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, + ImplicitParamDecl::Other); + CGF.EmitVarDecl(*PD); + AffinitiesArray = CGF.GetAddrOfLocalVar(PD); + NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, + /*isSigned=*/false); + } else { + KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( + KmpTaskAffinityInfoTy, + llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, + ArrayType::Normal, /*IndexTypeQuals=*/0); + AffinitiesArray = + CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); + AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); + NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, + /*isSigned=*/false); + } + + const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); + // Fill array by elements without iterators. + unsigned Pos = 0; + bool HasIterator = false; + for (const auto *C : D.getClausesOfKind()) { + if (C->getModifier()) { + HasIterator = true; + continue; + } + for (const Expr *E : C->varlists()) { + llvm::Value *Addr; + llvm::Value *Size; + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + LValue Base = + CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), + KmpTaskAffinityInfoTy); + // affs[i].base_addr = &; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), + BaseAddrLVal); + // affs[i].len = sizeof(); + LValue LenLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); + CGF.EmitStoreOfScalar(Size, LenLVal); + ++Pos; + } + } + LValue PosLVal; + if (HasIterator) { + PosLVal = CGF.MakeAddrLValue( + CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), + C.getSizeType()); + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); + } + // Process elements with iterators. + for (const auto *C : D.getClausesOfKind()) { + const Expr *Modifier = C->getModifier(); + if (!Modifier) + continue; + OMPIteratorGeneratorScope IteratorScope( + CGF, cast_or_null(Modifier->IgnoreParenImpCasts())); + for (const Expr *E : C->varlists()) { + llvm::Value *Addr; + llvm::Value *Size; + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); + LValue Base = CGF.MakeAddrLValue( + Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), + AffinitiesArray.getAlignment()), + KmpTaskAffinityInfoTy); + // affs[i].base_addr = &; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), + BaseAddrLVal); + // affs[i].len = sizeof(); + LValue LenLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); + CGF.EmitStoreOfScalar(Size, LenLVal); + Idx = CGF.Builder.CreateNUWAdd( + Idx, llvm::ConstantInt::get(Idx->getType(), 1)); + CGF.EmitStoreOfScalar(Idx, PosLVal); + } + } + // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, + // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 + // naffins, kmp_task_affinity_info_t *affin_list); + llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = getThreadID(CGF, Loc); + llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + AffinitiesArray.getPointer(), CGM.VoidPtrTy); + // FIXME: Emit the function and ignore its result for now unless the + // runtime function is properly implemented. + (void)CGF.EmitRuntimeCall( + llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), + {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); + } llvm::Value *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( NewTask, KmpTaskTWithPrivatesPtrTy); @@ -4350,85 +4615,6 @@ return std::make_pair(NumDeps, Base); } -namespace { -/// Loop generator for OpenMP iterator expression. -class OMPIteratorGeneratorScope final - : public CodeGenFunction::OMPPrivateScope { - CodeGenFunction &CGF; - const OMPIteratorExpr *E = nullptr; - SmallVector ContDests; - SmallVector ExitDests; - OMPIteratorGeneratorScope() = delete; - OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; - -public: - OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) - : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { - if (!E) - return; - SmallVector Uppers; - for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { - Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); - const auto *VD = cast(E->getIteratorDecl(I)); - addPrivate(VD, [&CGF, VD]() { - return CGF.CreateMemTemp(VD->getType(), VD->getName()); - }); - const OMPIteratorHelperData &HelperData = E->getHelper(I); - addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { - return CGF.CreateMemTemp(HelperData.CounterVD->getType(), - "counter.addr"); - }); - } - Privatize(); - - for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { - const OMPIteratorHelperData &HelperData = E->getHelper(I); - LValue CLVal = - CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), - HelperData.CounterVD->getType()); - // Counter = 0; - CGF.EmitStoreOfScalar( - llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), - CLVal); - CodeGenFunction::JumpDest &ContDest = - ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); - CodeGenFunction::JumpDest &ExitDest = - ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); - // N = ; - llvm::Value *N = Uppers[I]; - // cont: - // if (Counter < N) goto body; else goto exit; - CGF.EmitBlock(ContDest.getBlock()); - auto *CVal = - CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); - llvm::Value *Cmp = - HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() - ? CGF.Builder.CreateICmpSLT(CVal, N) - : CGF.Builder.CreateICmpULT(CVal, N); - llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); - CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); - // body: - CGF.EmitBlock(BodyBB); - // Iteri = Begini + Counter * Stepi; - CGF.EmitIgnoredExpr(HelperData.Update); - } - } - ~OMPIteratorGeneratorScope() { - if (!E) - return; - for (unsigned I = E->numOfIterators(); I > 0; --I) { - // Counter = Counter + 1; - const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); - CGF.EmitIgnoredExpr(HelperData.CounterUpdate); - // goto cont; - CGF.EmitBranchThroughCleanup(ContDests[I - 1]); - // exit: - CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); - } - } -}; -} // namespace - static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion Pos, const OMPTaskDataTy::DependData &Data, @@ -4446,37 +4632,9 @@ Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() : nullptr)); for (const Expr *E : Data.DepExprs) { - const auto *OASE = dyn_cast(E); llvm::Value *Addr; - if (OASE) { - const Expr *Base = OASE->getBase(); - Addr = CGF.EmitScalarExpr(Base); - } else { - Addr = CGF.EmitLValue(E).getPointer(CGF); - } llvm::Value *Size; - QualType Ty = E->getType(); - if (OASE) { - Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); - for (const Expr *SE : OASE->getDimensions()) { - llvm::Value *Sz = CGF.EmitScalarExpr(SE); - Sz = CGF.EmitScalarConversion(Sz, SE->getType(), - CGF.getContext().getSizeType(), - SE->getExprLoc()); - Size = CGF.Builder.CreateNUWMul(Size, Sz); - } - } else if (const auto *ASE = - dyn_cast(E->IgnoreParenImpCasts())) { - LValue UpAddrLVal = - CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( - UpAddrLVal.getPointer(CGF), /*Idx0=*/1); - llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy); - llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); - Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else { - Size = CGF.getTypeSize(Ty); - } + std::tie(Addr, Size) = getPointerAndSize(CGF, E); LValue Base; if (unsigned *P = Pos.dyn_cast()) { Base = CGF.MakeAddrLValue( diff --git a/clang/test/OpenMP/task_affinity_codegen.cpp b/clang/test/OpenMP/task_affinity_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/task_affinity_codegen.cpp @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: @main +int main() { + float *p; + int a = 10; + // kmp_task_affinity_info_t affs[1]; + // CHECK: [[AFFS_ADDR:%.+]] = alloca [1 x %struct.kmp_task_affinity_info_t], + // CHECK: [[TD:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID:%.+]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* @{{.+}} to i32 (i32, i8*)*)) + // CHECK: [[AFFINE_LST_ADDR:%.+]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], [1 x %struct.kmp_task_affinity_info_t]* [[AFFS_ADDR]], i64 0, i64 0 + // CHECK: [[P:%.+]] = load float*, float** [[P_ADDR:%.+]], + // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR:%.+]], + // CHECK: [[A_SZ:%.+]] = sext i32 [[A_VAL]] to i64 + // CHECK: [[BYTES:%.+]] = mul nuw i64 4, [[A_SZ]] + // CHECK: [[SZ:%.+]] = mul nuw i64 [[BYTES]], 10 + // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: [[A_SZ1:%.+]] = sext i32 [[A_VAL]] to i64 + // CHECK: [[SIZE:%.+]] = mul nuw i64 [[SZ]], [[A_SZ]] + // CHECK: [[AFFS_0_ADDR:%.+]] = getelementptr %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFINE_LST_ADDR]], i64 0 + + // affs[0].base = p; + // CHECK: [[AFFS_0_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 0 + // CHECK: [[P_INTPTR:%.+]] = ptrtoint float* [[P]] to i64 + // CHECK: store i64 [[P_INTPTR]], i64* [[AFFS_0_BASE_ADDR]], + + // affs[0].size = sizeof(*p) * a * 10 * a; + // CHECK: [[AFFS_0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 1 + // CHECK: store i64 [[SIZE]], i64* [[AFFS_0_SIZE_ADDR]], + // CHECK: [[BC:%.+]] = bitcast %struct.kmp_task_affinity_info_t* [[AFFINE_LST_ADDR]] to i8* + // CHECK: call i32 @__kmpc_omp_reg_task_with_affinity(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[TD]], i32 1, i8* [[BC]]) +#pragma omp task affinity(([a][10][a])p) + ; + // CHECK: [[TD:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* @{{.+}} to i32 (i32, i8*)*)) + // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: [[SUB:%.+]] = sub nsw i32 [[A_VAL]], 0 + // CHECK: [[CONV:%.+]] = zext i32 [[SUB]] to i64 + + // = + 1 constant affinity for affinity(a) + // CHECK: [[NUM_ELEMS:%.+]] = add nuw i64 1, [[CONV]] + // CHECK: [[SV:%.+]] = call i8* @llvm.stacksave() + // CHECK: store i8* [[SV]], i8** [[SV_ADDR:%.+]], + + // kmp_task_affinity_info_t affs[]; + // CHECK: [[AFFS_ADDR:%.+]] = alloca %struct.kmp_task_affinity_info_t, i64 [[NUM_ELEMS]], + // store i64 %21, i64* %__vla_expr0, align 8 + // CHECK: [[NAFFS:%.+]] = trunc i64 [[NUM_ELEMS]] to i32 + // CHECK: [[AFFS_0_ADDR:%.+]] = getelementptr %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ADDR]], i64 0 + + // affs[0].base = &a; + // CHECK: [[AFFS_0_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 0 + // CHECK: [[A_INTPTR:%.+]] = ptrtoint i32* [[A_ADDR]] to i64 + // CHECK: store i64 [[A_INTPTR]], i64* [[AFFS_0_BASE_ADDR]], + + // affs[0].size = sizeof(a); + // CHECK: [[AFFS_0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_0_ADDR]], i32 0, i32 1 + // CHECK: store i64 4, i64* [[AFFS_0_SIZE_ADDR]], + + // affs_cnt = 1; + // CHECK: store i64 1, i64* [[AFFS_CNT_ADDR:%.+]], + // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: [[NITERS:%.+]] = sub nsw i32 [[A_VAL]], 0 + // CHECK: store i32 0, i32* [[CNT_ADDR:%.+]], + // CHECK: br label %[[CONT:[^,]+]] + + //for (int cnt = 0; cnt < (a-0); ++cnt) { + // int i = cnt + 0; + // affs[affs_cnt].base = &p[i]; + // affs[affs_cnt].size = sizeof(p[i]); + // ++affs_cnt; + // } + + // CHECK: [[CONT]]: + // CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT]], [[NITERS]] + // CHECK: br i1 [[CMP]], label %[[BODY:[^,]+]], label %[[DONE:[^,]+]] + + // CHECK: [[BODY]]: + // i = cnt + 0; + // CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], + // CHECK: [[VAL:%.+]] = add nsw i32 0, [[CNT]] + // CHECK: store i32 [[VAL]], i32* [[I_ADDR:%.+]], + + // &p[i] + // CHECK: [[P:%.+]] = load float*, float** [[P_ADDR]], + // CHECK: [[I:%.+]] = load i32, i32* [[I_ADDR]], + // CHECK: [[IDX:%.+]] = sext i32 [[I]] to i64 + // CHECK: [[P_I_ADDR:%.+]] = getelementptr inbounds float, float* [[P]], i64 [[IDX]] + + // affs[affs_cnt] + // CHECK: [[AFFS_CNT:%.+]] = load i64, i64* [[AFFS_CNT_ADDR]], + // CHECK: [[AFFS_ELEM_ADDR:%.+]] = getelementptr %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ADDR]], i64 [[AFFS_CNT]] + + // affs[affs_cnt].base = &p[i]; + // CHECK: [[AFFS_ELEM_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ELEM_ADDR]], i32 0, i32 0 + // CHECK: [[CAST:%.+]] = ptrtoint float* [[P_I_ADDR]] to i64 + // CHECK: store i64 [[CAST]], i64* [[AFFS_ELEM_BASE_ADDR]], + + // affs[affs_cnt].size = sizeof(p[i]); + // CHECK: [[AFFS_ELEM_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_task_affinity_info_t, %struct.kmp_task_affinity_info_t* [[AFFS_ELEM_ADDR]], i32 0, i32 1 + // CHECK: store i64 4, i64* [[AFFS_ELEM_SIZE_ADDR]], + + // ++affs_cnt; + // CHECK: [[AFFS_CNT_NEXT:%.+]] = add nuw i64 [[AFFS_CNT]], 1 + // CHECK: store i64 [[AFFS_CNT_NEXT]], i64* [[AFFS_CNT_ADDR]], + + // ++cnt; + // CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], + // CHECK: [[CNT_NEXT:%.+]] = add nsw i32 [[CNT]], 1 + // CHECK: store i32 [[CNT_NEXT]], i32* [[CNT_ADDR]], + // CHECK: br label %[[CONT]] + + // CHECK: [[DONE]]: + // CHECK: [[BC:%.+]] = bitcast %struct.kmp_task_affinity_info_t* [[AFFS_ADDR]] to i8* + // CHECK: call i32 @__kmpc_omp_reg_task_with_affinity(%struct.ident_t* @{{.+}} i32 [[GTID]], i8* [[TD]], i32 [[NAFFS]], i8* [[BC]]) + // CHECK: [[SV:%.+]] = load i8*, i8** [[SV_ADDR]], + // CHECK: call void @llvm.stackrestore(i8* [[SV]]) +#pragma omp task affinity(iterator(i=0:a): p[i]) affinity(a) + ; + return 0; +} + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -336,6 +336,8 @@ __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, + Int8Ptr, Int32, Int8Ptr) __OMP_RTL(omp_get_thread_num, false, Int32, ) __OMP_RTL(omp_get_num_threads, false, Int32, )