Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1039,6 +1039,18 @@ /// \param ThreadLimit An integer expression of threads. virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc); + + /// \brief Emit the target data mapping code associated with \a D. + /// \param D Directive to emit. + /// \param IfCond Expression evaluated in if clause associated with the target + /// directive, or null if no if clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + /// \param CodeGen, Function that emits the enclosed region. + virtual void emitTargetDataCalls(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const Expr *IfCond, const Expr *Device, + const RegionCodeGenTy &CodeGen); }; } // namespace CodeGen Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -638,6 +638,12 @@ OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); OMPRTL__tgt_unregister_lib, + // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_begin, + // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_end, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -1519,6 +1525,34 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; } + case OMPRTL__tgt_target_data_begin: { + // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); + break; + } + case OMPRTL__tgt_target_data_end: { + // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -5110,6 +5144,160 @@ return; } }; + +enum OpenMPOffloadingReservedDeviceIDs { + /// \brief Device ID if the device was not defined, runtime should get it + /// from environment variables in the spec. + OMP_DEVICEID_UNDEF = -1, +}; +} // anonymous namespace + +/// \brief Emit the arrays used to pass the captures and map information to the +/// offloading runtime library. If there is no map or capture information, +/// return nullptr by reference. +static void +emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray, + llvm::Value *&PointersArray, llvm::Value *&SizesArray, + llvm::Value *&MapTypesArray, + MappableExprsHandler::MapValuesArrayTy &BasePointers, + MappableExprsHandler::MapValuesArrayTy &Pointers, + MappableExprsHandler::MapValuesArrayTy &Sizes, + MappableExprsHandler::MapFlagsArrayTy &MapTypes) { + auto &CGM = CGF.CGM; + auto &Ctx = CGF.getContext(); + + BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr; + + if (unsigned PointerNumVal = BasePointers.size()) { + // Detect if we have any capture size requiring runtime evaluation of the + // size so that a constant array could be eventually used. + bool hasRuntimeEvaluationCaptureSize = false; + for (auto *S : Sizes) + if (!isa(S)) { + hasRuntimeEvaluationCaptureSize = true; + break; + } + + llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); + QualType PointerArrayType = + Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, + /*IndexTypeQuals=*/0); + + BasePointersArray = + CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); + PointersArray = + CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); + + // If we don't have any VLA types or other types that require runtime + // evaluation, we can use a constant array for the map sizes, otherwise we + // need to fill up the arrays as we do for the pointers. + if (hasRuntimeEvaluationCaptureSize) { + QualType SizeArrayType = Ctx.getConstantArrayType( + Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SizesArray = + CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); + } else { + // We expect all the sizes to be constant, so we collect them to create + // a constant array. + SmallVector ConstSizes; + for (auto S : Sizes) + ConstSizes.push_back(cast(S)); + + auto *SizesArrayInit = llvm::ConstantArray::get( + llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); + auto *SizesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), SizesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + SizesArrayInit, ".offload_sizes"); + SizesArrayGbl->setUnnamedAddr(true); + SizesArray = SizesArrayGbl; + } + + // The map types are always constant so we don't need to generate code to + // fill arrays. Instead, we create an array constant. + llvm::Constant *MapTypesArrayInit = + llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); + auto *MapTypesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), MapTypesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + MapTypesArrayInit, ".offload_maptypes"); + MapTypesArrayGbl->setUnnamedAddr(true); + MapTypesArray = MapTypesArrayGbl; + + for (unsigned i = 0; i < PointerNumVal; ++i) { + llvm::Value *BPVal = BasePointers[i]; + if (BPVal->getType()->isPointerTy()) + BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); + else { + assert(BPVal->getType()->isIntegerTy() && + "If not a pointer, the value type must be an integer."); + BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); + } + llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, + 0, i); + Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); + CGF.Builder.CreateStore(BPVal, BPAddr); + + llvm::Value *PVal = Pointers[i]; + if (PVal->getType()->isPointerTy()) + PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); + else { + assert(PVal->getType()->isIntegerTy() && + "If not a pointer, the value type must be an integer."); + PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); + } + llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, + i); + Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); + CGF.Builder.CreateStore(PVal, PAddr); + + if (hasRuntimeEvaluationCaptureSize) { + llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, + /*Idx0=*/0, + /*Idx1=*/i); + Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); + CGF.Builder.CreateStore( + CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), + SAddr); + } + } + } +} +/// \brief Emit the arguments to be passed to the runtime library based on the +/// arrays of pointers, sizes and map types. +static void emitOffloadingArraysArgument( + CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, + llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, + llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray, + llvm::Value *PointersArray, llvm::Value *SizesArray, + llvm::Value *MapTypesArray, unsigned NumElems) { + auto &CGM = CGF.CGM; + if (NumElems) { + BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray, + /*Idx0=*/0, /*Idx1=*/0); + PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray, + /*Idx0=*/0, + /*Idx1=*/0); + SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray, + /*Idx0=*/0, /*Idx1=*/0); + MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray, + /*Idx0=*/0, + /*Idx1=*/0); + } else { + BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); + MapTypesArrayArg = + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); + } } void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, @@ -5121,12 +5309,6 @@ if (!CGF.HaveInsertPoint()) return; - enum OpenMPOffloadingReservedDeviceIDs { - /// \brief Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, - }; - assert(OutlinedFn && "Invalid outlined function!"); auto &Ctx = CGF.getContext(); @@ -5251,15 +5433,6 @@ MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } - // Detect if we have any capture size requiring runtime evaluation of the size - // so that a constant array could be eventually used. - bool hasRuntimeEvaluationCaptureSize = false; - for (auto *S : Sizes) - if (!isa(S)) { - hasRuntimeEvaluationCaptureSize = true; - break; - } - // Keep track on whether the host function has to be executed. auto OffloadErrorQType = Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); @@ -5270,130 +5443,22 @@ OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, - hasRuntimeEvaluationCaptureSize, Device, OutlinedFnID, - OffloadError, OffloadErrorQType, + auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFnID, OffloadError, OffloadErrorQType, &D](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); - unsigned PointerNumVal = BasePointers.size(); - llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); + // Emit the offloading arrays. llvm::Value *BasePointersArray; llvm::Value *PointersArray; llvm::Value *SizesArray; llvm::Value *MapTypesArray; - - if (PointerNumVal) { - llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); - QualType PointerArrayType = Ctx.getConstantArrayType( - Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); - - BasePointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); - PointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); - - // If we don't have any VLA types, we can use a constant array for the map - // sizes, otherwise we need to fill up the arrays as we do for the - // pointers. - if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = Ctx.getConstantArrayType( - Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); - SizesArray = - CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); - } else { - // We expect all the sizes to be constant, so we collect them to create - // a constant array. - SmallVector ConstSizes; - for (auto S : Sizes) - ConstSizes.push_back(cast(S)); - - auto *SizesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get(CGF.CGM.SizeTy, ConstSizes.size()), - ConstSizes); - auto *SizesArrayGbl = new llvm::GlobalVariable( - CGF.CGM.getModule(), SizesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - SizesArrayInit, ".offload_sizes"); - SizesArrayGbl->setUnnamedAddr(true); - SizesArray = SizesArrayGbl; - } - - // The map types are always constant so we don't need to generate code to - // fill arrays. Instead, we create an array constant. - llvm::Constant *MapTypesArrayInit = - llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); - auto *MapTypesArrayGbl = new llvm::GlobalVariable( - CGF.CGM.getModule(), MapTypesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapTypesArrayInit, ".offload_maptypes"); - MapTypesArrayGbl->setUnnamedAddr(true); - MapTypesArray = MapTypesArrayGbl; - - for (unsigned i = 0; i < PointerNumVal; ++i) { - llvm::Value *BPVal = BasePointers[i]; - if (BPVal->getType()->isPointerTy()) - BPVal = CGF.Builder.CreateBitCast(BPVal, CGF.VoidPtrTy); - else { - assert(BPVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGF.VoidPtrTy); - } - llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), - BasePointersArray, 0, i); - Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(BPVal, BPAddr); - - llvm::Value *PVal = Pointers[i]; - if (PVal->getType()->isPointerTy()) - PVal = CGF.Builder.CreateBitCast(PVal, CGF.VoidPtrTy); - else { - assert(PVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - PVal = CGF.Builder.CreateIntToPtr(PVal, CGF.VoidPtrTy); - } - llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray, - 0, i); - Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(PVal, PAddr); - - if (hasRuntimeEvaluationCaptureSize) { - llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray, - /*Idx0=*/0, - /*Idx1=*/i); - Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); - CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( - Sizes[i], CGF.SizeTy, /*isSigned=*/true), - SAddr); - } - } - - BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), BasePointersArray, - /*Idx0=*/0, /*Idx1=*/0); - PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray, - /*Idx0=*/0, - /*Idx1=*/0); - SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray, - /*Idx0=*/0, /*Idx1=*/0); - MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGF.Int32Ty, PointerNumVal), MapTypesArray, - /*Idx0=*/0, - /*Idx1=*/0); - - } else { - BasePointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy); - PointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy); - SizesArray = llvm::ConstantPointerNull::get(CGF.SizeTy->getPointerTo()); - MapTypesArray = - llvm::ConstantPointerNull::get(CGF.Int32Ty->getPointerTo()); - } + emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, + MapTypesArray, BasePointers, Pointers, Sizes, + MapTypes); + emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray, + SizesArray, MapTypesArray, BasePointersArray, + PointersArray, SizesArray, MapTypesArray, + BasePointers.size()); // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host @@ -5415,6 +5480,9 @@ else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + // Emit the number of elements in the offloading arrays. + llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + // Return value of the runtime offloading call. llvm::Value *Return; @@ -5650,3 +5718,124 @@ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), PushNumTeamsArgs); } + +void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const Expr *IfCond, + const Expr *Device, + const RegionCodeGenTy &CodeGen) { + + if (!CGF.HaveInsertPoint()) + return; + + llvm::Value *BasePointersArray = nullptr; + llvm::Value *PointersArray = nullptr; + llvm::Value *SizesArray = nullptr; + llvm::Value *MapTypesArray = nullptr; + unsigned NumOfPtrs = 0; + + // Generate the code for the opening of the data environment. Capture all the + // arguments of the runtime call by reference because they are used in the + // closing of the region. + auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray, + &SizesArray, &MapTypesArray, Device, + &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + + // Get map clause information. + MappableExprsHandler MCHandler(D, CGF); + MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + NumOfPtrs = BasePointers.size(); + + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, + MapTypesArray, BasePointers, Pointers, Sizes, + MapTypes); + + llvm::Value *BasePointersArrayArg = nullptr; + llvm::Value *PointersArrayArg = nullptr; + llvm::Value *SizesArrayArg = nullptr; + llvm::Value *MapTypesArrayArg = nullptr; + emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, + SizesArrayArg, MapTypesArrayArg, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumOfPtrs); + + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int32Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + + // Emit the number of elements in the offloading arrays. + auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); + + llvm::Value *OffloadingArgs[] = { + DeviceID, PointerNum, BasePointersArrayArg, + PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; + auto &RT = CGF.CGM.getOpenMPRuntime(); + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), + OffloadingArgs); + }; + + // Generate code for the closing of the data region. + auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray, + &MapTypesArray, Device, + &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { + assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray && + NumOfPtrs && "Invalid data environment closing arguments."); + + llvm::Value *BasePointersArrayArg = nullptr; + llvm::Value *PointersArrayArg = nullptr; + llvm::Value *SizesArrayArg = nullptr; + llvm::Value *MapTypesArrayArg = nullptr; + emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, + SizesArrayArg, MapTypesArrayArg, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumOfPtrs); + + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int32Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + + // Emit the number of elements in the offloading arrays. + auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); + + llvm::Value *OffloadingArgs[] = { + DeviceID, PointerNum, BasePointersArrayArg, + PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; + auto &RT = CGF.CGM.getOpenMPRuntime(); + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), + OffloadingArgs); + }; + + // In the event we get an if clause, we don't have to take any action on the + // else side. + auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + + if (IfCond) { + emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen); + } else { + RegionCodeGenTy BeginThenRCG(BeginThenGen); + BeginThenRCG(CGF); + } + + CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen); + + if (IfCond) { + emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen); + } else { + RegionCodeGenTy EndThenRCG(EndThenGen); + EndThenRCG(CGF); + } +} Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -3241,13 +3241,33 @@ // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { - // emit the code inside the construct for now - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_data, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast(S.getAssociatedStmt())->getCapturedStmt()); - }); + // The target data enclosed region is implemented just by emitting the + // statement. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + }; + + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data, + CodeGen); + return; + } + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (auto *C = S.getSingleClause()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause()) + Device = C->getDevice(); + + CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen); } void CodeGenFunction::EmitOMPTargetEnterDataDirective( Index: test/OpenMP/target_data_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/target_data_codegen.cpp @@ -0,0 +1,248 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +///==========================================================================/// +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fomptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -fomptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fomptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fomptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -fomptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fomptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +// CK1: [[ST:%.+]] = type { i32, double* } +template +struct ST { + T a; + double *b; +}; + +ST gb; +double gc[100]; + +// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 2] + +// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 1] + +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 5] + +// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 1, i32 97] + +// CK1-LABEL: _Z3fooi +void foo(int arg) { + int la; + float lb[arg]; + + // Region 00 + // CK1-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + + // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: store i8* bitcast ([100 x double]* @gc to i8*), i8** [[BP0]] + // CK1-DAG: store i8* bitcast ([100 x double]* @gc to i8*), i8** [[P0]] + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + + // CK1-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] + #pragma omp target data if(1+3-5) device(arg) map(from: gc) + {++arg;} + + // Region 01 + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target data map(la) if(1+3-4) + {++arg;} + + // Region 02 + // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CK1: [[IFTHEN]] + // CK1-DAG: call void @__tgt_target_data_begin(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + + // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: store i8* [[CBPVAL0:%[^,]+]], i8** [[BP0]] + // CK1-DAG: store i8* [[CPVAL0:%[^,]+]], i8** [[P0]] + // CK1-DAG: [[CBPVAL0]] = bitcast i32* [[VAR0:%.+]] to i8* + // CK1-DAG: [[CPVAL0]] = bitcast i32* [[VAR0]] to i8* + // CK1: br label %[[IFEND:[^,]+]] + + // CK1: [[IFELSE]] + // CK1: br label %[[IFEND]] + // CK1: [[IFEND]] + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + + // CK1: [[IFTHEN]] + // CK1-DAG: call void @__tgt_target_data_end(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] + // CK1: br label %[[IFEND:[^,]+]] + // CK1: [[IFELSE]] + // CK1: br label %[[IFEND]] + // CK1: [[IFEND]] + #pragma omp target data map(to: arg) if(arg) device(4) + {++arg;} + + // Region 03 + // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + + // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: store i8* [[CBPVAL0:%[^,]+]], i8** [[BP0]] + // CK1-DAG: store i8* [[CPVAL0:%[^,]+]], i8** [[P0]] + // CK1-DAG: store i[[sz]] [[CSVAL0:%[^,]+]], i[[sz]]* [[S0]] + // CK1-DAG: [[CBPVAL0]] = bitcast float* [[VAR0:%.+]] to i8* + // CK1-DAG: [[CPVAL0]] = bitcast float* [[VAR0]] to i8* + // CK1-DAG: [[CSVAL0]] = mul nuw i[[sz]] %{{[^,]+}}, 4 + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + + // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] + // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]] + #pragma omp target data map(always, to: lb) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 04 + // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + + // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK1-DAG: store i8* bitcast ([[ST]]* @gb to i8*), i8** [[BP0]] + // CK1-DAG: store i8* bitcast (double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1) to i8*), i8** [[P0]] + + + // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK1-DAG: store i8* bitcast (double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1) to i8*), i8** [[BP1]] + // CK1-DAG: store i8* [[CPVAL1:%[^,]+]], i8** [[P1]] + // CK1-DAG: [[CPVAL1]] = bitcast double* [[SEC1:%.+]] to i8* + // CK1-DAG: [[SEC1]] = getelementptr inbounds {{.+}}double* [[SEC11:%[^,]+]], i{{.+}} 0 + // CK1-DAG: [[SEC11]] = load double*, double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1), + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + + // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] + // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] + #pragma omp target data map(to: gb.b[:3]) + {++arg;} +} +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fomptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -fomptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fomptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fomptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -fomptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fomptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2: [[ST:%.+]] = type { i32, double* } +template +struct ST { + T a; + double *b; + + T foo(T arg) { + // Region 00 + #pragma omp target data map(always, to: b[1:3]) if(a>123) device(arg) + {arg++;} + return arg; + } +}; + +// CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24] +// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 5, i32 101] + +// CK2-LABEL: _Z3bari +int bar(int arg){ + ST A; + return A.foo(arg); +} + +// Region 00 +// CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CK2: [[IFTHEN]] +// CK2-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + +// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 +// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 +// CK2-DAG: store i8* [[CBPVAL0:%[^,]+]], i8** [[BP0]] +// CK2-DAG: store i8* [[CPVAL0:%[^,]+]], i8** [[P0]] +// CK2-DAG: [[CBPVAL0]] = bitcast [[ST]]* [[VAR0:%.+]] to i8* +// CK2-DAG: [[CPVAL0]] = bitcast double** [[SEC0:%[^,]+]] to i8* +// CK2-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1 + + +// CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 +// CK2-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 +// CK2-DAG: store i8* [[CBPVAL1:%[^,]+]], i8** [[BP1]] +// CK2-DAG: store i8* [[CPVAL1:%[^,]+]], i8** [[P1]] +// CK2-DAG: [[CBPVAL1]] = bitcast double** [[SEC0]] to i8* +// CK2-DAG: [[CPVAL1]] = bitcast double* [[SEC1:%[^,]+]] to i8* +// CK2-DAG: [[SEC1]] = getelementptr inbounds {{.*}}double* [[SEC11:%[^,]+]], i{{.+}} 1 +// CK2-DAG: [[SEC11]] = load double*, double** [[SEC111:%[^,]+]], +// CK2-DAG: [[SEC111]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1 + +// CK2: br label %[[IFEND:[^,]+]] + +// CK2: [[IFELSE]] +// CK2: br label %[[IFEND]] +// CK2: [[IFEND]] +// CK2: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 +// CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + +// CK2: [[IFTHEN]] +// CK2-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] +// CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] +// CK2: br label %[[IFEND:[^,]+]] +// CK2: [[IFELSE]] +// CK2: br label %[[IFEND]] +// CK2: [[IFEND]] +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3-LABEL: no_target_devices +void no_target_devices(int arg) { + // CK3-NOT: tgt_target_data_begin + // CK3: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + // CK3-NOT: tgt_target_data_end + // CK3: ret + #pragma omp target data map(to: arg) if(arg) device(4) + {++arg;} +} +#endif +#endif