diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -34,6 +34,10 @@ struct OmpClauseList; } // namespace parser +namespace semantics { +class SemanticsContext; +} // namespace semantics + namespace lower { class AbstractConverter; @@ -47,8 +51,8 @@ void genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *, mlir::Location); -void genOpenMPConstruct(AbstractConverter &, pft::Evaluation &, - const parser::OpenMPConstruct &); +void genOpenMPConstruct(AbstractConverter &, semantics::SemanticsContext &, + pft::Evaluation &, const parser::OpenMPConstruct &); void genOpenMPDeclarativeConstruct(AbstractConverter &, pft::Evaluation &, const parser::OpenMPDeclarativeConstruct &); int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2258,7 +2258,7 @@ void genFIR(const Fortran::parser::OpenMPConstruct &omp) { mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); localSymbols.pushScope(); - genOpenMPConstruct(*this, getEval(), omp); + genOpenMPConstruct(*this, bridge.getSemanticsContext(), getEval(), omp); const Fortran::parser::OpenMPLoopConstruct *ompLoop = std::get_if(&omp.u); diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -39,16 +39,21 @@ static Fortran::semantics::Symbol * getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject) { Fortran::semantics::Symbol *sym = nullptr; - std::visit(Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { - if (const Fortran::parser::Name *name = + std::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::Designator &designator) { + if (auto *arrayEle = + Fortran::parser::Unwrap( + designator)) { + sym = GetFirstName(arrayEle->base).symbol; + } else if (const Fortran::parser::Name *name = Fortran::semantics::getDesignatorNameIfDataRef( designator)) { - sym = name->symbol; - } - }, - [&](const Fortran::parser::Name &name) { sym = name.symbol; }}, - ompObject.u); + sym = name->symbol; + } + }, + [&](const Fortran::parser::Name &name) { sym = name.symbol; }}, + ompObject.u); return sym; } @@ -529,8 +534,11 @@ mlir::Value &result) const; bool processLink(llvm::SmallVectorImpl &result) const; - bool processMap(llvm::SmallVectorImpl &mapOperands, - llvm::SmallVectorImpl &mapTypes) const; + bool processMap(mlir::Location currentLocation, + const llvm::omp::Directive &directive, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + llvm::SmallVectorImpl &mapOperands) const; bool processReduction( mlir::Location currentLocation, llvm::SmallVectorImpl &reductionVars, @@ -1648,80 +1656,602 @@ }); } -bool ClauseProcessor::processMap( - llvm::SmallVectorImpl &mapOperands, - llvm::SmallVectorImpl &mapTypes) const { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); +// Ref pointers are used rather than direct access when we +// map a declare target link variable or declare target to +// with USM mode. +static bool mapRequiresReference(fir::FirOpBuilder &firOpBuilder, + const mlir::Value &mapOp) { + auto *op = mapOp.getDefiningOp(); + // It's a BlockArgument, which has no defining operation, it cannot be + // declare target as it's origination must be a global value or a SAVE + // variable + if (!op) + return false; - return findRepeatableClause< - ClauseTy::Map>([&](const ClauseTy::Map *mapClause, - const Fortran::parser::CharBlock &source) { - mlir::Location clauseLocation = converter.genLocation(source); - const auto &oMapType = - std::get>(mapClause->v.t); - llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; - // If the map type is specified, then process it else Tofrom is the default. - if (oMapType) { - const Fortran::parser::OmpMapType::Type &mapType = - std::get(oMapType->t); - switch (mapType) { - case Fortran::parser::OmpMapType::Type::To: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - break; - case Fortran::parser::OmpMapType::Type::From: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; - break; - case Fortran::parser::OmpMapType::Type::Tofrom: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; - break; - case Fortran::parser::OmpMapType::Type::Alloc: - case Fortran::parser::OmpMapType::Type::Release: - // alloc and release is the default map_type for the Target Data Ops, - // i.e. if no bits for map_type is supplied then alloc/release is - // implicitly assumed based on the target directive. Default value for - // Target Data and Enter Data is alloc and for Exit Data it is release. - break; - case Fortran::parser::OmpMapType::Type::Delete: - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; - } + if (auto addrOp = mlir::dyn_cast(op)) { + op = firOpBuilder.getModule().lookupSymbol(addrOp.getSymbol()); + } - if (std::get>( - oMapType->t)) - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; - } else { - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + // TODO: Add To+USM mode case when we have some method of + // enabling USM in the frontend and getting this information + if (auto declareTargetGlobal = + mlir::dyn_cast(op)) { + if (declareTargetGlobal.isDeclareTarget() && + ((declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link) /*|| + (declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::to && + hasRequiresUnifiedSharedMemory)*/)) { + return true; } + } + + return false; +} + +/// \param [in] converter - the abstract converter for the current invocation +/// of the lowering process +/// \param [in] capturedSym - The symbol of the variable being captured +/// \param [in] capturedByDirective - The directive the variable is part of +/// either by implicit or explicit map +/// \param [in] isVariableUsedInMapClause - is the variable used in a map +/// clause (explicitly mapped) +/// \param [in] isVariableAssociatedWithSection - is the variable related to +/// an array subscript operator, an OpenMP array section or shaping +/// expression or otherwise dereferenced +/// \param [in] forceCaptureByReferenceInTarget - will force capture byref +/// when true in certain cases where bycopy may be possible and byref is an +/// option +static mlir::omp::VariableCaptureKind +isCapturedByRef(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::Symbol *capturedSym, + llvm::omp::Directive capturedByDirective, + bool isVariableUsedInMapClause = true, + bool isVariableAssociatedWithSection = false, + bool forceCaptureByReferenceInTarget = false) { + // NOTE: Table taken from Clang's isOpenMPCapturedByRef which this function is + // loosely based on, Fortran is likely to have some different caveats to add. + // + // This table summarizes how a given variable should be passed to the device + // given its type and the clauses where it appears. This table is based on + // the description in OpenMP 4.5 [2.10.4, target Construct] and + // OpenMP 4.5 [2.15.5, Data-mapping Attribute Rules and Clauses]. + // + // + // | type | defaultmap | pvt | first | is_device_ptr | map | res. | + // | |(tofrom:scalar)| | pvt | |has_dv_adr| | + // + // | scl | | | | - | | bycopy| + // | scl | | - | x | - | - | bycopy| + // | scl | | x | - | - | - | null | + // | scl | x | | | - | | byref | + // | scl | x | - | x | - | - | bycopy| + // | scl | x | x | - | - | - | null | + // | scl | | - | - | - | x | byref | + // | scl | x | - | - | - | x | byref | + // + // | agg | n.a. | | | - | | byref | + // | agg | n.a. | - | x | - | - | byref | + // | agg | n.a. | x | - | - | - | null | + // | agg | n.a. | - | - | - | x | byref | + // | agg | n.a. | - | - | - | x[] | byref | + // + // | ptr | n.a. | | | - | | bycopy| + // | ptr | n.a. | - | x | - | - | bycopy| + // | ptr | n.a. | x | - | - | - | null | + // | ptr | n.a. | - | - | - | x | byref | + // | ptr | n.a. | - | - | - | x[] | bycopy| + // | ptr | n.a. | - | - | x | | bycopy| + // | ptr | n.a. | - | - | x | x | bycopy| + // | ptr | n.a. | - | - | x | x[] | bycopy| + // + // Legend: + // scl - scalar + // ptr - pointer + // agg - aggregate + // x - applies + // - - invalid in this combination + // [] - mapped with an array section + // byref - should be mapped by reference + // byval - should be mapped by value + // null - initialize a local variable to null on the device + // + // Observations: + // - All scalar declarations that show up in a map clause have to be passed + // by reference, because they may have been mapped in the enclosing data + // environment. + // - If the scalar value does not fit the size of uintptr, it has to be + // passed by reference, regardless the result in the table above. + // - For pointers mapped by value that have either an implicit map or an + // array section, the runtime library may pass the NULL value to the + // device instead of the value passed to it by the compiler. + bool isByRef = true; + auto symValue = converter.getSymbolAddress(*capturedSym); + mlir::Type type = symValue.getType(); + + if (auto refType = type.dyn_cast()) + type = refType.getElementType(); + + auto isScalarType = [&](mlir::Type type) { + if (type.isa() || type.isa() || + type.isa() || type.isa() || + type.isa()) + return true; + return false; + }; + + if (isVariableUsedInMapClause) { + isByRef = !(Fortran::semantics::IsAllocatableOrPointer(*capturedSym) && + isVariableAssociatedWithSection); + } else { + // TODO: Implement additional ||'s for: + // isDefaultmapCapturedByRef - which detects if a default map has been + // specified and if it indicates capture by ref + // hasExplicitDSA - check that the variable is not specified within a + // reduction clause + isByRef = (forceCaptureByReferenceInTarget && + !Fortran::semantics::IsAllocatableOrPointer(*capturedSym)) || + !isScalarType(type); + } + + // TODO: Implement additional checks for data sharing attributes on the + // variable, can be found in originating Clang function + // isOpenMPCapturedByRef. The current check is a little anemic in + // comaprison to the original, this requires the abiltiy to introspect + // if a variable is also in a firstprivate, defaultmap, reduction or + // device_ptr. The DataSharingProcessor class in this file may be of + // interest for implementing these checks, as we will need to inevitably + // check multiple levels of nested regions. + if (isByRef && isScalarType(type)) { + isByRef = (isVariableUsedInMapClause && + capturedByDirective == llvm::omp::Directive::OMPD_target); + } - // TODO: Add support MapTypeModifiers close, mapper, present, iterator - - mlir::IntegerAttr mapTypeAttr = firOpBuilder.getIntegerAttr( - firOpBuilder.getI64Type(), - static_cast< - std::underlying_type_t>( - mapTypeBits)); - - llvm::SmallVector mapOperand; - // Check for unsupported map operand types. - for (const Fortran::parser::OmpObject &ompObject : - std::get(mapClause->v.t).v) { - if (Fortran::parser::Unwrap(ompObject) || - Fortran::parser::Unwrap( - ompObject)) - TODO(clauseLocation, - "OMPD_target_data for Array Expressions or Structure Components"); + // When passing data by copy, we need to make sure it fits the uintptr size + // and alignment, because the runtime library only deals with uintptr types. + // If it does not fit the uintptr size, we need to pass the data by + // reference instead. + // NOTE: The datalayout may not be perfectly accurate at the moment, but this + // appears to be the best size information we have at this level + if (!isByRef && isScalarType(type)) { + mlir::DataLayout dl = mlir::DataLayout( + symValue.getDefiningOp()->getParentOfType()); + if (dl.getTypeSize(type) > + dl.getTypeSize(converter.getFirOpBuilder().getIntPtrType()) || + dl.getTypeABIAlignment(type) > + dl.getTypeABIAlignment( + converter.getFirOpBuilder().getIntPtrType())) { + isByRef = true; } - genObjectList(std::get(mapClause->v.t), - converter, mapOperand); + } + + return (isByRef) ? mlir::omp::VariableCaptureKind::ByRef + : mlir::omp::VariableCaptureKind::ByCopy; +} + +/// Generate the omp.bounds operation from the descriptor information. +static llvm::SmallVector +genBoundsOpsFromBox(fir::FirOpBuilder &builder, mlir::Location loc, + Fortran::lower::AbstractConverter &converter, + fir::ExtendedValue dataExv, mlir::Value box) { + llvm::SmallVector bounds; + mlir::Type idxTy = builder.getIndexType(); + mlir::Type boundTy = builder.getType(); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + assert(box.getType().isa() && + "expect fir.box or fir.class"); + for (unsigned dim = 0; dim < dataExv.rank(); ++dim) { + mlir::Value d = builder.createIntegerConstant(loc, idxTy, dim); + mlir::Value baseLb = + fir::factory::readLowerBound(builder, loc, dataExv, dim, one); + auto dimInfo = + builder.create(loc, idxTy, idxTy, idxTy, box, d); + mlir::Value lb = builder.createIntegerConstant(loc, idxTy, 0); + mlir::Value ub = + builder.create(loc, dimInfo.getExtent(), one); + mlir::Value bound = builder.create( + loc, boundTy, lb, ub, mlir::Value(), dimInfo.getByteStride(), true, + baseLb); + bounds.push_back(bound); + } + return bounds; +} - for (mlir::Value mapOp : mapOperand) { - checkMapType(mapOp.getLoc(), mapOp.getType()); - mapOperands.push_back(mapOp); - mapTypes.push_back(mapTypeAttr); +/// Generate omp.bounds operation for base array without any subscripts +/// provided. +static llvm::SmallVector +genBaseBoundsOps(fir::FirOpBuilder &builder, mlir::Location loc, + Fortran::lower::AbstractConverter &converter, + fir::ExtendedValue dataExv, mlir::Value baseAddr) { + mlir::Type idxTy = builder.getIndexType(); + mlir::Type boundTy = builder.getType(); + llvm::SmallVector bounds; + + if (dataExv.rank() == 0) + return bounds; + + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + for (std::size_t dim = 0; dim < dataExv.rank(); ++dim) { + mlir::Value baseLb = + fir::factory::readLowerBound(builder, loc, dataExv, dim, one); + mlir::Value ext = fir::factory::readExtent(builder, loc, dataExv, dim); + mlir::Value lb = builder.createIntegerConstant(loc, idxTy, 0); + + // ub = extent - 1 + mlir::Value ub = builder.create(loc, ext, one); + mlir::Value bound = builder.create( + loc, boundTy, lb, ub, ext, one, false, baseLb); + bounds.push_back(bound); + } + return bounds; +} + +/// Generate omp.bounds operations for an array section when subscripts are +/// provided. +static llvm::SmallVector +genBoundsOps(fir::FirOpBuilder &builder, mlir::Location loc, + Fortran::lower::AbstractConverter &converter, + Fortran::lower::StatementContext &stmtCtx, + const std::list &subscripts, + std::stringstream &asFortran, fir::ExtendedValue &dataExv, + mlir::Value baseAddr) { + int dimension = 0; + mlir::Type idxTy = builder.getIndexType(); + mlir::Type boundTy = builder.getType(); + llvm::SmallVector bounds; + + mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + for (const auto &subscript : subscripts) { + if (const auto *triplet{ + std::get_if(&subscript.u)}) { + if (dimension != 0) + asFortran << ','; + mlir::Value lbound, ubound, extent; + std::optional lval, uval; + mlir::Value baseLb = + fir::factory::readLowerBound(builder, loc, dataExv, dimension, one); + bool defaultLb = baseLb == one; + mlir::Value stride = one; + bool strideInBytes = false; + + if (fir::unwrapRefType(baseAddr.getType()).isa()) { + mlir::Value d = builder.createIntegerConstant(loc, idxTy, dimension); + auto dimInfo = builder.create(loc, idxTy, idxTy, idxTy, + baseAddr, d); + stride = dimInfo.getByteStride(); + strideInBytes = true; + } + + const auto &lower{std::get<0>(triplet->t)}; + if (lower) { + lval = Fortran::semantics::GetIntValue(lower); + if (lval) { + if (defaultLb) { + lbound = builder.createIntegerConstant(loc, idxTy, *lval - 1); + } else { + mlir::Value lb = builder.createIntegerConstant(loc, idxTy, *lval); + lbound = builder.create(loc, lb, baseLb); + } + asFortran << *lval; + } else { + const Fortran::lower::SomeExpr *lexpr = + Fortran::semantics::GetExpr(*lower); + mlir::Value lb = + fir::getBase(converter.genExprValue(loc, *lexpr, stmtCtx)); + lb = builder.createConvert(loc, baseLb.getType(), lb); + lbound = builder.create(loc, lb, baseLb); + asFortran << lexpr->AsFortran(); + } + } else { + lbound = defaultLb ? zero : baseLb; + } + asFortran << ':'; + const auto &upper{std::get<1>(triplet->t)}; + if (upper) { + uval = Fortran::semantics::GetIntValue(upper); + if (uval) { + if (defaultLb) { + ubound = builder.createIntegerConstant(loc, idxTy, *uval - 1); + } else { + mlir::Value ub = builder.createIntegerConstant(loc, idxTy, *uval); + ubound = builder.create(loc, ub, baseLb); + } + asFortran << *uval; + } else { + const Fortran::lower::SomeExpr *uexpr = + Fortran::semantics::GetExpr(*upper); + mlir::Value ub = + fir::getBase(converter.genExprValue(loc, *uexpr, stmtCtx)); + ub = builder.createConvert(loc, baseLb.getType(), ub); + ubound = builder.create(loc, ub, baseLb); + asFortran << uexpr->AsFortran(); + } + } + if (lower && upper) { + if (lval && uval && *uval < *lval) { + mlir::emitError(loc, "zero sized array section"); + break; + } else if (std::get<2>(triplet->t)) { + const auto &strideExpr{std::get<2>(triplet->t)}; + if (strideExpr) { + mlir::emitError(loc, "stride cannot be specified on " + "an OpenMP array section"); + break; + } + } + } + // ub = baseLb + extent - 1 + if (!ubound) { + mlir::Value ext = + fir::factory::readExtent(builder, loc, dataExv, dimension); + mlir::Value lbExt = + builder.create(loc, ext, baseLb); + ubound = builder.create(loc, lbExt, one); + } + mlir::Value bound = builder.create( + loc, boundTy, lbound, ubound, extent, stride, strideInBytes, baseLb); + bounds.push_back(bound); + ++dimension; } - }); + } + return bounds; +} + +static mlir::Value +getDataOperandBaseAddr(Fortran::lower::AbstractConverter &converter, + fir::FirOpBuilder &builder, + Fortran::lower::SymbolRef sym, mlir::Location loc) { + mlir::Value symAddr = converter.getSymbolAddress(sym); + // TODO: Might need revisiting to handle for non-shared clauses + if (!symAddr) { + if (const auto *details = + sym->detailsIf()) + symAddr = converter.getSymbolAddress(details->symbol()); + } + + if (!symAddr) + llvm::report_fatal_error("could not retrieve symbol address"); + + if (auto boxTy = + fir::unwrapRefType(symAddr.getType()).dyn_cast()) { + if (boxTy.getEleTy().isa()) + TODO(loc, "derived type"); + + // Load the box when baseAddr is a `fir.ref>` or a + // `fir.ref>` type. + if (symAddr.getType().isa()) + return builder.create(loc, symAddr); + } + return symAddr; +} + +static mlir::Value gatherDataOperandAddrAndBounds( + Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpObject &ompObject, mlir::Location operandLocation, + std::stringstream &asFortran, llvm::SmallVector &bounds) { + mlir::Value baseAddr; + std::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::Designator &designator) { + if (auto expr{Fortran::semantics::AnalyzeExpr(semanticsContext, + designator)}) { + if ((*expr).Rank() > 0 && + Fortran::parser::Unwrap( + designator)) { + const auto *arrayElement = + Fortran::parser::Unwrap( + designator); + const auto *dataRef = + std::get_if(&designator.u); + fir::ExtendedValue dataExv; + if (Fortran::parser::Unwrap< + Fortran::parser::StructureComponent>( + arrayElement->base)) { + auto exprBase = Fortran::semantics::AnalyzeExpr( + semanticsContext, arrayElement->base); + dataExv = converter.genExprAddr(operandLocation, *exprBase, + stmtCtx); + baseAddr = fir::getBase(dataExv); + asFortran << (*exprBase).AsFortran(); + } else { + const Fortran::parser::Name &name = + Fortran::parser::GetLastName(*dataRef); + baseAddr = getDataOperandBaseAddr( + converter, builder, *name.symbol, operandLocation); + dataExv = converter.getSymbolExtendedValue(*name.symbol); + asFortran << name.ToString(); + } + + if (!arrayElement->subscripts.empty()) { + asFortran << '('; + bounds = genBoundsOps(builder, operandLocation, converter, + stmtCtx, arrayElement->subscripts, + asFortran, dataExv, baseAddr); + } + asFortran << ')'; + } else if (Fortran::parser::Unwrap< + Fortran::parser::StructureComponent>(designator)) { + fir::ExtendedValue compExv = + converter.genExprAddr(operandLocation, *expr, stmtCtx); + baseAddr = fir::getBase(compExv); + if (fir::unwrapRefType(baseAddr.getType()) + .isa()) + bounds = genBaseBoundsOps(builder, operandLocation, converter, + compExv, baseAddr); + asFortran << (*expr).AsFortran(); + + // If the component is an allocatable or pointer the result of + // genExprAddr will be the result of a fir.box_addr operation. + // Retrieve the box so we handle it like other descriptor. + if (auto boxAddrOp = mlir::dyn_cast_or_null( + baseAddr.getDefiningOp())) { + baseAddr = boxAddrOp.getVal(); + bounds = genBoundsOpsFromBox(builder, operandLocation, + converter, compExv, baseAddr); + } + } else { + // Scalar or full array. + if (const auto *dataRef{ + std::get_if(&designator.u)}) { + const Fortran::parser::Name &name = + Fortran::parser::GetLastName(*dataRef); + fir::ExtendedValue dataExv = + converter.getSymbolExtendedValue(*name.symbol); + baseAddr = getDataOperandBaseAddr( + converter, builder, *name.symbol, operandLocation); + if (fir::unwrapRefType(baseAddr.getType()) + .isa()) + bounds = genBoundsOpsFromBox(builder, operandLocation, + converter, dataExv, baseAddr); + if (fir::unwrapRefType(baseAddr.getType()) + .isa()) + bounds = genBaseBoundsOps(builder, operandLocation, + converter, dataExv, baseAddr); + asFortran << name.ToString(); + } else { // Unsupported + llvm::report_fatal_error( + "Unsupported type of OpenACC operand"); + } + } + } + }, + [&](const Fortran::parser::Name &name) { + baseAddr = getDataOperandBaseAddr(converter, builder, *name.symbol, + operandLocation); + asFortran << name.ToString(); + }}, + ompObject.u); + return baseAddr; +} + +static mlir::omp::MapEntryOp +createMapEntryOp(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value baseAddr, std::stringstream &name, + mlir::SmallVector bounds, uint64_t mapType, + mlir::omp::VariableCaptureKind mapCaptureType, bool implicit, + mlir::Type retTy) { + mlir::Value varPtrPtr; + if (auto boxTy = baseAddr.getType().dyn_cast()) { + baseAddr = builder.create(loc, baseAddr); + retTy = baseAddr.getType(); + } + + mlir::omp::MapEntryOp op = + builder.create(loc, retTy, baseAddr); + op.setNameAttr(builder.getStringAttr(name.str())); + op.setImplicit(implicit); + op.setMapType(mapType); + op.setMapCaptureType(mapCaptureType); + + unsigned insPos = 1; + if (varPtrPtr) + op->insertOperands(insPos++, varPtrPtr); + if (bounds.size() > 0) + op->insertOperands(insPos, bounds); + op->setAttr(mlir::omp::MapEntryOp::getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr( + {1, varPtrPtr ? 1 : 0, static_cast(bounds.size())})); + return op; +} + +bool ClauseProcessor::processMap( + mlir::Location currentLocation, const llvm::omp::Directive &directive, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + llvm::SmallVectorImpl &mapOperands) const { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + return findRepeatableClause( + [&](const ClauseTy::Map *mapClause, + const Fortran::parser::CharBlock &source) { + mlir::Location clauseLocation = converter.genLocation(source); + const auto &oMapType = + std::get>( + mapClause->v.t); + llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; + // If the map type is specified, then process it else Tofrom is the + // default. + if (oMapType) { + const Fortran::parser::OmpMapType::Type &mapType = + std::get(oMapType->t); + switch (mapType) { + case Fortran::parser::OmpMapType::Type::To: + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + break; + case Fortran::parser::OmpMapType::Type::From: + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + break; + case Fortran::parser::OmpMapType::Type::Tofrom: + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + break; + case Fortran::parser::OmpMapType::Type::Alloc: + case Fortran::parser::OmpMapType::Type::Release: + // alloc and release is the default map_type for the Target Data + // Ops, i.e. if no bits for map_type is supplied then alloc/release + // is implicitly assumed based on the target directive. Default + // value for Target Data and Enter Data is alloc and for Exit Data + // it is release. + break; + case Fortran::parser::OmpMapType::Type::Delete: + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; + } + + if (std::get>( + oMapType->t)) + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + } else { + mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + + for (const Fortran::parser::OmpObject &ompObject : + std::get(mapClause->v.t).v) { + llvm::SmallVector bounds; + std::stringstream asFortran; + mlir::Value baseAddr = gatherDataOperandAddrAndBounds( + converter, firOpBuilder, semanticsContext, stmtCtx, ompObject, + clauseLocation, asFortran, bounds); + + checkMapType(baseAddr.getLoc(), baseAddr.getType()); + + // TODO: Clang special cases this for several other cases (member + // references as one example), see getMapTypeBits inside of + // generateInfoForComponentList in Clang's CGOpenMPRuntime for + // reference. We only support the declare target link variation + // at the moment. + llvm::omp::OpenMPOffloadMappingFlags perValMapTypeBit = mapTypeBits; + bool requiresRef = mapRequiresReference(firOpBuilder, baseAddr); + if (requiresRef) + perValMapTypeBit |= + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + + // TODO: Handle cases with overlapping elements, captures and + // composite types as generateInfoForCapture in Clang does, which + // makes subsequent elements by-pass the target_param flag. This may + // need to be done inside of the TargetOpMapCapture pass as we have + // information about the captures there, but unfortunately not here. + // The pass may be better named as TargetOpMapResolution. + bool isCaptureFirstInfo = true; + if (isCaptureFirstInfo && !requiresRef) + perValMapTypeBit |= + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + + uint64_t mapType = static_cast< + std::underlying_type_t>( + perValMapTypeBit); + + mlir::omp::VariableCaptureKind mapCaptureKind = isCapturedByRef( + converter, getOmpObjectSymbol(ompObject), directive); + + mapOperands.push_back(createMapEntryOp( + firOpBuilder, clauseLocation, baseAddr, asFortran, bounds, + mapType, mapCaptureKind, false, baseAddr.getType())); + } + }); } bool ClauseProcessor::processReduction( @@ -2309,14 +2839,13 @@ static mlir::omp::DataOp genDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semanticsContext, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, deviceOperand; llvm::SmallVector mapOperands, devicePtrOperands, deviceAddrOperands; - llvm::SmallVector mapTypes; llvm::SmallVector useDeviceTypes; llvm::SmallVector useDeviceLocs; llvm::SmallVector useDeviceSymbols; @@ -2330,16 +2859,12 @@ useDeviceSymbols); cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs, useDeviceSymbols); - cp.processMap(mapOperands, mapTypes); - - llvm::SmallVector mapTypesAttr(mapTypes.begin(), - mapTypes.end()); - mlir::ArrayAttr mapTypesArrayAttr = - mlir::ArrayAttr::get(firOpBuilder.getContext(), mapTypesAttr); + cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data, + semanticsContext, stmtCtx, mapOperands); auto dataOp = converter.getFirOpBuilder().create( currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands, - deviceAddrOperands, mapOperands, mapTypesArrayAttr); + deviceAddrOperands, mapOperands); createBodyOfTargetDataOp(converter, dataOp, useDeviceTypes, useDeviceLocs, useDeviceSymbols, currentLocation); return dataOp; @@ -2348,6 +2873,7 @@ template static OpTy genEnterExitDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semanticsContext, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -2355,7 +2881,6 @@ mlir::Value ifClauseOperand, deviceOperand; mlir::UnitAttr nowaitAttr; llvm::SmallVector mapOperands; - llvm::SmallVector mapTypes; Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName; llvm::omp::Directive directive; @@ -2375,32 +2900,26 @@ cp.processIf(stmtCtx, directiveName, ifClauseOperand); cp.processDevice(stmtCtx, deviceOperand); cp.processNowait(nowaitAttr); - cp.processMap(mapOperands, mapTypes); + cp.processMap(currentLocation, directive, semanticsContext, stmtCtx, + mapOperands); cp.processTODO(currentLocation, directive); - llvm::SmallVector mapTypesAttr(mapTypes.begin(), - mapTypes.end()); - mlir::ArrayAttr mapTypesArrayAttr = - mlir::ArrayAttr::get(firOpBuilder.getContext(), mapTypesAttr); - return firOpBuilder.create(currentLocation, ifClauseOperand, - deviceOperand, nowaitAttr, mapOperands, - mapTypesArrayAttr); + deviceOperand, nowaitAttr, mapOperands); } static mlir::omp::TargetOp genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, + Fortran::semantics::SemanticsContext &semanticsContext, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + llvm::omp::Directive directive, bool outerCombined = false) { Fortran::lower::StatementContext stmtCtx; mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand; mlir::UnitAttr nowaitAttr; llvm::SmallVector mapOperands; - llvm::SmallVector mapTypes; ClauseProcessor cp(converter, clauseList); cp.processIf(stmtCtx, @@ -2409,7 +2928,8 @@ cp.processDevice(stmtCtx, deviceOperand); cp.processThreadLimit(stmtCtx, threadLimitOperand); cp.processNowait(nowaitAttr); - cp.processMap(mapOperands, mapTypes); + cp.processMap(currentLocation, directive, semanticsContext, stmtCtx, + mapOperands); cp.processTODO( currentLocation, llvm::omp::Directive::OMPD_target); - llvm::SmallVector mapTypesAttr(mapTypes.begin(), - mapTypes.end()); - mlir::ArrayAttr mapTypesArrayAttr = - mlir::ArrayAttr::get(firOpBuilder.getContext(), mapTypesAttr); - return genOpWithBody( converter, eval, currentLocation, outerCombined, &clauseList, ifClauseOperand, deviceOperand, threadLimitOperand, nowaitAttr, - mapOperands, mapTypesArrayAttr); + mapOperands); } static mlir::omp::TeamsOp @@ -2474,6 +2989,7 @@ static void genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, + Fortran::semantics::SemanticsContext &semanticsContext, const Fortran::parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { const auto &directive = @@ -2501,15 +3017,15 @@ firOpBuilder.create(currentLocation); break; case llvm::omp::Directive::OMPD_target_data: - genDataOp(converter, currentLocation, opClauseList); + genDataOp(converter, semanticsContext, currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_target_enter_data: - genEnterExitDataOp(converter, currentLocation, - opClauseList); + genEnterExitDataOp(converter, semanticsContext, + currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_target_exit_data: - genEnterExitDataOp(converter, currentLocation, - opClauseList); + genEnterExitDataOp(converter, semanticsContext, + currentLocation, opClauseList); break; case llvm::omp::Directive::OMPD_target_update: TODO(currentLocation, "OMPD_target_update"); @@ -2539,12 +3055,14 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, + Fortran::semantics::SemanticsContext &semanticsContext, const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) { std::visit( Fortran::common::visitors{ [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { - genOmpSimpleStandalone(converter, eval, simpleStandaloneConstruct); + genOmpSimpleStandalone(converter, eval, semanticsContext, + simpleStandaloneConstruct); }, [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { genOmpFlush(converter, eval, flushConstruct); @@ -2562,6 +3080,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, + Fortran::semantics::SemanticsContext &semanticsContext, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector lowerBound, upperBound, step, linearVars, @@ -2595,8 +3114,8 @@ if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; - genTargetOp(converter, eval, currentLocation, loopOpClauseList, - /*outerCombined=*/true); + genTargetOp(converter, eval, semanticsContext, currentLocation, + loopOpClauseList, ompDirective, /*outerCombined=*/true); } if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet) .test(ompDirective)) { @@ -2721,6 +3240,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, + Fortran::semantics::SemanticsContext &semanticsContext, const Fortran::parser::OpenMPBlockConstruct &blockConstruct) { const auto &beginBlockDirective = std::get(blockConstruct.t); @@ -2782,10 +3302,11 @@ endClauseList); break; case llvm::omp::Directive::OMPD_target: - genTargetOp(converter, eval, currentLocation, beginClauseList); + genTargetOp(converter, eval, semanticsContext, currentLocation, + beginClauseList, directive.v); break; case llvm::omp::Directive::OMPD_target_data: - genDataOp(converter, currentLocation, beginClauseList); + genDataOp(converter, semanticsContext, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_task: genTaskOp(converter, eval, currentLocation, beginClauseList); @@ -2805,8 +3326,8 @@ bool combinedDirective = false; if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet) .test(directive.v)) { - genTargetOp(converter, eval, currentLocation, beginClauseList, - /*outerCombined=*/true); + genTargetOp(converter, eval, semanticsContext, currentLocation, + beginClauseList, directive.v, /*outerCombined=*/true); combinedDirective = true; } if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet) @@ -3471,13 +3992,14 @@ void Fortran::lower::genOpenMPConstruct( Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semanticsContext, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPConstruct &ompConstruct) { std::visit( common::visitors{ [&](const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) { - genOMP(converter, eval, standaloneConstruct); + genOMP(converter, eval, semanticsContext, standaloneConstruct); }, [&](const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) { @@ -3487,7 +4009,7 @@ genOMP(converter, eval, sectionConstruct); }, [&](const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { - genOMP(converter, eval, loopConstruct); + genOMP(converter, eval, semanticsContext, loopConstruct); }, [&](const Fortran::parser::OpenMPDeclarativeAllocate &execAllocConstruct) { @@ -3502,7 +4024,7 @@ TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct"); }, [&](const Fortran::parser::OpenMPBlockConstruct &blockConstruct) { - genOMP(converter, eval, blockConstruct); + genOMP(converter, eval, semanticsContext, blockConstruct); }, [&](const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) { genOMP(converter, eval, atomicConstruct); diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -220,35 +220,119 @@ // ----- func.func @_QPomp_target_data() { + %c1024 = arith.constant 1024 : index %0 = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_dataEa"} + %c1024_0 = arith.constant 1024 : index %1 = fir.alloca !fir.array<1024xi32> {bindc_name = "b", uniq_name = "_QFomp_target_dataEb"} + %c1024_1 = arith.constant 1024 : index %2 = fir.alloca !fir.array<1024xi32> {bindc_name = "c", uniq_name = "_QFomp_target_dataEc"} + %c1024_2 = arith.constant 1024 : index %3 = fir.alloca !fir.array<1024xi32> {bindc_name = "d", uniq_name = "_QFomp_target_dataEd"} - omp.target_enter_data map((to -> %0 : !fir.ref>), (to -> %1 : !fir.ref>), (always, alloc -> %2 : !fir.ref>)) - omp.target_exit_data map((from -> %0 : !fir.ref>), (from -> %1 : !fir.ref>), (release -> %2 : !fir.ref>), (always, delete -> %3 : !fir.ref>)) + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %4 = arith.subi %c1024, %c1 : index + %5 = omp.bounds lower_bound(%c0 : index) upper_bound(%4 : index) extent(%c1024 : index) stride(%c1 : index) start_idx(%c1 : index) + %6 = omp.map_entry var_ptr(%0 : !fir.ref>) map_type_value(33) capture(ByRef) bounds(%5) -> !fir.ref> {name = "a"} + %c1_3 = arith.constant 1 : index + %c0_4 = arith.constant 0 : index + %7 = arith.subi %c1024_0, %c1_3 : index + %8 = omp.bounds lower_bound(%c0_4 : index) upper_bound(%7 : index) extent(%c1024_0 : index) stride(%c1_3 : index) start_idx(%c1_3 : index) + %9 = omp.map_entry var_ptr(%1 : !fir.ref>) map_type_value(33) capture(ByRef) bounds(%8) -> !fir.ref> {name = "b"} + %c1_5 = arith.constant 1 : index + %c0_6 = arith.constant 0 : index + %10 = arith.subi %c1024_1, %c1_5 : index + %11 = omp.bounds lower_bound(%c0_6 : index) upper_bound(%10 : index) extent(%c1024_1 : index) stride(%c1_5 : index) start_idx(%c1_5 : index) + %12 = omp.map_entry var_ptr(%2 : !fir.ref>) map_type_value(36) capture(ByRef) bounds(%11) -> !fir.ref> {name = "c"} + omp.target_enter_data map_entries((to -> %6 : !fir.ref>), (to -> %9 : !fir.ref>), (always, alloc -> %12 : !fir.ref>)) + %c1_7 = arith.constant 1 : index + %c0_8 = arith.constant 0 : index + %13 = arith.subi %c1024, %c1_7 : index + %14 = omp.bounds lower_bound(%c0_8 : index) upper_bound(%13 : index) extent(%c1024 : index) stride(%c1_7 : index) start_idx(%c1_7 : index) + %15 = omp.map_entry var_ptr(%0 : !fir.ref>) map_type_value(34) capture(ByRef) bounds(%14) -> !fir.ref> {name = "a"} + %c1_9 = arith.constant 1 : index + %c0_10 = arith.constant 0 : index + %16 = arith.subi %c1024_0, %c1_9 : index + %17 = omp.bounds lower_bound(%c0_10 : index) upper_bound(%16 : index) extent(%c1024_0 : index) stride(%c1_9 : index) start_idx(%c1_9 : index) + %18 = omp.map_entry var_ptr(%1 : !fir.ref>) map_type_value(34) capture(ByRef) bounds(%17) -> !fir.ref> {name = "b"} + %c1_11 = arith.constant 1 : index + %c0_12 = arith.constant 0 : index + %19 = arith.subi %c1024_1, %c1_11 : index + %20 = omp.bounds lower_bound(%c0_12 : index) upper_bound(%19 : index) extent(%c1024_1 : index) stride(%c1_11 : index) start_idx(%c1_11 : index) + %21 = omp.map_entry var_ptr(%2 : !fir.ref>) map_type_value(32) capture(ByRef) bounds(%20) -> !fir.ref> {name = "c"} + %c1_13 = arith.constant 1 : index + %c0_14 = arith.constant 0 : index + %22 = arith.subi %c1024_2, %c1_13 : index + %23 = omp.bounds lower_bound(%c0_14 : index) upper_bound(%22 : index) extent(%c1024_2 : index) stride(%c1_13 : index) start_idx(%c1_13 : index) + %24 = omp.map_entry var_ptr(%3 : !fir.ref>) map_type_value(44) capture(ByRef) bounds(%23) -> !fir.ref> {name = "d"} + omp.target_exit_data map_entries((from -> %15 : !fir.ref>), (from -> %18 : !fir.ref>), (release -> %21 : !fir.ref>), (always, delete -> %24 : !fir.ref>)) return } -// CHECK-LABEL: llvm.func @_QPomp_target_data() { -// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEa"} : (i64) -> !llvm.ptr> -// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x !llvm.array<1024 x i32> {bindc_name = "b", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEb"} : (i64) -> !llvm.ptr> -// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.array<1024 x i32> {bindc_name = "c", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEc"} : (i64) -> !llvm.ptr> -// CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_7:.*]] = llvm.alloca %[[VAL_6]] x !llvm.array<1024 x i32> {bindc_name = "d", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEd"} : (i64) -> !llvm.ptr> -// CHECK: omp.target_enter_data map((to -> %[[VAL_1]] : !llvm.ptr>), (to -> %[[VAL_3]] : !llvm.ptr>), (always, alloc -> %[[VAL_5]] : !llvm.ptr>)) -// CHECK: omp.target_exit_data map((from -> %[[VAL_1]] : !llvm.ptr>), (from -> %[[VAL_3]] : !llvm.ptr>), (release -> %[[VAL_5]] : !llvm.ptr>), (always, delete -> %[[VAL_7]] : !llvm.ptr>)) -// CHECK: llvm.return -// CHECK: } + // CHECK-LABEL: llvm.func @_QPomp_target_data() { + // CHECK: %0 = llvm.mlir.constant(1024 : index) : i64 + // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 + // CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEa"} : (i64) -> !llvm.ptr> + // CHECK: %3 = llvm.mlir.constant(1024 : index) : i64 + // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 + // CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x !llvm.array<1024 x i32> {bindc_name = "b", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEb"} : (i64) -> !llvm.ptr> + // CHECK: %6 = llvm.mlir.constant(1024 : index) : i64 + // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 + // CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.array<1024 x i32> {bindc_name = "c", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEc"} : (i64) -> !llvm.ptr> + // CHECK: %9 = llvm.mlir.constant(1024 : index) : i64 + // CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(1 : i64) : i64 + // CHECK: %[[VAL_7:.*]] = llvm.alloca %[[VAL_6]] x !llvm.array<1024 x i32> {bindc_name = "d", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEd"} : (i64) -> !llvm.ptr> + // CHECK: %12 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %13 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %14 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %15 = omp.bounds lower_bound(%13 : i64) upper_bound(%14 : i64) extent(%0 : i64) stride(%12 : i64) start_idx(%12 : i64) + // CHECK: %16 = omp.map_entry var_ptr(%[[VAL_1]] : !llvm.ptr>) map_type_value(33) capture(ByRef) bounds(%15) -> !llvm.ptr> {name = "a"} + // CHECK: %17 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %18 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %19 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %20 = omp.bounds lower_bound(%18 : i64) upper_bound(%19 : i64) extent(%3 : i64) stride(%17 : i64) start_idx(%17 : i64) + // CHECK: %21 = omp.map_entry var_ptr(%[[VAL_3]] : !llvm.ptr>) map_type_value(33) capture(ByRef) bounds(%20) -> !llvm.ptr> {name = "b"} + // CHECK: %22 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %23 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %24 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %25 = omp.bounds lower_bound(%23 : i64) upper_bound(%24 : i64) extent(%6 : i64) stride(%22 : i64) start_idx(%22 : i64) + // CHECK: %26 = omp.map_entry var_ptr(%[[VAL_5]] : !llvm.ptr>) map_type_value(36) capture(ByRef) bounds(%25) -> !llvm.ptr> {name = "c"} + // CHECK: omp.target_enter_data map_entries((to -> %16 : !llvm.ptr>), (to -> %21 : !llvm.ptr>), (always, alloc -> %26 : !llvm.ptr>)) + // CHECK: %27 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %28 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %29 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %30 = omp.bounds lower_bound(%28 : i64) upper_bound(%29 : i64) extent(%0 : i64) stride(%27 : i64) start_idx(%27 : i64) + // CHECK: %31 = omp.map_entry var_ptr(%[[VAL_1]] : !llvm.ptr>) map_type_value(34) capture(ByRef) bounds(%30) -> !llvm.ptr> {name = "a"} + // CHECK: %32 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %33 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %34 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %35 = omp.bounds lower_bound(%33 : i64) upper_bound(%34 : i64) extent(%3 : i64) stride(%32 : i64) start_idx(%32 : i64) + // CHECK: %36 = omp.map_entry var_ptr(%[[VAL_3]] : !llvm.ptr>) map_type_value(34) capture(ByRef) bounds(%35) -> !llvm.ptr> {name = "b"} + // CHECK: %37 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %38 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %39 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %40 = omp.bounds lower_bound(%38 : i64) upper_bound(%39 : i64) extent(%6 : i64) stride(%37 : i64) start_idx(%37 : i64) + // CHECK: %41 = omp.map_entry var_ptr(%[[VAL_5]] : !llvm.ptr>) map_type_value(32) capture(ByRef) bounds(%40) -> !llvm.ptr> {name = "c"} + // CHECK: %42 = llvm.mlir.constant(1 : index) : i64 + // CHECK: %43 = llvm.mlir.constant(0 : index) : i64 + // CHECK: %44 = llvm.mlir.constant(1023 : index) : i64 + // CHECK: %45 = omp.bounds lower_bound(%43 : i64) upper_bound(%44 : i64) extent(%9 : i64) stride(%42 : i64) start_idx(%42 : i64) + // CHECK: %46 = omp.map_entry var_ptr(%[[VAL_7]] : !llvm.ptr>) map_type_value(44) capture(ByRef) bounds(%45) -> !llvm.ptr> {name = "d"} + // CHECK: omp.target_exit_data map_entries((from -> %31 : !llvm.ptr>), (from -> %36 : !llvm.ptr>), (release -> %41 : !llvm.ptr>), (always, delete -> %46 : !llvm.ptr>)) + // CHECK: llvm.return + // CHECK: } // ----- func.func @_QPopenmp_target_data_region() { %0 = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFopenmp_target_data_regionEa"} %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFopenmp_target_data_regionEi"} - omp.target_data map((tofrom -> %0 : !fir.ref>)) { + %c1024 = arith.constant 1024 : index + %c3 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c2 = arith.subi %c1024, %c3 : index + %bound = omp.bounds lower_bound(%c0 : index) upper_bound(%c2 : index) extent(%c1024 : index) stride(%c3 : index) start_idx(%c3 : index) + %entry = omp.map_entry var_ptr(%0 : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%bound) -> !fir.ref> {name = "a"} + omp.target_data map_entries((tofrom -> %entry : !fir.ref>)) { %c1_i32 = arith.constant 1 : i32 %2 = fir.convert %c1_i32 : (i32) -> index %c1024_i32 = arith.constant 1024 : i32 @@ -281,7 +365,13 @@ // CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFopenmp_target_data_regionEa"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFopenmp_target_data_regionEi"} : (i64) -> !llvm.ptr -// CHECK: omp.target_data map((tofrom -> %[[VAL_1]] : !llvm.ptr>)) { +// CHECK: %[[VAL_MAX:.*]] = llvm.mlir.constant(1024 : index) : i64 +// CHECK: %[[VAL_ONE:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_ZERO:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_UPPER:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_BOUNDS:.*]] = omp.bounds lower_bound(%[[VAL_ZERO]] : i64) upper_bound(%[[VAL_UPPER]] : i64) extent(%[[VAL_MAX]] : i64) stride(%[[VAL_ONE]] : i64) start_idx(%[[VAL_ONE]] : i64) +// CHECK: %[[VAL_MAP:.*]] = omp.map_entry var_ptr(%[[VAL_1]] : !llvm.ptr>) map_type_value(35) capture(ByRef) bounds(%[[VAL_BOUNDS]]) -> !llvm.ptr> {name = "a"} +// CHECK: omp.target_data map_entries((tofrom -> %[[VAL_MAP]] : !llvm.ptr>)) { // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_5:.*]] = llvm.sext %[[VAL_4]] : i32 to i64 // CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(1024 : i32) : i32 @@ -335,25 +425,37 @@ // ----- func.func @_QPomp_target() { + %c512 = arith.constant 512 : index %0 = fir.alloca !fir.array<512xi32> {bindc_name = "a", uniq_name = "_QFomp_targetEa"} %c64_i32 = arith.constant 64 : i32 - omp.target thread_limit(%c64_i32 : i32) map((tofrom -> %0 : !fir.ref>)) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %1 = arith.subi %c512, %c1 : index + %2 = omp.bounds lower_bound(%c0 : index) upper_bound(%1 : index) extent(%c512 : index) stride(%c1 : index) start_idx(%c1 : index) + %3 = omp.map_entry var_ptr(%0 : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%2) -> !fir.ref> {name = "a"} + omp.target thread_limit(%c64_i32 : i32) map_entries((tofrom -> %3 : !fir.ref>)) { %c10_i32 = arith.constant 10 : i32 %c1_i64 = arith.constant 1 : i64 %c1_i64_0 = arith.constant 1 : i64 - %1 = arith.subi %c1_i64, %c1_i64_0 : i64 - %2 = fir.coordinate_of %0, %1 : (!fir.ref>, i64) -> !fir.ref - fir.store %c10_i32 to %2 : !fir.ref + %4 = arith.subi %c1_i64, %c1_i64_0 : i64 + %5 = fir.coordinate_of %0, %4 : (!fir.ref>, i64) -> !fir.ref + fir.store %c10_i32 to %5 : !fir.ref omp.terminator } return } // CHECK-LABEL: llvm.func @_QPomp_target() { +// CHECK: %[[EXTENT:.*]] = llvm.mlir.constant(512 : index) : i64 // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<512 x i32> {bindc_name = "a", in_type = !fir.array<512xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_targetEa"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(64 : i32) : i32 -// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map((tofrom -> %[[VAL_1]] : !llvm.ptr>)) { +// CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[LOWER:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[UPPER:.*]] = llvm.mlir.constant(511 : index) : i64 +// CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[LOWER]] : i64) upper_bound(%[[UPPER]] : i64) extent(%[[EXTENT]] : i64) stride(%[[STRIDE]] : i64) start_idx(%[[STRIDE]] : i64) +// CHECK: %[[MAP:.*]] = omp.map_entry var_ptr(%2 : !llvm.ptr>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr> {name = "a"} +// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map_entries((tofrom -> %[[MAP]] : !llvm.ptr>)) { // CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32 // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(1 : i64) : i64 diff --git a/flang/test/Lower/OpenMP/array-bounds.f90 b/flang/test/Lower/OpenMP/array-bounds.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/array-bounds.f90 @@ -0,0 +1,44 @@ +!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes HOST +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes DEVICE + +!DEVICE: func.func @_QPread_write_section_omp_outline_0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref>, %[[ARG2:.*]]: !fir.ref>) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPread_write_section"} { +!DEVICE: %c4 = arith.constant 4 : index +!DEVICE: %c1 = arith.constant 1 : index +!DEVICE: %c1_0 = arith.constant 1 : index +!DEVICE: %c1_1 = arith.constant 1 : index +!DEVICE: %[[BOUNDS0:.*]] = omp.bounds lower_bound(%c1 : index) upper_bound(%c4 : index) stride(%c1_1 : index) start_idx(%c1_1 : index) +!DEVICE: %[[MAP0:.*]] = omp.map_entry var_ptr(%[[ARG1]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref> {name = "sp_read(2:5)"} +!DEVICE: %c4_2 = arith.constant 4 : index +!DEVICE: %c1_3 = arith.constant 1 : index +!DEVICE: %c1_4 = arith.constant 1 : index +!DEVICE: %c1_5 = arith.constant 1 : index +!DEVICE: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%c1_3 : index) upper_bound(%c4_2 : index) stride(%c1_5 : index) start_idx(%c1_5 : index) +!DEVICE: %[[MAP1:.*]] = omp.map_entry var_ptr(%[[ARG2]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} +!DEVICE: omp.target map_entries((tofrom -> %[[MAP0]] : !fir.ref>), (tofrom -> %[[MAP1]] : !fir.ref>)) { + +!HOST: func.func @_QPread_write_section() { +!HOST: %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFread_write_sectionEi"} +!HOST: %[[READ:.*]] = fir.address_of(@_QFread_write_sectionEsp_read) : !fir.ref> +!HOST: %[[WRITE:.*]] = fir.address_of(@_QFread_write_sectionEsp_write) : !fir.ref> +!HOST: %c1 = arith.constant 1 : index +!HOST: %c1_0 = arith.constant 1 : index +!HOST: %c4 = arith.constant 4 : index +!HOST: %[[BOUNDS0:.*]] = omp.bounds lower_bound(%c1_0 : index) upper_bound(%c4 : index) stride(%c1 : index) start_idx(%c1 : index) +!HOST: %[[MAP0:.*]] = omp.map_entry var_ptr(%[[READ]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref> {name = "sp_read(2:5)"} +!HOST: %c1_1 = arith.constant 1 : index +!HOST: %c1_2 = arith.constant 1 : index +!HOST: %c4_3 = arith.constant 4 : index +!HOST: %[[BOUNDS1:.*]] = omp.bounds lower_bound(%c1_2 : index) upper_bound(%c4_3 : index) stride(%c1_1 : index) start_idx(%c1_1 : index) +!HOST: %[[MAP1:.*]] = omp.map_entry var_ptr(%[[WRITE]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} +!HOST: omp.target map_entries((tofrom -> %[[MAP0]] : !fir.ref>), (tofrom -> %[[MAP1]] : !fir.ref>)) { + +SUBROUTINE READ_WRITE_SECTION() + INTEGER :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/) + INTEGER :: sp_write(10) = (/0,0,0,0,0,0,0,0,0,0/) + +!$omp target map(tofrom:sp_read(2:5)) map(tofrom:sp_write(2:5)) + do i = 2, 5 + sp_write(i) = sp_read(i) + end do +!$omp end target +END SUBROUTINE READ_WRITE_SECTION diff --git a/flang/test/Lower/OpenMP/location.f90 b/flang/test/Lower/OpenMP/location.f90 --- a/flang/test/Lower/OpenMP/location.f90 +++ b/flang/test/Lower/OpenMP/location.f90 @@ -17,7 +17,7 @@ !CHECK-LABEL: sub_target subroutine sub_target() print *, x -!CHECK: omp.target {{.*}} { +!CHECK: omp.target { !$omp target print *, x !CHECK: omp.terminator loc(#[[TAR_LOC:.*]]) diff --git a/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 b/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 --- a/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 +++ b/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 @@ -6,7 +6,8 @@ !CHECK: func.func @_QPtarget_function !CHECK: func.func @_QPwrite_index_omp_outline_0(%[[ARG0:.*]]: !fir.ref) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPwrite_index"} { -!CHECK-NEXT: omp.target {{.*}} { +!CHECK-NEXT: %[[MAP_ENTRY0:.*]] = omp.map_entry var_ptr(%[[ARG0]]{{.*}} +!CHECK-NEXT: omp.target map_entries((from -> %[[MAP_ENTRY0]]{{.*}} { !CHECK: %[[CONSTANT_VALUE_10:.*]] = arith.constant 10 : i32 !CHECK: fir.store %[[CONSTANT_VALUE_10]] to %[[ARG0]] : !fir.ref !CHECK: omp.terminator @@ -14,7 +15,8 @@ !CHECK-NEXT: return !CHECK: func.func @_QPwrite_index_omp_outline_1(%[[ARG1:.*]]: !fir.ref) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPwrite_index"} { -!CHECK-NEXT: omp.target {{.*}} { +!CHECK-NEXT: %[[MAP_ENTRY1:.*]] = omp.map_entry var_ptr(%[[ARG1]]{{.*}} +!CHECK-NEXT: omp.target map_entries((from -> %[[MAP_ENTRY1]]{{.*}} { !CHECK: %[[CONSTANT_VALUE_20:.*]] = arith.constant 20 : i32 !CHECK: fir.store %[[CONSTANT_VALUE_20]] to %[[ARG1]] : !fir.ref !CHECK: omp.terminator @@ -39,3 +41,47 @@ SUBROUTINE TARGET_FUNCTION() !$omp declare target END + +!CHECK: func.func @_QParray_bounds_omp_outline_0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref>) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QParray_bounds"} { +!CHECK: %[[C4:.*]] = arith.constant 4 : index +!CHECK: %[[C1:.*]] = arith.constant 1 : index +!CHECK: %[[C1_0:.*]] = arith.constant 1 : index +!CHECK: %[[C1_1:.*]] = arith.constant 1 : index +!CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C1]] : index) upper_bound(%[[C4]] : index) stride(%[[C1_1]] : index) start_idx(%[[C1_1]] : index) +!CHECK: %[[ENTRY:.*]] = omp.map_entry var_ptr(%[[ARG1]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "sp_write(2:5)"} +!CHECK: omp.target map_entries((tofrom -> %[[ENTRY]] : !fir.ref>)) { +!CHECK: %c2_i32 = arith.constant 2 : i32 +!CHECK: %2 = fir.convert %c2_i32 : (i32) -> index +!CHECK: %c5_i32 = arith.constant 5 : i32 +!CHECK: %3 = fir.convert %c5_i32 : (i32) -> index +!CHECK: %c1_2 = arith.constant 1 : index +!CHECK: %4 = fir.convert %2 : (index) -> i32 +!CHECK: %5:2 = fir.do_loop %arg2 = %2 to %3 step %c1_2 iter_args(%arg3 = %4) -> (index, i32) { +!CHECK: fir.store %arg3 to %[[ARG0]] : !fir.ref +!CHECK: %6 = fir.load %[[ARG0]] : !fir.ref +!CHECK: %7 = fir.load %[[ARG0]] : !fir.ref +!CHECK: %8 = fir.convert %7 : (i32) -> i64 +!CHECK: %c1_i64 = arith.constant 1 : i64 +!CHECK: %9 = arith.subi %8, %c1_i64 : i64 +!CHECK: %10 = fir.coordinate_of %[[ARG1]], %9 : (!fir.ref>, i64) -> !fir.ref +!CHECK: fir.store %6 to %10 : !fir.ref +!CHECK: %11 = arith.addi %arg2, %c1_2 : index +!CHECK: %12 = fir.convert %c1_2 : (index) -> i32 +!CHECK: %13 = fir.load %[[ARG0]] : !fir.ref +!CHECK: %14 = arith.addi %13, %12 : i32 +!CHECK: fir.result %11, %14 : index, i32 +!CHECK: } +!CHECK: fir.store %5#1 to %[[ARG0]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK:return +!CHECK:} + +SUBROUTINE ARRAY_BOUNDS() + INTEGER :: sp_write(10) = (/0,0,0,0,0,0,0,0,0,0/) +!$omp target map(tofrom:sp_write(2:5)) + do i = 2, 5 + sp_write(i) = i + end do +!$omp end target +end subroutine ARRAY_BOUNDS diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -7,7 +7,9 @@ !CHECK-LABEL: func.func @_QPomp_target_enter_simple() { subroutine omp_target_enter_simple integer :: a(1024) - !CHECK: omp.target_enter_data map((to -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_enter_data map_entries((to -> %[[MAP]] : !fir.ref>) !$omp target enter data map(to: a) end subroutine omp_target_enter_simple @@ -21,7 +23,15 @@ integer :: b(1024) integer :: c(1024) integer :: d(1024) - !CHECK: omp.target_enter_data map((to -> {{.*}} : !fir.ref>), (to -> {{.*}} : !fir.ref>), (always, alloc -> {{.*}} : !fir.ref>), (to -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS_0:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_0:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS_0]]) -> !fir.ref> {name = "a"} + !CHECK: %[[BOUNDS_1:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_1:.*]] = omp.map_entry var_ptr(%{{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref> {name = "b"} + !CHECK: %[[BOUNDS_2:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_2:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(36) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref> {name = "c"} + !CHECK: %[[BOUNDS_3:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_3:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref> {name = "d"} + !CHECK: omp.target_enter_data map_entries((to -> %[[MAP_0]] : !fir.ref>), (to -> %[[MAP_1]] : !fir.ref>), (always, alloc -> %[[MAP_2]] : !fir.ref>), (to -> %[[MAP_3]] : !fir.ref>)) !$omp target enter data map(to: a, b) map(always, alloc: c) map(to: d) end subroutine omp_target_enter_mt @@ -32,7 +42,9 @@ !CHECK-LABEL: func.func @_QPomp_target_enter_nowait() { subroutine omp_target_enter_nowait integer :: a(1024) - !CHECK: omp.target_enter_data nowait map((to -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_enter_data nowait map_entries((to -> %[[MAP]] : !fir.ref>)) !$omp target enter data map(to: a) nowait end subroutine omp_target_enter_nowait @@ -48,7 +60,9 @@ !CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1:.*]] : !fir.ref !CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_5:.*]] = arith.cmpi slt, %[[VAL_3]], %[[VAL_4]] : i32 - !CHECK: omp.target_enter_data if(%[[VAL_5]] : i1) map((to -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_enter_data if(%[[VAL_5]] : i1) map_entries((to -> %[[MAP]] : !fir.ref>)) !$omp target enter data if(i<10) map(to: a) end subroutine omp_target_enter_if @@ -60,7 +74,9 @@ subroutine omp_target_enter_device integer :: a(1024) !CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 - !CHECK: omp.target_enter_data device(%[[VAL_1]] : i32) map((to -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(33) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_enter_data device(%[[VAL_1]] : i32) map_entries((to -> %[[MAP]] : !fir.ref>)) !$omp target enter data map(to: a) device(2) end subroutine omp_target_enter_device @@ -71,7 +87,9 @@ !CHECK-LABEL: func.func @_QPomp_target_exit_simple() { subroutine omp_target_exit_simple integer :: a(1024) - !CHECK: omp.target_exit_data map((from -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(34) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_exit_data map_entries((from -> %[[MAP]] : !fir.ref>)) !$omp target exit data map(from: a) end subroutine omp_target_exit_simple @@ -86,7 +104,17 @@ integer :: c(1024) integer :: d(1024) integer :: e(1024) - !CHECK: omp.target_exit_data map((from -> {{.*}} : !fir.ref>), (from -> {{.*}} : !fir.ref>), (release -> {{.*}} : !fir.ref>), (always, delete -> {{.*}} : !fir.ref>), (from -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS_0:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_0:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(34) capture(ByRef) bounds(%[[BOUNDS_0]]) -> !fir.ref> {name = "a"} + !CHECK: %[[BOUNDS_1:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_1:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(34) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref> {name = "b"} + !CHECK: %[[BOUNDS_2:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_2:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(32) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref> {name = "c"} + !CHECK: %[[BOUNDS_3:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_3:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(44) capture(ByRef) bounds(%[[BOUNDS_3]]) -> !fir.ref> {name = "d"} + !CHECK: %[[BOUNDS_4:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_4:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(34) capture(ByRef) bounds(%[[BOUNDS_4]]) -> !fir.ref> {name = "e"} + !CHECK: omp.target_exit_data map_entries((from -> %[[MAP_0]] : !fir.ref>), (from -> %[[MAP_1]] : !fir.ref>), (release -> %[[MAP_2]] : !fir.ref>), (always, delete -> %[[MAP_3]] : !fir.ref>), (from -> %[[MAP_4]] : !fir.ref>)) !$omp target exit data map(from: a,b) map(release: c) map(always, delete: d) map(from: e) end subroutine omp_target_exit_mt @@ -99,7 +127,9 @@ integer :: a(1024) integer :: d !CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1:.*]] : !fir.ref - !CHECK: omp.target_exit_data device(%[[VAL_2]] : i32) map((from -> {{.*}} : !fir.ref>)) + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(34) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_exit_data device(%[[VAL_2]] : i32) map_entries((from -> %[[MAP]] : !fir.ref>)) !$omp target exit data map(from: a) device(d) end subroutine omp_target_exit_device @@ -111,7 +141,9 @@ subroutine omp_target_data !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_dataEa"} integer :: a(1024) - !CHECK: omp.target_data map((tofrom -> %[[VAL_0]] : !fir.ref>)) { + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr(%[[VAL_0]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_data map_entries((tofrom -> %[[MAP]] : !fir.ref>)) { !$omp target data map(tofrom: a) !CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i64 @@ -131,12 +163,16 @@ integer :: b(1024) !CHECK: %[[VAR_A:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_data_mtEa"} !CHECK: %[[VAR_B:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "b", uniq_name = "_QFomp_target_data_mtEb"} - !CHECK: omp.target_data map((tofrom -> %[[VAR_A]] : !fir.ref>)) + !CHECK: %[[BOUNDS_A:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_A:.*]] = omp.map_entry var_ptr(%[[VAR_A]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS_A]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target_data map_entries((tofrom -> %[[MAP_A]] : !fir.ref>)) { !$omp target data map(a) !CHECK: omp.terminator !$omp end target data !CHECK: } - !CHECK: omp.target_data map((always, from -> %[[VAR_B]] : !fir.ref>)) + !CHECK: %[[BOUNDS_B:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP_B:.*]] = omp.map_entry var_ptr(%[[VAR_B]] : !fir.ref>) map_type_value(38) capture(ByRef) bounds(%[[BOUNDS_B]]) -> !fir.ref> {name = "b"} + !CHECK: omp.target_data map_entries((always, from -> %[[MAP_B]] : !fir.ref>)) { !$omp target data map(always, from : b) !CHECK: omp.terminator !$omp end target data @@ -151,7 +187,9 @@ subroutine omp_target !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_targetEa"} integer :: a(1024) - !CHECK: omp.target map((tofrom -> %[[VAL_0]] : !fir.ref>)) { + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr(%[[VAL_0]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target map_entries((tofrom -> %[[MAP]] : !fir.ref>)) { !$omp target map(tofrom: a) !CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i64 @@ -173,7 +211,8 @@ subroutine omp_target_thread_limit integer :: a !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 - !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map((tofrom -> %[[VAL_0]] : !fir.ref)) { + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(35) capture(ByRef) -> !fir.ref {name = "a"} + !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries((tofrom -> %[[MAP]] : !fir.ref)) { !$omp target map(tofrom: a) thread_limit(64) a = 10 !CHECK: omp.terminator @@ -190,7 +229,8 @@ use iso_c_binding, only : c_ptr, c_loc type(c_ptr) :: a integer, target :: b - !CHECK: omp.target_data map((tofrom -> %[[VAL_0:.*]] : !fir.ref>)) use_device_ptr(%[[VAL_0]] : !fir.ref>) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(35) capture(ByRef) -> {{.*}} {name = "a"} + !CHECK: omp.target_data map_entries((tofrom -> %[[MAP]]{{.*}} !$omp target data map(tofrom: a) use_device_ptr(a) !CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref>): !CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], {{.*}} : (!fir.ref>, !fir.field) -> !fir.ref @@ -207,7 +247,9 @@ !CHECK-LABEL: func.func @_QPomp_target_device_addr() { subroutine omp_target_device_addr integer, pointer :: a - !CHECK: omp.target_data map((tofrom -> %[[VAL_0:.*]] : !fir.ref>>)) use_device_addr(%[[VAL_0]] : !fir.ref>>) + !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"} + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr({{.*}}) map_type_value(35) capture(ByRef) -> {{.*}} {name = "a"} + !CHECK: omp.target_data map_entries((tofrom -> %[[MAP]] : {{.*}})) use_device_addr(%[[VAL_0]] : !fir.ref>>) { !$omp target data map(tofrom: a) use_device_addr(a) !CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref>>): !CHECK: {{.*}} = fir.load %[[VAL_1]] : !fir.ref>> @@ -223,11 +265,17 @@ !CHECK-LABEL: func.func @_QPomp_target_parallel_do() { subroutine omp_target_parallel_do + !CHECK: %[[C1024:.*]] = arith.constant 1024 : index !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_parallel_doEa"} integer :: a(1024) !CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_target_parallel_doEi"} integer :: i - !CHECK: omp.target map((tofrom -> %[[VAL_0]] : !fir.ref>)) { + !CHECK: %[[C1:.*]] = arith.constant 1 : index + !CHECK: %[[C0:.*]] = arith.constant 0 : index + !CHECK: %[[SUB:.*]] = arith.subi %[[C1024]], %[[C1]] : index + !CHECK: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[C0]] : index) upper_bound(%[[SUB]] : index) extent(%[[C1024]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) + !CHECK: %[[MAP:.*]] = omp.map_entry var_ptr(%[[VAL_0]] : !fir.ref>) map_type_value(35) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} + !CHECK: omp.target map_entries((tofrom -> %[[MAP]] : !fir.ref>)) { !CHECK-NEXT: omp.parallel !$omp target parallel do map(tofrom: a) !CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} @@ -237,7 +285,7 @@ !CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { !CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref !CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 - !CHECK: %[[VAL_8:.*]] = fir.load %2 : !fir.ref + !CHECK: %[[VAL_8:.*]] = fir.load %5 : !fir.ref !CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> i64 !CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64 !CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]], %[[VAL_10]] : i64