Index: flang/include/flang/Semantics/scope.h =================================================================== --- flang/include/flang/Semantics/scope.h +++ flang/include/flang/Semantics/scope.h @@ -68,7 +68,7 @@ explicit Scope(SemanticsContext &context) : Scope{*this, Kind::Global, nullptr, context} {} Scope(Scope &parent, Kind kind, Symbol *symbol, SemanticsContext &context) - : parent_{parent}, kind_{kind}, symbol_{symbol}, context_{context} { + : parent_{&parent}, kind_{kind}, symbol_{symbol}, context_{context} { if (symbol) { symbol->set_scope(this); } @@ -79,12 +79,12 @@ bool operator!=(const Scope &that) const { return this != &that; } Scope &parent() { - CHECK(&parent_ != this); - return parent_; + CHECK(parent_ != this); + return *parent_; } const Scope &parent() const { - CHECK(&parent_ != this); - return parent_; + CHECK(parent_ != this); + return *parent_; } Kind kind() const { return kind_; } bool IsGlobal() const { return kind_ == Kind::Global; } @@ -121,6 +121,7 @@ bool Contains(const Scope &) const; /// Make a scope nested in this one Scope &MakeScope(Kind kind, Symbol *symbol = nullptr); + SemanticsContext &GetMutableSemanticsContext() const { return const_cast(context()); } @@ -271,7 +272,8 @@ } private: - Scope &parent_; // this is enclosing scope, not extended derived type base + Scope *parent_{ + nullptr}; // this is enclosing scope, not extended derived type base const Kind kind_; std::size_t size_{0}; // size in bytes std::optional alignment_; // required alignment in bytes Index: flang/include/flang/Semantics/semantics.h =================================================================== --- flang/include/flang/Semantics/semantics.h +++ flang/include/flang/Semantics/semantics.h @@ -215,7 +215,7 @@ // Defines builtinsScope_ from the __Fortran_builtins module void UseFortranBuiltinsModule(); const Scope *GetBuiltinsScope() const { return builtinsScope_; } - + const Scope *GetCUDABuiltinsScope(); void UsePPCFortranBuiltinsModule(); const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; } @@ -253,7 +253,7 @@ void CheckError(const Symbol &); const common::IntrinsicTypeDefaultKinds &defaultKinds_; - const common::LanguageFeatureControl languageFeatures_; + const common::LanguageFeatureControl &languageFeatures_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; @@ -279,6 +279,7 @@ UnorderedSymbolSet errorSymbols_; std::set tempNames_; const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins + std::optional CUDABuiltinsScope_; // module __CUDA_builtins const Scope *ppcBuiltinsScope_{nullptr}; // module __Fortran_PPC_intrinsics std::list modFileParseTrees_; std::unique_ptr commonBlockMap_; Index: flang/include/flang/Semantics/symbol.h =================================================================== --- flang/include/flang/Semantics/symbol.h +++ flang/include/flang/Semantics/symbol.h @@ -114,6 +114,26 @@ } bool defaultIgnoreTKR() const { return defaultIgnoreTKR_; } void set_defaultIgnoreTKR(bool yes) { defaultIgnoreTKR_ = yes; } + std::optional cudaSubprogramAttrs() const { + return cudaSubprogramAttrs_; + } + void set_cudaSubprogramAttrs(common::CUDASubprogramAttrs csas) { + cudaSubprogramAttrs_ = csas; + } + std::vector &cudaLaunchBounds() { return cudaLaunchBounds_; } + const std::vector &cudaLaunchBounds() const { + return cudaLaunchBounds_; + } + void set_cudaLaunchBounds(std::vector &&x) { + cudaLaunchBounds_ = std::move(x); + } + std::vector &cudaClusterDims() { return cudaClusterDims_; } + const std::vector &cudaClusterDims() const { + return cudaClusterDims_; + } + void set_cudaClusterDims(std::vector &&x) { + cudaClusterDims_ = std::move(x); + } private: bool isInterface_{false}; // true if this represents an interface-body @@ -127,6 +147,10 @@ // appeared in an ancestor (sub)module. Symbol *moduleInterface_{nullptr}; bool defaultIgnoreTKR_{false}; + // CUDA ATTRIBUTES(...) from subroutine/function prefix + std::optional cudaSubprogramAttrs_; + // CUDA LAUNCH_BOUNDS(...) & CLUSTER_DIMS(...) from prefix + std::vector cudaLaunchBounds_, cudaClusterDims_; friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const SubprogramDetails &); @@ -229,6 +253,12 @@ bool CanBeDeferredShape() const { return shape_.CanBeDeferredShape(); } bool IsAssumedSize() const { return isDummy() && shape_.CanBeAssumedSize(); } bool IsAssumedRank() const { return isDummy() && shape_.IsAssumedRank(); } + std::optional cudaDataAttr() const { + return cudaDataAttr_; + } + void set_cudaDataAttr(std::optional attr) { + cudaDataAttr_ = attr; + } private: MaybeExpr init_; @@ -237,6 +267,7 @@ ArraySpec coshape_; common::IgnoreTKRSet ignoreTKR_; const Symbol *commonBlock_{nullptr}; // common block this object is in + std::optional cudaDataAttr_; friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const ObjectEntityDetails &); }; @@ -276,10 +307,13 @@ std::optional init() const { return init_; } void set_init(const Symbol &symbol) { init_ = &symbol; } void set_init(std::nullptr_t) { init_ = nullptr; } + bool isCUDAKernel() const { return isCUDAKernel_; } + void set_isCUDAKernel(bool yes = true) { isCUDAKernel_ = yes; } private: const Symbol *procInterface_{nullptr}; std::optional init_; + bool isCUDAKernel_{false}; friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const ProcEntityDetails &); }; Index: flang/include/flang/Semantics/tools.h =================================================================== --- flang/include/flang/Semantics/tools.h +++ flang/include/flang/Semantics/tools.h @@ -190,6 +190,22 @@ bool IsUnlimitedPolymorphic(const Symbol &); bool IsPolymorphicAllocatable(const Symbol &); +inline bool IsCUDADeviceContext(const Scope *scope) { + if (scope) { + if (const Symbol * symbol{scope->symbol()}) { + if (const auto *subp{symbol->detailsIf()}) { + if (auto attrs{subp->cudaSubprogramAttrs()}) { + return *attrs != common::CUDASubprogramAttrs::Host; + } + } + } + } + return false; +} + +const Scope *FindCUDADeviceContext(const Scope *); +std::optional GetCUDADataAttr(const Symbol *); + // Return an error if a symbol is not accessible from a scope std::optional CheckAccessibleSymbol( const semantics::Scope &, const Symbol &); @@ -386,9 +402,9 @@ // its non-POINTER derived type components. (The lifetime of each // potential subobject component is that of the entire instance.) // - PotentialAndPointer subobject components of a derived type are the -// closure of -// its components (including POINTERs) and the PotentialAndPointer subobject -// components of its non-POINTER derived type components. +// closure of its components (including POINTERs) and the +// PotentialAndPointer subobject components of its non-POINTER derived type +// components. // Parent and procedure components are considered against these definitions. // For this kind of iterator, the component tree is recursively visited in the // following order: Index: flang/lib/Semantics/mod-file.cpp =================================================================== --- flang/lib/Semantics/mod-file.cpp +++ flang/lib/Semantics/mod-file.cpp @@ -457,6 +457,31 @@ os << (isAbstract ? "abstract " : "") << "interface\n"; } PutAttrs(os, prefixAttrs, nullptr, false, ""s, " "s); + if (auto attrs{details.cudaSubprogramAttrs()}) { + if (*attrs == common::CUDASubprogramAttrs::HostDevice) { + os << "attributes(host,device) "; + } else { + PutLower(os << "attributes(", common::EnumToString(*attrs)) << ") "; + } + if (!details.cudaLaunchBounds().empty()) { + os << "launch_bounds"; + char sep{'('}; + for (auto x : details.cudaLaunchBounds()) { + os << sep << x; + sep = ','; + } + os << ") "; + } + if (!details.cudaClusterDims().empty()) { + os << "cluster_dims"; + char sep{'('}; + for (auto x : details.cudaClusterDims()) { + os << sep << x; + sep = ','; + } + os << ") "; + } + } os << (details.isFunction() ? "function " : "subroutine "); os << symbol.name() << '('; int n = 0; @@ -710,6 +735,10 @@ }); os << ") " << symbol.name() << '\n'; } + if (auto attr{details.cudaDataAttr()}) { + PutLower(os << "attributes(", common::EnumToString(*attr)) + << ") " << symbol.name() << '\n'; + } } void ModFileWriter::PutProcEntity(llvm::raw_ostream &os, const Symbol &symbol) { @@ -990,6 +1019,7 @@ options.isModuleFile = true; options.features.Enable(common::LanguageFeature::BackslashEscapes); options.features.Enable(common::LanguageFeature::OpenMP); + options.features.Enable(common::LanguageFeature::CUDA); if (!isIntrinsic.value_or(false) && !notAModule) { // The search for this module file will scan non-intrinsic module // directories. If a directory is in both the intrinsic and non-intrinsic Index: flang/lib/Semantics/resolve-names.cpp =================================================================== --- flang/lib/Semantics/resolve-names.cpp +++ flang/lib/Semantics/resolve-names.cpp @@ -238,6 +238,7 @@ public: bool BeginAttrs(); // always returns true Attrs GetAttrs(); + std::optional cudaDataAttr() { return cudaDataAttr_; } Attrs EndAttrs(); bool SetPassNameOn(Symbol &); void SetBindNameOn(Symbol &); @@ -278,9 +279,11 @@ HANDLE_ATTR_CLASS(Value, VALUE) HANDLE_ATTR_CLASS(Volatile, VOLATILE) #undef HANDLE_ATTR_CLASS + bool Pre(const common::CUDADataAttr); protected: std::optional attrs_; + std::optional cudaDataAttr_; Attr AccessSpecToAttr(const parser::AccessSpec &x) { switch (x.v) { @@ -419,7 +422,8 @@ }; // Track array specifications. They can occur in AttrSpec, EntityDecl, -// ObjectDecl, DimensionStmt, CommonBlockObject, or BasedPointerStmt. +// ObjectDecl, DimensionStmt, CommonBlockObject, BasedPointerStmt, and +// ComponentDecl. // 1. INTEGER, DIMENSION(10) :: x // 2. INTEGER :: x(10) // 3. ALLOCATABLE :: x(:) @@ -658,6 +662,8 @@ symbol.attrs().set(attr); symbol.implicitAttrs().set(attr); } + void SetCUDADataAttr( + SourceName, Symbol &, std::optional); protected: FuncResultStack &funcResultStack() { return funcResultStack_; } @@ -851,6 +857,9 @@ void Post(const parser::InterfaceBody::Function &); bool Pre(const parser::Suffix &); bool Pre(const parser::PrefixSpec &); + bool Pre(const parser::PrefixSpec::Attributes &); + void Post(const parser::PrefixSpec::Launch_Bounds &); + void Post(const parser::PrefixSpec::Cluster_Dims &); bool BeginSubprogram(const parser::Name &, Symbol::Flag, bool hasModulePrefix = false, @@ -922,6 +931,7 @@ objectDeclAttr_ = Attr::TARGET; return true; } + bool Pre(const parser::CUDAAttributesStmt &); void Post(const parser::TargetStmt &) { objectDeclAttr_ = std::nullopt; } void Post(const parser::DimensionStmt::Declaration &); void Post(const parser::CodimensionDecl &); @@ -1529,7 +1539,8 @@ llvm_unreachable("This node is handled in ProgramUnit"); } - void NoteExecutablePartCall(Symbol::Flag, const parser::Call &); + void NoteExecutablePartCall( + Symbol::Flag, const parser::Call &, bool hasCUDAChevrons); friend void ResolveSpecificationParts(SemanticsContext &, const Symbol &); @@ -1557,6 +1568,7 @@ void FinishSpecificationParts(const ProgramTree &); void FinishDerivedTypeInstantiation(Scope &); void ResolveExecutionParts(const ProgramTree &); + void UseCUDABuiltinNames(); }; // ImplicitRules implementation @@ -1662,8 +1674,8 @@ // AttrsVisitor implementation bool AttrsVisitor::BeginAttrs() { - CHECK(!attrs_); - attrs_ = std::make_optional(); + CHECK(!attrs_ && !cudaDataAttr_); + attrs_ = Attrs{}; return true; } Attrs AttrsVisitor::GetAttrs() { @@ -1673,6 +1685,7 @@ Attrs AttrsVisitor::EndAttrs() { Attrs result{GetAttrs()}; attrs_.reset(); + cudaDataAttr_.reset(); passName_ = std::nullopt; bindName_.reset(); return result; @@ -1789,6 +1802,15 @@ attrs_->set(attrName); return true; } +bool AttrsVisitor::Pre(const common::CUDADataAttr x) { + if (cudaDataAttr_.value_or(x) != x) { + Say(currStmtSource().value(), + "CUDA data attributes '%s' and '%s' may not both be specified"_err_en_US, + common::EnumToString(*cudaDataAttr_), common::EnumToString(x)); + } + cudaDataAttr_ = x; + return false; +} // DeclTypeSpecVisitor implementation @@ -2688,6 +2710,27 @@ return ok; } +void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol, + std::optional attr) { + if (attr) { + ConvertToObjectEntity(symbol); + if (auto *object{symbol.detailsIf()}) { + if (*attr != object->cudaDataAttr().value_or(*attr)) { + Say(source, + "'%s' already has another CUDA data attribute ('%s')"_err_en_US, + symbol.name(), + common::EnumToString(*object->cudaDataAttr()).substr()); + } else { + object->set_cudaDataAttr(attr); + } + } else { + Say(source, + "'%s' is not an object and may not have a CUDA data attribute"_err_en_US, + symbol.name()); + } + } +} + // ModuleVisitor implementation bool ModuleVisitor::Pre(const parser::Only &x) { @@ -3424,17 +3467,100 @@ if (info.parsedType) { // C1543 Say(currStmtSource().value(), "FUNCTION prefix cannot specify the type more than once"_err_en_US); - return false; } else { info.parsedType = parsedType; info.source = currStmtSource(); - return false; } + return false; } else { return true; } } +bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) { + if (auto *subp{currScope().symbol() + ? currScope().symbol()->detailsIf() + : nullptr}) { + for (auto attr : attrs.v) { + if (auto current{subp->cudaSubprogramAttrs()}) { + if (attr == *current || + (*current == common::CUDASubprogramAttrs::HostDevice && + (attr == common::CUDASubprogramAttrs::Host || + attr == common::CUDASubprogramAttrs::Device))) { + Say(currStmtSource().value(), + "ATTRIBUTES(%s) appears more than once"_warn_en_US, + common::EnumToString(attr)); + } else if ((attr == common::CUDASubprogramAttrs::Host || + attr == common::CUDASubprogramAttrs::Device) && + (*current == common::CUDASubprogramAttrs::Host || + *current == common::CUDASubprogramAttrs::Device || + *current == common::CUDASubprogramAttrs::HostDevice)) { + // HOST,DEVICE or DEVICE,HOST -> HostDevice + subp->set_cudaSubprogramAttrs( + common::CUDASubprogramAttrs::HostDevice); + } else { + Say(currStmtSource().value(), + "ATTRIBUTES(%s) conflicts with earlier ATTRIBUTES(%s)"_err_en_US, + common::EnumToString(attr), common::EnumToString(*current)); + } + } else { + subp->set_cudaSubprogramAttrs(attr); + } + } + } + return false; +} + +void SubprogramVisitor::Post(const parser::PrefixSpec::Launch_Bounds &x) { + std::vector bounds; + bool ok{true}; + for (const auto &sicx : x.v) { + if (auto value{evaluate::ToInt64(EvaluateExpr(sicx))}) { + bounds.push_back(*value); + } else { + ok = false; + } + } + if (!ok || bounds.size() != 2) { + Say(currStmtSource().value(), + "Operands of LAUNCH_BOUNDS() must be two integer constants"_err_en_US); + } else if (auto *subp{currScope().symbol() + ? currScope().symbol()->detailsIf() + : nullptr}) { + if (subp->cudaLaunchBounds().empty()) { + subp->set_cudaLaunchBounds(std::move(bounds)); + } else { + Say(currStmtSource().value(), + "LAUNCH_BOUNDS() may only appear once"_err_en_US); + } + } +} + +void SubprogramVisitor::Post(const parser::PrefixSpec::Cluster_Dims &x) { + std::vector dims; + bool ok{true}; + for (const auto &sicx : x.v) { + if (auto value{evaluate::ToInt64(EvaluateExpr(sicx))}) { + dims.push_back(*value); + } else { + ok = false; + } + } + if (!ok || dims.size() != 3) { + Say(currStmtSource().value(), + "Operands of CLUSTER_DIMS() must be three integer constants"_err_en_US); + } else if (auto *subp{currScope().symbol() + ? currScope().symbol()->detailsIf() + : nullptr}) { + if (subp->cudaClusterDims().empty()) { + subp->set_cudaClusterDims(std::move(dims)); + } else { + Say(currStmtSource().value(), + "CLUSTER_DIMS() may only appear once"_err_en_US); + } + } +} + bool SubprogramVisitor::Pre(const parser::InterfaceBody::Subroutine &x) { const auto &name{std::get( std::get>(x.t).statement.t)}; @@ -3766,6 +3892,7 @@ } SubprogramDetails &entryDetails{entrySymbol.get()}; CHECK(entryDetails.entryScope() == &inclusiveScope); + SetCUDADataAttr(name.source, entrySymbol, cudaDataAttr()); entrySymbol.attrs() |= GetAttrs(); SetBindNameOn(entrySymbol); for (const auto &dummyArg : std::get>(stmt.t)) { @@ -4150,6 +4277,7 @@ Attrs attrs{attrs_ ? HandleSaveName(name.source, *attrs_) : Attrs{}}; Symbol &symbol{DeclareUnknownEntity(name, attrs)}; symbol.ReplaceName(name.source); + SetCUDADataAttr(name.source, symbol, cudaDataAttr()); if (const auto &init{std::get>(x.t)}) { ConvertToObjectEntity(symbol) || ConvertToProcEntity(symbol); symbol.set( @@ -4422,6 +4550,23 @@ bool DeclarationVisitor::Pre(const parser::VolatileStmt &x) { return HandleAttributeStmt(Attr::VOLATILE, x.v); } +bool DeclarationVisitor::Pre(const parser::CUDAAttributesStmt &x) { + auto attr{std::get(x.t)}; + for (const auto &name : std::get>(x.t)) { + auto *symbol{FindInScope(name)}; + if (symbol && symbol->has()) { + Say(currStmtSource().value(), + "Cannot apply CUDA data attribute to use-associated '%s'"_err_en_US, + name.source); + } else { + if (!symbol) { + symbol = &MakeSymbol(name, ObjectEntityDetails{}); + } + SetCUDADataAttr(name.source, *symbol, attr); + } + } + return false; +} // Handle a statement that sets an attribute on a list of names. bool DeclarationVisitor::HandleAttributeStmt( Attr attr, const std::list &names) { @@ -4972,6 +5117,7 @@ } if (OkToAddComponent(name)) { auto &symbol{DeclareObjectEntity(name, attrs)}; + SetCUDADataAttr(name.source, symbol, cudaDataAttr()); if (symbol.has()) { if (auto &init{std::get>(x.t)}) { Initialization(name, *init, true); @@ -5079,6 +5225,7 @@ attrs.set(Attr::EXTERNAL); } Symbol &symbol{DeclareProcEntity(name, attrs, procInterface)}; + SetCUDADataAttr(name.source, symbol, cudaDataAttr()); // for error symbol.ReplaceName(name.source); if (dtDetails) { dtDetails->add_component(symbol); @@ -6057,6 +6204,7 @@ attrs.set(Attr::PRIVATE); } Symbol &result{MakeSymbol(name, attrs, std::move(details))}; + SetCUDADataAttr(name, result, cudaDataAttr()); if (result.has()) { derivedType.symbol()->get().add_paramDecl(result); } @@ -6718,6 +6866,7 @@ } bool ResolveNamesVisitor::Pre(const parser::CallStmt &x) { HandleCall(Symbol::Flag::Subroutine, x.call); + Walk(x.chevrons); return false; } @@ -7233,7 +7382,7 @@ // of the subprogram's interface, and to mark as procedures any symbols // that might otherwise have been miscategorized as objects. void ResolveNamesVisitor::NoteExecutablePartCall( - Symbol::Flag flag, const parser::Call &call) { + Symbol::Flag flag, const parser::Call &call, bool hasCUDAChevrons) { auto &designator{std::get(call.t)}; if (const auto *name{std::get_if(&designator.u)}) { // Subtlety: The symbol pointers in the parse tree are not set, because @@ -7245,12 +7394,15 @@ : Symbol::Flag::Subroutine}; if (!symbol->test(other)) { ConvertToProcEntity(*symbol); - if (symbol->has()) { + if (auto *details{symbol->detailsIf()}) { symbol->set(flag); if (IsDummy(*symbol)) { SetImplicitAttr(*symbol, Attr::EXTERNAL); } ApplyImplicitRules(*symbol); + if (hasCUDAChevrons) { + details->set_isCUDAKernel(); + } } } } @@ -7378,6 +7530,7 @@ Walk(ompDecls); Walk(compilerDirectives); Walk(useStmts); + UseCUDABuiltinNames(); ClearUseRenames(); ClearUseOnly(); ClearModuleUses(); @@ -7394,6 +7547,20 @@ return false; } +void ResolveNamesVisitor::UseCUDABuiltinNames() { + if (FindCUDADeviceContext(&currScope())) { + if (const Scope * CUDABuiltins{context().GetCUDABuiltinsScope()}) { + for (const auto &[name, symbol] : *CUDABuiltins) { + if (!FindInScope(name)) { + auto &localSymbol{MakeSymbol(name)}; + localSymbol.set_details(UseDetails{name, *symbol}); + localSymbol.flags() = symbol->flags(); + } + } + } + } +} + // Initial processing on specification constructs, before visiting them. void ResolveNamesVisitor::PreSpecificationConstruct( const parser::SpecificationConstruct &spec) { @@ -7931,10 +8098,11 @@ template bool Pre(const A &) { return true; } template void Post(const A &) {} void Post(const parser::FunctionReference &fr) { - resolver_.NoteExecutablePartCall(Symbol::Flag::Function, fr.v); + resolver_.NoteExecutablePartCall(Symbol::Flag::Function, fr.v, false); } void Post(const parser::CallStmt &cs) { - resolver_.NoteExecutablePartCall(Symbol::Flag::Subroutine, cs.call); + resolver_.NoteExecutablePartCall( + Symbol::Flag::Subroutine, cs.call, cs.chevrons.has_value()); } private: @@ -8241,6 +8409,7 @@ void ResolveNamesVisitor::Post(const parser::Program &) { // ensure that all temps were deallocated CHECK(!attrs_); + CHECK(!cudaDataAttr_); CHECK(!GetDeclTypeSpec()); } Index: flang/lib/Semantics/scope.cpp =================================================================== --- flang/lib/Semantics/scope.cpp +++ flang/lib/Semantics/scope.cpp @@ -92,7 +92,7 @@ const Scope *parent{symbol_->get().parent()}; return parent ? parent->FindSymbol(name) : nullptr; } else if (CanImport(name)) { - return parent_.FindSymbol(name); + return parent_->FindSymbol(name); } else { return nullptr; } @@ -288,7 +288,7 @@ // true if name can be imported or host-associated from parent scope. bool Scope::CanImport(const SourceName &name) const { - if (IsTopLevel() || parent_.IsTopLevel()) { + if (IsTopLevel() || parent_->IsTopLevel()) { return false; } switch (GetImportKind()) { Index: flang/lib/Semantics/semantics.cpp =================================================================== --- flang/lib/Semantics/semantics.cpp +++ flang/lib/Semantics/semantics.cpp @@ -470,6 +470,13 @@ } } +const Scope *SemanticsContext::GetCUDABuiltinsScope() { + if (!CUDABuiltinsScope_) { + CUDABuiltinsScope_ = GetBuiltinModule("__cuda_builtins"); + } + return *CUDABuiltinsScope_; +} + void SemanticsContext::UsePPCFortranBuiltinsModule() { if (ppcBuiltinsScope_ == nullptr) { ppcBuiltinsScope_ = GetBuiltinModule("__fortran_ppc_intrinsics"); Index: flang/lib/Semantics/symbol.cpp =================================================================== --- flang/lib/Semantics/symbol.cpp +++ flang/lib/Semantics/symbol.cpp @@ -128,6 +128,22 @@ if (x.defaultIgnoreTKR_) { os << " defaultIgnoreTKR"; } + if (x.cudaSubprogramAttrs_) { + os << " cudaSubprogramAttrs: " + << common::EnumToString(*x.cudaSubprogramAttrs_); + } + if (!x.cudaLaunchBounds_.empty()) { + os << " cudaLaunchBounds:"; + for (auto x : x.cudaLaunchBounds_) { + os << ' ' << x; + } + } + if (!x.cudaClusterDims_.empty()) { + os << " cudaClusterDims:"; + for (auto x : x.cudaClusterDims_) { + os << ' ' << x; + } + } return os; } @@ -414,6 +430,9 @@ os << ' '; x.ignoreTKR_.Dump(os, common::EnumToString); } + if (x.cudaDataAttr()) { + os << " cudaDataAttr: " << common::EnumToString(*x.cudaDataAttr()); + } return os; } @@ -443,6 +462,9 @@ os << " => NULL()"; } } + if (x.isCUDAKernel()) { + os << " isCUDAKernel"; + } return os; } Index: flang/lib/Semantics/tools.cpp =================================================================== --- flang/lib/Semantics/tools.cpp +++ flang/lib/Semantics/tools.cpp @@ -1058,6 +1058,18 @@ return IsAllocatable(symbol) && IsPolymorphic(symbol); } +const Scope *FindCUDADeviceContext(const Scope *scope) { + return !scope ? nullptr : FindScopeContaining(*scope, [](const Scope &s) { + return IsCUDADeviceContext(&s); + }); +} + +std::optional GetCUDADataAttr(const Symbol *symbol) { + const auto *object{ + symbol ? symbol->detailsIf() : nullptr}; + return object ? object->cudaDataAttr() : std::nullopt; +} + std::optional CheckAccessibleSymbol( const Scope &scope, const Symbol &symbol) { if (symbol.attrs().test(Attr::PRIVATE)) { Index: flang/module/__fortran_builtins.f90 =================================================================== --- flang/module/__fortran_builtins.f90 +++ flang/module/__fortran_builtins.f90 @@ -43,6 +43,15 @@ integer, parameter :: __builtin_atomic_int_kind = selected_int_kind(18) integer, parameter :: __builtin_atomic_logical_kind = __builtin_atomic_int_kind + procedure(type(__builtin_c_ptr)) :: __builtin_c_loc + + type :: __builtin_dim3 + integer :: x=1, y=1, z=1 + end type + type(__builtin_dim3) :: & + __builtin_threadIdx, __builtin_blockDim, __builtin_blockIdx, __builtin_gridDim + integer, parameter :: __builtin_warpsize = 32 + intrinsic :: __builtin_ieee_is_nan, __builtin_ieee_is_negative, & __builtin_ieee_is_normal intrinsic :: __builtin_ieee_next_after, __builtin_ieee_next_down, & Index: flang/test/Parser/cuf-sanity-common =================================================================== --- flang/test/Parser/cuf-sanity-common +++ flang/test/Parser/cuf-sanity-common @@ -18,7 +18,7 @@ attributes(global) launch_bounds(1, 2) subroutine lbsub; end attributes(global) cluster_dims(1, 2, 3) subroutine cdsub; end attributes(device) subroutine attrs -! enable with name resolution: attributes(device) :: devx1 + attributes(device) :: devx1 real, device :: devx2 end subroutine subroutine test Index: flang/test/Semantics/cuf01.cuf =================================================================== --- /dev/null +++ flang/test/Semantics/cuf01.cuf @@ -0,0 +1,22 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Test conflicting CUDA subprogram attributes +module m1 + contains + !WARNING: ATTRIBUTES(Host) appears more than once + attributes(host,host) subroutine ok1; end + !WARNING: ATTRIBUTES(Host) appears more than once + attributes(host) attributes(host) subroutine ok2; end + attributes(host,device) subroutine ok3; end + attributes(device,host) subroutine ok4; end + !WARNING: ATTRIBUTES(Host) appears more than once + attributes(host,device,host) subroutine ok5; end + !WARNING: ATTRIBUTES(Device) appears more than once + attributes(device,host,device) subroutine ok6; end + !ERROR: ATTRIBUTES(Global) conflicts with earlier ATTRIBUTES(Host) + attributes(host,global) subroutine conflict1; end + !ERROR: ATTRIBUTES(Host) conflicts with earlier ATTRIBUTES(Global) + attributes(global,host) subroutine conflict2; end + !ERROR: ATTRIBUTES(Grid_Global) conflicts with earlier ATTRIBUTES(Host) + attributes(host,grid_global) subroutine conflict3; end + !TODO: more with launch_bounds & cluster_dims +end module Index: flang/test/Semantics/modfile55.cuf =================================================================== --- /dev/null +++ flang/test/Semantics/modfile55.cuf @@ -0,0 +1,41 @@ +! RUN: %python %S/test_modfile.py %s %flang_fc1 +! Sanity check for CUDA Fortran attributes in module files +module m + attributes(device) dd + real, managed, allocatable :: md + real, pinned, allocatable :: mp + attributes(constant) cd + contains + attributes(global) subroutine globsub(x,y,z) + real, value :: x + real, device :: y + real, managed :: z + end subroutine + attributes(host,device) real function foo(x) + foo = x + 1. + end function +end + +!Expect: m.mod +!module m +!real(4)::dd +!attributes(device) dd +!real(4),allocatable::md +!attributes(managed) md +!real(4),allocatable::mp +!attributes(pinned) mp +!real(4)::cd +!attributes(constant) cd +!contains +!attributes(global) subroutine globsub(x,y,z) +!real(4),value::x +!real(4)::y +!attributes(device) y +!real(4)::z +!attributes(managed) z +!end +!attributes(host,device) function foo(x) +!real(4)::x +!real(4)::foo +!end +!end