Index: flang/docs/ParserCombinators.md =================================================================== --- flang/docs/ParserCombinators.md +++ flang/docs/ParserCombinators.md @@ -97,8 +97,9 @@ * `nonemptySeparated(p, q)` repeatedly matches "p q p q p q ... p", returning a `std::list<>` of only the values of the p's. It fails if p immediately fails. -* `extension(p)` parses p if strict standard compliance is disabled, - or with a warning if nonstandard usage warnings are enabled. +* `extension([msg,]p)` parses p if strict standard compliance is + disabled, or with an optional warning when nonstandard usage warnings + are enabled. * `deprecated(p)` parses p if strict standard compliance is disabled, with a warning if deprecated usage warnings are enabled. * `inContext(msg, p)` runs p within an error message context; any @@ -165,9 +166,9 @@ a longer identifier or keyword). * `parenthesized(p)` is shorthand for `"(" >> p / ")"`. * `bracketed(p)` is shorthand for `"[" >> p / "]"`. -* `nonEmptyList(p)` matches a comma-separated list of one or more +* `nonemptyList(p)` matches a comma-separated list of one or more instances of p. -* `nonEmptyList(errorMessage, p)` is equivalent to +* `nonemptyList(errorMessage, p)` is equivalent to `withMessage(errorMessage, nonemptyList(p))`, which allows one to supply a meaningful error message in the event of an empty list. * `optionalList(p)` is the same thing, but can be empty, and always succeeds. Index: flang/include/flang/Common/Fortran-features.h =================================================================== --- flang/include/flang/Common/Fortran-features.h +++ flang/include/flang/Common/Fortran-features.h @@ -26,7 +26,7 @@ SignedPrimary, FileName, Carriagecontrol, Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor, ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer, Hollerith, ArithmeticIF, - Assign, AssignedGOTO, Pause, OpenACC, OpenMP, CruftAfterAmpersand, + Assign, AssignedGOTO, Pause, OpenACC, OpenMP, CUDA, CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals, RealDoControls, EquivalenceNumericWithCharacter, EquivalenceNonDefaultNumeric, EquivalenceSameNonSequence, AdditionalIntrinsics, AnonymousParents, @@ -45,6 +45,7 @@ disable_.set(LanguageFeature::OldDebugLines); disable_.set(LanguageFeature::OpenACC); disable_.set(LanguageFeature::OpenMP); + disable_.set(LanguageFeature::CUDA); // !@cuf disable_.set(LanguageFeature::ImplicitNoneTypeNever); disable_.set(LanguageFeature::ImplicitNoneTypeAlways); disable_.set(LanguageFeature::DefaultSave); Index: flang/include/flang/Common/Fortran.h =================================================================== --- flang/include/flang/Common/Fortran.h +++ flang/include/flang/Common/Fortran.h @@ -15,6 +15,7 @@ #include "enum-set.h" #include "idioms.h" #include +#include #include namespace Fortran::common { @@ -79,6 +80,12 @@ // Fortran arrays may have up to 15 dimensions (See Fortran 2018 section 5.4.6). static constexpr int maxRank{15}; +// CUDA subprogram attribute combinations +ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, Global, Grid_Global) + +// CUDA data attributes; mutually exclusive +ENUM_CLASS(CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture) + // Fortran names may have up to 63 characters (See Fortran 2018 C601). static constexpr int maxNameLen{63}; @@ -98,5 +105,8 @@ IgnoreTKR::Rank, IgnoreTKR::Device, IgnoreTKR::Managed}; std::string AsFortran(IgnoreTKRSet); +bool AreCompatibleCUDADataAttrs( + std::optional, std::optional, IgnoreTKRSet); + } // namespace Fortran::common #endif // FORTRAN_COMMON_FORTRAN_H_ Index: flang/include/flang/Common/indirection.h =================================================================== --- flang/include/flang/Common/indirection.h +++ flang/include/flang/Common/indirection.h @@ -148,6 +148,7 @@ A *operator->() const { return p_; } operator bool() const { return p_ != nullptr; } A *get() { return p_; } + auto get() const { return reinterpret_cast *>(p_); } A *release() { A *result{p_}; p_ = nullptr; Index: flang/include/flang/Common/template.h =================================================================== --- flang/include/flang/Common/template.h +++ flang/include/flang/Common/template.h @@ -94,8 +94,10 @@ TUPLEorVARIANT>::value()}; template -constexpr bool HasMember{ - SearchMembers::template Match, TUPLEorVARIANT> >= 0}; +constexpr int FindMember{ + SearchMembers::template Match, TUPLEorVARIANT>}; +template +constexpr bool HasMember{FindMember >= 0}; // std::optional> -> std::optional template Index: flang/include/flang/Frontend/FrontendOptions.h =================================================================== --- flang/include/flang/Frontend/FrontendOptions.h +++ flang/include/flang/Frontend/FrontendOptions.h @@ -113,6 +113,10 @@ /// \return True if the file should be preprocessed bool isToBePreprocessed(llvm::StringRef suffix); +/// \param suffix The file extension +/// \return True if the file contains CUDA Fortran +bool isCUDAFortranSuffix(llvm::StringRef suffix); + enum class Language : uint8_t { Unknown, @@ -182,6 +186,9 @@ /// sufficient to implement gfortran`s logic controlled with `-cpp/-nocpp`. unsigned mustBePreprocessed : 1; + /// Whether to enable CUDA Fortran language extensions + bool isCUDAFortran{false}; + public: FrontendInputFile() = default; FrontendInputFile(llvm::StringRef file, InputKind inKind) @@ -193,6 +200,7 @@ std::string pathSuffix{file.substr(pathDotIndex + 1)}; isFixedForm = isFixedFormSuffix(pathSuffix); mustBePreprocessed = isToBePreprocessed(pathSuffix); + isCUDAFortran = isCUDAFortranSuffix(pathSuffix); } FrontendInputFile(const llvm::MemoryBuffer *memBuf, InputKind inKind) @@ -204,6 +212,7 @@ bool isFile() const { return (buffer == nullptr); } bool getIsFixedForm() const { return isFixedForm; } bool getMustBePreprocessed() const { return mustBePreprocessed; } + bool getIsCUDAFortran() const { return isCUDAFortran; } llvm::StringRef getFile() const { assert(isFile()); Index: flang/include/flang/Parser/dump-parse-tree.h =================================================================== --- flang/include/flang/Parser/dump-parse-tree.h +++ flang/include/flang/Parser/dump-parse-tree.h @@ -14,6 +14,7 @@ #include "parse-tree.h" #include "tools.h" #include "unparse.h" +#include "flang/Common/Fortran.h" #include "flang/Common/idioms.h" #include "flang/Common/indirection.h" #include "llvm/Support/raw_ostream.h" @@ -45,6 +46,8 @@ NODE(std, string) NODE(std, int64_t) NODE(std, uint64_t) + NODE_ENUM(common, CUDADataAttr) + NODE_ENUM(common, CUDASubprogramAttrs) NODE(format, ControlEditDesc) NODE(format::ControlEditDesc, Kind) NODE(format, DerivedTypeDataEditDesc) @@ -120,6 +123,8 @@ NODE(parser, AllocOpt) NODE(AllocOpt, Mold) NODE(AllocOpt, Source) + NODE(AllocOpt, Stream) + NODE(AllocOpt, Pinned) NODE(parser, Allocatable) NODE(parser, AllocatableStmt) NODE(parser, AllocateCoarraySpec) @@ -165,6 +170,7 @@ NODE(parser, BoundsSpec) NODE(parser, Call) NODE(parser, CallStmt) + NODE(CallStmt, Chevrons) NODE(parser, CaseConstruct) NODE(CaseConstruct, Case) NODE(parser, CaseSelector) @@ -216,6 +222,9 @@ NODE(parser, ContinueStmt) NODE(parser, CriticalConstruct) NODE(parser, CriticalStmt) + NODE(parser, CUDAAttributesStmt) + NODE(parser, CUFKernelDoConstruct) + NODE(CUFKernelDoConstruct, Directive) NODE(parser, CycleStmt) NODE(parser, DataComponentDefStmt) NODE(parser, DataIDoObject) @@ -605,6 +614,9 @@ NODE(PrefixSpec, Non_Recursive) NODE(PrefixSpec, Pure) NODE(PrefixSpec, Recursive) + NODE(PrefixSpec, Attributes) + NODE(PrefixSpec, Launch_Bounds) + NODE(PrefixSpec, Cluster_Dims) NODE(parser, PrintStmt) NODE(parser, PrivateStmt) NODE(parser, PrivateOrSequence) Index: flang/include/flang/Parser/message.h =================================================================== --- flang/include/flang/Parser/message.h +++ flang/include/flang/Parser/message.h @@ -54,6 +54,7 @@ constexpr MessageFixedText &operator=(MessageFixedText &&) = default; CharBlock text() const { return text_; } + bool empty() const { return text_.empty(); } Severity severity() const { return severity_; } MessageFixedText &set_severity(Severity severity) { severity_ = severity; Index: flang/include/flang/Parser/parse-tree-visitor.h =================================================================== --- flang/include/flang/Parser/parse-tree-visitor.h +++ flang/include/flang/Parser/parse-tree-visitor.h @@ -568,17 +568,33 @@ mutator.Post(x); } } -template void Walk(const Call &x, V &visitor) { +template void Walk(const FunctionReference &x, V &visitor) { if (visitor.Pre(x)) { Walk(x.source, visitor); - Walk(x.t, visitor); + Walk(x.v, visitor); visitor.Post(x); } } -template void Walk(Call &x, M &mutator) { +template void Walk(FunctionReference &x, M &mutator) { if (mutator.Pre(x)) { Walk(x.source, mutator); - Walk(x.t, mutator); + Walk(x.v, mutator); + mutator.Post(x); + } +} +template void Walk(const CallStmt &x, V &visitor) { + if (visitor.Pre(x)) { + Walk(x.source, visitor); + Walk(x.call, visitor); + Walk(x.chevrons, visitor); + visitor.Post(x); + } +} +template void Walk(CallStmt &x, M &mutator) { + if (mutator.Pre(x)) { + Walk(x.source, mutator); + Walk(x.call, mutator); + Walk(x.chevrons, mutator); mutator.Post(x); } } Index: flang/include/flang/Parser/parse-tree.h =================================================================== --- flang/include/flang/Parser/parse-tree.h +++ flang/include/flang/Parser/parse-tree.h @@ -67,7 +67,7 @@ namespace Fortran::evaluate { struct GenericExprWrapper; // forward definition, wraps Expr struct GenericAssignmentWrapper; // forward definition, represent assignment -class ProcedureRef; // forward definition, represents a CALL statement +class ProcedureRef; // forward definition, represents a CALL or function ref } // namespace Fortran::evaluate // Most non-template classes in this file use these default definitions @@ -253,6 +253,7 @@ // Directives, extensions, and deprecated statements struct CompilerDirective; struct BasedPointerStmt; +struct CUDAAttributesStmt; struct StructureDef; struct ArithmeticIfStmt; struct AssignStmt; @@ -264,6 +265,7 @@ struct OpenMPConstruct; struct OpenMPDeclarativeConstruct; struct OmpEndLoopDirective; +struct CUFKernelDoConstruct; // Cooked character stream locations using Location = const char *; @@ -361,6 +363,7 @@ // pointer-stmt | protected-stmt | save-stmt | target-stmt | // volatile-stmt | value-stmt | common-stmt | equivalence-stmt // Extension: (Cray) based POINTER statement +// Extension: CUDA data attribute statement struct OtherSpecificationStmt { UNION_CLASS_BOILERPLATE(OtherSpecificationStmt); std::variant, @@ -374,7 +377,8 @@ common::Indirection, common::Indirection, common::Indirection, common::Indirection, common::Indirection, common::Indirection, - common::Indirection> + common::Indirection, + common::Indirection> u; }; @@ -507,7 +511,8 @@ // action-stmt | associate-construct | block-construct | // case-construct | change-team-construct | critical-construct | // do-construct | if-construct | select-rank-construct | -// select-type-construct | where-construct | forall-construct +// select-type-construct | where-construct | forall-construct | +// (CUDA) CUF-kernel-do-construct struct ExecutableConstruct { UNION_CLASS_BOILERPLATE(ExecutableConstruct); std::variant, common::Indirection, @@ -524,7 +529,8 @@ common::Indirection, common::Indirection, common::Indirection, - common::Indirection> + common::Indirection, + common::Indirection> u; }; @@ -960,14 +966,15 @@ // R738 component-attr-spec -> // access-spec | ALLOCATABLE | // CODIMENSION lbracket coarray-spec rbracket | -// CONTIGUOUS | DIMENSION ( component-array-spec ) | POINTER +// CONTIGUOUS | DIMENSION ( component-array-spec ) | POINTER | +// (CUDA) CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE EMPTY_CLASS(Allocatable); EMPTY_CLASS(Pointer); EMPTY_CLASS(Contiguous); struct ComponentAttrSpec { UNION_CLASS_BOILERPLATE(ComponentAttrSpec); std::variant + ComponentArraySpec, Pointer, common::CUDADataAttr, ErrorRecovery> u; }; @@ -1320,7 +1327,8 @@ // CODIMENSION lbracket coarray-spec rbracket | CONTIGUOUS | // DIMENSION ( array-spec ) | EXTERNAL | INTENT ( intent-spec ) | // INTRINSIC | language-binding-spec | OPTIONAL | PARAMETER | POINTER | -// PROTECTED | SAVE | TARGET | VALUE | VOLATILE +// PROTECTED | SAVE | TARGET | VALUE | VOLATILE | +// (CUDA) CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE EMPTY_CLASS(Asynchronous); EMPTY_CLASS(External); EMPTY_CLASS(Intrinsic); @@ -1335,7 +1343,8 @@ UNION_CLASS_BOILERPLATE(AttrSpec); std::variant + Parameter, Pointer, Protected, Save, Target, Value, Volatile, + common::CUDADataAttr> u; }; @@ -1909,13 +1918,17 @@ // R928 alloc-opt -> // ERRMSG = errmsg-variable | MOLD = source-expr | -// SOURCE = source-expr | STAT = stat-variable +// SOURCE = source-expr | STAT = stat-variable | +// (CUDA) STREAM = scalar-int-expr +// PINNED = scalar-logical-variable // R931 source-expr -> expr struct AllocOpt { UNION_CLASS_BOILERPLATE(AllocOpt); WRAPPER_CLASS(Mold, common::Indirection); WRAPPER_CLASS(Source, common::Indirection); - std::variant u; + WRAPPER_CLASS(Stream, common::Indirection); + WRAPPER_CLASS(Pinned, common::Indirection); + std::variant u; }; // R927 allocate-stmt -> @@ -3016,7 +3029,9 @@ // R1527 prefix-spec -> // declaration-type-spec | ELEMENTAL | IMPURE | MODULE | -// NON_RECURSIVE | PURE | RECURSIVE +// NON_RECURSIVE | PURE | RECURSIVE | +// (CUDA) ATTRIBUTES ( (DEVICE | GLOBAL | GRID_GLOBAL | HOST)... ) +// LAUNCH_BOUNDS(expr-list) | CLUSTER_DIMS(expr-list) struct PrefixSpec { UNION_CLASS_BOILERPLATE(PrefixSpec); EMPTY_CLASS(Elemental); @@ -3025,8 +3040,11 @@ EMPTY_CLASS(Non_Recursive); EMPTY_CLASS(Pure); EMPTY_CLASS(Recursive); + WRAPPER_CLASS(Attributes, std::list); + WRAPPER_CLASS(Launch_Bounds, std::list); + WRAPPER_CLASS(Cluster_Dims, std::list); std::variant + Pure, Recursive, Attributes, Launch_Bounds, Cluster_Dims> u; }; @@ -3155,23 +3173,39 @@ std::tuple, ActualArg> t; }; -// R1520 function-reference -> procedure-designator ( [actual-arg-spec-list] ) +// R1520 function-reference -> procedure-designator +// ( [actual-arg-spec-list] ) struct Call { TUPLE_CLASS_BOILERPLATE(Call); - CharBlock source; std::tuple> t; }; struct FunctionReference { WRAPPER_CLASS_BOILERPLATE(FunctionReference, Call); + CharBlock source; Designator ConvertToArrayElementRef(); StructureConstructor ConvertToStructureConstructor( const semantics::DerivedTypeSpec &); }; -// R1521 call-stmt -> CALL procedure-designator [( [actual-arg-spec-list] )] +// R1521 call-stmt -> CALL procedure-designator [ chevrons ] +// [( [actual-arg-spec-list] )] +// (CUDA) chevrons -> <<< scalar-expr, scalar-expr [, +// scalar-int-expr [, scalar-int-expr ] ] >>> struct CallStmt { - WRAPPER_CLASS_BOILERPLATE(CallStmt, Call); + BOILERPLATE(CallStmt); + struct Chevrons { + TUPLE_CLASS_BOILERPLATE(Chevrons); + std::tuple, + std::optional> + t; + }; + explicit CallStmt(ProcedureDesignator &&pd, std::optional &&ch, + std::list &&args) + : call{std::move(pd), std::move(args)}, chevrons{std::move(ch)} {} + Call call; + std::optional chevrons; + CharBlock source; mutable common::ForwardOwningPointer typedCall; // filled by semantics }; @@ -3250,6 +3284,12 @@ std::variant, LoopCount, std::list> u; }; +// (CUDA) ATTRIBUTE(attribute) [::] name-list +struct CUDAAttributesStmt { + TUPLE_CLASS_BOILERPLATE(CUDAAttributesStmt); + std::tuple> t; +}; + // Legacy extensions struct BasedPointer { TUPLE_CLASS_BOILERPLATE(BasedPointer); @@ -4168,5 +4208,23 @@ u; }; +// CUF-kernel-do-construct -> +// !$CUF KERNEL DO [ (scalar-int-constant-expr) ] <<< grid, block [, stream] +// >>> do-construct +// grid -> * | scalar-int-expr | ( scalar-int-expr-list ) +// block -> * | scalar-int-expr | ( scalar-int-expr-list ) +// stream -> 0, scalar-int-expr | STREAM = scalar-int-expr +struct CUFKernelDoConstruct { + TUPLE_CLASS_BOILERPLATE(CUFKernelDoConstruct); + struct Directive { + TUPLE_CLASS_BOILERPLATE(Directive); + CharBlock source; + std::tuple, std::list, + std::list, std::optional> + t; + }; + std::tuple> t; +}; + } // namespace Fortran::parser #endif // FORTRAN_PARSER_PARSE_TREE_H_ Index: flang/include/flang/Semantics/scope.h =================================================================== --- flang/include/flang/Semantics/scope.h +++ flang/include/flang/Semantics/scope.h @@ -68,7 +68,7 @@ explicit Scope(SemanticsContext &context) : Scope{*this, Kind::Global, nullptr, context} {} Scope(Scope &parent, Kind kind, Symbol *symbol, SemanticsContext &context) - : parent_{parent}, kind_{kind}, symbol_{symbol}, context_{context} { + : parent_{&parent}, kind_{kind}, symbol_{symbol}, context_{context} { if (symbol) { symbol->set_scope(this); } @@ -79,12 +79,12 @@ bool operator!=(const Scope &that) const { return this != &that; } Scope &parent() { - CHECK(&parent_ != this); - return parent_; + CHECK(parent_ != this); + return *parent_; } const Scope &parent() const { - CHECK(&parent_ != this); - return parent_; + CHECK(parent_ != this); + return *parent_; } Kind kind() const { return kind_; } bool IsGlobal() const { return kind_ == Kind::Global; } @@ -121,6 +121,7 @@ bool Contains(const Scope &) const; /// Make a scope nested in this one Scope &MakeScope(Kind kind, Symbol *symbol = nullptr); + SemanticsContext &GetMutableSemanticsContext() const { return const_cast(context()); } @@ -271,7 +272,8 @@ } private: - Scope &parent_; // this is enclosing scope, not extended derived type base + Scope *parent_{ + nullptr}; // this is enclosing scope, not extended derived type base const Kind kind_; std::size_t size_{0}; // size in bytes std::optional alignment_; // required alignment in bytes Index: flang/include/flang/Semantics/semantics.h =================================================================== --- flang/include/flang/Semantics/semantics.h +++ flang/include/flang/Semantics/semantics.h @@ -215,7 +215,7 @@ // Defines builtinsScope_ from the __Fortran_builtins module void UseFortranBuiltinsModule(); const Scope *GetBuiltinsScope() const { return builtinsScope_; } - + const Scope *GetCUDABuiltinsScope(); void UsePPCFortranBuiltinsModule(); const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; } @@ -253,7 +253,7 @@ void CheckError(const Symbol &); const common::IntrinsicTypeDefaultKinds &defaultKinds_; - const common::LanguageFeatureControl languageFeatures_; + const common::LanguageFeatureControl &languageFeatures_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; @@ -279,6 +279,7 @@ UnorderedSymbolSet errorSymbols_; std::set tempNames_; const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins + std::optional CUDABuiltinsScope_; // module __CUDA_builtins const Scope *ppcBuiltinsScope_{nullptr}; // module __Fortran_PPC_intrinsics std::list modFileParseTrees_; std::unique_ptr commonBlockMap_; Index: flang/include/flang/Semantics/symbol.h =================================================================== --- flang/include/flang/Semantics/symbol.h +++ flang/include/flang/Semantics/symbol.h @@ -114,6 +114,26 @@ } bool defaultIgnoreTKR() const { return defaultIgnoreTKR_; } void set_defaultIgnoreTKR(bool yes) { defaultIgnoreTKR_ = yes; } + std::optional cudaSubprogramAttrs() const { + return cudaSubprogramAttrs_; + } + void set_cudaSubprogramAttrs(common::CUDASubprogramAttrs csas) { + cudaSubprogramAttrs_ = csas; + } + std::vector &cudaLaunchBounds() { return cudaLaunchBounds_; } + const std::vector &cudaLaunchBounds() const { + return cudaLaunchBounds_; + } + void set_cudaLaunchBounds(std::vector &&x) { + cudaLaunchBounds_ = std::move(x); + } + std::vector &cudaClusterDims() { return cudaClusterDims_; } + const std::vector &cudaClusterDims() const { + return cudaClusterDims_; + } + void set_cudaClusterDims(std::vector &&x) { + cudaClusterDims_ = std::move(x); + } private: bool isInterface_{false}; // true if this represents an interface-body @@ -127,6 +147,10 @@ // appeared in an ancestor (sub)module. Symbol *moduleInterface_{nullptr}; bool defaultIgnoreTKR_{false}; + // CUDA ATTRIBUTES(...) from subroutine/function prefix + std::optional cudaSubprogramAttrs_; + // CUDA LAUNCH_BOUNDS(...) & CLUSTER_DIMS(...) from prefix + std::vector cudaLaunchBounds_, cudaClusterDims_; friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const SubprogramDetails &); @@ -229,6 +253,12 @@ bool CanBeDeferredShape() const { return shape_.CanBeDeferredShape(); } bool IsAssumedSize() const { return isDummy() && shape_.CanBeAssumedSize(); } bool IsAssumedRank() const { return isDummy() && shape_.IsAssumedRank(); } + std::optional cudaDataAttr() const { + return cudaDataAttr_; + } + void set_cudaDataAttr(std::optional attr) { + cudaDataAttr_ = attr; + } private: MaybeExpr init_; @@ -237,6 +267,7 @@ ArraySpec coshape_; common::IgnoreTKRSet ignoreTKR_; const Symbol *commonBlock_{nullptr}; // common block this object is in + std::optional cudaDataAttr_; friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const ObjectEntityDetails &); }; @@ -276,10 +307,13 @@ std::optional init() const { return init_; } void set_init(const Symbol &symbol) { init_ = &symbol; } void set_init(std::nullptr_t) { init_ = nullptr; } + bool isCUDAKernel() const { return isCUDAKernel_; } + void set_isCUDAKernel(bool yes = true) { isCUDAKernel_ = yes; } private: const Symbol *procInterface_{nullptr}; std::optional init_; + bool isCUDAKernel_{false}; friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const ProcEntityDetails &); }; Index: flang/include/flang/Semantics/tools.h =================================================================== --- flang/include/flang/Semantics/tools.h +++ flang/include/flang/Semantics/tools.h @@ -190,6 +190,22 @@ bool IsUnlimitedPolymorphic(const Symbol &); bool IsPolymorphicAllocatable(const Symbol &); +inline bool IsCUDADeviceContext(const Scope *scope) { + if (scope) { + if (const Symbol * symbol{scope->symbol()}) { + if (const auto *subp{symbol->detailsIf()}) { + if (auto attrs{subp->cudaSubprogramAttrs()}) { + return *attrs != common::CUDASubprogramAttrs::Host; + } + } + } + } + return false; +} + +const Scope *FindCUDADeviceContext(const Scope *); +std::optional GetCUDADataAttr(const Symbol *); + // Return an error if a symbol is not accessible from a scope std::optional CheckAccessibleSymbol( const semantics::Scope &, const Symbol &); @@ -386,9 +402,9 @@ // its non-POINTER derived type components. (The lifetime of each // potential subobject component is that of the entire instance.) // - PotentialAndPointer subobject components of a derived type are the -// closure of -// its components (including POINTERs) and the PotentialAndPointer subobject -// components of its non-POINTER derived type components. +// closure of its components (including POINTERs) and the +// PotentialAndPointer subobject components of its non-POINTER derived type +// components. // Parent and procedure components are considered against these definitions. // For this kind of iterator, the component tree is recursively visited in the // following order: Index: flang/lib/Common/Fortran.cpp =================================================================== --- flang/lib/Common/Fortran.cpp +++ flang/lib/Common/Fortran.cpp @@ -97,4 +97,23 @@ return result; } +bool AreCompatibleCUDADataAttrs(std::optional x, + std::optional y, IgnoreTKRSet ignoreTKR) { + if (!x && !y) { + return true; + } else if (x && y && *x == *y) { + return true; + } else if (ignoreTKR.test(IgnoreTKR::Device) && + x.value_or(CUDADataAttr::Device) == CUDADataAttr::Device && + y.value_or(CUDADataAttr::Device) == CUDADataAttr::Device) { + return true; + } else if (ignoreTKR.test(IgnoreTKR::Managed) && + x.value_or(CUDADataAttr::Managed) == CUDADataAttr::Managed && + y.value_or(CUDADataAttr::Managed) == CUDADataAttr::Managed) { + return true; + } else { + return false; + } +} + } // namespace Fortran::common Index: flang/lib/Frontend/FrontendAction.cpp =================================================================== --- flang/lib/Frontend/FrontendAction.cpp +++ flang/lib/Frontend/FrontendAction.cpp @@ -86,6 +86,10 @@ invoc.collectMacroDefinitions(); } + // Enable CUDA Fortran if source file is *.cuf/*.CUF. + invoc.getFortranOpts().features.Enable(Fortran::common::LanguageFeature::CUDA, + getCurrentInput().getIsCUDAFortran()); + // Decide between fixed and free form (if the user didn't express any // preference, use the file extension to decide) if (invoc.getFrontendOpts().fortranForm == FortranForm::Unknown) { Index: flang/lib/Frontend/FrontendOptions.cpp =================================================================== --- flang/lib/Frontend/FrontendOptions.cpp +++ flang/lib/Frontend/FrontendOptions.cpp @@ -23,17 +23,22 @@ bool Fortran::frontend::isFreeFormSuffix(llvm::StringRef suffix) { // Note: Keep this list in-sync with flang/test/lit.cfg.py - // TODO: Add Cuda Fortan files (i.e. `*.cuf` and `*.CUF`). return suffix == "f90" || suffix == "F90" || suffix == "ff90" || suffix == "f95" || suffix == "F95" || suffix == "ff95" || suffix == "f03" || suffix == "F03" || suffix == "f08" || - suffix == "F08" || suffix == "f18" || suffix == "F18"; + suffix == "F08" || suffix == "f18" || suffix == "F18" || + suffix == "cuf" || suffix == "CUF"; } bool Fortran::frontend::isToBePreprocessed(llvm::StringRef suffix) { return suffix == "F" || suffix == "FOR" || suffix == "fpp" || suffix == "FPP" || suffix == "F90" || suffix == "F95" || - suffix == "F03" || suffix == "F08" || suffix == "F18"; + suffix == "F03" || suffix == "F08" || suffix == "F18" || + suffix == "CUF"; +} + +bool Fortran::frontend::isCUDAFortranSuffix(llvm::StringRef suffix) { + return suffix == "cuf" || suffix == "CUF"; } InputKind FrontendOptions::getInputKindForExtension(llvm::StringRef extension) { Index: flang/lib/Lower/Allocatable.cpp =================================================================== --- flang/lib/Lower/Allocatable.cpp +++ flang/lib/Lower/Allocatable.cpp @@ -367,6 +367,12 @@ [&](const Fortran::parser::AllocOpt::Mold &mold) { moldExpr = Fortran::semantics::GetExpr(mold.v.value()); }, + [&](const Fortran::parser::AllocOpt::Stream &) { + TODO(loc, "CUDA ALLOCATE(STREAM=)"); + }, + [&](const Fortran::parser::AllocOpt::Pinned &) { + TODO(loc, "CUDA ALLOCATE(PINNED=)"); + }, }, allocOption.u); } Index: flang/lib/Lower/Bridge.cpp =================================================================== --- flang/lib/Lower/Bridge.cpp +++ flang/lib/Lower/Bridge.cpp @@ -1265,7 +1265,7 @@ void genFIR(const Fortran::parser::CallStmt &stmt) { Fortran::lower::StatementContext stmtCtx; Fortran::lower::pft::Evaluation &eval = getEval(); - setCurrentPosition(stmt.v.source); + setCurrentPosition(stmt.source); assert(stmt.typedCall && "Call was not analyzed"); mlir::Value res{}; if (lowerToHighLevelFIR()) { @@ -1292,7 +1292,7 @@ llvm::SmallVector labelList; int64_t index = 0; for (const Fortran::parser::ActualArgSpec &arg : - std::get>(stmt.v.t)) { + std::get>(stmt.call.t)) { const auto &actual = std::get(arg.t); if (const auto *altReturn = std::get_if(&actual.u)) { Index: flang/lib/Lower/PFTBuilder.cpp =================================================================== --- flang/lib/Lower/PFTBuilder.cpp +++ flang/lib/Lower/PFTBuilder.cpp @@ -726,7 +726,7 @@ [&](const parser::CallStmt &s) { // Look for alternate return specifiers. const auto &args = - std::get>(s.v.t); + std::get>(s.call.t); for (const auto &arg : args) { const auto &actual = std::get(arg.t); if (const auto *altReturn = Index: flang/lib/Parser/Fortran-parsers.cpp =================================================================== --- flang/lib/Parser/Fortran-parsers.cpp +++ flang/lib/Parser/Fortran-parsers.cpp @@ -428,13 +428,16 @@ // R738 component-attr-spec -> // access-spec | ALLOCATABLE | // CODIMENSION lbracket coarray-spec rbracket | -// CONTIGUOUS | DIMENSION ( component-array-spec ) | POINTER +// CONTIGUOUS | DIMENSION ( component-array-spec ) | POINTER | +// CUDA-data-attr TYPE_PARSER(construct(accessSpec) || construct(allocatable) || construct("CODIMENSION" >> coarraySpec) || construct(contiguous) || construct("DIMENSION" >> Parser{}) || construct(pointer) || + extension( + construct(Parser{})) || construct(recovery( fail( "type parameter definitions must appear before component declarations"_err_en_US), @@ -654,7 +657,8 @@ // CODIMENSION lbracket coarray-spec rbracket | CONTIGUOUS | // DIMENSION ( array-spec ) | EXTERNAL | INTENT ( intent-spec ) | // INTRINSIC | language-binding-spec | OPTIONAL | PARAMETER | POINTER | -// PROTECTED | SAVE | TARGET | VALUE | VOLATILE +// PROTECTED | SAVE | TARGET | VALUE | VOLATILE | +// CUDA-data-attr TYPE_PARSER(construct(accessSpec) || construct(allocatable) || construct(construct("ASYNCHRONOUS"_tok)) || @@ -670,7 +674,17 @@ construct(save) || construct(construct("TARGET"_tok)) || construct(construct("VALUE"_tok)) || - construct(construct("VOLATILE"_tok))) + construct(construct("VOLATILE"_tok)) || + extension( + construct(Parser{}))) + +// CUDA-data-attr -> CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE +TYPE_PARSER("CONSTANT" >> pure(common::CUDADataAttr::Constant) || + "DEVICE" >> pure(common::CUDADataAttr::Device) || + "MANAGED" >> pure(common::CUDADataAttr::Managed) || + "PINNED" >> pure(common::CUDADataAttr::Pinned) || + "SHARED" >> pure(common::CUDADataAttr::Shared) || + "TEXTURE" >> pure(common::CUDADataAttr::Texture)) // R804 object-name -> name constexpr auto objectName{name}; @@ -1158,13 +1172,20 @@ // R928 alloc-opt -> // ERRMSG = errmsg-variable | MOLD = source-expr | -// SOURCE = source-expr | STAT = stat-variable +// SOURCE = source-expr | STAT = stat-variable | +// (CUDA) STREAM = scalar-int-expr +// PINNED = scalar-logical-variable // R931 source-expr -> expr TYPE_PARSER(construct( construct("MOLD =" >> indirect(expr))) || construct( construct("SOURCE =" >> indirect(expr))) || - construct(statOrErrmsg)) + construct(statOrErrmsg) || + extension( + construct(construct( + "STREAM =" >> indirect(scalarIntExpr))) || + construct(construct( + "PINNED =" >> indirect(scalarLogicalVariable))))) // R929 stat-variable -> scalar-int-variable TYPE_PARSER(construct(scalar(integer(variable)))) @@ -1216,15 +1237,12 @@ // !DIR$ IGNORE_TKR [ [(tkrdmac...)] name ]... // !DIR$ LOOP COUNT (n1[, n2]...) // !DIR$ name... -constexpr auto beginDirective{skipStuffBeforeStatement >> "!"_ch}; -constexpr auto endDirective{space >> endOfLine}; constexpr auto ignore_tkr{ "DIR$ IGNORE_TKR" >> optionalList(construct( maybe(parenthesized(many(letter))), name))}; constexpr auto loopCount{ "DIR$ LOOP COUNT" >> construct( parenthesized(nonemptyList(digitString64)))}; - TYPE_PARSER(beginDirective >> sourced(construct(ignore_tkr) || construct(loopCount) || @@ -1240,6 +1258,12 @@ construct("(" >> objectName / ",", objectName, maybe(Parser{}) / ")"))))) +// CUDA-attributes-stmt -> ATTRIBUTES (CUDA-data-attr) [::] name-list +TYPE_PARSER(extension(construct( + "ATTRIBUTES" >> parenthesized(Parser{}), + defaulted( + maybe("::"_tok) >> nonemptyList("expected names"_err_en_US, name))))) + // Subtle: the name includes the surrounding slashes, which avoids // clashes with other uses of the name in the same scope. TYPE_PARSER(construct( Index: flang/lib/Parser/basic-parsers.h =================================================================== --- flang/lib/Parser/basic-parsers.h +++ flang/lib/Parser/basic-parsers.h @@ -852,6 +852,7 @@ constexpr NonstandardParser(const NonstandardParser &) = default; constexpr NonstandardParser(PA parser, MessageFixedText msg) : parser_{parser}, message_{msg} {} + constexpr NonstandardParser(PA parser) : parser_{parser} {} std::optional Parse(ParseState &state) const { if (UserState * ustate{state.userState()}) { if (!ustate->features().IsEnabled(LF)) { @@ -860,7 +861,7 @@ } auto at{state.GetLocation()}; auto result{parser_.Parse(state)}; - if (result) { + if (result && !message_.empty()) { state.Nonstandard( CharBlock{at, std::max(state.GetLocation(), at + 1)}, LF, message_); } @@ -877,6 +878,11 @@ return NonstandardParser(parser, feature); } +template +inline constexpr auto extension(PA parser) { + return NonstandardParser(parser); +} + // If a is a parser for some deprecated or deleted language feature LF, // deprecated(a) is a parser that is optionally enabled, sets a strict // conformance violation flag, and may emit a warning message, if enabled. Index: flang/lib/Parser/executable-parsers.cpp =================================================================== --- flang/lib/Parser/executable-parsers.cpp +++ flang/lib/Parser/executable-parsers.cpp @@ -9,6 +9,7 @@ // Per-type parsers for executable statements #include "basic-parsers.h" +#include "debug-parser.h" #include "expr-parsers.h" #include "misc-parsers.h" #include "stmt-parser.h" @@ -30,29 +31,31 @@ // action-stmt | associate-construct | block-construct | // case-construct | change-team-construct | critical-construct | // do-construct | if-construct | select-rank-construct | -// select-type-construct | where-construct | forall-construct -constexpr auto executableConstruct{ - first(construct(CapturedLabelDoStmt{}), - construct(EndDoStmtForCapturedLabelDoStmt{}), - construct(indirect(Parser{})), - // Attempt DO statements before assignment statements for better - // error messages in cases like "DO10I=1,(error)". - construct(statement(actionStmt)), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(Parser{})), - construct(indirect(whereConstruct)), - construct(indirect(forallConstruct)), - construct(indirect(ompEndLoopDirective)), - construct(indirect(openmpConstruct)), - construct(indirect(accEndCombinedDirective)), - construct(indirect(openaccConstruct)), - construct(indirect(compilerDirective)))}; +// select-type-construct | where-construct | forall-construct | +// (CUDA) CUF-kernel-do-construct +constexpr auto executableConstruct{first( + construct(CapturedLabelDoStmt{}), + construct(EndDoStmtForCapturedLabelDoStmt{}), + construct(indirect(Parser{})), + // Attempt DO statements before assignment statements for better + // error messages in cases like "DO10I=1,(error)". + construct(statement(actionStmt)), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(Parser{})), + construct(indirect(whereConstruct)), + construct(indirect(forallConstruct)), + construct(indirect(ompEndLoopDirective)), + construct(indirect(openmpConstruct)), + construct(indirect(accEndCombinedDirective)), + construct(indirect(openaccConstruct)), + construct(indirect(compilerDirective)), + construct(indirect(Parser{})))}; // R510 execution-part-construct -> // executable-construct | format-stmt | entry-stmt | data-stmt @@ -524,4 +527,28 @@ construct("UNLOCK (" >> lockVariable, defaulted("," >> nonemptyList(statOrErrmsg)) / ")")) +// CUF-kernel-do-construct -> CUF-kernel-do-directive do-construct +// CUF-kernel-do-directive -> +// !$CUF KERNEL DO [ (scalar-int-constant-expr) ] <<< grid, block [, stream] +// >>> do-construct +// grid -> * | scalar-int-expr | ( scalar-int-expr-list ) +// block -> * | scalar-int-expr | ( scalar-int-expr-list ) +// stream -> ( 0, | STREAM = ) scalar-int-expr +TYPE_PARSER(sourced(beginDirective >> "$CUF KERNEL DO"_tok >> + construct( + maybe(parenthesized(scalarIntConstantExpr)), + "<<<" >> + ("*" >> pure>() || + parenthesized(nonemptyList(scalarIntExpr)) || + applyFunction(singletonList, scalarIntExpr)), + "," >> ("*" >> pure>() || + parenthesized(nonemptyList(scalarIntExpr)) || + applyFunction(singletonList, scalarIntExpr)), + maybe((", 0 ,"_tok || ", STREAM ="_tok) >> scalarIntExpr) / ">>>" / + endDirective))) +TYPE_CONTEXT_PARSER("!$CUF KERNEL DO construct"_en_US, + extension(construct( + Parser{}, + maybe(Parser{})))) + } // namespace Fortran::parser Index: flang/lib/Parser/io-parsers.cpp =================================================================== --- flang/lib/Parser/io-parsers.cpp +++ flang/lib/Parser/io-parsers.cpp @@ -301,11 +301,6 @@ construct("IOMSG =" >> msgVariable), construct("IOSTAT =" >> statVariable))) -template common::IfNoLvalue, A> singletonList(A &&x) { - std::list result; - result.push_front(std::move(x)); - return result; -} constexpr auto bareUnitNumberAsList{ applyFunction(singletonList, construct(fileUnitNumber))}; Index: flang/lib/Parser/misc-parsers.h =================================================================== --- flang/lib/Parser/misc-parsers.h +++ flang/lib/Parser/misc-parsers.h @@ -52,5 +52,10 @@ constexpr auto protectedAttr{construct("PROTECTED"_tok)}; constexpr auto save{construct("SAVE"_tok)}; +template common::IfNoLvalue, A> singletonList(A &&x) { + std::list result; + result.emplace_back(std::move(x)); + return result; +} } // namespace Fortran::parser #endif Index: flang/lib/Parser/parse-tree.cpp =================================================================== --- flang/lib/Parser/parse-tree.cpp +++ flang/lib/Parser/parse-tree.cpp @@ -132,7 +132,7 @@ }, [&](common::Indirection &z) { return WithSource( - z.value().v.source, Expr{std::move(z.value())}); + z.value().source, Expr{std::move(z.value())}); }, }, y.value().u); @@ -151,10 +151,10 @@ common::visitors{ [&](const Name &name) { return WithSource( - v.source, MakeArrayElementRef(name, std::move(args))); + source, MakeArrayElementRef(name, std::move(args))); }, [&](ProcComponentRef &pcr) { - return WithSource(v.source, + return WithSource(source, MakeArrayElementRef(std::move(pcr.v.thing), std::move(args))); }, }, @@ -226,9 +226,10 @@ } CHECK(*source.end() == ')'); source = CharBlock{source.begin(), source.end() + 1}; - FunctionReference funcRef{WithSource(source, + FunctionReference funcRef{ Call{ProcedureDesignator{Name{funcName.source, funcName.symbol}}, - std::move(actuals)})}; + std::move(actuals)}}; + funcRef.source = source; auto variable{Variable{common::Indirection{std::move(funcRef)}}}; return Statement{std::nullopt, ActionStmt{common::Indirection{ @@ -242,7 +243,7 @@ return des.value().source; }, [&](const common::Indirection &call) { - return call.value().v.source; + return call.value().source; }, }, u); Index: flang/lib/Parser/parsing.cpp =================================================================== --- flang/lib/Parser/parsing.cpp +++ flang/lib/Parser/parsing.cpp @@ -84,6 +84,11 @@ prescanner.AddCompilerDirectiveSentinel("$omp"); prescanner.AddCompilerDirectiveSentinel("$"); // OMP conditional line } + if (options.features.IsEnabled(LanguageFeature::CUDA)) { + prescanner.AddCompilerDirectiveSentinel("$cuf"); + prescanner.AddCompilerDirectiveSentinel("@cuf"); + preprocessor.Define("_CUDA", "1"); + } ProvenanceRange range{allSources.AddIncludedFile( *sourceFile, ProvenanceRange{}, options.isModuleFile)}; prescanner.Prescan(range); Index: flang/lib/Parser/preprocessor.cpp =================================================================== --- flang/lib/Parser/preprocessor.cpp +++ flang/lib/Parser/preprocessor.cpp @@ -601,11 +601,12 @@ TokenSequence braced{dir, j + 1, k - j - 1}; include = braced.ToString(); j = k; - } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && - include.substr(include.size() - 1, 1) == "\"") { // #include "foo" + } else if (((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" || + include.substr(0, 1) == "'") && + include.substr(include.size() - 1, 1) == include.substr(0, 1)) { + // #include "foo" and #include 'foo' include = include.substr(1, include.size() - 2); - // #include "foo" starts search in directory of file containing - // the directive + // Start search in directory of file containing the directive auto prov{dir.GetTokenProvenanceRange(dirOffset).start()}; if (const auto *currentFile{allSources_.GetSourceFile(prov)}) { prependPath = DirectoryName(currentFile->path()); Index: flang/lib/Parser/prescan.cpp =================================================================== --- flang/lib/Parser/prescan.cpp +++ flang/lib/Parser/prescan.cpp @@ -127,6 +127,17 @@ } else { SkipSpaces(); } + } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && + directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && + directiveSentinel_[4] == '\0') { + // CUDA conditional compilation line. Remove the sentinel and then + // treat the line as if it were normal source. + at_ += 5, column_ += 5; + if (inFixedForm_) { + LabelField(tokens); + } else { + SkipSpaces(); + } } else { // Compiler directive. Emit normalized sentinel. EmitChar(tokens, '!'); Index: flang/lib/Parser/program-parsers.cpp =================================================================== --- flang/lib/Parser/program-parsers.cpp +++ flang/lib/Parser/program-parsers.cpp @@ -81,10 +81,10 @@ // are in contexts that impose constraints on the kinds of statements that // are allowed, and so we have a variant production for declaration-construct // that implements those constraints. -constexpr auto execPartLookAhead{ - first(actionStmt >> ok, openaccConstruct >> ok, openmpConstruct >> ok, - "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok, - "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)}; +constexpr auto execPartLookAhead{first(actionStmt >> ok, openaccConstruct >> ok, + openmpConstruct >> ok, "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, + "CHANGE TEAM"_sptok, "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, + "FORALL ("_tok, "!$CUF"_tok)}; constexpr auto declErrorRecovery{ stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery}; constexpr auto misplacedSpecificationStmt{Parser{} >> @@ -168,7 +168,8 @@ // codimension-stmt | contiguous-stmt | dimension-stmt | external-stmt | // intent-stmt | intrinsic-stmt | namelist-stmt | optional-stmt | // pointer-stmt | protected-stmt | save-stmt | target-stmt | -// volatile-stmt | value-stmt | common-stmt | equivalence-stmt +// volatile-stmt | value-stmt | common-stmt | equivalence-stmt | +// (CUDA) CUDA-attributes-stmt TYPE_PARSER(first( construct(indirect(Parser{})), construct(indirect(Parser{})), @@ -190,7 +191,8 @@ construct(indirect(Parser{})), construct(indirect(Parser{})), construct(indirect(Parser{})), - construct(indirect(Parser{})))) + construct(indirect(Parser{})), + construct(indirect(Parser{})))) // R1401 main-program -> // [program-stmt] [specification-part] [execution-part] @@ -422,16 +424,25 @@ TYPE_PARSER( "INTRINSIC" >> maybe("::"_tok) >> construct(listOfNames)) -// R1520 function-reference -> procedure-designator ( [actual-arg-spec-list] ) +// R1520 function-reference -> procedure-designator +// ( [actual-arg-spec-list] ) TYPE_CONTEXT_PARSER("function reference"_en_US, - construct( - sourced(construct(Parser{}, + sourced(construct( + construct(Parser{}, parenthesized(optionalList(actualArgSpec))))) / !"["_tok) -// R1521 call-stmt -> CALL procedure-designator [( [actual-arg-spec-list] )] +// R1521 call-stmt -> CALL procedure-designator [chevrons] +/// [( [actual-arg-spec-list] )] +// (CUDA) chevrons -> <<< scalar-expr, scalar-expr [, scalar-int-expr +// [, scalar-int-expr ] ] >>> +TYPE_PARSER(extension( + "<<<" >> construct(scalarExpr, "," >> scalarExpr, + maybe("," >> scalarIntExpr), maybe("," >> scalarIntExpr)) / + ">>>")) TYPE_PARSER(construct( - sourced(construct("CALL" >> Parser{}, + sourced(construct("CALL" >> Parser{}, + maybe(Parser{}), defaulted(parenthesized(optionalList(actualArgSpec))))))) // R1522 procedure-designator -> @@ -467,7 +478,13 @@ // R1527 prefix-spec -> // declaration-type-spec | ELEMENTAL | IMPURE | MODULE | -// NON_RECURSIVE | PURE | RECURSIVE +// NON_RECURSIVE | PURE | RECURSIVE | +// (CUDA) ATTRIBUTES ( (DEVICE | GLOBAL | GRID_GLOBAL | HOST)... ) | +// LAUNCH_BOUNDS(expr-list) | CLUSTER_DIMS(expr-list) +TYPE_PARSER(first("DEVICE" >> pure(common::CUDASubprogramAttrs::Device), + "GLOBAL" >> pure(common::CUDASubprogramAttrs::Global), + "GRID_GLOBAL" >> pure(common::CUDASubprogramAttrs::Grid_Global), + "HOST" >> pure(common::CUDASubprogramAttrs::Host))) TYPE_PARSER(first(construct(declarationTypeSpec), construct(construct("ELEMENTAL"_tok)), construct(construct("IMPURE"_tok)), @@ -475,7 +492,19 @@ construct( construct("NON_RECURSIVE"_tok)), construct(construct("PURE"_tok)), - construct(construct("RECURSIVE"_tok)))) + construct(construct("RECURSIVE"_tok)), + extension( + construct(construct("ATTRIBUTES" >> + parenthesized( + optionalList(Parser{}))))), + extension(construct( + construct("LAUNCH_BOUNDS" >> + parenthesized(nonemptyList( + "expected launch bounds"_err_en_US, scalarIntConstantExpr))))), + extension(construct( + construct("CLUSTER_DIMS" >> + parenthesized(nonemptyList("expected cluster dimensions"_err_en_US, + scalarIntConstantExpr))))))) // R1529 function-subprogram -> // function-stmt [specification-part] [execution-part] Index: flang/lib/Parser/stmt-parser.h =================================================================== --- flang/lib/Parser/stmt-parser.h +++ flang/lib/Parser/stmt-parser.h @@ -105,5 +105,9 @@ (many(!"END"_tok >> SkipPast<'\n'>{}) >> ("END"_tok >> SkipTo<'\n'>{} || consumedAllInput)) >> missingOptionalName}; + +constexpr auto beginDirective{skipStuffBeforeStatement >> "!"_ch}; +constexpr auto endDirective{space >> endOfLine}; + } // namespace Fortran::parser #endif // FORTRAN_PARSER_STMT_PARSER_H_ Index: flang/lib/Parser/unparse.cpp =================================================================== --- flang/lib/Parser/unparse.cpp +++ flang/lib/Parser/unparse.cpp @@ -811,6 +811,8 @@ common::visit(common::visitors{ [&](const AllocOpt::Mold &) { Word("MOLD="); }, [&](const AllocOpt::Source &) { Word("SOURCE="); }, + [&](const AllocOpt::Stream &) { Word("STREAM="); }, + [&](const AllocOpt::Pinned &) { Word("PINNED="); }, [](const StatOrErrmsg &) {}, }, x.u); @@ -1676,19 +1678,26 @@ void Unparse(const IntrinsicStmt &x) { // R1519 Word("INTRINSIC :: "), Walk(x.v, ", "); } + void Unparse(const CallStmt::Chevrons &x) { // CUDA + Walk(std::get<0>(x.t)); // grid + Word(","), Walk(std::get<1>(x.t)); // block + Walk(",", std::get<2>(x.t)); // bytes + Walk(",", std::get<3>(x.t)); // stream + } void Unparse(const FunctionReference &x) { // R1520 Walk(std::get(x.v.t)); Put('('), Walk(std::get>(x.v.t), ", "), Put(')'); } void Unparse(const CallStmt &x) { // R1521 - if (asFortran_ && x.typedCall.get()) { + if (asFortran_ && x.typedCall.get() && !x.chevrons /*CUDA todo*/) { Put(' '); asFortran_->call(out_, *x.typedCall); Put('\n'); } else { - const auto &pd{std::get(x.v.t)}; - const auto &args{std::get>(x.v.t)}; + const auto &pd{std::get(x.call.t)}; Word("CALL "), Walk(pd); + Walk("<<<", x.chevrons, ">>>"); + const auto &args{std::get>(x.call.t)}; if (args.empty()) { if (std::holds_alternative(pd.u)) { Put("()"); // pgf90 crashes on CALL to tbp without parentheses @@ -1717,6 +1726,15 @@ void Post(const PrefixSpec::Non_Recursive) { Word("NON_RECURSIVE"); } void Post(const PrefixSpec::Pure) { Word("PURE"); } void Post(const PrefixSpec::Recursive) { Word("RECURSIVE"); } + void Unparse(const PrefixSpec::Attributes &x) { + Word("ATTRIBUTES("), Walk(x.v), Word(")"); + } + void Unparse(const PrefixSpec::Launch_Bounds &x) { + Word("LAUNCH_BOUNDS("), Walk(x.v), Word(")"); + } + void Unparse(const PrefixSpec::Cluster_Dims &x) { + Word("CLUSTER_DIMS("), Walk(x.v), Word(")"); + } void Unparse(const FunctionStmt &x) { // R1530 Walk("", std::get>(x.t), " ", " "); Word("FUNCTION "), Walk(std::get(x.t)), Put("("); @@ -1861,9 +1879,6 @@ Walk(std::get>(x.t), ":"); Walk(std::get(x.t)); } - void Unparse(const AccDataModifier::Modifier &x) { - Word(AccDataModifier::EnumToString(x)); - } void Unparse(const AccBindClause &x) { common::visit(common::visitors{ [&](const Name &y) { Put('('), Walk(y), Put(')'); }, @@ -1957,9 +1972,6 @@ x.u); } void Unparse(const AccObjectList &x) { Walk(x.v, ","); } - void Unparse(const AccReductionOperator::Operator &x) { - Word(AccReductionOperator::EnumToString(x)); - } void Unparse(const AccObjectListWithReduction &x) { Walk(std::get(x.t)); Put(":"); @@ -2562,6 +2574,10 @@ Walk("(", std::get>(x.t), ")"), Put(')'); } void Unparse(const BasedPointerStmt &x) { Walk("POINTER ", x.v, ","); } + void Unparse(const CUDAAttributesStmt &x) { + Word("ATTRIBUTES("), Walk(std::get(x.t)); + Word(") "), Walk(std::get>(x.t), ", "); + } void Post(const StructureField &x) { if (const auto *def{std::get_if>(&x.u)}) { for (const auto &item : @@ -2607,8 +2623,12 @@ #define WALK_NESTED_ENUM(CLASS, ENUM) \ void Unparse(const CLASS::ENUM &x) { Word(CLASS::EnumToString(x)); } + WALK_NESTED_ENUM(AccDataModifier, Modifier) WALK_NESTED_ENUM(AccessSpec, Kind) // R807 + WALK_NESTED_ENUM(AccReductionOperator, Operator) WALK_NESTED_ENUM(common, TypeParamAttr) // R734 + WALK_NESTED_ENUM(common, CUDADataAttr) // CUDA + WALK_NESTED_ENUM(common, CUDASubprogramAttrs) // CUDA WALK_NESTED_ENUM(IntentSpec, Intent) // R826 WALK_NESTED_ENUM(ImplicitStmt, ImplicitNoneNameSpec) // R866 WALK_NESTED_ENUM(ConnectSpec::CharExpr, Kind) // R1205 @@ -2635,6 +2655,38 @@ WALK_NESTED_ENUM(OmpOrderModifier, Kind) // OMP order-modifier #undef WALK_NESTED_ENUM + void Unparse(const CUFKernelDoConstruct::Directive &x) { + Word("!$CUF KERNEL DO"); + Walk(" (", std::get>(x.t), ")"); + Word(" <<<"); + const auto &grid{std::get<1>(x.t)}; + if (grid.empty()) { + Word("*"); + } else if (grid.size() == 1) { + Walk(grid.front()); + } else { + Walk("(", grid, ",", ")"); + } + Word(","); + const auto &block{std::get<2>(x.t)}; + if (block.empty()) { + Word("*"); + } else if (block.size() == 1) { + Walk(block.front()); + } else { + Walk("(", block, ",", ")"); + } + if (const auto &stream{std::get<3>(x.t)}) { + Word(",STREAM="), Walk(*stream); + } + Word(">>>\n"); + } + + void Unparse(const CUFKernelDoConstruct &x) { + Walk(std::get(x.t)); + Walk(std::get>(x.t)); + } + void Done() const { CHECK(indent_ == 0); } private: Index: flang/lib/Semantics/check-allocate.cpp =================================================================== --- flang/lib/Semantics/check-allocate.cpp +++ flang/lib/Semantics/check-allocate.cpp @@ -179,6 +179,8 @@ parserSourceExpr = &mold.v.value(); info.gotMold = true; }, + [](const parser::AllocOpt::Stream &) { /* CUDA coming */ }, + [](const parser::AllocOpt::Pinned &) { /* CUDA coming */ }, }, allocOpt.u); } Index: flang/lib/Semantics/check-do-forall.cpp =================================================================== --- flang/lib/Semantics/check-do-forall.cpp +++ flang/lib/Semantics/check-do-forall.cpp @@ -976,7 +976,7 @@ void DoForallChecker::Leave(const parser::CallStmt &callStmt) { if (const auto &typedCall{callStmt.typedCall}) { const auto &parsedArgs{ - std::get>(callStmt.v.t)}; + std::get>(callStmt.call.t)}; auto parsedArgIter{parsedArgs.begin()}; const evaluate::ActualArguments &checkedArgs{typedCall->arguments()}; for (const auto &checkedOptionalArg : checkedArgs) { Index: flang/lib/Semantics/expression.cpp =================================================================== --- flang/lib/Semantics/expression.cpp +++ flang/lib/Semantics/expression.cpp @@ -2685,8 +2685,8 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::FunctionReference &funcRef, std::optional *structureConstructor) { const parser::Call &call{funcRef.v}; - auto restorer{GetContextualMessages().SetLocation(call.source)}; - ArgumentAnalyzer analyzer{*this, call.source, true /* isProcedureCall */}; + auto restorer{GetContextualMessages().SetLocation(funcRef.source)}; + ArgumentAnalyzer analyzer{*this, funcRef.source, true /* isProcedureCall */}; for (const auto &arg : std::get>(call.t)) { analyzer.Analyze(arg, false /* not subroutine call */); } @@ -2699,7 +2699,7 @@ true /* might be structure constructor */)}) { if (auto *proc{std::get_if(&callee->u)}) { return MakeFunctionRef( - call.source, std::move(*proc), std::move(callee->arguments)); + funcRef.source, std::move(*proc), std::move(callee->arguments)); } CHECK(std::holds_alternative(callee->u)); const Symbol &symbol{*std::get(callee->u)}; @@ -2741,9 +2741,9 @@ } void ExpressionAnalyzer::Analyze(const parser::CallStmt &callStmt) { - const parser::Call &call{callStmt.v}; - auto restorer{GetContextualMessages().SetLocation(call.source)}; - ArgumentAnalyzer analyzer{*this, call.source, true /* isProcedureCall */}; + const parser::Call &call{callStmt.call}; + auto restorer{GetContextualMessages().SetLocation(callStmt.source)}; + ArgumentAnalyzer analyzer{*this, callStmt.source, true /* isProcedureCall */}; const auto &actualArgList{std::get>(call.t)}; for (const auto &arg : actualArgList) { analyzer.Analyze(arg, true /* is subroutine call */); @@ -2754,7 +2754,7 @@ analyzer.GetActuals(), true /* subroutine */)}) { ProcedureDesignator *proc{std::get_if(&callee->u)}; CHECK(proc); - if (CheckCall(call.source, *proc, callee->arguments)) { + if (CheckCall(callStmt.source, *proc, callee->arguments)) { callStmt.typedCall.Reset( new ProcedureRef{std::move(*proc), std::move(callee->arguments), HasAlternateReturns(callee->arguments)}, @@ -3247,7 +3247,7 @@ } else if (name->symbol->Rank() == 0) { if (const Symbol *function{ semantics::IsFunctionResultWithSameNameAsFunction(*name->symbol)}) { - auto &msg{context.Say(funcRef.v.source, + auto &msg{context.Say(funcRef.source, function->flags().test(Symbol::Flag::StmtFunction) ? "Recursive call to statement function '%s' is not allowed"_err_en_US : "Recursive call to '%s' requires a distinct RESULT in its declaration"_err_en_US, @@ -3258,7 +3258,7 @@ return false; } else { if (std::get>(funcRef.v.t).empty()) { - auto &msg{context.Say(funcRef.v.source, + auto &msg{context.Say(funcRef.source, "Reference to array '%s' with empty subscript list"_err_en_US, name->source)}; if (name->symbol) { Index: flang/lib/Semantics/mod-file.cpp =================================================================== --- flang/lib/Semantics/mod-file.cpp +++ flang/lib/Semantics/mod-file.cpp @@ -457,6 +457,31 @@ os << (isAbstract ? "abstract " : "") << "interface\n"; } PutAttrs(os, prefixAttrs, nullptr, false, ""s, " "s); + if (auto attrs{details.cudaSubprogramAttrs()}) { + if (*attrs == common::CUDASubprogramAttrs::HostDevice) { + os << "attributes(host,device) "; + } else { + PutLower(os << "attributes(", common::EnumToString(*attrs)) << ") "; + } + if (!details.cudaLaunchBounds().empty()) { + os << "launch_bounds"; + char sep{'('}; + for (auto x : details.cudaLaunchBounds()) { + os << sep << x; + sep = ','; + } + os << ") "; + } + if (!details.cudaClusterDims().empty()) { + os << "cluster_dims"; + char sep{'('}; + for (auto x : details.cudaClusterDims()) { + os << sep << x; + sep = ','; + } + os << ") "; + } + } os << (details.isFunction() ? "function " : "subroutine "); os << symbol.name() << '('; int n = 0; @@ -710,6 +735,10 @@ }); os << ") " << symbol.name() << '\n'; } + if (auto attr{details.cudaDataAttr()}) { + PutLower(os << "attributes(", common::EnumToString(*attr)) + << ") " << symbol.name() << '\n'; + } } void ModFileWriter::PutProcEntity(llvm::raw_ostream &os, const Symbol &symbol) { @@ -990,6 +1019,7 @@ options.isModuleFile = true; options.features.Enable(common::LanguageFeature::BackslashEscapes); options.features.Enable(common::LanguageFeature::OpenMP); + options.features.Enable(common::LanguageFeature::CUDA); if (!isIntrinsic.value_or(false) && !notAModule) { // The search for this module file will scan non-intrinsic module // directories. If a directory is in both the intrinsic and non-intrinsic Index: flang/lib/Semantics/resolve-names.cpp =================================================================== --- flang/lib/Semantics/resolve-names.cpp +++ flang/lib/Semantics/resolve-names.cpp @@ -238,6 +238,7 @@ public: bool BeginAttrs(); // always returns true Attrs GetAttrs(); + std::optional cudaDataAttr() { return cudaDataAttr_; } Attrs EndAttrs(); bool SetPassNameOn(Symbol &); void SetBindNameOn(Symbol &); @@ -278,9 +279,11 @@ HANDLE_ATTR_CLASS(Value, VALUE) HANDLE_ATTR_CLASS(Volatile, VOLATILE) #undef HANDLE_ATTR_CLASS + bool Pre(const common::CUDADataAttr); protected: std::optional attrs_; + std::optional cudaDataAttr_; Attr AccessSpecToAttr(const parser::AccessSpec &x) { switch (x.v) { @@ -419,7 +422,8 @@ }; // Track array specifications. They can occur in AttrSpec, EntityDecl, -// ObjectDecl, DimensionStmt, CommonBlockObject, or BasedPointerStmt. +// ObjectDecl, DimensionStmt, CommonBlockObject, BasedPointerStmt, and +// ComponentDecl. // 1. INTEGER, DIMENSION(10) :: x // 2. INTEGER :: x(10) // 3. ALLOCATABLE :: x(:) @@ -658,6 +662,8 @@ symbol.attrs().set(attr); symbol.implicitAttrs().set(attr); } + void SetCUDADataAttr( + SourceName, Symbol &, std::optional); protected: FuncResultStack &funcResultStack() { return funcResultStack_; } @@ -851,6 +857,9 @@ void Post(const parser::InterfaceBody::Function &); bool Pre(const parser::Suffix &); bool Pre(const parser::PrefixSpec &); + bool Pre(const parser::PrefixSpec::Attributes &); + void Post(const parser::PrefixSpec::Launch_Bounds &); + void Post(const parser::PrefixSpec::Cluster_Dims &); bool BeginSubprogram(const parser::Name &, Symbol::Flag, bool hasModulePrefix = false, @@ -922,6 +931,7 @@ objectDeclAttr_ = Attr::TARGET; return true; } + bool Pre(const parser::CUDAAttributesStmt &); void Post(const parser::TargetStmt &) { objectDeclAttr_ = std::nullopt; } void Post(const parser::DimensionStmt::Declaration &); void Post(const parser::CodimensionDecl &); @@ -1529,7 +1539,8 @@ llvm_unreachable("This node is handled in ProgramUnit"); } - void NoteExecutablePartCall(Symbol::Flag, const parser::Call &); + void NoteExecutablePartCall( + Symbol::Flag, const parser::Call &, bool hasCUDAChevrons); friend void ResolveSpecificationParts(SemanticsContext &, const Symbol &); @@ -1557,6 +1568,7 @@ void FinishSpecificationParts(const ProgramTree &); void FinishDerivedTypeInstantiation(Scope &); void ResolveExecutionParts(const ProgramTree &); + void UseCUDABuiltinNames(); }; // ImplicitRules implementation @@ -1662,8 +1674,8 @@ // AttrsVisitor implementation bool AttrsVisitor::BeginAttrs() { - CHECK(!attrs_); - attrs_ = std::make_optional(); + CHECK(!attrs_ && !cudaDataAttr_); + attrs_ = Attrs{}; return true; } Attrs AttrsVisitor::GetAttrs() { @@ -1673,6 +1685,7 @@ Attrs AttrsVisitor::EndAttrs() { Attrs result{GetAttrs()}; attrs_.reset(); + cudaDataAttr_.reset(); passName_ = std::nullopt; bindName_.reset(); return result; @@ -1789,6 +1802,15 @@ attrs_->set(attrName); return true; } +bool AttrsVisitor::Pre(const common::CUDADataAttr x) { + if (cudaDataAttr_.value_or(x) != x) { + Say(currStmtSource().value(), + "CUDA data attributes '%s' and '%s' may not both be specified"_err_en_US, + common::EnumToString(*cudaDataAttr_), common::EnumToString(x)); + } + cudaDataAttr_ = x; + return false; +} // DeclTypeSpecVisitor implementation @@ -2688,6 +2710,27 @@ return ok; } +void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol, + std::optional attr) { + if (attr) { + ConvertToObjectEntity(symbol); + if (auto *object{symbol.detailsIf()}) { + if (*attr != object->cudaDataAttr().value_or(*attr)) { + Say(source, + "'%s' already has another CUDA data attribute ('%s')"_err_en_US, + symbol.name(), + common::EnumToString(*object->cudaDataAttr()).substr()); + } else { + object->set_cudaDataAttr(attr); + } + } else { + Say(source, + "'%s' is not an object and may not have a CUDA data attribute"_err_en_US, + symbol.name()); + } + } +} + // ModuleVisitor implementation bool ModuleVisitor::Pre(const parser::Only &x) { @@ -3424,17 +3467,100 @@ if (info.parsedType) { // C1543 Say(currStmtSource().value(), "FUNCTION prefix cannot specify the type more than once"_err_en_US); - return false; } else { info.parsedType = parsedType; info.source = currStmtSource(); - return false; } + return false; } else { return true; } } +bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) { + if (auto *subp{currScope().symbol() + ? currScope().symbol()->detailsIf() + : nullptr}) { + for (auto attr : attrs.v) { + if (auto current{subp->cudaSubprogramAttrs()}) { + if (attr == *current || + (*current == common::CUDASubprogramAttrs::HostDevice && + (attr == common::CUDASubprogramAttrs::Host || + attr == common::CUDASubprogramAttrs::Device))) { + Say(currStmtSource().value(), + "ATTRIBUTES(%s) appears more than once"_warn_en_US, + common::EnumToString(attr)); + } else if ((attr == common::CUDASubprogramAttrs::Host || + attr == common::CUDASubprogramAttrs::Device) && + (*current == common::CUDASubprogramAttrs::Host || + *current == common::CUDASubprogramAttrs::Device || + *current == common::CUDASubprogramAttrs::HostDevice)) { + // HOST,DEVICE or DEVICE,HOST -> HostDevice + subp->set_cudaSubprogramAttrs( + common::CUDASubprogramAttrs::HostDevice); + } else { + Say(currStmtSource().value(), + "ATTRIBUTES(%s) conflicts with earlier ATTRIBUTES(%s)"_err_en_US, + common::EnumToString(attr), common::EnumToString(*current)); + } + } else { + subp->set_cudaSubprogramAttrs(attr); + } + } + } + return false; +} + +void SubprogramVisitor::Post(const parser::PrefixSpec::Launch_Bounds &x) { + std::vector bounds; + bool ok{true}; + for (const auto &sicx : x.v) { + if (auto value{evaluate::ToInt64(EvaluateExpr(sicx))}) { + bounds.push_back(*value); + } else { + ok = false; + } + } + if (!ok || bounds.size() != 2) { + Say(currStmtSource().value(), + "Operands of LAUNCH_BOUNDS() must be two integer constants"_err_en_US); + } else if (auto *subp{currScope().symbol() + ? currScope().symbol()->detailsIf() + : nullptr}) { + if (subp->cudaLaunchBounds().empty()) { + subp->set_cudaLaunchBounds(std::move(bounds)); + } else { + Say(currStmtSource().value(), + "LAUNCH_BOUNDS() may only appear once"_err_en_US); + } + } +} + +void SubprogramVisitor::Post(const parser::PrefixSpec::Cluster_Dims &x) { + std::vector dims; + bool ok{true}; + for (const auto &sicx : x.v) { + if (auto value{evaluate::ToInt64(EvaluateExpr(sicx))}) { + dims.push_back(*value); + } else { + ok = false; + } + } + if (!ok || dims.size() != 3) { + Say(currStmtSource().value(), + "Operands of CLUSTER_DIMS() must be three integer constants"_err_en_US); + } else if (auto *subp{currScope().symbol() + ? currScope().symbol()->detailsIf() + : nullptr}) { + if (subp->cudaClusterDims().empty()) { + subp->set_cudaClusterDims(std::move(dims)); + } else { + Say(currStmtSource().value(), + "CLUSTER_DIMS() may only appear once"_err_en_US); + } + } +} + bool SubprogramVisitor::Pre(const parser::InterfaceBody::Subroutine &x) { const auto &name{std::get( std::get>(x.t).statement.t)}; @@ -3766,6 +3892,7 @@ } SubprogramDetails &entryDetails{entrySymbol.get()}; CHECK(entryDetails.entryScope() == &inclusiveScope); + SetCUDADataAttr(name.source, entrySymbol, cudaDataAttr()); entrySymbol.attrs() |= GetAttrs(); SetBindNameOn(entrySymbol); for (const auto &dummyArg : std::get>(stmt.t)) { @@ -4150,6 +4277,7 @@ Attrs attrs{attrs_ ? HandleSaveName(name.source, *attrs_) : Attrs{}}; Symbol &symbol{DeclareUnknownEntity(name, attrs)}; symbol.ReplaceName(name.source); + SetCUDADataAttr(name.source, symbol, cudaDataAttr()); if (const auto &init{std::get>(x.t)}) { ConvertToObjectEntity(symbol) || ConvertToProcEntity(symbol); symbol.set( @@ -4422,6 +4550,23 @@ bool DeclarationVisitor::Pre(const parser::VolatileStmt &x) { return HandleAttributeStmt(Attr::VOLATILE, x.v); } +bool DeclarationVisitor::Pre(const parser::CUDAAttributesStmt &x) { + auto attr{std::get(x.t)}; + for (const auto &name : std::get>(x.t)) { + auto *symbol{FindInScope(name)}; + if (symbol && symbol->has()) { + Say(currStmtSource().value(), + "Cannot apply CUDA data attribute to use-associated '%s'"_err_en_US, + name.source); + } else { + if (!symbol) { + symbol = &MakeSymbol(name, ObjectEntityDetails{}); + } + SetCUDADataAttr(name.source, *symbol, attr); + } + } + return false; +} // Handle a statement that sets an attribute on a list of names. bool DeclarationVisitor::HandleAttributeStmt( Attr attr, const std::list &names) { @@ -4972,6 +5117,7 @@ } if (OkToAddComponent(name)) { auto &symbol{DeclareObjectEntity(name, attrs)}; + SetCUDADataAttr(name.source, symbol, cudaDataAttr()); if (symbol.has()) { if (auto &init{std::get>(x.t)}) { Initialization(name, *init, true); @@ -5079,6 +5225,7 @@ attrs.set(Attr::EXTERNAL); } Symbol &symbol{DeclareProcEntity(name, attrs, procInterface)}; + SetCUDADataAttr(name.source, symbol, cudaDataAttr()); // for error symbol.ReplaceName(name.source); if (dtDetails) { dtDetails->add_component(symbol); @@ -6057,6 +6204,7 @@ attrs.set(Attr::PRIVATE); } Symbol &result{MakeSymbol(name, attrs, std::move(details))}; + SetCUDADataAttr(name, result, cudaDataAttr()); if (result.has()) { derivedType.symbol()->get().add_paramDecl(result); } @@ -6717,7 +6865,8 @@ return false; } bool ResolveNamesVisitor::Pre(const parser::CallStmt &x) { - HandleCall(Symbol::Flag::Subroutine, x.v); + HandleCall(Symbol::Flag::Subroutine, x.call); + Walk(x.chevrons); return false; } @@ -7233,7 +7382,7 @@ // of the subprogram's interface, and to mark as procedures any symbols // that might otherwise have been miscategorized as objects. void ResolveNamesVisitor::NoteExecutablePartCall( - Symbol::Flag flag, const parser::Call &call) { + Symbol::Flag flag, const parser::Call &call, bool hasCUDAChevrons) { auto &designator{std::get(call.t)}; if (const auto *name{std::get_if(&designator.u)}) { // Subtlety: The symbol pointers in the parse tree are not set, because @@ -7245,12 +7394,15 @@ : Symbol::Flag::Subroutine}; if (!symbol->test(other)) { ConvertToProcEntity(*symbol); - if (symbol->has()) { + if (auto *details{symbol->detailsIf()}) { symbol->set(flag); if (IsDummy(*symbol)) { SetImplicitAttr(*symbol, Attr::EXTERNAL); } ApplyImplicitRules(*symbol); + if (hasCUDAChevrons) { + details->set_isCUDAKernel(); + } } } } @@ -7378,6 +7530,7 @@ Walk(ompDecls); Walk(compilerDirectives); Walk(useStmts); + UseCUDABuiltinNames(); ClearUseRenames(); ClearUseOnly(); ClearModuleUses(); @@ -7394,6 +7547,20 @@ return false; } +void ResolveNamesVisitor::UseCUDABuiltinNames() { + if (FindCUDADeviceContext(&currScope())) { + if (const Scope * CUDABuiltins{context().GetCUDABuiltinsScope()}) { + for (const auto &[name, symbol] : *CUDABuiltins) { + if (!FindInScope(name)) { + auto &localSymbol{MakeSymbol(name)}; + localSymbol.set_details(UseDetails{name, *symbol}); + localSymbol.flags() = symbol->flags(); + } + } + } + } +} + // Initial processing on specification constructs, before visiting them. void ResolveNamesVisitor::PreSpecificationConstruct( const parser::SpecificationConstruct &spec) { @@ -7931,10 +8098,11 @@ template bool Pre(const A &) { return true; } template void Post(const A &) {} void Post(const parser::FunctionReference &fr) { - resolver_.NoteExecutablePartCall(Symbol::Flag::Function, fr.v); + resolver_.NoteExecutablePartCall(Symbol::Flag::Function, fr.v, false); } void Post(const parser::CallStmt &cs) { - resolver_.NoteExecutablePartCall(Symbol::Flag::Subroutine, cs.v); + resolver_.NoteExecutablePartCall( + Symbol::Flag::Subroutine, cs.call, cs.chevrons.has_value()); } private: @@ -8241,6 +8409,7 @@ void ResolveNamesVisitor::Post(const parser::Program &) { // ensure that all temps were deallocated CHECK(!attrs_); + CHECK(!cudaDataAttr_); CHECK(!GetDeclTypeSpec()); } Index: flang/lib/Semantics/scope.cpp =================================================================== --- flang/lib/Semantics/scope.cpp +++ flang/lib/Semantics/scope.cpp @@ -92,7 +92,7 @@ const Scope *parent{symbol_->get().parent()}; return parent ? parent->FindSymbol(name) : nullptr; } else if (CanImport(name)) { - return parent_.FindSymbol(name); + return parent_->FindSymbol(name); } else { return nullptr; } @@ -288,7 +288,7 @@ // true if name can be imported or host-associated from parent scope. bool Scope::CanImport(const SourceName &name) const { - if (IsTopLevel() || parent_.IsTopLevel()) { + if (IsTopLevel() || parent_->IsTopLevel()) { return false; } switch (GetImportKind()) { Index: flang/lib/Semantics/semantics.cpp =================================================================== --- flang/lib/Semantics/semantics.cpp +++ flang/lib/Semantics/semantics.cpp @@ -470,6 +470,13 @@ } } +const Scope *SemanticsContext::GetCUDABuiltinsScope() { + if (!CUDABuiltinsScope_) { + CUDABuiltinsScope_ = GetBuiltinModule("__cuda_builtins"); + } + return *CUDABuiltinsScope_; +} + void SemanticsContext::UsePPCFortranBuiltinsModule() { if (ppcBuiltinsScope_ == nullptr) { ppcBuiltinsScope_ = GetBuiltinModule("__fortran_ppc_intrinsics"); Index: flang/lib/Semantics/symbol.cpp =================================================================== --- flang/lib/Semantics/symbol.cpp +++ flang/lib/Semantics/symbol.cpp @@ -128,6 +128,22 @@ if (x.defaultIgnoreTKR_) { os << " defaultIgnoreTKR"; } + if (x.cudaSubprogramAttrs_) { + os << " cudaSubprogramAttrs: " + << common::EnumToString(*x.cudaSubprogramAttrs_); + } + if (!x.cudaLaunchBounds_.empty()) { + os << " cudaLaunchBounds:"; + for (auto x : x.cudaLaunchBounds_) { + os << ' ' << x; + } + } + if (!x.cudaClusterDims_.empty()) { + os << " cudaClusterDims:"; + for (auto x : x.cudaClusterDims_) { + os << ' ' << x; + } + } return os; } @@ -414,6 +430,9 @@ os << ' '; x.ignoreTKR_.Dump(os, common::EnumToString); } + if (x.cudaDataAttr()) { + os << " cudaDataAttr: " << common::EnumToString(*x.cudaDataAttr()); + } return os; } @@ -443,6 +462,9 @@ os << " => NULL()"; } } + if (x.isCUDAKernel()) { + os << " isCUDAKernel"; + } return os; } Index: flang/lib/Semantics/tools.cpp =================================================================== --- flang/lib/Semantics/tools.cpp +++ flang/lib/Semantics/tools.cpp @@ -925,11 +925,12 @@ } bool operator()(const parser::CallStmt &stmt) { const auto &procedureDesignator{ - std::get(stmt.v.t)}; + std::get(stmt.call.t)}; if (auto *name{std::get_if(&procedureDesignator.u)}) { // TODO: also ensure that the procedure is, in fact, an intrinsic if (name->source == "move_alloc") { - const auto &args{std::get>(stmt.v.t)}; + const auto &args{ + std::get>(stmt.call.t)}; if (!args.empty()) { const parser::ActualArg &actualArg{ std::get(args.front().t)}; @@ -1057,6 +1058,18 @@ return IsAllocatable(symbol) && IsPolymorphic(symbol); } +const Scope *FindCUDADeviceContext(const Scope *scope) { + return !scope ? nullptr : FindScopeContaining(*scope, [](const Scope &s) { + return IsCUDADeviceContext(&s); + }); +} + +std::optional GetCUDADataAttr(const Symbol *symbol) { + const auto *object{ + symbol ? symbol->detailsIf() : nullptr}; + return object ? object->cudaDataAttr() : std::nullopt; +} + std::optional CheckAccessibleSymbol( const Scope &scope, const Symbol &symbol) { if (symbol.attrs().test(Attr::PRIVATE)) { Index: flang/module/__fortran_builtins.f90 =================================================================== --- flang/module/__fortran_builtins.f90 +++ flang/module/__fortran_builtins.f90 @@ -43,6 +43,15 @@ integer, parameter :: __builtin_atomic_int_kind = selected_int_kind(18) integer, parameter :: __builtin_atomic_logical_kind = __builtin_atomic_int_kind + procedure(type(__builtin_c_ptr)) :: __builtin_c_loc + + type :: __builtin_dim3 + integer :: x=1, y=1, z=1 + end type + type(__builtin_dim3) :: & + __builtin_threadIdx, __builtin_blockDim, __builtin_blockIdx, __builtin_gridDim + integer, parameter :: __builtin_warpsize = 32 + intrinsic :: __builtin_ieee_is_nan, __builtin_ieee_is_negative, & __builtin_ieee_is_normal intrinsic :: __builtin_ieee_next_after, __builtin_ieee_next_down, & Index: flang/test/Parser/cuf-sanity-common =================================================================== --- /dev/null +++ flang/test/Parser/cuf-sanity-common @@ -0,0 +1,37 @@ +! Common source for CUF parse tree and unparsing tests. +!@cuf subroutine atcuf; +end + +#ifdef _CUDA + subroutine cudadefd; +end +#endif + +module m + real, allocatable, pinned ::pa(:) + contains + attributes(device) subroutine devicesub; end + attributes(device) real function devicefunc(); devicefunc = 1.; end + attributes(global) subroutine globalsub; end + attributes(grid_global) subroutine gridglobalsub; end + attributes(host) subroutine hostsub; end + attributes(global) launch_bounds(1, 2) subroutine lbsub; end + attributes(global) cluster_dims(1, 2, 3) subroutine cdsub; end + attributes(device) subroutine attrs + attributes(device) :: devx1 + real, device :: devx2 + end subroutine + subroutine test + logical isPinned + !$cuf kernel do(1) <<<*, *, stream = 1>>> + do j = 1, 10 + end do + !$cuf kernel do <<<1, (2, 3), stream = 1>>> + do j = 1, 10 + end do + call globalsub<<<1, 2>>> + call globalsub<<<1, 2, 3>>> + call globalsub<<<1, 2, 3, 4>>> + allocate(pa(32), stream = 1, pinned = isPinned) + end subroutine +end module Index: flang/test/Parser/cuf-sanity-tree.CUF =================================================================== --- /dev/null +++ flang/test/Parser/cuf-sanity-tree.CUF @@ -0,0 +1,195 @@ +! RUN: %flang_fc1 -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s +include "cuf-sanity-common" +!CHECK: Program -> ProgramUnit -> SubroutineSubprogram +!CHECK: | SubroutineStmt +!CHECK: | | Name = 'atcuf' +!CHECK: | SpecificationPart +!CHECK: | | ImplicitPart -> +!CHECK: | ExecutionPart -> Block +!CHECK: | EndSubroutineStmt -> +!CHECK: ProgramUnit -> SubroutineSubprogram +!CHECK: | SubroutineStmt +!CHECK: | | Name = 'cudadefd' +!CHECK: | SpecificationPart +!CHECK: | | ImplicitPart -> +!CHECK: | ExecutionPart -> Block +!CHECK: | EndSubroutineStmt -> +!CHECK: ProgramUnit -> Module +!CHECK: | ModuleStmt -> Name = 'm' +!CHECK: | SpecificationPart +!CHECK: | | ImplicitPart -> +!CHECK: | | DeclarationConstruct -> SpecificationConstruct -> TypeDeclarationStmt +!CHECK: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> Real +!CHECK: | | | AttrSpec -> Allocatable +!CHECK: | | | AttrSpec -> CUDADataAttr = Pinned +!CHECK: | | | EntityDecl +!CHECK: | | | | Name = 'pa' +!CHECK: | | | | ArraySpec -> DeferredShapeSpecList -> int +!CHECK: | ModuleSubprogramPart +!CHECK: | | ContainsStmt +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Device +!CHECK: | | | | Name = 'devicesub' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> FunctionSubprogram +!CHECK: | | | FunctionStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Device +!CHECK: | | | | PrefixSpec -> DeclarationTypeSpec -> IntrinsicTypeSpec -> Real +!CHECK: | | | | Name = 'devicefunc' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'devicefunc=1._4' +!CHECK: | | | | | Variable = 'devicefunc' +!CHECK: | | | | | | Designator -> DataRef -> Name = 'devicefunc' +!CHECK: | | | | | Expr = '1._4' +!CHECK: | | | | | | LiteralConstant -> RealLiteralConstant +!CHECK: | | | | | | | Real = '1.' +!CHECK: | | | EndFunctionStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Global +!CHECK: | | | | Name = 'globalsub' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Grid_Global +!CHECK: | | | | Name = 'gridglobalsub' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Host +!CHECK: | | | | Name = 'hostsub' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Global +!CHECK: | | | | PrefixSpec -> Launch_Bounds -> Scalar -> Integer -> Constant -> Expr = '1_4' +!CHECK: | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | Scalar -> Integer -> Constant -> Expr = '2_4' +!CHECK: | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | Name = 'lbsub' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Global +!CHECK: | | | | PrefixSpec -> Cluster_Dims -> Scalar -> Integer -> Constant -> Expr = '1_4' +!CHECK: | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | Scalar -> Integer -> Constant -> Expr = '2_4' +!CHECK: | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | Scalar -> Integer -> Constant -> Expr = '3_4' +!CHECK: | | | | | LiteralConstant -> IntLiteralConstant = '3' +!CHECK: | | | | Name = 'cdsub' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | PrefixSpec -> Attributes -> CUDASubprogramAttrs = Device +!CHECK: | | | | Name = 'attrs' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | | DeclarationConstruct -> SpecificationConstruct -> TypeDeclarationStmt +!CHECK: | | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> Real +!CHECK: | | | | | AttrSpec -> CUDADataAttr = Device +!CHECK: | | | | | EntityDecl +!CHECK: | | | | | | Name = 'devx2' +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | | ModuleSubprogram -> SubroutineSubprogram +!CHECK: | | | SubroutineStmt +!CHECK: | | | | Name = 'test' +!CHECK: | | | SpecificationPart +!CHECK: | | | | ImplicitPart -> +!CHECK: | | | | DeclarationConstruct -> SpecificationConstruct -> TypeDeclarationStmt +!CHECK: | | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> Logical +!CHECK: | | | | | EntityDecl +!CHECK: | | | | | | Name = 'ispinned' +!CHECK: | | | ExecutionPart -> Block +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> CUFKernelDoConstruct +!CHECK: | | | | | Directive +!CHECK: | | | | | | Scalar -> Integer -> Constant -> Expr = '1_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | Scalar -> Integer -> Expr = '1_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | DoConstruct +!CHECK: | | | | | | NonLabelDoStmt +!CHECK: | | | | | | | LoopControl -> LoopBounds +!CHECK: | | | | | | | | Scalar -> Name = 'j' +!CHECK: | | | | | | | | Scalar -> Expr = '1_4' +!CHECK: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | | | Scalar -> Expr = '10_4' +!CHECK: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '10' +!CHECK: | | | | | | Block +!CHECK: | | | | | | EndDoStmt -> +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> CUFKernelDoConstruct +!CHECK: | | | | | Directive +!CHECK: | | | | | | Scalar -> Integer -> Expr = '1_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | Scalar -> Integer -> Expr = '2_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | | | Scalar -> Integer -> Expr = '3_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '3' +!CHECK: | | | | | | Scalar -> Integer -> Expr = '1_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | DoConstruct +!CHECK: | | | | | | NonLabelDoStmt +!CHECK: | | | | | | | LoopControl -> LoopBounds +!CHECK: | | | | | | | | Scalar -> Name = 'j' +!CHECK: | | | | | | | | Scalar -> Expr = '1_4' +!CHECK: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | | | Scalar -> Expr = '10_4' +!CHECK: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '10' +!CHECK: | | | | | | Block +!CHECK: | | | | | | EndDoStmt -> +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub()' +!CHECK: | | | | | Call +!CHECK: | | | | | | ProcedureDesignator -> Name = 'globalsub' +!CHECK: | | | | | Chevrons +!CHECK: | | | | | | Scalar -> Expr -> LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | Scalar -> Expr -> LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub()' +!CHECK: | | | | | Call +!CHECK: | | | | | | ProcedureDesignator -> Name = 'globalsub' +!CHECK: | | | | | Chevrons +!CHECK: | | | | | | Scalar -> Expr -> LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | Scalar -> Expr -> LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '3' +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub()' +!CHECK: | | | | | Call +!CHECK: | | | | | | ProcedureDesignator -> Name = 'globalsub' +!CHECK: | | | | | Chevrons +!CHECK: | | | | | | Scalar -> Expr -> LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | Scalar -> Expr -> LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '3' +!CHECK: | | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '4' +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AllocateStmt +!CHECK: | | | | | Allocation +!CHECK: | | | | | | AllocateObject = 'pa' +!CHECK: | | | | | | | Name = 'pa' +!CHECK: | | | | | | AllocateShapeSpec +!CHECK: | | | | | | | Scalar -> Integer -> Expr = '32_4' +!CHECK: | | | | | | | | LiteralConstant -> IntLiteralConstant = '32' +!CHECK: | | | | | AllocOpt -> Stream -> Scalar -> Integer -> Expr = '1_4' +!CHECK: | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | AllocOpt -> Pinned -> Scalar -> Logical -> Variable = 'ispinned' +!CHECK: | | | | | | Designator -> DataRef -> Name = 'ispinned' +!CHECK: | | | EndSubroutineStmt -> +!CHECK: | EndModuleStmt -> Index: flang/test/Parser/cuf-sanity-unparse.CUF =================================================================== --- /dev/null +++ flang/test/Parser/cuf-sanity-unparse.CUF @@ -0,0 +1,41 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s +include "cuf-sanity-common" +!CHECK: SUBROUTINE atcuf +!CHECK: END SUBROUTINE +!CHECK: SUBROUTINE cudadefd +!CHECK: END SUBROUTINE +!CHECK: MODULE m +!CHECK: REAL, ALLOCATABLE, PINNED :: pa(:) +!CHECK: CONTAINS +!CHECK: ATTRIBUTES(DEVICE) SUBROUTINE devicesub +!CHECK: END SUBROUTINE +!CHECK: ATTRIBUTES(DEVICE) REAL FUNCTION devicefunc() +!CHECK: devicefunc=1._4 +!CHECK: END FUNCTION +!CHECK: ATTRIBUTES(GLOBAL) SUBROUTINE globalsub +!CHECK: END SUBROUTINE +!CHECK: ATTRIBUTES(GRID_GLOBAL) SUBROUTINE gridglobalsub +!CHECK: END SUBROUTINE +!CHECK: ATTRIBUTES(HOST) SUBROUTINE hostsub +!CHECK: END SUBROUTINE +!CHECK: ATTRIBUTES(GLOBAL) LAUNCH_BOUNDS(1_4, 2_4) SUBROUTINE lbsub +!CHECK: END SUBROUTINE +!CHECK: ATTRIBUTES(GLOBAL) CLUSTER_DIMS(1_4, 2_4, 3_4) SUBROUTINE cdsub +!CHECK: END SUBROUTINE +!CHECK: ATTRIBUTES(DEVICE) SUBROUTINE attrs +!CHECK: REAL, DEVICE :: devx2 +!CHECK: END SUBROUTINE +!CHECK: SUBROUTINE test +!CHECK: LOGICAL ispinned +!CHECK: !$CUF KERNEL DO (1_4) <<<*,*,STREAM=1_4>>> +!CHECK: DO j=1_4,10_4 +!CHECK: END DO +!CHECK: !$CUF KERNEL DO <<<1_4,(2_4,3_4),STREAM=1_4>>> +!CHECK: DO j=1_4,10_4 +!CHECK: END DO +!CHECK: CALL globalsub<<<1,2>>> +!CHECK: CALL globalsub<<<1,2,3>>> +!CHECK: CALL globalsub<<<1,2,3,4>>> +!CHECK: ALLOCATE(pa(32_4), STREAM=1_4, PINNED=ispinned) +!CHECK: END SUBROUTINE +!CHECK: END MODULE Index: flang/test/Semantics/cuf01.cuf =================================================================== --- /dev/null +++ flang/test/Semantics/cuf01.cuf @@ -0,0 +1,22 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Test conflicting CUDA subprogram attributes +module m1 + contains + !WARNING: ATTRIBUTES(Host) appears more than once + attributes(host,host) subroutine ok1; end + !WARNING: ATTRIBUTES(Host) appears more than once + attributes(host) attributes(host) subroutine ok2; end + attributes(host,device) subroutine ok3; end + attributes(device,host) subroutine ok4; end + !WARNING: ATTRIBUTES(Host) appears more than once + attributes(host,device,host) subroutine ok5; end + !WARNING: ATTRIBUTES(Device) appears more than once + attributes(device,host,device) subroutine ok6; end + !ERROR: ATTRIBUTES(Global) conflicts with earlier ATTRIBUTES(Host) + attributes(host,global) subroutine conflict1; end + !ERROR: ATTRIBUTES(Host) conflicts with earlier ATTRIBUTES(Global) + attributes(global,host) subroutine conflict2; end + !ERROR: ATTRIBUTES(Grid_Global) conflicts with earlier ATTRIBUTES(Host) + attributes(host,grid_global) subroutine conflict3; end + !TODO: more with launch_bounds & cluster_dims +end module Index: flang/test/Semantics/modfile55.cuf =================================================================== --- /dev/null +++ flang/test/Semantics/modfile55.cuf @@ -0,0 +1,41 @@ +! RUN: %python %S/test_modfile.py %s %flang_fc1 +! Sanity check for CUDA Fortran attributes in module files +module m + attributes(device) dd + real, managed, allocatable :: md + real, pinned, allocatable :: mp + attributes(constant) cd + contains + attributes(global) subroutine globsub(x,y,z) + real, value :: x + real, device :: y + real, managed :: z + end subroutine + attributes(host,device) real function foo(x) + foo = x + 1. + end function +end + +!Expect: m.mod +!module m +!real(4)::dd +!attributes(device) dd +!real(4),allocatable::md +!attributes(managed) md +!real(4),allocatable::mp +!attributes(pinned) mp +!real(4)::cd +!attributes(constant) cd +!contains +!attributes(global) subroutine globsub(x,y,z) +!real(4),value::x +!real(4)::y +!attributes(device) y +!real(4)::z +!attributes(managed) z +!end +!attributes(host,device) function foo(x) +!real(4)::x +!real(4)::foo +!end +!end Index: flang/test/lib/lit.local.cfg =================================================================== --- flang/test/lib/lit.local.cfg +++ flang/test/lib/lit.local.cfg @@ -1,7 +1,7 @@ -# Excluding .cpp file from the extensions since from this level down they are used for the development +# Excluding .cpp file from the extensions since from this level down they are used for the development config.suffixes = ['.c', '.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', - '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf' + '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf', '.CUF', '.f18', '.F18', '.f03', '.F03', '.f08', '.F08', '.ll', '.fir', '.mlir'] Index: flang/test/lit.cfg.py =================================================================== --- flang/test/lit.cfg.py +++ flang/test/lit.cfg.py @@ -26,7 +26,7 @@ # suffixes: A list of file extensions to treat as test files. config.suffixes = ['.c', '.cpp', '.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', - '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf' + '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf', '.CUF', '.f18', '.F18', '.f03', '.F03', '.f08', '.F08', '.ll', '.fir', '.mlir']