diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h --- a/flang/include/flang/Evaluate/traverse.h +++ b/flang/include/flang/Evaluate/traverse.h @@ -38,6 +38,7 @@ // expression of an ASSOCIATE (or related) construct entity. #include "expression.h" +#include "flang/Common/indirection.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/type.h" #include @@ -53,6 +54,10 @@ Result operator()(const common::Indirection &x) const { return visitor_(x.value()); } + template + Result operator()(const common::ForwardOwningPointer &p) const { + return visitor_(p.get()); + } template Result operator()(const SymbolRef x) const { return visitor_(*x); } @@ -76,13 +81,17 @@ return visitor_.Default(); } } - template - Result operator()(const std::variant &u) const { - return common::visit(visitor_, u); + template + Result operator()(const std::variant &u) const { + return common::visit([=](const auto &y) { return visitor_(y); }, u); } template Result operator()(const std::vector &x) const { return CombineContents(x); } + template + Result operator()(const std::pair &x) const { + return Combine(x.first, x.second); + } // Leaves Result operator()(const BOZLiteralConstant &) const { @@ -233,14 +242,24 @@ template Result operator()(const Expr &x) const { return visitor_(x.u); } + Result operator()(const Assignment &x) const { + return Combine(x.lhs, x.rhs, x.u); + } + Result operator()(const Assignment::Intrinsic &) const { + return visitor_.Default(); + } + Result operator()(const GenericExprWrapper &x) const { return visitor_(x.v); } + Result operator()(const GenericAssignmentWrapper &x) const { + return visitor_(x.v); + } private: template Result CombineRange(ITER iter, ITER end) const { if (iter == end) { return visitor_.Default(); } else { - Result result{visitor_(*iter++)}; - for (; iter != end; ++iter) { + Result result{visitor_(*iter)}; + for (++iter; iter != end; ++iter) { result = visitor_.Combine(std::move(result), visitor_(*iter)); } return result; diff --git a/flang/include/flang/Parser/tools.h b/flang/include/flang/Parser/tools.h --- a/flang/include/flang/Parser/tools.h +++ b/flang/include/flang/Parser/tools.h @@ -65,6 +65,18 @@ return common::visit([](const auto &y) { return Unwrap(y); }, x); } + template + static const A *Unwrap(const std::tuple &x) { + if constexpr (J < sizeof...(Bs)) { + if (auto result{Unwrap(std::get(x))}) { + return result; + } + return Unwrap(x); + } else { + return nullptr; + } + } + template static const A *Unwrap(const std::optional &o) { if (o) { @@ -122,5 +134,120 @@ template struct HasTypedExpr(A::typedExpr), 0)> : std::true_type {}; + +// GetSource() + +template struct GetSourceHelper { + + using Result = std::optional; + + template static Result GetSource(A *p) { + if (p) { + return GetSource(*p); + } else { + return std::nullopt; + } + } + template + static Result GetSource(const common::Indirection &x) { + return GetSource(x.value()); + } + + template + static Result GetSource(const common::Indirection &x) { + return GetSource(x.value()); + } + + template + static Result GetSource(const std::variant &x) { + return common::visit([](const auto &y) { return GetSource(y); }, x); + } + + template + static Result GetSource(const std::tuple &x) { + if constexpr (J < sizeof...(As)) { + constexpr std::size_t index{GET_FIRST ? J : sizeof...(As) - J - 1}; + if (auto result{GetSource(std::get(x))}) { + return result; + } + return GetSource<(J + 1)>(x); + } else { + return {}; + } + } + + template static Result GetSource(const std::optional &o) { + if (o) { + return GetSource(*o); + } else { + return {}; + } + } + + template static Result GetSource(const std::list &x) { + if constexpr (GET_FIRST) { + for (const A &y : x) { + if (auto result{GetSource(y)}) { + return result; + } + } + } else { + for (auto iter{x.rbegin()}; iter != x.rend(); ++iter) { + if (auto result{GetSource(*iter)}) { + return result; + } + } + } + return {}; + } + + template static Result GetSource(const std::vector &x) { + if constexpr (GET_FIRST) { + for (const A &y : x) { + if (auto result{GetSource(y)}) { + return result; + } + } + } else { + for (auto iter{x.rbegin()}; iter != x.rend(); ++iter) { + if (auto result{GetSource(*iter)}) { + return result; + } + } + } + return {}; + } + + template static Result GetSource(A &x) { + if constexpr (HasSource::value) { + return x.source; + } else if constexpr (ConstraintTrait) { + return GetSource(x.thing); + } else if constexpr (WrapperTrait) { + return GetSource(x.v); + } else if constexpr (UnionTrait) { + return GetSource(x.u); + } else if constexpr (TupleTrait) { + return GetSource(x.t); + } else { + return {}; + } + } +}; + +template std::optional GetSource(const A &x) { + return GetSourceHelper::GetSource(x); +} +template std::optional GetSource(A &x) { + return GetSourceHelper::GetSource(const_cast(x)); +} + +template std::optional GetLastSource(const A &x) { + return GetSourceHelper::GetSource(x); +} +template std::optional GetLastSource(A &x) { + return GetSourceHelper::GetSource(const_cast(x)); +} + } // namespace Fortran::parser #endif // FORTRAN_PARSER_TOOLS_H_ diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -214,8 +214,9 @@ // Defines builtinsScope_ from the __Fortran_builtins module void UseFortranBuiltinsModule(); const Scope *GetBuiltinsScope() const { return builtinsScope_; } + void UsePPCFortranBuiltinTypesModule(); - const Scope *GetCUDABuiltinsScope(); + const Scope &GetCUDABuiltinsScope(); void UsePPCFortranBuiltinsModule(); Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; } const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; } @@ -281,7 +282,7 @@ std::set tempNames_; const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types - std::optional CUDABuiltinsScope_; // module __CUDA_builtins + std::optional cudaBuiltinsScope_; // module __CUDA_builtins const Scope *ppcBuiltinsScope_{nullptr}; // module __Fortran_PPC_intrinsics std::list modFileParseTrees_; std::unique_ptr commonBlockMap_; diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1698,7 +1698,7 @@ Put('('), Walk(std::get>(x.v.t), ", "), Put(')'); } void Unparse(const CallStmt &x) { // R1521 - if (asFortran_ && x.typedCall.get() && !x.chevrons /*CUDA todo*/) { + if (asFortran_ && x.typedCall.get()) { Put(' '); asFortran_->call(out_, *x.typedCall); Put('\n'); diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -10,6 +10,7 @@ check-call.cpp check-case.cpp check-coarray.cpp + check-cuda.cpp check-data.cpp check-deallocate.cpp check-declarations.cpp diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp --- a/flang/lib/Semantics/canonicalize-acc.cpp +++ b/flang/lib/Semantics/canonicalize-acc.cpp @@ -65,7 +65,7 @@ const auto &outer{std::get>(x.t)}; if (outer->IsDoConcurrent()) { - return; // Tile is not allowed on DO CONURRENT + return; // Tile is not allowed on DO CONCURRENT } for (const parser::DoConstruct *loop{&*outer}; loop && tileArgNb > 0; --tileArgNb) { diff --git a/flang/lib/Semantics/check-allocate.cpp b/flang/lib/Semantics/check-allocate.cpp --- a/flang/lib/Semantics/check-allocate.cpp +++ b/flang/lib/Semantics/check-allocate.cpp @@ -31,6 +31,8 @@ bool gotTypeSpec{false}; bool gotSource{false}; bool gotMold{false}; + bool gotStream{false}; + bool gotPinned{false}; }; class AllocationCheckerHelper { @@ -179,8 +181,22 @@ parserSourceExpr = &mold.v.value(); info.gotMold = true; }, - [](const parser::AllocOpt::Stream &) { /* CUDA coming */ }, - [](const parser::AllocOpt::Pinned &) { /* CUDA coming */ }, + [&](const parser::AllocOpt::Stream &stream) { // CUDA + if (info.gotStream) { + context.Say( + "STREAM may not be duplicated in a ALLOCATE statement"_err_en_US); + stopCheckingAllocate = true; + } + info.gotStream = true; + }, + [&](const parser::AllocOpt::Pinned &pinned) { // CUDA + if (info.gotPinned) { + context.Say( + "PINNED may not be duplicated in a ALLOCATE statement"_err_en_US); + stopCheckingAllocate = true; + } + info.gotPinned = true; + }, }, allocOpt.u); } @@ -569,12 +585,13 @@ return false; } context.CheckIndexVarRedefine(name_); + const Scope &subpScope{ + GetProgramUnitContaining(context.FindScope(name_.source))}; if (allocateObject_.typedExpr && allocateObject_.typedExpr->v) { - if (auto whyNot{ - WhyNotDefinable(name_.source, context.FindScope(name_.source), - {DefinabilityFlag::PointerDefinition, - DefinabilityFlag::AcceptAllocatable}, - *allocateObject_.typedExpr->v)}) { + if (auto whyNot{WhyNotDefinable(name_.source, subpScope, + {DefinabilityFlag::PointerDefinition, + DefinabilityFlag::AcceptAllocatable}, + *allocateObject_.typedExpr->v)}) { context .Say(name_.source, "Name in ALLOCATE statement is not definable"_err_en_US) diff --git a/flang/lib/Semantics/check-cuda.h b/flang/lib/Semantics/check-cuda.h new file mode 100644 --- /dev/null +++ b/flang/lib/Semantics/check-cuda.h @@ -0,0 +1,50 @@ +//===-- lib/Semantics/check-cuda.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_SEMANTICS_CHECK_CUDA_H_ +#define FORTRAN_SEMANTICS_CHECK_CUDA_H_ + +#include "flang/Semantics/semantics.h" +#include + +namespace Fortran::parser { +struct Program; +class Messages; +struct Name; +class CharBlock; +struct ExecutionPartConstruct; +struct ExecutableConstruct; +struct ActionStmt; +struct IfConstruct; +struct CUFKernelDoConstruct; +struct SubroutineSubprogram; +struct FunctionSubprogram; +struct SeparateModuleSubprogram; +} // namespace Fortran::parser + +namespace Fortran::semantics { + +class SemanticsContext; + +class CUDAChecker : public virtual BaseChecker { +public: + explicit CUDAChecker(SemanticsContext &c) : context_{c} {} + void Enter(const parser::SubroutineSubprogram &); + void Enter(const parser::FunctionSubprogram &); + void Enter(const parser::SeparateModuleSubprogram &); + void Enter(const parser::CUFKernelDoConstruct &); + +private: + SemanticsContext &context_; +}; + +bool CanonicalizeCUDA(parser::Program &); + +} // namespace Fortran::semantics + +#endif // FORTRAN_SEMANTICS_CHECK_CUDA_H_ diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Semantics/check-cuda.cpp @@ -0,0 +1,416 @@ +//===-- lib/Semantics/check-cuda.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "check-cuda.h" +#include "flang/Common/template.h" +#include "flang/Evaluate/fold.h" +#include "flang/Evaluate/traverse.h" +#include "flang/Parser/parse-tree-visitor.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" +#include "flang/Semantics/expression.h" +#include "flang/Semantics/symbol.h" + +// Once labeled DO constructs have been canonicalized and their parse subtrees +// transformed into parser::DoConstructs, scan the parser::Blocks of the program +// and merge adjacent CUFKernelDoConstructs and DoConstructs whenever the +// CUFKernelDoConstruct doesn't already have an embedded DoConstruct. Also +// emit errors about improper or missing DoConstructs. + +namespace Fortran::parser { +struct Mutator { + template bool Pre(A &) { return true; } + template void Post(A &) {} + bool Pre(Block &); +}; + +bool Mutator::Pre(Block &block) { + for (auto iter{block.begin()}; iter != block.end(); ++iter) { + if (auto *kernel{Unwrap(*iter)}) { + auto &nested{std::get>(kernel->t)}; + if (!nested) { + if (auto next{iter}; ++next != block.end()) { + if (auto *doConstruct{Unwrap(*next)}) { + nested = std::move(*doConstruct); + block.erase(next); + } + } + } + } else { + Walk(*iter, *this); + } + } + return false; +} +} // namespace Fortran::parser + +namespace Fortran::semantics { + +bool CanonicalizeCUDA(parser::Program &program) { + parser::Mutator mutator; + parser::Walk(program, mutator); + return true; +} + +using MaybeMsg = std::optional; + +// Traverses an evaluate::Expr<> in search of unsupported operations +// on the device. + +struct DeviceExprChecker + : public evaluate::AnyTraverse { + using Result = MaybeMsg; + using Base = evaluate::AnyTraverse; + DeviceExprChecker() : Base(*this) {} + using Base::operator(); + Result operator()(const evaluate::ProcedureDesignator &x) const { + if (const Symbol * sym{x.GetInterfaceSymbol()}) { + const auto *subp{ + sym->GetUltimate().detailsIf()}; + if (subp) { + if (auto attrs{subp->cudaSubprogramAttrs()}) { + if (*attrs == common::CUDASubprogramAttrs::HostDevice || + *attrs == common::CUDASubprogramAttrs::Device) { + return {}; + } + } + } + } else if (x.GetSpecificIntrinsic()) { + // TODO(CUDA): Check for unsupported intrinsics here + return {}; + } + return parser::MessageFormattedText( + "'%s' may not be called in device code"_err_en_US, x.GetName()); + } +}; + +template static MaybeMsg CheckUnwrappedExpr(const A &x) { + if (const auto *expr{parser::Unwrap(x)}) { + return DeviceExprChecker{}(expr->typedExpr); + } + return {}; +} + +template +static void CheckUnwrappedExpr( + SemanticsContext &context, SourceName at, const A &x) { + if (const auto *expr{parser::Unwrap(x)}) { + if (auto msg{DeviceExprChecker{}(expr->typedExpr)}) { + context.Say(at, std::move(*msg)); + } + } +} + +template struct ActionStmtChecker { + template static MaybeMsg WhyNotOk(const A &x) { + if constexpr (ConstraintTrait) { + return WhyNotOk(x.thing); + } else if constexpr (WrapperTrait) { + return WhyNotOk(x.v); + } else if constexpr (UnionTrait) { + return WhyNotOk(x.u); + } else if constexpr (TupleTrait) { + return WhyNotOk(x.t); + } else { + return parser::MessageFormattedText{ + "Statement may not appear in device code"_err_en_US}; + } + } + template + static MaybeMsg WhyNotOk(const common::Indirection &x) { + return WhyNotOk(x.value()); + } + template + static MaybeMsg WhyNotOk(const std::variant &x) { + return common::visit([](const auto &x) { return WhyNotOk(x); }, x); + } + template + static MaybeMsg WhyNotOk(const std::tuple &x) { + if constexpr (J == sizeof...(As)) { + return {}; + } else if (auto msg{WhyNotOk(std::get(x))}) { + return msg; + } else { + return WhyNotOk<(J + 1)>(x); + } + } + template static MaybeMsg WhyNotOk(const std::list &x) { + for (const auto &y : x) { + if (MaybeMsg result{WhyNotOk(y)}) { + return result; + } + } + return {}; + } + template static MaybeMsg WhyNotOk(const std::optional &x) { + if (x) { + return WhyNotOk(*x); + } else { + return {}; + } + } + template + static MaybeMsg WhyNotOk(const parser::UnlabeledStatement &x) { + return WhyNotOk(x.statement); + } + template + static MaybeMsg WhyNotOk(const parser::Statement &x) { + return WhyNotOk(x.statement); + } + static MaybeMsg WhyNotOk(const parser::AllocateStmt &) { + return {}; // AllocateObjects are checked elsewhere + } + static MaybeMsg WhyNotOk(const parser::AllocateCoarraySpec &) { + return parser::MessageFormattedText( + "A coarray may not be allocated on the device"_err_en_US); + } + static MaybeMsg WhyNotOk(const parser::DeallocateStmt &) { + return {}; // AllocateObjects are checked elsewhere + } + static MaybeMsg WhyNotOk(const parser::AssignmentStmt &x) { + return DeviceExprChecker{}(x.typedAssignment); + } + static MaybeMsg WhyNotOk(const parser::CallStmt &x) { + return DeviceExprChecker{}(x.typedCall); + } + static MaybeMsg WhyNotOk(const parser::ContinueStmt &) { return {}; } + static MaybeMsg WhyNotOk(const parser::IfStmt &x) { + if (auto result{ + CheckUnwrappedExpr(std::get(x.t))}) { + return result; + } + return WhyNotOk( + std::get>(x.t) + .statement); + } + static MaybeMsg WhyNotOk(const parser::NullifyStmt &x) { + for (const auto &y : x.v) { + if (MaybeMsg result{DeviceExprChecker{}(y.typedExpr)}) { + return result; + } + } + return {}; + } + static MaybeMsg WhyNotOk(const parser::PointerAssignmentStmt &x) { + return DeviceExprChecker{}(x.typedAssignment); + } +}; + +template class DeviceContextChecker { +public: + explicit DeviceContextChecker(SemanticsContext &c) : context_{c} {} + void CheckSubprogram(const parser::Name &name, const parser::Block &body) { + if (name.symbol) { + const auto *subp{ + name.symbol->GetUltimate().detailsIf()}; + if (subp && subp->moduleInterface()) { + subp = subp->moduleInterface() + ->GetUltimate() + .detailsIf(); + } + if (subp && + subp->cudaSubprogramAttrs().value_or( + common::CUDASubprogramAttrs::Host) != + common::CUDASubprogramAttrs::Host) { + Check(body); + } + } + } + void Check(const parser::Block &block) { + for (const auto &epc : block) { + Check(epc); + } + } + +private: + void Check(const parser::ExecutionPartConstruct &epc) { + common::visit( + common::visitors{ + [&](const parser::ExecutableConstruct &x) { Check(x); }, + [&](const parser::Statement> + &x) { + context_.Say(x.source, + "Device code may not contain an ENTRY statement"_err_en_US); + }, + [](const parser::Statement> + &) {}, + [](const parser::Statement> + &) {}, + [](const parser::Statement< + common::Indirection> &) {}, + [](const parser::ErrorRecovery &) {}, + }, + epc.u); + } + void Check(const parser::ExecutableConstruct &ec) { + common::visit( + common::visitors{ + [&](const parser::Statement &stmt) { + Check(stmt.statement, stmt.source); + }, + [&](const common::Indirection &x) { + if (const std::optional &control{ + x.value().GetLoopControl()}) { + common::visit([&](const auto &y) { Check(y); }, control->u); + } + Check(std::get(x.value().t)); + }, + [&](const common::Indirection &x) { + Check(std::get(x.value().t)); + }, + [&](const common::Indirection &x) { + Check(x.value()); + }, + [&](const auto &x) { + if (auto source{parser::GetSource(x)}) { + context_.Say(*source, + "Statement may not appear in device code"_err_en_US); + } + }, + }, + ec.u); + } + void Check(const parser::ActionStmt &stmt, const parser::CharBlock &source) { + common::visit( + common::visitors{ + [&](const auto &x) { + if (auto msg{ActionStmtChecker::WhyNotOk(x)}) { + context_.Say(source, std::move(*msg)); + } + }, + }, + stmt.u); + } + void Check(const parser::IfConstruct &ic) { + const auto &ifS{std::get>(ic.t)}; + CheckUnwrappedExpr(context_, ifS.source, + std::get(ifS.statement.t)); + Check(std::get(ic.t)); + for (const auto &eib : + std::get>(ic.t)) { + const auto &eIfS{std::get>(eib.t)}; + CheckUnwrappedExpr(context_, eIfS.source, + std::get(eIfS.statement.t)); + Check(std::get(eib.t)); + } + if (const auto &eb{ + std::get>(ic.t)}) { + Check(std::get(eb->t)); + } + } + void Check(const parser::LoopControl::Bounds &bounds) { + Check(bounds.lower); + Check(bounds.upper); + if (bounds.step) { + Check(*bounds.step); + } + } + void Check(const parser::LoopControl::Concurrent &x) { + const auto &header{std::get(x.t)}; + for (const auto &cc : + std::get>(header.t)) { + Check(std::get<1>(cc.t)); + Check(std::get<2>(cc.t)); + if (const auto &step{ + std::get>(cc.t)}) { + Check(*step); + } + } + if (const auto &mask{ + std::get>(header.t)}) { + Check(*mask); + } + } + void Check(const parser::ScalarLogicalExpr &x) { + Check(DEREF(parser::Unwrap(x))); + } + void Check(const parser::ScalarIntExpr &x) { + Check(DEREF(parser::Unwrap(x))); + } + void Check(const parser::ScalarExpr &x) { + Check(DEREF(parser::Unwrap(x))); + } + void Check(const parser::Expr &expr) { + if (MaybeMsg msg{DeviceExprChecker{}(expr.typedExpr)}) { + context_.Say(expr.source, std::move(*msg)); + } + } + + SemanticsContext &context_; +}; + +void CUDAChecker::Enter(const parser::SubroutineSubprogram &x) { + DeviceContextChecker{context_}.CheckSubprogram( + std::get( + std::get>(x.t).statement.t), + std::get(x.t).v); +} + +void CUDAChecker::Enter(const parser::FunctionSubprogram &x) { + DeviceContextChecker{context_}.CheckSubprogram( + std::get( + std::get>(x.t).statement.t), + std::get(x.t).v); +} + +void CUDAChecker::Enter(const parser::SeparateModuleSubprogram &x) { + DeviceContextChecker{context_}.CheckSubprogram( + std::get>(x.t).statement.v, + std::get(x.t).v); +} + +// !$CUF KERNEL DO semantic checks + +static int DoConstructTightNesting( + const parser::DoConstruct *doConstruct, const parser::Block *&innerBlock) { + if (!doConstruct || !doConstruct->IsDoNormal()) { + return 0; + } + innerBlock = &std::get(doConstruct->t); + if (innerBlock->size() == 1) { + if (const auto *execConstruct{ + std::get_if(&innerBlock->front().u)}) { + if (const auto *next{ + std::get_if>( + &execConstruct->u)}) { + return 1 + DoConstructTightNesting(&next->value(), innerBlock); + } + } + } + return 1; +} + +void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) { + auto source{std::get(x.t).source}; + const auto &directive{std::get(x.t)}; + std::int64_t depth{1}; + if (auto expr{AnalyzeExpr(context_, + std::get>( + directive.t))}) { + depth = evaluate::ToInt64(expr).value_or(0); + if (depth <= 0) { + context_.Say(source, + "!$CUF KERNEL DO (%jd): loop nesting depth must be positive"_err_en_US, + std::intmax_t{depth}); + depth = 1; + } + } + const parser::DoConstruct *doConstruct{common::GetPtrFromOptional( + std::get>(x.t))}; + const parser::Block *innerBlock{nullptr}; + if (DoConstructTightNesting(doConstruct, innerBlock) < depth) { + context_.Say(source, + "!$CUF KERNEL DO (%jd) must be followed by a DO construct with tightly nested outer levels of counted DO loops"_err_en_US, + std::intmax_t{depth}); + } + if (innerBlock) { + DeviceContextChecker{context_}.Check(*innerBlock); + } +} + +} // namespace Fortran::semantics diff --git a/flang/lib/Semantics/check-deallocate.cpp b/flang/lib/Semantics/check-deallocate.cpp --- a/flang/lib/Semantics/check-deallocate.cpp +++ b/flang/lib/Semantics/check-deallocate.cpp @@ -19,10 +19,13 @@ void DeallocateChecker::Leave(const parser::DeallocateStmt &deallocateStmt) { for (const parser::AllocateObject &allocateObject : std::get>(deallocateStmt.t)) { + parser::CharBlock source; + const Symbol *symbol{nullptr}; common::visit( common::visitors{ [&](const parser::Name &name) { - auto const *symbol{name.symbol}; + source = name.source; + symbol = name.symbol; if (context_.HasError(symbol)) { // already reported an error } else if (!IsVariableName(*symbol)) { @@ -58,9 +61,10 @@ [&](const parser::StructureComponent &structureComponent) { // Only perform structureComponent checks if it was successfully // analyzed by expression analysis. + source = structureComponent.component.source; + symbol = structureComponent.component.symbol; if (const auto *expr{GetExpr(context_, allocateObject)}) { - if (const Symbol *symbol{structureComponent.component.symbol}) { - auto source{structureComponent.component.source}; + if (symbol) { if (!IsAllocatableOrPointer(*symbol)) { // C932 context_.Say(source, "Component in DEALLOCATE statement must have the ALLOCATABLE or POINTER attribute"_err_en_US); diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -2740,7 +2740,7 @@ Say(source, "'%s' already has another CUDA data attribute ('%s')"_err_en_US, symbol.name(), - common::EnumToString(*object->cudaDataAttr()).substr()); + std::string{common::EnumToString(*object->cudaDataAttr())}.c_str()); } else { object->set_cudaDataAttr(attr); } @@ -7700,13 +7700,11 @@ void ResolveNamesVisitor::UseCUDABuiltinNames() { if (FindCUDADeviceContext(&currScope())) { - if (const Scope * CUDABuiltins{context().GetCUDABuiltinsScope()}) { - for (const auto &[name, symbol] : *CUDABuiltins) { - if (!FindInScope(name)) { - auto &localSymbol{MakeSymbol(name)}; - localSymbol.set_details(UseDetails{name, *symbol}); - localSymbol.flags() = symbol->flags(); - } + for (const auto &[name, symbol] : context().GetCUDABuiltinsScope()) { + if (!FindInScope(name)) { + auto &localSymbol{MakeSymbol(name)}; + localSymbol.set_details(UseDetails{name, *symbol}); + localSymbol.flags() = symbol->flags(); } } } diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -16,6 +16,7 @@ #include "check-arithmeticif.h" #include "check-case.h" #include "check-coarray.h" +#include "check-cuda.h" #include "check-data.h" #include "check-deallocate.h" #include "check-declarations.h" @@ -69,12 +70,13 @@ // children are visited, Leave is called after. No two checkers may have the // same Enter or Leave function. Each checker must be constructible from // SemanticsContext and have BaseChecker as a virtual base class. -template class SemanticsVisitor : public virtual C... { +template +class SemanticsVisitor : public virtual BaseChecker, public virtual C... { public: - using C::Enter...; - using C::Leave...; using BaseChecker::Enter; using BaseChecker::Leave; + using C::Enter...; + using C::Leave...; SemanticsVisitor(SemanticsContext &context) : C{context}..., context_{context} {} @@ -158,12 +160,14 @@ }; using StatementSemanticsPass1 = ExprChecker; -using StatementSemanticsPass2 = SemanticsVisitor; +using StatementSemanticsPass2 = SemanticsVisitor; +using StatementSemanticsPass3 = + SemanticsVisitor; static bool PerformStatementSemantics( SemanticsContext &context, parser::Program &program) { @@ -174,6 +178,11 @@ StatementSemanticsPass1{context}.Walk(program); StatementSemanticsPass2 pass2{context}; pass2.Walk(program); + if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC) || + context.languageFeatures().IsEnabled(common::LanguageFeature::OpenMP) || + context.languageFeatures().IsEnabled(common::LanguageFeature::CUDA)) { + StatementSemanticsPass3{context}.Walk(program); + } if (!context.AnyFatalError()) { pass2.CompileDataInitializationsIntoInitializers(); } @@ -476,11 +485,12 @@ } } -const Scope *SemanticsContext::GetCUDABuiltinsScope() { - if (!CUDABuiltinsScope_) { - CUDABuiltinsScope_ = GetBuiltinModule("__cuda_builtins"); +const Scope &SemanticsContext::GetCUDABuiltinsScope() { + if (!cudaBuiltinsScope_) { + cudaBuiltinsScope_ = GetBuiltinModule("__cuda_builtins"); + CHECK(cudaBuiltinsScope_.value() != nullptr); } - return *CUDABuiltinsScope_; + return **cudaBuiltinsScope_; } void SemanticsContext::UsePPCFortranBuiltinsModule() { @@ -525,6 +535,7 @@ parser::CanonicalizeDo(program_) && // force line break CanonicalizeAcc(context_.messages(), program_) && CanonicalizeOmp(context_.messages(), program_) && + CanonicalizeCUDA(program_) && PerformStatementSemantics(context_, program_) && ModFileWriter{context_}.WriteAll(); } @@ -566,7 +577,7 @@ if (scope.derivedTypeSpec()) { os << " instantiation of " << *scope.derivedTypeSpec(); } - os << '\n'; + os << " sourceRange=" << scope.sourceRange().size() << " bytes\n"; ++indent; for (const auto &pair : scope) { const auto &symbol{*pair.second}; diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90 --- a/flang/module/__fortran_builtins.f90 +++ b/flang/module/__fortran_builtins.f90 @@ -75,4 +75,23 @@ intrinsic :: __builtin_compiler_options, __builtin_compiler_version + interface operator(==) + module procedure __builtin_c_ptr_eq + end interface + interface operator(/=) + module procedure __builtin_c_ptr_eq + end interface + +contains + + elemental logical function __builtin_c_ptr_eq(x, y) + type(__builtin_c_ptr), intent(in) :: x, y + __builtin_c_ptr_eq = x%__address == y%__address + end function + + elemental logical function __builtin_c_ptr_ne(x, y) + type(__builtin_c_ptr), intent(in) :: x, y + __builtin_c_ptr_ne = x%__address /= y%__address + end function + end module diff --git a/flang/module/iso_c_binding.f90 b/flang/module/iso_c_binding.f90 --- a/flang/module/iso_c_binding.f90 +++ b/flang/module/iso_c_binding.f90 @@ -15,7 +15,8 @@ c_ptr => __builtin_c_ptr, & c_funptr => __builtin_c_funptr, & c_sizeof => sizeof, & - c_loc => __builtin_c_loc + c_loc => __builtin_c_loc, & + operator(==), operator(/=) type(c_ptr), parameter :: c_null_ptr = c_ptr(0) type(c_funptr), parameter :: c_null_funptr = c_funptr(0) diff --git a/flang/test/Driver/compiler_options.f90 b/flang/test/Driver/compiler_options.f90 --- a/flang/test/Driver/compiler_options.f90 +++ b/flang/test/Driver/compiler_options.f90 @@ -1,6 +1,6 @@ ! RUN: %flang -S -emit-llvm -o - %s | FileCheck %s ! Test communication of COMPILER_OPTIONS from flang-new to flang-new -fc1. -! CHECK: [[OPTSVAR:@_QQcl\.[0-9a-f]+]] = internal constant [[[OPTSLEN:[0-9]+]] x i8] c"{{.*}}flang-new{{(\.exe)?}} -S -emit-llvm -o - {{.*}}compiler_options.f90" +! CHECK: [[OPTSVAR:@_QQcl\.[0-9a-f]+]] = {{[a-z]+}} constant [[[OPTSLEN:[0-9]+]] x i8] c"{{.*}}flang-new{{(\.exe)?}} -S -emit-llvm -o - {{.*}}compiler_options.f90" program main use ISO_FORTRAN_ENV, only: compiler_options implicit none diff --git a/flang/test/Parser/cuf-sanity-unparse.CUF b/flang/test/Parser/cuf-sanity-unparse.CUF --- a/flang/test/Parser/cuf-sanity-unparse.CUF +++ b/flang/test/Parser/cuf-sanity-unparse.CUF @@ -34,9 +34,9 @@ !CHECK: !$CUF KERNEL DO <<<1_4,(2_4,3_4),STREAM=1_4>>> !CHECK: DO j=1_4,10_4 !CHECK: END DO -!CHECK: CALL globalsub<<<1_4,2_4>>> -!CHECK: CALL globalsub<<<1_4,2_4,3_4>>> -!CHECK: CALL globalsub<<<1_4,2_4,3_4,4_4>>> +!CHECK: CALL globalsub<<<1_4,2_4>>>() +!CHECK: CALL globalsub<<<1_4,2_4,3_4>>>() +!CHECK: CALL globalsub<<<1_4,2_4,3_4,4_4>>>() !CHECK: ALLOCATE(pa(32_4), STREAM=1_4, PINNED=ispinned) !CHECK: END SUBROUTINE !CHECK: END MODULE diff --git a/flang/test/Semantics/cuf04.cuf b/flang/test/Semantics/cuf04.cuf new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/cuf04.cuf @@ -0,0 +1,24 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! CUDA Fortran section 2.5.6 restrictions +module m + contains + attributes(device) subroutine devsubr(n) + integer, intent(in) :: n + !WARNING: 'x1' should not have the SAVE attribute or initialization in a device subprogram + real, save :: x1 + !WARNING: 'x2' should not have the SAVE attribute or initialization in a device subprogram + real :: x2 = 1. + !ERROR: Device subprogram 'devsubr' cannot call itself + if (n > 0) call devsubr(n-1) + end subroutine + attributes(global) subroutine globsubr + end subroutine + subroutine boring + end subroutine + subroutine test + !ERROR: 'globsubr' is a kernel subroutine and must be called with kernel launch parameters in chevrons + call globsubr + !ERROR: Kernel launch parameters in chevrons may not be used unless calling a kernel subroutine + call boring<<<1,2>>> + end subroutine +end module diff --git a/flang/test/Semantics/cuf05.cuf b/flang/test/Semantics/cuf05.cuf new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/cuf05.cuf @@ -0,0 +1,19 @@ +! RUN: %flang_fc1 -fdebug-dump-symbols %s 2>&1 | FileCheck --dump-input-context=500 %s +!CHECK: Global scope: size=0 alignment=1 sourceRange=0 bytes +!CHECK: IntrinsicModules scope: size=0 alignment=1 sourceRange=0 bytes +!CHECK: Module scope: __fortran_builtins +!CHECK: Module scope: __cuda_builtins size=0 alignment=1 +!CHECK: Module scope: __fortran_type_info +!CHECK: Module scope: m size=0 alignment=1 +!CHECK: Subprogram scope: devsubr size=0 alignment=1 +module m + implicit none + contains + attributes(device) subroutine devsubr() + !CHECK: blockdim: Use from blockdim in __cuda_builtins + !CHECK: blockidx: Use from blockidx in __cuda_builtins + !CHECK: griddim: Use from griddim in __cuda_builtins + !CHECK: threadidx: Use from threadidx in __cuda_builtins + !CHECK: warpsize: Use from warpsize in __cuda_builtins + end subroutine +end module diff --git a/flang/test/Semantics/cuf06.cuf b/flang/test/Semantics/cuf06.cuf new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/cuf06.cuf @@ -0,0 +1,15 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +module m + use, intrinsic :: __fortran_builtins, only: __builtin_dim3 + contains + attributes(global) subroutine kernel + end subroutine + subroutine test + call kernel<<< 1, 32 >>> ! ok + call kernel<<< __builtin_dim3(1,1), __builtin_dim3(32,1,1) >>> ! ok + !ERROR: Kernel launch grid parameter must be either integer or TYPE(dim3) + call kernel<<< 1.d0, 32 >>> + !ERROR: Kernel launch block parameter must be either integer or TYPE(dim3) + call kernel<<< 1, "abc" >>> + end +end module diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/cuf09.cuf @@ -0,0 +1,76 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +module m + contains + attributes(device) subroutine devsub + !ERROR: Statement may not appear in device code + !$cuf kernel do <<< 1, 2 >>> + do k=1,10 + end do + end +end + +program main + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + do while (.false.) + end do + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + do + exit + end do + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + do concurrent (j=1:10) + end do + !$cuf kernel do <<< 1, 2 >>> + do 1 j=1,10 +1 continue ! ok + !$cuf kernel do <<< 1, 2 >>> + do j=1,10 + end do ! ok + !$cuf kernel do <<< 1, 2 >>> + do j=1,10 + !ERROR: Statement may not appear in device code + !$cuf kernel do <<< 1, 2 >>> + do k=1,10 + end do + end do + !ERROR: !$CUF KERNEL DO (-1): loop nesting depth must be positive + !$cuf kernel do (-1) <<< 1, 2 >>> + do j=1,10 + end do + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + continue + !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do (2) <<< 1, 2 >>> + do j=1,10 + end do + !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do (2) <<< 1, 2 >>> + do j=1,10 + continue + end do + !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do (2) <<< 1, 2 >>> + do j=1,10 + do k=1,10 + end do + continue + end do + !$cuf kernel do <<< 1, 2 >>> + do j = 1, 10 + !ERROR: 'foo' may not be called in device code + call foo + !ERROR: 'bar' may not be called in device code + x = bar() + !ERROR: 'ifunc' may not be called in device code + if (ifunc() /= 0) continue + !ERROR: 'ifunc' may not be called in device code + if (ifunc() /= 0) then + !ERROR: 'ifunc' may not be called in device code + else if (ifunc() /= 1) then + end if + end do +end diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -10,6 +10,7 @@ "__fortran_type_info" "__fortran_ppc_types" "__fortran_ppc_intrinsics" + "__cuda_builtins" "ieee_arithmetic" "ieee_exceptions" "ieee_features" diff --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp --- a/flang/unittests/Runtime/Time.cpp +++ b/flang/unittests/Runtime/Time.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#ifndef __clang__ // 16.0.3 lacks + #include "gtest/gtest.h" #include "flang/Runtime/time-intrinsic.h" #include @@ -166,3 +168,4 @@ EXPECT_LE(minutes, 59); } } +#endif // __clang__