diff --git a/flang/include/flang/Parser/characters.h b/flang/include/flang/Parser/characters.h --- a/flang/include/flang/Parser/characters.h +++ b/flang/include/flang/Parser/characters.h @@ -151,6 +151,33 @@ } } +// Does not include spaces or line ending characters. +inline constexpr bool IsValidFortranTokenCharacter(char ch) { + switch (ch) { + case '"': + case '%': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '.': + case '/': + case ':': + case ';': + case '<': + case '=': + case '>': + case '[': + case ']': + return true; + default: + return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch); + } +} + struct EncodedCharacter { static constexpr int maxEncodingBytes{6}; char buffer[maxEncodingBytes]; diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -184,7 +184,8 @@ case LineClassification::Kind::PreprocessorDirective: Say(preprocessed->GetProvenanceRange(), "Preprocessed line resembles a preprocessor directive"_en_US); - preprocessed->ToLowerCase().Emit(cooked_); + preprocessed->ToLowerCase().CheckBadFortranCharacters(messages_).Emit( + cooked_); break; case LineClassification::Kind::CompilerDirective: if (preprocessed->HasRedundantBlanks()) { @@ -193,7 +194,9 @@ NormalizeCompilerDirectiveCommentMarker(*preprocessed); preprocessed->ToLowerCase(); SourceFormChange(preprocessed->ToString()); - preprocessed->ClipComment(true /* skip first ! */).Emit(cooked_); + preprocessed->ClipComment(true /* skip first ! */) + .CheckBadFortranCharacters(messages_) + .Emit(cooked_); break; case LineClassification::Kind::Source: if (inFixedForm_) { @@ -205,7 +208,10 @@ preprocessed->RemoveRedundantBlanks(); } } - preprocessed->ToLowerCase().ClipComment().Emit(cooked_); + preprocessed->ToLowerCase() + .ClipComment() + .CheckBadFortranCharacters(messages_) + .Emit(cooked_); break; } } else { @@ -213,7 +219,7 @@ if (line.kind == LineClassification::Kind::CompilerDirective) { SourceFormChange(tokens.ToString()); } - tokens.Emit(cooked_); + tokens.CheckBadFortranCharacters(messages_).Emit(cooked_); } if (omitNewline_) { omitNewline_ = false; diff --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h --- a/flang/lib/Parser/token-sequence.h +++ b/flang/lib/Parser/token-sequence.h @@ -27,6 +27,8 @@ namespace Fortran::parser { +class Messages; + // Buffers a contiguous sequence of characters that has been partitioned into // a sequence of preprocessing tokens with provenances. class TokenSequence { @@ -115,6 +117,7 @@ TokenSequence &RemoveBlanks(std::size_t firstChar = 0); TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0); TokenSequence &ClipComment(bool skipFirst = false); + const TokenSequence &CheckBadFortranCharacters(Messages &) const; void Emit(CookedSource &) const; void Dump(llvm::raw_ostream &) const; diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp --- a/flang/lib/Parser/token-sequence.cpp +++ b/flang/lib/Parser/token-sequence.cpp @@ -8,6 +8,7 @@ #include "token-sequence.h" #include "flang/Parser/characters.h" +#include "flang/Parser/message.h" #include "llvm/Support/raw_ostream.h" namespace Fortran::parser { @@ -310,4 +311,25 @@ ProvenanceRange TokenSequence::GetProvenanceRange() const { return GetIntervalProvenanceRange(0, start_.size()); } + +const TokenSequence &TokenSequence::CheckBadFortranCharacters( + Messages &messages) const { + std::size_t tokens{SizeInTokens()}; + for (std::size_t j{0}; j < tokens; ++j) { + CharBlock token{TokenAt(j)}; + char ch{token.FirstNonBlank()}; + if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) { + if (ch == '!' && j == 0) { + // allow in !dir$ + } else if (ch < ' ' || ch >= '\x7f') { + messages.Say(GetTokenProvenanceRange(j), + "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff); + } else { + messages.Say(GetTokenProvenanceRange(j), + "bad character ('%c') in Fortran token"_err_en_US, ch); + } + } + } + return *this; +} } // namespace Fortran::parser diff --git a/flang/test/Preprocessing/pp130.F90 b/flang/test/Preprocessing/pp130.F90 --- a/flang/test/Preprocessing/pp130.F90 +++ b/flang/test/Preprocessing/pp130.F90 @@ -1,5 +1,5 @@ -! RUN: %f18 -E %s 2>&1 | FileCheck %s -! CHECK: j = j + & +! RUN: (%f18 -E %s 2>&1 || true) | FileCheck %s +! CHECK: error: bad character ('&') in Fortran token ! #define KWM &, use for continuation w/o pasting (ifort and nag seem to continue #define) #define KWM &