Index: flang/include/flang/Parser/source.h =================================================================== --- flang/include/flang/Parser/source.h +++ flang/include/flang/Parser/source.h @@ -16,9 +16,12 @@ #include "characters.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" #include #include +#include #include +#include #include #include #include @@ -36,15 +39,17 @@ class SourceFile; struct SourcePosition { - const SourceFile &file; + const SourceFile &sourceFile; + const std::string &path; // may not be sourceFile.path() when #line present int line, column; + int trueLineNumber; }; class SourceFile { public: explicit SourceFile(Encoding e) : encoding_{e} {} ~SourceFile(); - std::string path() const { return path_; } + const std::string &path() const { return path_; } llvm::ArrayRef content() const { return buf_->getBuffer().slice(bom_end_, buf_end_ - bom_end_); } @@ -55,12 +60,19 @@ bool Open(std::string path, llvm::raw_ostream &error); bool ReadStandardInput(llvm::raw_ostream &error); void Close(); - SourcePosition FindOffsetLineAndColumn(std::size_t) const; + SourcePosition GetSourcePosition(std::size_t) const; std::size_t GetLineStartOffset(int lineNumber) const { return lineStart_.at(lineNumber - 1); } + llvm::raw_ostream &Dump(llvm::raw_ostream &) const; private: + struct SourcePositionOrigin { + const std::string &path; + int line; + }; + + void ParseLineDirective(const char *, const std::string *&linePath); void ReadFile(); void IdentifyPayload(); void RecordLineStarts(); @@ -71,6 +83,8 @@ std::size_t bom_end_{0}; std::size_t buf_end_; Encoding encoding_; + std::set distinctPaths_; + std::map origins_; }; } // namespace Fortran::parser #endif // FORTRAN_PARSER_SOURCE_H_ Index: flang/lib/Frontend/FrontendActions.cpp =================================================================== --- flang/lib/Frontend/FrontendActions.cpp +++ flang/lib/Frontend/FrontendActions.cpp @@ -606,10 +606,10 @@ } llvm::outs() << "Found symbol name: " << symbol->name().ToString() << "\n"; - llvm::outs() << symbol->name().ToString() << ": " - << sourceInfo->first.file.path() << ", " - << sourceInfo->first.line << ", " << sourceInfo->first.column - << "-" << sourceInfo->second.column << "\n"; + llvm::outs() << symbol->name().ToString() << ": " << sourceInfo->first.path + << ", " << sourceInfo->first.line << ", " + << sourceInfo->first.column << "-" << sourceInfo->second.column + << "\n"; } void GetSymbolsSourcesAction::executeAction() { Index: flang/lib/Lower/Bridge.cpp =================================================================== --- flang/lib/Lower/Bridge.cpp +++ flang/lib/Lower/Bridge.cpp @@ -716,7 +716,7 @@ loc = cooked->GetSourcePositionRange(block)) { // loc is a pair (begin, end); use the beginning position Fortran::parser::SourcePosition &filePos = loc->first; - llvm::SmallString<256> filePath(filePos.file.path()); + llvm::SmallString<256> filePath(filePos.path); llvm::sys::fs::make_absolute(filePath); llvm::sys::path::remove_dots(filePath); return mlir::FileLineColLoc::get(&getMLIRContext(), filePath.str(), Index: flang/lib/Parser/parsing.cpp =================================================================== --- flang/lib/Parser/parsing.cpp +++ flang/lib/Parser/parsing.cpp @@ -32,7 +32,7 @@ std::string buf; llvm::raw_string_ostream fileError{buf}; - const SourceFile *sourceFile; + const SourceFile *sourceFile{nullptr}; if (path == "-") { sourceFile = allSources.ReadStandardInput(fileError); } else if (options.isModuleFile) { @@ -109,7 +109,7 @@ void Parsing::EmitPreprocessedSource( llvm::raw_ostream &out, bool lineDirectives) const { - const SourceFile *sourceFile{nullptr}; + const std::string *sourcePath{nullptr}; int sourceLine{0}; int column{1}; bool inDirective{false}; @@ -162,8 +162,8 @@ ? allSources.GetSourcePosition(provenance->start()) : std::nullopt}; if (lineDirectives && column == 1 && position) { - if (&position->file != sourceFile) { - out << "#line \"" << position->file.path() << "\" " << position->line + if (&position->path != sourcePath) { + out << "#line \"" << position->path << "\" " << position->line << '\n'; } else if (position->line != sourceLine) { if (sourceLine < position->line && @@ -178,7 +178,7 @@ out << "#line " << position->line << '\n'; } } - sourceFile = &position->file; + sourcePath = &position->path; sourceLine = position->line; } if (column > 72) { Index: flang/lib/Parser/preprocessor.cpp =================================================================== --- flang/lib/Parser/preprocessor.cpp +++ flang/lib/Parser/preprocessor.cpp @@ -444,7 +444,7 @@ nameToken = dir.TokenAt(j); } if (dirName == "line") { - // #line is ignored + // #line was already handled in SourceFile::RecordLineStarts() } else if (dirName == "define") { if (nameToken.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), Index: flang/lib/Parser/prescan.h =================================================================== --- flang/lib/Parser/prescan.h +++ flang/lib/Parser/prescan.h @@ -204,7 +204,7 @@ const char *nextLine_{nullptr}; // next line to process; <= limit_ const char *directiveSentinel_{nullptr}; // current compiler directive - // This data members are state for processing the source line containing + // These data members are state for processing the source line containing // "at_", which goes to up to the newline character before "nextLine_". const char *at_{nullptr}; // next character to process; < nextLine_ int column_{1}; // card image column position of next character Index: flang/lib/Parser/prescan.cpp =================================================================== --- flang/lib/Parser/prescan.cpp +++ flang/lib/Parser/prescan.cpp @@ -349,7 +349,7 @@ if (!start || !end) { return; } - if (&start->file == &end->file && start->line == end->line) { + if (&start->sourceFile == &end->sourceFile && start->line == end->line) { return; // no continuation } j += 3; @@ -377,9 +377,11 @@ auto endOfPrefixPos{ allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; - if (endOfPrefixPos && next && &endOfPrefixPos->file == &start->file && + if (endOfPrefixPos && next && + &endOfPrefixPos->sourceFile == &start->sourceFile && endOfPrefixPos->line == start->line && - (&next->file != &start->file || next->line != start->line)) { + (&next->sourceFile != &start->sourceFile || + next->line != start->line)) { Say(range, "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); } Index: flang/lib/Parser/provenance.cpp =================================================================== --- flang/lib/Parser/provenance.cpp +++ flang/lib/Parser/provenance.cpp @@ -10,6 +10,7 @@ #include "flang/Common/idioms.h" #include "llvm/Support/raw_ostream.h" #include +#include #include namespace Fortran::parser { @@ -251,15 +252,14 @@ common::visit( common::visitors{ [&](const Inclusion &inc) { - o << inc.source.path(); std::size_t offset{origin.covers.MemberOffset(range->start())}; - SourcePosition pos{inc.source.FindOffsetLineAndColumn(offset)}; - o << ':' << pos.line << ':' << pos.column << ": "; + SourcePosition pos{inc.source.GetSourcePosition(offset)}; + o << pos.path << ':' << pos.line << ':' << pos.column << ": "; EmitPrefix(o, color, prefix, this->getShowColors()); o << message << '\n'; if (echoSourceLine) { const char *text{inc.source.content().data() + - inc.source.GetLineStartOffset(pos.line)}; + inc.source.GetLineStartOffset(pos.trueLineNumber)}; o << " "; for (const char *p{text}; *p != '\n'; ++p) { o << *p; @@ -274,7 +274,7 @@ auto last{range->start() + range->size() - 1}; if (&MapToOrigin(last) == &origin) { auto endOffset{origin.covers.MemberOffset(last)}; - auto endPos{inc.source.FindOffsetLineAndColumn(endOffset)}; + auto endPos{inc.source.GetSourcePosition(endOffset)}; if (pos.line == endPos.line) { for (int j{pos.column}; j < endPos.column; ++j) { o << '^'; @@ -351,7 +351,7 @@ common::visitors{ [&](const Inclusion &inc) -> std::optional { std::size_t offset{origin.covers.MemberOffset(prov)}; - return inc.source.FindOffsetLineAndColumn(offset); + return inc.source.GetSourcePosition(offset); }, [&](const Macro &) { return GetSourcePosition(origin.replaces.start()); @@ -373,14 +373,15 @@ } std::string AllSources::GetPath(Provenance at) const { - const SourceFile *source{GetSourceFile(at)}; - return source ? source->path() : ""s; + std::size_t offset{0}; + const SourceFile *source{GetSourceFile(at, &offset)}; + return source ? source->GetSourcePosition(offset).path : ""s; } int AllSources::GetLineNumber(Provenance at) const { std::size_t offset{0}; const SourceFile *source{GetSourceFile(at, &offset)}; - return source ? source->FindOffsetLineAndColumn(offset).line : 0; + return source ? source->GetSourcePosition(offset).line : 0; } Provenance AllSources::CompilerInsertionProvenance(char ch) { @@ -529,6 +530,7 @@ o << "AllSources range_ "; DumpRange(o, range_); o << '\n'; + std::set sources; for (const Origin &m : origin_) { o << " "; DumpRange(o, m.covers); @@ -539,6 +541,7 @@ o << "module "; } o << "file " << inc.source.path(); + sources.emplace(&inc.source); }, [&](const Macro &mac) { o << "macro " << mac.expansion; }, [&](const CompilerInsertion &ins) { @@ -557,6 +560,9 @@ } o << '\n'; } + for (const SourceFile *sf : sources) { + sf->Dump(o); + } return o; } Index: flang/lib/Parser/source.cpp =================================================================== --- flang/lib/Parser/source.cpp +++ flang/lib/Parser/source.cpp @@ -9,34 +9,91 @@ #include "flang/Parser/source.h" #include "flang/Common/idioms.h" #include "flang/Parser/char-buffer.h" +#include "flang/Parser/characters.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include +#include #include +#include #include namespace Fortran::parser { SourceFile::~SourceFile() { Close(); } -static std::vector FindLineStarts(llvm::StringRef source) { - std::vector result; - if (source.size() > 0) { - CHECK(source.back() == '\n' && "missing ultimate newline"); - std::size_t at{0}; - do { - result.push_back(at); - at = source.find('\n', at) + 1; - } while (at < source.size()); - result.shrink_to_fit(); +void SourceFile::ParseLineDirective( + const char *p, const std::string *&linePath) { + for (; *p == ' ' || *p == '\t'; ++p) { + } + if (*p == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e') { + p += 4; + } else if (*p != '"' && !IsDecimalDigit(*p)) { + return; + } + const std::string *newLinePath{nullptr}; + std::optional lineNumber; + while (true) { + if (*p == '"') { + auto start{++p}; + for (; *p != '"' && *p != '\n'; ++p) { + } + if (*p == '"') { + newLinePath = &*distinctPaths_.emplace(start, p - start).first; + ++p; + } else { + break; + } + } else if (IsDecimalDigit(*p)) { + int ln{0}; + for (; IsDecimalDigit(*p); ++p) { + ln = 10 * ln + *p - '0'; + } + if (!lineNumber) { + lineNumber = ln; + } else { + // ignore column number + } + } else if (*p == ' ' || *p == '\t') { + ++p; + } else { + break; + } + } + if (lineNumber) { + if (newLinePath) { + linePath = newLinePath; + } + // Map the next line's true number to its path/line alias. + origins_.emplace( + lineStart_.size() + 1, SourcePositionOrigin{*linePath, *lineNumber}); } - return result; } void SourceFile::RecordLineStarts() { - lineStart_ = FindLineStarts({content().data(), bytes()}); + if (std::size_t chars{bytes()}; chars > 0) { + origins_.emplace(1, SourcePositionOrigin{path_, 1}); + const char *source{content().data()}; + CHECK(source[chars - 1] == '\n' && "missing ultimate newline"); + const std::string *linePath{&path_}; + std::size_t at{0}; + do { // "at" is always at the beginning of a source line + lineStart_.push_back(at); + for (; source[at] == ' ' || source[at] == '\t'; ++at) { + } + if (source[at] == '#') { + ParseLineDirective(source + at + 1, linePath); + } + // Fast-forward to the beginning of the next line + at = reinterpret_cast( + std::memchr(source + at, '\n', chars - at)) - + source + 1; + } while (at < chars); + CHECK(at == chars); + lineStart_.shrink_to_fit(); + } } // Check for a Unicode byte order mark (BOM). @@ -157,14 +214,34 @@ void SourceFile::Close() { path_.clear(); buf_.reset(); + distinctPaths_.clear(); + origins_.clear(); } -SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { +SourcePosition SourceFile::GetSourcePosition(std::size_t at) const { CHECK(at < bytes()); + auto it{llvm::upper_bound(lineStart_, at)}; + auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1}; + auto ub{origins_.upper_bound(trueLineNumber)}; + auto column{static_cast(at - lineStart_[trueLineNumber - 1] + 1)}; + if (ub == origins_.begin()) { + return {*this, path_, static_cast(trueLineNumber), column, + static_cast(trueLineNumber)}; + } else { + --ub; + const SourcePositionOrigin &origin{ub->second}; + auto lineNumber{ + trueLineNumber - ub->first + static_cast(origin.line)}; + return {*this, origin.path, static_cast(lineNumber), column, + static_cast(trueLineNumber)}; + } +} - auto it = llvm::upper_bound(lineStart_, at); - auto low = std::distance(lineStart_.begin(), it - 1); - return {*this, static_cast(low + 1), - static_cast(at - lineStart_[low] + 1)}; +llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const { + o << "SourceFile '" << path_ << "'\n"; + for (const auto &[at, spo] : origins_) { + o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n'; + } + return o; } } // namespace Fortran::parser Index: flang/lib/Semantics/semantics.cpp =================================================================== --- flang/lib/Semantics/semantics.cpp +++ flang/lib/Semantics/semantics.cpp @@ -557,9 +557,9 @@ for (const auto &pair : symbols) { const Symbol &symbol{pair.second}; if (auto sourceInfo{allCooked.GetSourcePositionRange(symbol.name())}) { - os << symbol.name().ToString() << ": " << sourceInfo->first.file.path() - << ", " << sourceInfo->first.line << ", " << sourceInfo->first.column - << "-" << sourceInfo->second.column << "\n"; + os << symbol.name().ToString() << ": " << sourceInfo->first.path << ", " + << sourceInfo->first.line << ", " << sourceInfo->first.column << "-" + << sourceInfo->second.column << "\n"; } else if (symbol.has()) { os << symbol.name().ToString() << ": " << symbol.GetUltimate().owner().symbol()->name().ToString() << "\n"; Index: flang/test/Driver/debug-provenance.f90 =================================================================== --- flang/test/Driver/debug-provenance.f90 +++ flang/test/Driver/debug-provenance.f90 @@ -9,6 +9,8 @@ ! CHECK-NEXT: [3..3] (1 bytes) -> compiler '\'(0x5c) ! CHECK-NEXT: [{{[0-9]*}}..{{[0-9]*}}] ({{[0-9]*}} bytes) -> file {{.*[/\\]}}debug-provenance.f90 ! CHECK-NEXT: [{{[0-9]*}}..{{[0-9]*}}] ({{[0-9]*}} bytes) -> compiler '(after end of source)' +! CHECK-NEXT: SourceFile '{{.*[/\\]}}debug-provenance.f90' +! CHECK-NEXT: origin_[1] -> '{{.*[/\\]}}debug-provenance.f90' 1 ! CHECK-NEXT: CookedSource::provenanceMap_: ! CHECK-NEXT: offsets [{{[0-9]*}}..{{[0-9]*}}] -> provenances [{{[0-9]*}}..{{[0-9]*}}] ({{[0-9]*}} bytes) ! CHECK-NEXT: CookedSource::invertedMap_: Index: flang/test/Parser/line-directive.f90 =================================================================== --- /dev/null +++ flang/test/Parser/line-directive.f90 @@ -0,0 +1,18 @@ +! RUN: %flang_fc1 -E %s 2>&1 | FileCheck %s +!CHECK: #line "{{.*}}/line-directive.f90" 3 +subroutine s + implicit none + a = 1. +#line 100 +!CHECK: #line 101 + b = 2. +#line "sourceFile.cobol" 200 +!CHECK: #line "sourceFile.cobol" 201 + c = 3. +# 300 +!CHECK: #line 301 + d = 4. +# "/dev/random" 400 +!CHECK: #line "/dev/random" 401 + e = 5. +end Index: flang/test/Semantics/line-directive.f90 =================================================================== --- /dev/null +++ flang/test/Semantics/line-directive.f90 @@ -0,0 +1,18 @@ +!RUN: not %flang -fsyntax-only %s 2>&1 | FileCheck %s +subroutine s + implicit none +!CHECK: line-directive.f90:5:3: error: No explicit type declared for 'a' + a = 1. +#line 100 +!CHECK: line-directive.f90:101:3: error: No explicit type declared for 'b' + b = 2. +#line "sourceFile.cobol" 200 +!CHECK: sourceFile.cobol:201:3: error: No explicit type declared for 'c' + c = 3. +# 300 +!CHECK: sourceFile.cobol:301:3: error: No explicit type declared for 'd' + d = 4. +# "/dev/random" 400 +!CHECK: random:401:3: error: No explicit type declared for 'e' + e = 5. +end