diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -180,8 +180,8 @@ clauseStrings[curConstruct].begin(), clauseStrings[curConstruct].end()); SourcePosition s{getLocation(*curConstruct)}; - LogRecord r{s.file.path(), s.line, getName(*curConstruct), - clauseStrings[curConstruct]}; + LogRecord r{ + s.path, s.line, getName(*curConstruct), clauseStrings[curConstruct]}; constructClauses.push_back(r); auto it = clauseStrings.find(curConstruct); diff --git a/flang/include/flang/Parser/source.h b/flang/include/flang/Parser/source.h --- a/flang/include/flang/Parser/source.h +++ b/flang/include/flang/Parser/source.h @@ -15,10 +15,14 @@ // - A Unicode byte order mark is recognized if present. #include "characters.h" +#include "flang/Common/reference.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" #include #include +#include #include +#include #include #include #include @@ -36,15 +40,18 @@ class SourceFile; struct SourcePosition { - const SourceFile &file; + common::Reference sourceFile; + common::Reference + path; // may not be sourceFile.path() when #line present int line, column; + int trueLineNumber; }; class SourceFile { public: explicit SourceFile(Encoding e) : encoding_{e} {} ~SourceFile(); - std::string path() const { return path_; } + const std::string &path() const { return path_; } llvm::ArrayRef content() const { return buf_->getBuffer().slice(bom_end_, buf_end_ - bom_end_); } @@ -55,12 +62,20 @@ bool Open(std::string path, llvm::raw_ostream &error); bool ReadStandardInput(llvm::raw_ostream &error); void Close(); - SourcePosition FindOffsetLineAndColumn(std::size_t) const; + SourcePosition GetSourcePosition(std::size_t) const; std::size_t GetLineStartOffset(int lineNumber) const { return lineStart_.at(lineNumber - 1); } + const std::string &SavePath(std::string &&); + void LineDirective(int trueLineNumber, const std::string &, int); + llvm::raw_ostream &Dump(llvm::raw_ostream &) const; private: + struct SourcePositionOrigin { + const std::string &path; + int line; + }; + void ReadFile(); void IdentifyPayload(); void RecordLineStarts(); @@ -71,6 +86,8 @@ std::size_t bom_end_{0}; std::size_t buf_end_; Encoding encoding_; + std::set distinctPaths_; + std::map origins_; }; } // namespace Fortran::parser #endif // FORTRAN_PARSER_SOURCE_H_ diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -606,10 +606,10 @@ } llvm::outs() << "Found symbol name: " << symbol->name().ToString() << "\n"; - llvm::outs() << symbol->name().ToString() << ": " - << sourceInfo->first.file.path() << ", " - << sourceInfo->first.line << ", " << sourceInfo->first.column - << "-" << sourceInfo->second.column << "\n"; + llvm::outs() << symbol->name().ToString() << ": " << sourceInfo->first.path + << ", " << sourceInfo->first.line << ", " + << sourceInfo->first.column << "-" << sourceInfo->second.column + << "\n"; } void GetSymbolsSourcesAction::executeAction() { diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -716,7 +716,7 @@ loc = cooked->GetSourcePositionRange(block)) { // loc is a pair (begin, end); use the beginning position Fortran::parser::SourcePosition &filePos = loc->first; - llvm::SmallString<256> filePath(filePos.file.path()); + llvm::SmallString<256> filePath(*filePos.path); llvm::sys::fs::make_absolute(filePath); llvm::sys::path::remove_dots(filePath); return mlir::FileLineColLoc::get(&getMLIRContext(), filePath.str(), diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -32,7 +32,7 @@ std::string buf; llvm::raw_string_ostream fileError{buf}; - const SourceFile *sourceFile; + const SourceFile *sourceFile{nullptr}; if (path == "-") { sourceFile = allSources.ReadStandardInput(fileError); } else if (options.isModuleFile) { @@ -109,7 +109,7 @@ void Parsing::EmitPreprocessedSource( llvm::raw_ostream &out, bool lineDirectives) const { - const SourceFile *sourceFile{nullptr}; + const std::string *sourcePath{nullptr}; int sourceLine{0}; int column{1}; bool inDirective{false}; @@ -162,8 +162,8 @@ ? allSources.GetSourcePosition(provenance->start()) : std::nullopt}; if (lineDirectives && column == 1 && position) { - if (&position->file != sourceFile) { - out << "#line \"" << position->file.path() << "\" " << position->line + if (&*position->path != sourcePath) { + out << "#line \"" << *position->path << "\" " << position->line << '\n'; } else if (position->line != sourceLine) { if (sourceLine < position->line && @@ -178,7 +178,7 @@ out << "#line " << position->line << '\n'; } } - sourceFile = &position->file; + sourcePath = &*position->path; sourceLine = position->line; } if (column > 72) { diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h --- a/flang/lib/Parser/preprocessor.h +++ b/flang/lib/Parser/preprocessor.h @@ -90,6 +90,7 @@ const std::string &, IsElseActive, Prescanner &, ProvenanceRange); bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first, std::size_t exprTokens, Prescanner &); + void LineDirective(const TokenSequence &, std::size_t, Prescanner &); AllSources &allSources_; std::list names_; diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -434,7 +434,8 @@ return; } if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { - return; // treat like #line, ignore it + LineDirective(dir, j, prescanner); + return; } std::size_t dirOffset{j}; std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; @@ -444,7 +445,7 @@ nameToken = dir.TokenAt(j); } if (dirName == "line") { - // #line is ignored + LineDirective(dir, j, prescanner); } else if (dirName == "define") { if (nameToken.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), @@ -1122,4 +1123,57 @@ } return result; } + +void Preprocessor::LineDirective( + const TokenSequence &dir, std::size_t j, Prescanner &prescanner) { + std::size_t tokens{dir.SizeInTokens()}; + const std::string *linePath{nullptr}; + std::optional lineNumber; + SourceFile *sourceFile{nullptr}; + std::optional pos; + for (; j < tokens; j = dir.SkipBlanks(j + 1)) { + std::string tstr{dir.TokenAt(j).ToString()}; + Provenance provenance{dir.GetTokenProvenance(j)}; + if (!pos) { + pos = allSources_.GetSourcePosition(provenance); + } + if (!sourceFile && pos) { + sourceFile = const_cast(&*pos->sourceFile); + } + if (tstr.front() == '"' && tstr.back() == '"') { + tstr = tstr.substr(1, tstr.size() - 2); + if (!tstr.empty() && sourceFile) { + linePath = &sourceFile->SavePath(std::move(tstr)); + } + } else if (IsDecimalDigit(tstr[0])) { + if (!lineNumber) { // ignore later column number + int ln{0}; + for (char c : tstr) { + if (IsDecimalDigit(c)) { + int nln{10 * ln + c - '0'}; + if (nln / 10 == ln && nln % 10 == c - '0') { + ln = nln; + continue; + } + } + prescanner.Say(provenance, + "bad line number '%s' in #line directive"_err_en_US, tstr); + return; + } + lineNumber = ln; + } + } else { + prescanner.Say( + provenance, "bad token '%s' in #line directive"_err_en_US, tstr); + return; + } + } + if (lineNumber && sourceFile) { + CHECK(pos); + if (!linePath) { + linePath = &*pos->path; + } + sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber); + } +} } // namespace Fortran::parser diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -204,7 +204,7 @@ const char *nextLine_{nullptr}; // next line to process; <= limit_ const char *directiveSentinel_{nullptr}; // current compiler directive - // This data members are state for processing the source line containing + // These data members are state for processing the source line containing // "at_", which goes to up to the newline character before "nextLine_". const char *at_{nullptr}; // next character to process; < nextLine_ int column_{1}; // card image column position of next character diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -349,7 +349,7 @@ if (!start || !end) { return; } - if (&start->file == &end->file && start->line == end->line) { + if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { return; // no continuation } j += 3; @@ -377,9 +377,11 @@ auto endOfPrefixPos{ allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; - if (endOfPrefixPos && next && &endOfPrefixPos->file == &start->file && + if (endOfPrefixPos && next && + &*endOfPrefixPos->sourceFile == &*start->sourceFile && endOfPrefixPos->line == start->line && - (&next->file != &start->file || next->line != start->line)) { + (&*next->sourceFile != &*start->sourceFile || + next->line != start->line)) { Say(range, "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); } diff --git a/flang/lib/Parser/provenance.cpp b/flang/lib/Parser/provenance.cpp --- a/flang/lib/Parser/provenance.cpp +++ b/flang/lib/Parser/provenance.cpp @@ -10,6 +10,7 @@ #include "flang/Common/idioms.h" #include "llvm/Support/raw_ostream.h" #include +#include #include namespace Fortran::parser { @@ -251,15 +252,14 @@ common::visit( common::visitors{ [&](const Inclusion &inc) { - o << inc.source.path(); std::size_t offset{origin.covers.MemberOffset(range->start())}; - SourcePosition pos{inc.source.FindOffsetLineAndColumn(offset)}; - o << ':' << pos.line << ':' << pos.column << ": "; + SourcePosition pos{inc.source.GetSourcePosition(offset)}; + o << pos.path << ':' << pos.line << ':' << pos.column << ": "; EmitPrefix(o, color, prefix, this->getShowColors()); o << message << '\n'; if (echoSourceLine) { const char *text{inc.source.content().data() + - inc.source.GetLineStartOffset(pos.line)}; + inc.source.GetLineStartOffset(pos.trueLineNumber)}; o << " "; for (const char *p{text}; *p != '\n'; ++p) { o << *p; @@ -274,7 +274,7 @@ auto last{range->start() + range->size() - 1}; if (&MapToOrigin(last) == &origin) { auto endOffset{origin.covers.MemberOffset(last)}; - auto endPos{inc.source.FindOffsetLineAndColumn(endOffset)}; + auto endPos{inc.source.GetSourcePosition(endOffset)}; if (pos.line == endPos.line) { for (int j{pos.column}; j < endPos.column; ++j) { o << '^'; @@ -351,7 +351,7 @@ common::visitors{ [&](const Inclusion &inc) -> std::optional { std::size_t offset{origin.covers.MemberOffset(prov)}; - return inc.source.FindOffsetLineAndColumn(offset); + return inc.source.GetSourcePosition(offset); }, [&](const Macro &) { return GetSourcePosition(origin.replaces.start()); @@ -373,14 +373,15 @@ } std::string AllSources::GetPath(Provenance at) const { - const SourceFile *source{GetSourceFile(at)}; - return source ? source->path() : ""s; + std::size_t offset{0}; + const SourceFile *source{GetSourceFile(at, &offset)}; + return source ? *source->GetSourcePosition(offset).path : ""s; } int AllSources::GetLineNumber(Provenance at) const { std::size_t offset{0}; const SourceFile *source{GetSourceFile(at, &offset)}; - return source ? source->FindOffsetLineAndColumn(offset).line : 0; + return source ? source->GetSourcePosition(offset).line : 0; } Provenance AllSources::CompilerInsertionProvenance(char ch) { @@ -529,6 +530,7 @@ o << "AllSources range_ "; DumpRange(o, range_); o << '\n'; + std::set sources; for (const Origin &m : origin_) { o << " "; DumpRange(o, m.covers); @@ -539,6 +541,7 @@ o << "module "; } o << "file " << inc.source.path(); + sources.emplace(&inc.source); }, [&](const Macro &mac) { o << "macro " << mac.expansion; }, [&](const CompilerInsertion &ins) { @@ -557,6 +560,9 @@ } o << '\n'; } + for (const SourceFile *sf : sources) { + sf->Dump(o); + } return o; } diff --git a/flang/lib/Parser/source.cpp b/flang/lib/Parser/source.cpp --- a/flang/lib/Parser/source.cpp +++ b/flang/lib/Parser/source.cpp @@ -9,34 +9,36 @@ #include "flang/Parser/source.h" #include "flang/Common/idioms.h" #include "flang/Parser/char-buffer.h" +#include "flang/Parser/characters.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include +#include #include +#include #include namespace Fortran::parser { SourceFile::~SourceFile() { Close(); } -static std::vector FindLineStarts(llvm::StringRef source) { - std::vector result; - if (source.size() > 0) { - CHECK(source.back() == '\n' && "missing ultimate newline"); +void SourceFile::RecordLineStarts() { + if (std::size_t chars{bytes()}; chars > 0) { + origins_.emplace(1, SourcePositionOrigin{path_, 1}); + const char *source{content().data()}; + CHECK(source[chars - 1] == '\n' && "missing ultimate newline"); std::size_t at{0}; - do { - result.push_back(at); - at = source.find('\n', at) + 1; - } while (at < source.size()); - result.shrink_to_fit(); + do { // "at" is always at the beginning of a source line + lineStart_.push_back(at); + at = reinterpret_cast( + std::memchr(source + at, '\n', chars - at)) - + source + 1; + } while (at < chars); + CHECK(at == chars); + lineStart_.shrink_to_fit(); } - return result; -} - -void SourceFile::RecordLineStarts() { - lineStart_ = FindLineStarts({content().data(), bytes()}); } // Check for a Unicode byte order mark (BOM). @@ -157,14 +159,43 @@ void SourceFile::Close() { path_.clear(); buf_.reset(); + distinctPaths_.clear(); + origins_.clear(); } -SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { +SourcePosition SourceFile::GetSourcePosition(std::size_t at) const { CHECK(at < bytes()); + auto it{llvm::upper_bound(lineStart_, at)}; + auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1}; + auto ub{origins_.upper_bound(trueLineNumber)}; + auto column{static_cast(at - lineStart_[trueLineNumber - 1] + 1)}; + if (ub == origins_.begin()) { + return {*this, path_, static_cast(trueLineNumber), column, + static_cast(trueLineNumber)}; + } else { + --ub; + const SourcePositionOrigin &origin{ub->second}; + auto lineNumber{ + trueLineNumber - ub->first + static_cast(origin.line)}; + return {*this, origin.path, static_cast(lineNumber), column, + static_cast(trueLineNumber)}; + } +} - auto it = llvm::upper_bound(lineStart_, at); - auto low = std::distance(lineStart_.begin(), it - 1); - return {*this, static_cast(low + 1), - static_cast(at - lineStart_[low] + 1)}; +const std::string &SourceFile::SavePath(std::string &&path) { + return *distinctPaths_.emplace(std::move(path)).first; +} + +void SourceFile::LineDirective( + int trueLineNumber, const std::string &path, int lineNumber) { + origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber}); +} + +llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const { + o << "SourceFile '" << path_ << "'\n"; + for (const auto &[at, spo] : origins_) { + o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n'; + } + return o; } } // namespace Fortran::parser diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -557,9 +557,9 @@ for (const auto &pair : symbols) { const Symbol &symbol{pair.second}; if (auto sourceInfo{allCooked.GetSourcePositionRange(symbol.name())}) { - os << symbol.name().ToString() << ": " << sourceInfo->first.file.path() - << ", " << sourceInfo->first.line << ", " << sourceInfo->first.column - << "-" << sourceInfo->second.column << "\n"; + os << symbol.name().ToString() << ": " << sourceInfo->first.path << ", " + << sourceInfo->first.line << ", " << sourceInfo->first.column << "-" + << sourceInfo->second.column << "\n"; } else if (symbol.has()) { os << symbol.name().ToString() << ": " << symbol.GetUltimate().owner().symbol()->name().ToString() << "\n"; diff --git a/flang/test/Driver/debug-provenance.f90 b/flang/test/Driver/debug-provenance.f90 --- a/flang/test/Driver/debug-provenance.f90 +++ b/flang/test/Driver/debug-provenance.f90 @@ -9,6 +9,8 @@ ! CHECK-NEXT: [3..3] (1 bytes) -> compiler '\'(0x5c) ! CHECK-NEXT: [{{[0-9]*}}..{{[0-9]*}}] ({{[0-9]*}} bytes) -> file {{.*[/\\]}}debug-provenance.f90 ! CHECK-NEXT: [{{[0-9]*}}..{{[0-9]*}}] ({{[0-9]*}} bytes) -> compiler '(after end of source)' +! CHECK-NEXT: SourceFile '{{.*[/\\]}}debug-provenance.f90' +! CHECK-NEXT: origin_[1] -> '{{.*[/\\]}}debug-provenance.f90' 1 ! CHECK-NEXT: CookedSource::provenanceMap_: ! CHECK-NEXT: offsets [{{[0-9]*}}..{{[0-9]*}}] -> provenances [{{[0-9]*}}..{{[0-9]*}}] ({{[0-9]*}} bytes) ! CHECK-NEXT: CookedSource::invertedMap_: diff --git a/flang/test/Parser/line-directive.f90 b/flang/test/Parser/line-directive.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Parser/line-directive.f90 @@ -0,0 +1,18 @@ +! RUN: %flang_fc1 -E %s 2>&1 | FileCheck %s +!CHECK: #line "{{.*[/\\]}}line-directive.f90" 3 +subroutine s + implicit none + a = 1. +#line 100 +!CHECK: #line 101 + b = 2. +#line "sourceFile.cobol" 200 +!CHECK: #line "sourceFile.cobol" 201 + c = 3. +# 300 +!CHECK: #line 301 + d = 4. +# "/dev/random" 400 +!CHECK: #line "/dev/random" 401 + e = 5. +end diff --git a/flang/test/Semantics/line-directive.f90 b/flang/test/Semantics/line-directive.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/line-directive.f90 @@ -0,0 +1,18 @@ +!RUN: not %flang -fsyntax-only %s 2>&1 | FileCheck %s +subroutine s + implicit none +!CHECK: line-directive.f90:5:3: error: No explicit type declared for 'a' + a = 1. +#line 100 +!CHECK: line-directive.f90:101:3: error: No explicit type declared for 'b' + b = 2. +#line "sourceFile.cobol" 200 +!CHECK: sourceFile.cobol:201:3: error: No explicit type declared for 'c' + c = 3. +# 300 +!CHECK: sourceFile.cobol:301:3: error: No explicit type declared for 'd' + d = 4. +# "/dev/random" 400 +!CHECK: random:401:3: error: No explicit type declared for 'e' + e = 5. +end