diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -128,6 +128,10 @@ bool HasLeadingEmptyMacro; + // NewLinePtr - A pointer to new line character '\n' being + // lexed. For '\r\n', it also points to '\n.' + const char *NewLinePtr; + // CurrentConflictMarkerState - The kind of conflict marker we are handling. ConflictMarkerKind CurrentConflictMarkerState; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -67,6 +67,7 @@ class CommentHandler; class DirectoryEntry; class DirectoryLookup; +class EmptylineHandler; class ExternalPreprocessorSource; class FileEntry; class FileManager; @@ -226,6 +227,9 @@ /// True if we are pre-expanding macro arguments. bool InMacroArgPreExpansion; + // True if we are currently parsing function body. + bool ParsingFunctionBody; + /// Mapping/lookup information for all identifiers in /// the program, including program keywords. mutable IdentifierTable Identifiers; @@ -256,6 +260,9 @@ /// with this preprocessor. std::vector CommentHandlers; + /// Empty line handler. + EmptylineHandler *Emptyline = nullptr; + /// True if we want to ignore EOF token and continue later on (thus /// avoid tearing the Lexer and etc. down). bool IncrementalProcessing = false; @@ -949,6 +956,9 @@ return NumDirectives; } + void setParsingFunctionBody(bool parsing) { ParsingFunctionBody = parsing; } + bool isParsingFunctionBody() const { return ParsingFunctionBody; } + /// True if we are currently preprocessing a #if or #elif directive bool isParsingIfOrElifDirective() const { return ParsingIfOrElifDirective; @@ -1219,6 +1229,11 @@ /// Install empty handlers for all pragmas (making them ignored). void IgnorePragmas(); + /// Set empty line handler. + void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } + + EmptylineHandler *getEmptylineHandler() const { return Emptyline; } + /// Add the specified comment handler to the preprocessor. void addCommentHandler(CommentHandler *Handler); @@ -2390,6 +2405,16 @@ virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; }; +/// Abstract base class that describes a handler that will receive +/// source ranges for empty lines encountered in the source file. +class EmptylineHandler { +public: + virtual ~EmptylineHandler(); + + // The handler handles empty lines. + virtual void HandleEmptyline(SourceRange Range) = 0; +}; + /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry; diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -45,22 +45,29 @@ /// Stores additional source code information like skipped ranges which /// is required by the coverage mapping generator and is obtained from /// the preprocessor. -class CoverageSourceInfo : public PPCallbacks, public CommentHandler { +class CoverageSourceInfo : public PPCallbacks, + public CommentHandler, + public EmptylineHandler { // A vector of skipped source ranges and PrevTokLoc with NextTokLoc. std::vector SkippedRanges; - bool AfterComment = false; + + SourceManager &SourceMgr; public: // Location of the token parsed before HandleComment is called. This is // updated every time Preprocessor::Lex lexes a new token. SourceLocation PrevTokLoc; - // The location of token before comment. - SourceLocation BeforeCommentLoc; + + CoverageSourceInfo(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} std::vector &getSkippedRanges() { return SkippedRanges; } + void AddSkippedRange(SourceRange Range); + void SourceRangeSkipped(SourceRange Range, SourceLocation EndifLoc) override; + void HandleEmptyline(SourceRange Range) override; + bool HandleComment(Preprocessor &PP, SourceRange Range) override; void updateNextTokLoc(SourceLocation Loc); diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -37,9 +37,11 @@ CoverageSourceInfo * CoverageMappingModuleGen::setUpCoverageCallbacks(Preprocessor &PP) { - CoverageSourceInfo *CoverageInfo = new CoverageSourceInfo(); + CoverageSourceInfo *CoverageInfo = + new CoverageSourceInfo(PP.getSourceManager()); PP.addPPCallbacks(std::unique_ptr(CoverageInfo)); PP.addCommentHandler(CoverageInfo); + PP.setEmptylineHandler(CoverageInfo); PP.setPreprocessToken(true); PP.setTokenWatcher([CoverageInfo](clang::Token Tok) { // Update previous token location. @@ -50,21 +52,32 @@ return CoverageInfo; } +void CoverageSourceInfo::AddSkippedRange(SourceRange Range) { + if (!SkippedRanges.empty() && PrevTokLoc == SkippedRanges.back().PrevTokLoc && + SourceMgr.isWrittenInSameFile(SkippedRanges.back().Range.getEnd(), + Range.getBegin())) + SkippedRanges.back().Range.setEnd(Range.getEnd()); + else + SkippedRanges.push_back({Range, PrevTokLoc}); +} + void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range, SourceLocation) { - SkippedRanges.push_back({Range}); + AddSkippedRange(Range); +} + +void CoverageSourceInfo::HandleEmptyline(SourceRange Range) { + AddSkippedRange(Range); } bool CoverageSourceInfo::HandleComment(Preprocessor &PP, SourceRange Range) { - SkippedRanges.push_back({Range, PrevTokLoc}); - AfterComment = true; + if (PP.isParsingFunctionBody()) + AddSkippedRange(Range); return false; } void CoverageSourceInfo::updateNextTokLoc(SourceLocation Loc) { - if (AfterComment) { + if (!SkippedRanges.empty() && SkippedRanges.back().NextTokLoc.isInvalid()) SkippedRanges.back().NextTokLoc = Loc; - AfterComment = false; - } } namespace { @@ -311,24 +324,24 @@ SourceLocation PrevTokLoc, SourceLocation NextTokLoc) { SpellingRegion SR{SM, LocStart, LocEnd}; - // If Range begin location is invalid, it's not a comment region. - if (PrevTokLoc.isInvalid()) - return SR; - unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc); - unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc); - SpellingRegion newSR(SR); - if (SM.isWrittenInSameFile(LocStart, PrevTokLoc) && - SR.LineStart == PrevTokLine) { - newSR.LineStart = SR.LineStart + 1; - newSR.ColumnStart = 1; + if (PrevTokLoc.isValid()) { + unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc); + if (SM.isWrittenInSameFile(LocStart, PrevTokLoc) && + SR.LineStart == PrevTokLine) { + SR.LineStart = SR.LineStart + 1; + SR.ColumnStart = 1; + } } - if (SM.isWrittenInSameFile(LocEnd, NextTokLoc) && - SR.LineEnd == NextTokLine) { - newSR.LineEnd = SR.LineEnd - 1; - newSR.ColumnEnd = SR.ColumnStart + 1; + if (NextTokLoc.isValid()) { + unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc); + if (SM.isWrittenInSameFile(LocEnd, NextTokLoc) && + SR.LineEnd == NextTokLine) { + SR.LineEnd = SR.LineEnd - 1; + SR.ColumnEnd = SR.ColumnStart + 1; + } } - if (newSR.isInSourceOrder()) - return newSR; + if (SR.isInSourceOrder()) + return SR; return None; } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -125,6 +125,8 @@ // Default to not keeping comments. ExtendedTokenMode = 0; + + NewLinePtr = nullptr; } /// Lexer constructor - Create a new lexer object for the specified buffer @@ -2197,6 +2199,13 @@ unsigned char Char = *CurPtr; + const char *lastNewLine = nullptr; + if (SawNewline) { + lastNewLine = CurPtr - 1; + if (!NewLinePtr) + NewLinePtr = CurPtr - 1; + } + // Skip consecutive spaces efficiently. while (true) { // Skip horizontal whitespace very aggressively. @@ -2214,6 +2223,9 @@ } // OK, but handle newline. + lastNewLine = *CurPtr == '\n' ? CurPtr : lastNewLine; + if (!NewLinePtr && *CurPtr == '\n') + NewLinePtr = CurPtr; SawNewline = true; Char = *++CurPtr; } @@ -2237,6 +2249,13 @@ if (SawNewline) { Result.setFlag(Token::StartOfLine); TokAtPhysicalStartOfLine = true; + + if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine && PP && + PP->isParsingFunctionBody()) { + if (auto *Handler = PP->getEmptylineHandler()) + Handler->HandleEmptyline(SourceRange(getSourceLocation(NewLinePtr + 1), + getSourceLocation(lastNewLine))); + } } BufferPtr = CurPtr; @@ -2377,7 +2396,7 @@ // contribute to another token), it isn't needed for correctness. Note that // this is ok even in KeepWhitespaceMode, because we would have returned the /// comment above in that mode. - ++CurPtr; + NewLinePtr = CurPtr++; // The next returned token is at the start of the line. Result.setFlag(Token::StartOfLine); @@ -3211,6 +3230,9 @@ char Char = getAndAdvanceChar(CurPtr, Result); tok::TokenKind Kind; + if (!isVerticalWhitespace(Char)) + NewLinePtr = nullptr; + switch (Char) { case 0: // Null. // Found end of file? @@ -3265,6 +3287,7 @@ // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; IsAtPhysicalStartOfLine = true; + NewLinePtr = CurPtr - 1; Kind = tok::eod; break; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -109,6 +109,7 @@ PragmasEnabled = true; ParsingIfOrElifDirective = false; PreprocessedOutput = false; + ParsingFunctionBody = false; // We haven't read anything from the external source. ReadMacrosFromExternalSource = false; @@ -1417,6 +1418,8 @@ CommentHandler::~CommentHandler() = default; +EmptylineHandler::~EmptylineHandler() = default; + CodeCompletionHandler::~CodeCompletionHandler() = default; void Preprocessor::createPreprocessingRecord() { diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -2236,11 +2236,15 @@ Sema::PragmaStackSentinelRAII PragmaStackSentinel(Actions, "InternalPragmaState", IsCXXMethod); + PP.setParsingFunctionBody(true); + // Do not enter a scope for the brace, as the arguments are in the same scope // (the function body) as the body itself. Instead, just read the statement // list and put it into a CompoundStmt for safe keeping. StmtResult FnBody(ParseCompoundStatementBody()); + PP.setParsingFunctionBody(false); + // If the function body could not be parsed, make a bogus compoundstmt. if (FnBody.isInvalid()) { Sema::CompoundScopeRAII CompoundScope(Actions); diff --git a/compiler-rt/test/profile/coverage_emptylines.cpp b/compiler-rt/test/profile/coverage_emptylines.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/profile/coverage_emptylines.cpp @@ -0,0 +1,61 @@ +// Remove comments first. +// RUN: sed 's/[ \t]*\/\/.*//' %s > %t.stripped.cpp +// RUN: %clangxx_profgen -fcoverage-mapping -o %t %t.stripped.cpp +// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: llvm-cov show %t -instr-profile %t.profdata -path-equivalence=/tmp,%S 2>&1 | FileCheck %s + + +int main() { // CHECK: [[# @LINE]]| 1|int main() { + int x = 0; // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + x = 1; // CHECK-NEXT: [[# @LINE]]| 1| + if (x) // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + x // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + = // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + 0; // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + if (x) // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + x = 1; // CHECK-NEXT: [[# @LINE]]| 0| + // CHECK-NEXT: [[# @LINE]]| | + #ifdef UNDEFINED // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + int y = 0; // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + y = 1; // CHECK-NEXT: [[# @LINE]]| | + if (y) // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + y // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + = // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + 0; // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + #endif // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + #define DEFINED 1 // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + #ifdef DEFINED // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + int y = 0; // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + y = 1; // CHECK-NEXT: [[# @LINE]]| 1| + if (y) // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + y // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + = // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + // CHECK-NEXT: [[# @LINE]]| | + 0; // CHECK-NEXT: [[# @LINE]]| 1| + #endif // CHECK-NEXT: [[# @LINE]]| 1| + // CHECK-NEXT: [[# @LINE]]| | + return 0; // CHECK-NEXT: [[# @LINE]]| 1| +} // CHECK-NEXT: [[# @LINE]]| 1| \ No newline at end of file diff --git a/compiler-rt/test/profile/instrprof-set-file-object-merging.c b/compiler-rt/test/profile/instrprof-set-file-object-merging.c --- a/compiler-rt/test/profile/instrprof-set-file-object-merging.c +++ b/compiler-rt/test/profile/instrprof-set-file-object-merging.c @@ -31,13 +31,13 @@ // CHECK: 14| 2|int main(int argc, const char *argv[]) { // CHECK: 15| 2| if (argc < 2) // CHECK: 16| 0| return 1; -// CHECK: 17| 2| +// CHECK: 17| | // CHECK: 18| 2| FILE *F = fopen(argv[1], "r+b"); // CHECK: 19| 2| if (!F) { // CHECK: 20| | // File might not exist, try opening with truncation // CHECK: 21| 1| F = fopen(argv[1], "w+b"); // CHECK: 22| 1| } // CHECK: 23| 2| __llvm_profile_set_file_object(F, 1); -// CHECK: 24| 2| +// CHECK: 24| | // CHECK: 25| 2| return 0; // CHECK: 26| 2|} diff --git a/compiler-rt/test/profile/instrprof-set-file-object.c b/compiler-rt/test/profile/instrprof-set-file-object.c --- a/compiler-rt/test/profile/instrprof-set-file-object.c +++ b/compiler-rt/test/profile/instrprof-set-file-object.c @@ -24,7 +24,7 @@ // CHECK: 12| 1|int main(int argc, const char *argv[]) { // CHECK: 13| 1| if (argc < 2) // CHECK: 14| 0| return 1; -// CHECK: 15| 1| +// CHECK: 15| | // CHECK: 16| 1| FILE *F = fopen(argv[1], "w+b"); // CHECK: 17| 1| __llvm_profile_set_file_object(F, 0); // CHECK: 18| 1| return 0; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -488,8 +488,11 @@ const bool Skipped = (CR.index() + 1) == Regions.size(); startSegment(ActiveRegions.empty() ? CR.value() : *ActiveRegions.back(), CurStartLoc, !GapRegion, Skipped); + // Create a segment with last pushed regions's count after CurStartLoc. + startSegment(*ActiveRegions.back(), CurStartLoc, false); continue; } + if (CR.index() + 1 == Regions.size() || CurStartLoc != Regions[CR.index() + 1].startLoc()) { // Emit a segment if the next region doesn't start at the same location @@ -586,7 +589,7 @@ for (unsigned I = 1, E = Segments.size(); I < E; ++I) { const auto &L = Segments[I - 1]; const auto &R = Segments[I]; - if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col < R.Col)) { + if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col <= R.Col)) { LLVM_DEBUG(dbgs() << " ! Segment " << L.Line << ":" << L.Col << " followed by " << R.Line << ":" << R.Col << "\n"); assert(false && "Coverage segments not unique or sorted");