Index: lib/AST/RawCommentList.cpp =================================================================== --- lib/AST/RawCommentList.cpp +++ lib/AST/RawCommentList.cpp @@ -64,10 +64,43 @@ } } // unnamed namespace +/// \brief Determines whether there is only whitespace in `Buffer` between `P` +/// and the previous line. +/// \param Buffer The buffer to search in. +/// \param P The offset from the beginning of `Buffer` to start from. +/// \return true if all of the characters in `Buffer` ranging from the closest +/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1` +/// are whitespace. +static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) { + // Search backwards until we see linefeed or carriage return. + for (unsigned I = P; I != 0; --I) { + switch (Buffer[I - 1]) { + default: + return false; + case ' ': + case '\t': + case '\f': + case '\v': + break; + case '\r': + case '\n': + return true; + } + } + // We hit the beginning of the buffer. + return true; +} + +/// Returns whether `K` is an ordinary comment kind. +static bool isOrdinaryKind(RawComment::CommentKind K) { + return (K == RawComment::RCK_OrdinaryBCPL) || + (K == RawComment::RCK_OrdinaryC); +} + RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, bool Merged, bool ParseAllComments) : Range(SR), RawTextValid(false), BriefTextValid(false), - IsAttached(false), IsAlmostTrailingComment(false), + IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false), ParseAllComments(ParseAllComments) { // Extract raw comment text, if possible. if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { @@ -75,17 +108,34 @@ return; } + // Guess comment kind. + std::pair K = getCommentKind(RawText, ParseAllComments); + + // Guess whether an ordinary comment is trailing. + if (ParseAllComments && isOrdinaryKind(K.first)) { + FileID BeginFileID; + unsigned BeginOffset; + std::tie(BeginFileID, BeginOffset) = + SourceMgr.getDecomposedLoc(Range.getBegin()); + if (BeginOffset != 0) { + bool Invalid = false; + const char *Buffer = + SourceMgr.getBufferData(BeginFileID, &Invalid).data(); + IsTrailingComment |= + (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset)); + } + } + if (!Merged) { - // Guess comment kind. - std::pair K = getCommentKind(RawText, ParseAllComments); Kind = K.first; - IsTrailingComment = K.second; + IsTrailingComment |= K.second; IsAlmostTrailingComment = RawText.startswith("//<") || RawText.startswith("/*<"); } else { Kind = RCK_Merged; - IsTrailingComment = mergedCommentIsTrailingComment(RawText); + IsTrailingComment = + IsTrailingComment || mergedCommentIsTrailingComment(RawText); } } @@ -239,9 +289,17 @@ const RawComment &C2 = RC; // Merge comments only if there is only whitespace between them. - // Can't merge trailing and non-trailing comments. + // Can't merge trailing and non-trailing comments unless the second is + // non-trailing ordinary, as in the case: + // int x; // documents x + // // more text + // versus: + // int x; // documents x + // int y; // documents y // Merge comments if they are on same or consecutive lines. - if (C1.isTrailingComment() == C2.isTrailingComment() && + if ((C1.isTrailingComment() == C2.isTrailingComment() || + (C1.isTrailingComment() && !C2.isTrailingComment() && + isOrdinaryKind(C2.getKind()))) && onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(), /*MaxNewlinesAllowed=*/1)) { SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd()); Index: test/Index/parse-all-comments.c =================================================================== --- test/Index/parse-all-comments.c +++ test/Index/parse-all-comments.c @@ -33,6 +33,32 @@ // WITH EMPTY LINE void multi_line_comment_empty_line(int); +int notdoxy7; // Not a Doxygen juxtaposed comment. notdoxy7 NOT_DOXYGEN +int notdoxy8; // Not a Doxygen juxtaposed comment. notdoxy8 NOT_DOXYGEN + +int trdoxy9; /// A Doxygen non-trailing comment. trdoxyA IS_DOXYGEN_SINGLE +int trdoxyA; + +int trdoxyB; // Not a Doxygen trailing comment. PART_ONE + // It's a multiline one too. trdoxyB NOT_DOXYGEN +int trdoxyC; + +int trdoxyD; // Not a Doxygen trailing comment. trdoxyD NOT_DOXYGEN + /// This comment doesn't get merged. trdoxyE IS_DOXYGEN +int trdoxyE; + +int trdoxyF; /// A Doxygen non-trailing comment that gets dropped on the floor. + // This comment will also be dropped. +int trdoxyG; // This one won't. trdoxyG NOT_DOXYGEN + +int trdoxyH; ///< A Doxygen trailing comment. PART_ONE + // This one gets merged with it. trdoxyH SOME_DOXYGEN +int trdoxyI; // This one doesn't. trdoxyI NOT_DOXYGEN + +int trdoxyJ; // Not a Doxygen trailing comment. PART_ONE + ///< This one gets merged with it. trdoxyJ SOME_DOXYGEN +int trdoxyK; // This one doesn't. trdoxyK NOT_DOXYGEN + #endif // RUN: rm -rf %t @@ -60,3 +86,17 @@ // CHECK: parse-all-comments.c:22:6: FunctionDecl=isdoxy6:{{.*}} isdoxy6 IS_DOXYGEN_SINGLE // CHECK: parse-all-comments.c:29:6: FunctionDecl=multi_line_comment_plus_ordinary:{{.*}} BLOCK_ORDINARY_COMMENT {{.*}} ORDINARY COMMENT {{.*}} IS_DOXYGEN_START {{.*}} IS_DOXYGEN_END // CHECK: parse-all-comments.c:34:6: FunctionDecl=multi_line_comment_empty_line:{{.*}} MULTILINE COMMENT{{.*}}\n{{.*}}\n{{.*}} WITH EMPTY LINE +// CHECK: parse-all-comments.c:36:5: VarDecl=notdoxy7:{{.*}} notdoxy7 NOT_DOXYGEN +// CHECK: parse-all-comments.c:37:5: VarDecl=notdoxy8:{{.*}} notdoxy8 NOT_DOXYGEN +// CHECK-NOT: parse-all-comments.c:39:5: VarDecl=trdoxy9:{{.*}} trdoxyA IS_DOXYGEN_SINGLE +// CHECK: parse-all-comments.c:40:5: VarDecl=trdoxyA:{{.*}} trdoxyA IS_DOXYGEN_SINGLE +// CHECK: parse-all-comments.c:42:5: VarDecl=trdoxyB:{{.*}} PART_ONE {{.*}} trdoxyB NOT_DOXYGEN +// CHECK-NOT: parse-all-comments.c:44:5: VarDecl=trdoxyC:{{.*}} trdoxyB NOT_DOXYGEN +// CHECK: parse-all-comments.c:46:5: VarDecl=trdoxyD:{{.*}} trdoxyD NOT_DOXYGEN +// CHECK: parse-all-comments.c:48:5: VarDecl=trdoxyE:{{.*}} trdoxyE IS_DOXYGEN +// CHECK-NOT: parse-all-comments.c:50:5: VarDecl=trdoxyF:{{.*}} RawComment +// CHECK: parse-all-comments.c:52:5: VarDecl=trdoxyG:{{.*}} trdoxyG NOT_DOXYGEN +// CHECK: parse-all-comments.c:54:5: VarDecl=trdoxyH:{{.*}} PART_ONE {{.*}} trdoxyH SOME_DOXYGEN +// CHECK: parse-all-comments.c:56:5: VarDecl=trdoxyI:{{.*}} trdoxyI NOT_DOXYGEN +// CHECK: parse-all-comments.c:58:5: VarDecl=trdoxyJ:{{.*}} PART_ONE {{.*}} trdoxyJ SOME_DOXYGEN +// CHECK: parse-all-comments.c:60:5: VarDecl=trdoxyK:{{.*}} trdoxyK NOT_DOXYGEN