Index: lib/AST/RawCommentList.cpp =================================================================== --- lib/AST/RawCommentList.cpp +++ lib/AST/RawCommentList.cpp @@ -15,6 +15,7 @@ #include "clang/AST/CommentLexer.h" #include "clang/AST/CommentParser.h" #include "clang/AST/CommentSema.h" +#include "clang/Basic/CharInfo.h" #include "llvm/ADT/STLExtras.h" using namespace clang; @@ -62,12 +63,53 @@ bool mergedCommentIsTrailingComment(StringRef Comment) { return (Comment.size() > 3) && (Comment[3] == '<'); } + +/// Returns true if R1 and R2 both have valid locations that start on the same +/// column. +bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1, + const RawComment &R2) { + SourceLocation L1 = R1.getLocStart(); + SourceLocation L2 = R2.getLocStart(); + bool Invalid = false; + unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid); + if (!Invalid) { + unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid); + return !Invalid && (C1 == C2); + } + return false; +} } // unnamed namespace +/// \brief Determines whether there is only whitespace in `Buffer` between `P` +/// and the previous line. +/// \param Buffer The buffer to search in. +/// \param P The offset from the beginning of `Buffer` to start from. +/// \return true if all of the characters in `Buffer` ranging from the closest +/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1` +/// are whitespace. +static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) { + // Search backwards until we see linefeed or carriage return. + for (unsigned I = P; I != 0; --I) { + char C = Buffer[I - 1]; + if (isVerticalWhitespace(C)) + return true; + if (!isHorizontalWhitespace(C)) + return false; + } + // We hit the beginning of the buffer. + return true; +} + +/// Returns whether `K` is an ordinary comment kind. +static bool isOrdinaryKind(RawComment::CommentKind K) { + return (K == RawComment::RCK_OrdinaryBCPL) || + (K == RawComment::RCK_OrdinaryC); +} + RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, bool Merged, bool ParseAllComments) : Range(SR), RawTextValid(false), BriefTextValid(false), - IsAttached(false), IsAlmostTrailingComment(false), + IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false), ParseAllComments(ParseAllComments) { // Extract raw comment text, if possible. if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { @@ -75,17 +117,34 @@ return; } + // Guess comment kind. + std::pair K = getCommentKind(RawText, ParseAllComments); + + // Guess whether an ordinary comment is trailing. + if (ParseAllComments && isOrdinaryKind(K.first)) { + FileID BeginFileID; + unsigned BeginOffset; + std::tie(BeginFileID, BeginOffset) = + SourceMgr.getDecomposedLoc(Range.getBegin()); + if (BeginOffset != 0) { + bool Invalid = false; + const char *Buffer = + SourceMgr.getBufferData(BeginFileID, &Invalid).data(); + IsTrailingComment |= + (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset)); + } + } + if (!Merged) { - // Guess comment kind. - std::pair K = getCommentKind(RawText, ParseAllComments); Kind = K.first; - IsTrailingComment = K.second; + IsTrailingComment |= K.second; IsAlmostTrailingComment = RawText.startswith("//<") || RawText.startswith("/*<"); } else { Kind = RCK_Merged; - IsTrailingComment = mergedCommentIsTrailingComment(RawText); + IsTrailingComment = + IsTrailingComment || mergedCommentIsTrailingComment(RawText); } } @@ -239,9 +298,22 @@ const RawComment &C2 = RC; // Merge comments only if there is only whitespace between them. - // Can't merge trailing and non-trailing comments. + // Can't merge trailing and non-trailing comments unless the second is + // non-trailing ordinary in the same column, as in the case: + // int x; // documents x + // // more text + // versus: + // int x; // documents x + // int y; // documents y + // or: + // int x; // documents x + // // documents y + // int y; // Merge comments if they are on same or consecutive lines. - if (C1.isTrailingComment() == C2.isTrailingComment() && + if ((C1.isTrailingComment() == C2.isTrailingComment() || + (C1.isTrailingComment() && !C2.isTrailingComment() && + isOrdinaryKind(C2.getKind()) && + commentsStartOnSameColumn(SourceMgr, C1, C2))) && onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(), /*MaxNewlinesAllowed=*/1)) { SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd()); Index: test/Index/parse-all-comments.c =================================================================== --- test/Index/parse-all-comments.c +++ test/Index/parse-all-comments.c @@ -33,6 +33,41 @@ // WITH EMPTY LINE void multi_line_comment_empty_line(int); +int notdoxy7; // Not a Doxygen juxtaposed comment. notdoxy7 NOT_DOXYGEN +int notdoxy8; // Not a Doxygen juxtaposed comment. notdoxy8 NOT_DOXYGEN + +int trdoxy9; /// A Doxygen non-trailing comment. trdoxyA IS_DOXYGEN_SINGLE +int trdoxyA; + +int trdoxyB; // Not a Doxygen trailing comment. PART_ONE + // It's a multiline one too. trdoxyB NOT_DOXYGEN +int trdoxyC; + +int trdoxyD; // Not a Doxygen trailing comment. trdoxyD NOT_DOXYGEN + /// This comment doesn't get merged. trdoxyE IS_DOXYGEN +int trdoxyE; + +int trdoxyF; /// A Doxygen non-trailing comment that gets dropped on the floor. + // This comment will also be dropped. +int trdoxyG; // This one won't. trdoxyG NOT_DOXYGEN + +int trdoxyH; ///< A Doxygen trailing comment. PART_ONE + // This one gets merged with it. trdoxyH SOME_DOXYGEN +int trdoxyI; // This one doesn't. trdoxyI NOT_DOXYGEN + +int trdoxyJ; // Not a Doxygen trailing comment. PART_ONE + ///< This one gets merged with it. trdoxyJ SOME_DOXYGEN +int trdoxyK; // This one doesn't. trdoxyK NOT_DOXYGEN + +int trdoxyL; // Not a Doxygen trailing comment. trdoxyL NOT_DOXYGEN +// This one shouldn't get merged. trdoxyM NOT_DOXYGEN +int trdoxyM; + +int trdoxyN; ///< A Doxygen trailing comment. trdoxyN IS_DOXYGEN + // This one shouldn't get merged. trdoxyO NOT_DOXYGEN +int trdoxyO; + + #endif // RUN: rm -rf %t @@ -60,3 +95,21 @@ // CHECK: parse-all-comments.c:22:6: FunctionDecl=isdoxy6:{{.*}} isdoxy6 IS_DOXYGEN_SINGLE // CHECK: parse-all-comments.c:29:6: FunctionDecl=multi_line_comment_plus_ordinary:{{.*}} BLOCK_ORDINARY_COMMENT {{.*}} ORDINARY COMMENT {{.*}} IS_DOXYGEN_START {{.*}} IS_DOXYGEN_END // CHECK: parse-all-comments.c:34:6: FunctionDecl=multi_line_comment_empty_line:{{.*}} MULTILINE COMMENT{{.*}}\n{{.*}}\n{{.*}} WITH EMPTY LINE +// CHECK: parse-all-comments.c:36:5: VarDecl=notdoxy7:{{.*}} notdoxy7 NOT_DOXYGEN +// CHECK: parse-all-comments.c:37:5: VarDecl=notdoxy8:{{.*}} notdoxy8 NOT_DOXYGEN +// CHECK-NOT: parse-all-comments.c:39:5: VarDecl=trdoxy9:{{.*}} trdoxyA IS_DOXYGEN_SINGLE +// CHECK: parse-all-comments.c:40:5: VarDecl=trdoxyA:{{.*}} trdoxyA IS_DOXYGEN_SINGLE +// CHECK: parse-all-comments.c:42:5: VarDecl=trdoxyB:{{.*}} PART_ONE {{.*}} trdoxyB NOT_DOXYGEN +// CHECK-NOT: parse-all-comments.c:44:5: VarDecl=trdoxyC:{{.*}} trdoxyB NOT_DOXYGEN +// CHECK: parse-all-comments.c:46:5: VarDecl=trdoxyD:{{.*}} trdoxyD NOT_DOXYGEN +// CHECK: parse-all-comments.c:48:5: VarDecl=trdoxyE:{{.*}} trdoxyE IS_DOXYGEN +// CHECK-NOT: parse-all-comments.c:50:5: VarDecl=trdoxyF:{{.*}} RawComment +// CHECK: parse-all-comments.c:52:5: VarDecl=trdoxyG:{{.*}} trdoxyG NOT_DOXYGEN +// CHECK: parse-all-comments.c:54:5: VarDecl=trdoxyH:{{.*}} PART_ONE {{.*}} trdoxyH SOME_DOXYGEN +// CHECK: parse-all-comments.c:56:5: VarDecl=trdoxyI:{{.*}} trdoxyI NOT_DOXYGEN +// CHECK: parse-all-comments.c:58:5: VarDecl=trdoxyJ:{{.*}} PART_ONE {{.*}} trdoxyJ SOME_DOXYGEN +// CHECK: parse-all-comments.c:60:5: VarDecl=trdoxyK:{{.*}} trdoxyK NOT_DOXYGEN +// CHECK: parse-all-comments.c:62:5: VarDecl=trdoxyL:{{.*}} trdoxyL NOT_DOXYGEN +// CHECK: parse-all-comments.c:64:5: VarDecl=trdoxyM:{{.*}} trdoxyM NOT_DOXYGEN +// CHECK: parse-all-comments.c:66:5: VarDecl=trdoxyN:{{.*}} trdoxyN IS_DOXYGEN +// CHECK: parse-all-comments.c:68:5: VarDecl=trdoxyO:{{.*}} trdoxyO NOT_DOXYGEN