Index: lib/StaticAnalyzer/Core/PlistDiagnostics.cpp =================================================================== --- lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -22,6 +22,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/LineIterator.h" +#include "clang/AST/ASTContext.h" +#include "llvm/Support/MD5.h" +#include + using namespace clang; using namespace ento; using namespace markup; @@ -285,6 +290,57 @@ } } +static std::string GetNthLineOfFile(llvm::MemoryBuffer *Buffer, int Line) { + if (!Buffer) + return ""; + + llvm::line_iterator LI(*Buffer, false); + for (; !LI.is_at_eof() && LI.line_number() != Line; ++LI) + ; + + return LI->str(); +} + +static std::string NormalizeLine(const SourceManager *SM, FullSourceLoc &L, + const Decl *D) { + static const std::string whitespaces = " \t\n"; + + const LangOptions &Opts = D->getASTContext().getLangOpts(); + std::string str = GetNthLineOfFile(SM->getBuffer(L.getFileID(), L), L.getExpansionLineNumber()); + unsigned col = str.find_first_not_of(whitespaces); + + SourceLocation StartOfLine = SM->translateLineCol(SM->getFileID(L), L.getExpansionLineNumber(), col); + llvm::MemoryBuffer *Buffer = SM->getBuffer(SM->getFileID(StartOfLine), StartOfLine); + if (!Buffer) return {}; + + const char *BufferPos = SM->getCharacterData(StartOfLine); + + Token Token; + Lexer Lexer(SM->getLocForStartOfFile(SM->getFileID(StartOfLine)), Opts, + Buffer->getBufferStart(), BufferPos, Buffer->getBufferEnd()); + + size_t nextStart = 0; + std::ostringstream lineBuff; + while (!Lexer.LexFromRawLexer(Token) && nextStart < 2) { + if (Token.isAtStartOfLine() && nextStart++ > 0) continue; + lineBuff << std::string(SM->getCharacterData(Token.getLocation()), Token.getLength()); + } + + return lineBuff.str(); +} + +static llvm::SmallString<32> GetHashOfContent(StringRef Content) { + llvm::MD5 Hash; + llvm::MD5::MD5Result MD5Res; + llvm::SmallString<32> Res; + + Hash.update(Content); + Hash.final(MD5Res); + llvm::MD5::stringifyResult(MD5Res, Res); + + return Res; +} + void PlistDiagnostics::FlushDiagnosticsImpl( std::vector &Diags, FilesMade *filesMade) { @@ -420,9 +476,12 @@ EmitString(o, declName) << '\n'; } - // Output the bug hash for issue unique-ing. Currently, it's just an - // offset from the beginning of the function. - if (const Stmt *Body = DeclWithIssue->getBody()) { + // Output the bug hash for issue unique-ing. + // Currently, it contains the following information: + // 1. column number + // 2. source line string after removing whitespace + // 3. bug type + if (DeclWithIssue->getBody()) { // If the bug uniqueing location exists, use it for the hash. // For example, this ensures that two leaks reported on the same line @@ -433,19 +492,22 @@ if (UPDLoc.isValid()) { FullSourceLoc UL(SM->getExpansionLoc(UPDLoc.asLocation()), *SM); - FullSourceLoc UFunL(SM->getExpansionLoc( - D->getUniqueingDecl()->getBody()->getLocStart()), *SM); o << " issue_hash" - << UL.getExpansionLineNumber() - UFunL.getExpansionLineNumber() + << GetHashOfContent( + std::to_string(UL.getExpansionColumnNumber()) + "$" + + ::NormalizeLine(SM, UL, DeclWithIssue) + "$" + + D->getBugType().str()) << "\n"; // Otherwise, use the location on which the bug is reported. } else { FullSourceLoc L(SM->getExpansionLoc(D->getLocation().asLocation()), *SM); - FullSourceLoc FunL(SM->getExpansionLoc(Body->getLocStart()), *SM); o << " issue_hash" - << L.getExpansionLineNumber() - FunL.getExpansionLineNumber() + << GetHashOfContent( + std::to_string(L.getExpansionColumnNumber()) + "$" + + ::NormalizeLine(SM, L, DeclWithIssue) + "$" + + D->getBugType().str()) << "\n"; }