diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -42,8 +42,10 @@ void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState, SVal TaintedSVal) const; + static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); + public: - void checkLocation(SVal l, bool isLoad, const Stmt*S, + void checkLocation(SVal l, bool isLoad, const Stmt *S, CheckerContext &C) const; }; @@ -155,6 +157,15 @@ // memory access is within the extent of the base region. Since we // have some flexibility in defining the base region, we can achieve // various levels of conservatism in our buffer overflow checking. + + // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as + // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) + // and incomplete analysis of these leads to false positives. As even + // accurate reports would be confusing for the users, just disable reports + // from these macros: + if (isFromCtypeMacro(LoadS, checkerContext.getASTContext())) + return; + ProgramStateRef state = checkerContext.getState(); SValBuilder &svalBuilder = checkerContext.getSValBuilder(); @@ -267,6 +278,25 @@ checkerContext.emitReport(std::move(BR)); } +bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { + SourceLocation Loc = S->getBeginLoc(); + if (!Loc.isMacroID()) + return false; + + StringRef MacroName = Lexer::getImmediateMacroName( + Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); + + if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's') + return false; + + return ((MacroName == "isalnum") || (MacroName == "isalpha") || + (MacroName == "isblank") || (MacroName == "isdigit") || + (MacroName == "isgraph") || (MacroName == "islower") || + (MacroName == "isnctrl") || (MacroName == "isprint") || + (MacroName == "ispunct") || (MacroName == "isspace") || + (MacroName == "isupper") || (MacroName == "isxdigit")); +} + #ifndef NDEBUG LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { dumpToStream(llvm::errs()); diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -156,6 +156,28 @@ Buffer[m] = 1; //expected-warning {{Out of bound memory access (index is tainted)}} } +extern const unsigned short int **__ctype_b_loc (void); +enum { _ISdigit = 2048 }; +# define isdigit(c) ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit) + +int isdigitImplFalsePositive(void) { + // If this code no longer produces a bug report, then consider removing the + // special case that disables buffer overflow reports coming from the isXXXXX + // macros in ctypes.h. + int c = getchar(); + return ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit); + //expected-warning@-1 {{Out of bound memory access (index is tainted)}} +} + +int isdigitSuppressed(void) { + // Same code as above, but reports are suppressed based on macro name: + int c = getchar(); + return isdigit(c); //no-warning +} + +// Some later tests use isdigit as a function, so we need to undef it: +#undef isdigit + void testUncontrolledFormatString(char **p) { char s[80]; fscanf(stdin, "%s", s);