diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -38,15 +38,6 @@ // Non-pure functions, for which only partial improvement over the default // behavior is expected, are modeled via check::PostCall, non-intrusively. // -// The following standard C functions are currently supported: -// -// fgetc getline isdigit isupper toascii -// fread isalnum isgraph isxdigit -// fwrite isalpha islower read -// getc isascii isprint write -// getchar isblank ispunct toupper -// getdelim iscntrl isspace tolower -// //===----------------------------------------------------------------------===// #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" @@ -384,7 +375,43 @@ }; /// The complete list of constraints that defines a single branch. - typedef std::vector ConstraintSet; + using ConstraintSet = std::vector; + + /// A single branch of a function summary. + /// + /// A branch is defined by a series of constraints - "assumptions" - + /// that together form a single possible outcome of invoking the function. + /// When static analyzer considers a branch, it tries to introduce + /// a child node in the Exploded Graph. The child node has to include + /// constraints that define the branch. If the constraints contradict + /// existing constraints in the state, the node is not created and the branch + /// is dropped; otherwise it's queued for future exploration. + /// The branch is accompanied by a note text that may be displayed + /// to the user when a bug is found on a path that takes this branch. + /// + /// For example, consider the branches in `isalpha(x)`: + /// Branch 1) + /// x is in range ['A', 'Z'] or in ['a', 'z'] + /// then the return value is not 0. (I.e. out-of-range [0, 0]) + /// and the note may say "Assuming the character is alphabetical" + /// Branch 2) + /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] + /// then the return value is 0 + /// and the note may say "Assuming the character is non-alphabetical". + class SummaryCase { + ConstraintSet Constraints; + StringRef Note; + + public: + SummaryCase(ConstraintSet &&Constraints, StringRef Note) + : Constraints(std::move(Constraints)), Note(Note) {} + + SummaryCase(const ConstraintSet &Constraints, StringRef Note) + : Constraints(Constraints), Note(Note) {} + + const ConstraintSet &getConstraints() const { return Constraints; } + StringRef getNote() const { return Note; } + }; using ArgTypes = std::vector>; using RetType = Optional; @@ -451,23 +478,12 @@ return T; } - using Cases = std::vector; + using SummaryCases = std::vector; /// A summary includes information about /// * function prototype (signature) /// * approach to invalidation, - /// * a list of branches - a list of list of ranges - - /// A branch represents a path in the exploded graph of a function (which - /// is a tree). So, a branch is a series of assumptions. In other words, - /// branches represent split states and additional assumptions on top of - /// the splitting assumption. - /// For example, consider the branches in `isalpha(x)` - /// Branch 1) - /// x is in range ['A', 'Z'] or in ['a', 'z'] - /// then the return value is not 0. (I.e. out-of-range [0, 0]) - /// Branch 2) - /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] - /// then the return value is 0. + /// * a list of branches - so, a list of list of ranges, /// * a list of argument constraints, that must be true on every branch. /// If these constraints are not satisfied that means a fatal error /// usually resulting in undefined behaviour. @@ -482,7 +498,7 @@ /// signature is matched. class Summary { const InvalidationKind InvalidationKd; - Cases CaseConstraints; + SummaryCases Cases; ConstraintSet ArgConstraints; // The function to which the summary applies. This is set after lookup and @@ -492,12 +508,12 @@ public: Summary(InvalidationKind InvalidationKd) : InvalidationKd(InvalidationKd) {} - Summary &Case(ConstraintSet &&CS) { - CaseConstraints.push_back(std::move(CS)); + Summary &Case(ConstraintSet &&CS, StringRef Note = "") { + Cases.push_back(SummaryCase(std::move(CS), Note)); return *this; } - Summary &Case(const ConstraintSet &CS) { - CaseConstraints.push_back(CS); + Summary &Case(const ConstraintSet &CS, StringRef Note = "") { + Cases.push_back(SummaryCase(CS, Note)); return *this; } Summary &ArgConstraint(ValueConstraintPtr VC) { @@ -508,7 +524,7 @@ } InvalidationKind getInvalidationKd() const { return InvalidationKd; } - const Cases &getCaseConstraints() const { return CaseConstraints; } + const SummaryCases &getCases() const { return Cases; } const ConstraintSet &getArgConstraints() const { return ArgConstraints; } QualType getArgType(ArgNo ArgN) const { @@ -530,8 +546,8 @@ // Once we know the exact type of the function then do validation check on // all the given constraints. bool validateByConstraints(const FunctionDecl *FD) const { - for (const ConstraintSet &Case : CaseConstraints) - for (const ValueConstraintPtr &Constraint : Case) + for (const SummaryCase &Case : Cases) + for (const ValueConstraintPtr &Constraint : Case.getConstraints()) if (!Constraint->checkValidity(FD)) return false; for (const ValueConstraintPtr &Constraint : ArgConstraints) @@ -842,18 +858,29 @@ // Now apply the constraints. const Summary &Summary = *FoundSummary; ProgramStateRef State = C.getState(); + const ExplodedNode *Node = C.getPredecessor(); // Apply case/branch specifications. - for (const ConstraintSet &Case : Summary.getCaseConstraints()) { + for (const SummaryCase &Case : Summary.getCases()) { ProgramStateRef NewState = State; - for (const ValueConstraintPtr &Constraint : Case) { + for (const ValueConstraintPtr &Constraint : Case.getConstraints()) { NewState = Constraint->apply(NewState, Call, Summary, C); if (!NewState) break; } - if (NewState && NewState != State) - C.addTransition(NewState); + if (NewState && NewState != State) { + StringRef Note = Case.getNote(); + const NoteTag *Tag = C.getNoteTag( + // Sorry couldn't help myself. + [Node, Note]() { + // Don't emit "Assuming..." note when we ended up + // knowing in advance which branch is taken. + return (Node->succ_size() > 1) ? Note.str() : ""; + }, + /*IsPrunable=*/true); + C.addTransition(NewState, Tag); + } } } @@ -1211,7 +1238,8 @@ // Boils down to isupper() or islower() or isdigit(). .Case({ArgumentCondition(0U, WithinRange, {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is alphanumeric") // The locale-specific range. // No post-condition. We are completely unaware of // locale-specific return values. @@ -1220,65 +1248,81 @@ {ArgumentCondition( 0U, OutOfRange, {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))}) + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is non-alphanumeric") .ArgConstraint(ArgumentCondition( 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))); addToFunctionSummaryMap( "isalpha", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is alphabetical") // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) .Case({ArgumentCondition( 0U, OutOfRange, {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is non-alphabetical")); addToFunctionSummaryMap( "isascii", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is an ASCII character") .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not an ASCII character")); addToFunctionSummaryMap( "isblank", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a blank character") .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a blank character")); addToFunctionSummaryMap( "iscntrl", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a control character") .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a control character")); addToFunctionSummaryMap( "isdigit", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a digit") .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a digit")); addToFunctionSummaryMap( "isgraph", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character has graphical representation") + .Case( + {ArgumentCondition(0U, OutOfRange, Range(33, 126)), + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character does not have graphical representation")); addToFunctionSummaryMap( "islower", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) // Is certainly lowercase. .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a lowercase letter") // Is ascii but not lowercase. .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), ArgumentCondition(0U, OutOfRange, Range('a', 'z')), - ReturnValueCondition(WithinRange, SingleValue(0))}) + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a lowercase letter") // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) // Is not an unsigned char. @@ -1288,52 +1332,62 @@ "isprint", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is printable") .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is non-printable")); addToFunctionSummaryMap( "ispunct", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition( 0U, WithinRange, {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a punctuation mark") .Case({ArgumentCondition( 0U, OutOfRange, {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a punctuation mark")); addToFunctionSummaryMap( "isspace", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) // Space, '\f', '\n', '\r', '\t', '\v'. .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a whitespace character") // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) .Case({ArgumentCondition(0U, OutOfRange, {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a whitespace character")); addToFunctionSummaryMap( "isupper", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) // Is certainly uppercase. .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is an uppercase letter") // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) // Other. .Case({ArgumentCondition(0U, OutOfRange, {{'A', 'Z'}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not an uppercase letter")); addToFunctionSummaryMap( "isxdigit", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) + ReturnValueCondition(OutOfRange, SingleValue(0))}, + "Assuming the character is a hexadecimal digit") .Case({ArgumentCondition(0U, OutOfRange, {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), - ReturnValueCondition(WithinRange, SingleValue(0))})); + ReturnValueCondition(WithinRange, SingleValue(0))}, + "Assuming the character is not a hexadecimal digit")); addToFunctionSummaryMap( "toupper", Signature(ArgTypes{IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) @@ -1435,12 +1489,14 @@ GetLineSummary); { - Summary GetenvSummary = Summary(NoEvalCall) - .ArgConstraint(NotNull(ArgNo(0))) - .Case({NotNull(Ret)}); + Summary GetenvSummary = + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .Case({NotNull(Ret)}, "Assuming the environment variable exists"); // In untrusted environments the envvar might not exist. if (!ShouldAssumeControlledEnvironment) - GetenvSummary.Case({NotNull(Ret)->negate()}); + GetenvSummary.Case({NotNull(Ret)->negate()}, + "Assuming the environment variable does not exist"); // char *getenv(const char *name); addToFunctionSummaryMap( diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -1695,6 +1695,13 @@ // Construct a new PathDiagnosticPiece. ProgramPoint P = N->getLocation(); + + // If this node already have a specialized note, it's probably better + // than our generic note. + // FIXME: This only looks for note tags, not for other ways to add a note. + if (isa_and_nonnull(P.getTag())) + return nullptr; + PathDiagnosticLocation L = PathDiagnosticLocation::create(P, BRC.getSourceManager()); if (!L.isValid()) diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -38,7 +38,8 @@ } void test_alnum_symbolic(int x) { - int ret = isalnum(x); + int ret = isalnum(x); // \ + // bugpath-note{{Assuming the character is non-alphanumeric}} (void)ret; clang_analyzer_eval(EOF <= x && x <= 255); // \ diff --git a/clang/test/Analysis/std-c-library-functions-path-notes.c b/clang/test/Analysis/std-c-library-functions-path-notes.c new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-path-notes.c @@ -0,0 +1,60 @@ +// RUN: %clang_analyze_cc1 -verify %s \ +// RUN: -analyzer-checker=core,apiModeling \ +// RUN: -analyzer-output=text + +#define NULL ((void *)0) + +char *getenv(const char *); +int isalpha(int); +int isdigit(int); +int islower(int); + +char test_getenv() { + char *env = getenv("VAR"); // \ + // expected-note{{Assuming the environment variable does not exist}} \ + // expected-note{{'env' initialized here}} + + return env[0]; // \ + // expected-warning{{Array access (from variable 'env') results in a null pointer dereference}} \ + // expected-note {{Array access (from variable 'env') results in a null pointer dereference}} +} + +int test_isalpha(int *x, char c) { + if (isalpha(c)) {// \ + // expected-note{{Assuming the character is alphabetical}} \ + // expected-note{{Taking true branch}} + x = NULL; // \ + // expected-note{{Null pointer value stored to 'x'}} + } + + return *x; // \ + // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} \ + // expected-note {{Dereference of null pointer (loaded from variable 'x')}} +} + +int test_isdigit(int *x, char c) { + if (!isdigit(c)) {// \ + // expected-note{{Assuming the character is not a digit}} \ + // expected-note{{Taking true branch}} + x = NULL; // \ + // expected-note{{Null pointer value stored to 'x'}} + } + + return *x; // \ + // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} \ + // expected-note {{Dereference of null pointer (loaded from variable 'x')}} +} + +int test_islower(int *x) { + char c = 'c'; + // No "Assuming..." note. We aren't assuming anything. We *know*. + if (islower(c)) { // \ + // expected-note{{Taking true branch}} + x = NULL; // \ + // expected-note{{Null pointer value stored to 'x'}} + } + + return *x; // \ + // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} \ + // expected-note {{Dereference of null pointer (loaded from variable 'x')}} +}