Index: include/clang/StaticAnalyzer/Checkers/Checkers.td =================================================================== --- include/clang/StaticAnalyzer/Checkers/Checkers.td +++ include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -416,6 +416,10 @@ HelpText<"Check for proper usage of vfork">, DescFile<"VforkChecker.cpp">; +def StdLibraryFunctionsChecker : Checker<"StdLibraryFunctions">, + HelpText<"Improve modeling of standard library functions">, + DescFile<"StdLibraryFunctionsChecker.cpp">; + } // end "unix" let ParentPackage = UnixAlpha in { Index: lib/StaticAnalyzer/Checkers/CMakeLists.txt =================================================================== --- lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -68,6 +68,7 @@ ReturnUndefChecker.cpp SimpleStreamChecker.cpp StackAddrEscapeChecker.cpp + StdLibraryFunctionsChecker.cpp StreamChecker.cpp TaintTesterChecker.cpp TestAfterDivZeroChecker.cpp Index: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp =================================================================== --- /dev/null +++ lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -0,0 +1,1130 @@ +//=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This checker improves modeling of a few simple library functions. +// It does not throw warnings. +// +// This checker provides a specification format - `FunctionSpecTy' - and +// contains descriptions of some library functions in this format. Each +// specification contains a list of branches for splitting the program state +// upon call, and range constraints on argument and return-value symbols that +// are satisfied on each branch. This spec can be expanded to include more +// items, like external effects of the function. +// +// The main difference between this approach and the body farms technique is +// in more explicit control over how many branches are produced. For example, +// consider standard C function `ispunct(int x)', which returns a non-zero value +// iff `x' is a punctuation character, that is, when `x' is in range +// ['!', '/'] U [':', '@'] U ['[', '\`'] U ['{', '~']. +// `FunctionSpecTy' provides only two branches for this function. However, any +// attempt to describe this range with if-statements in the body farm +// would result in many more branches. Because each branch needs to be analyzed +// independently, this significantly reduces performance. Additionally, +// once we consider a branch on which `x' is in range, say, ['!', '/'], +// we assume that such branch is an important separate path through the program, +// which may lead to false positives because considering this particular path +// was not consciously intended, and therefore it might have been unreachable. +// +// This checker uses eval::Call for modeling "pure" functions, for which +// their `FunctionSpecTy' is a precise model. This avoids unnecessary +// invalidation passes. Conflicts with other checkers are unlikely because +// if the function has no other effects, other checkers would probably never +// want to improve upon the modeling done by this checker. +// +// Non-"pure" functions, for which only partial improvement over the default +// behavior is expected, are modeled via check::PostCall, non-intrusively. +// +// The following standard C functions are currently supported: +// +// fgetc getline isdigit isupper +// fread isalnum isgraph isxdigit +// fwrite isalpha islower read +// getc isascii isprint write +// getchar isblank ispunct +// getdelim iscntrl isspace +// +//===----------------------------------------------------------------------===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace clang::ento; + +namespace { +class StdLibraryFunctionsChecker : public Checker { + /// Below is a series of typedefs necessary to define function specs. + /// We avoid nesting types here because each additional qualifier + /// would need to be repeated in every function spec. + struct FunctionSpecTy; + + /// Specify how much the analyzer engine should entrust modeling this function + /// to us. If he doesn't, he performs additional invalidations. + enum InvalidationKindTy { NoEvalCall, EvalCallAsPure }; + + /// A pair of ValueRangeKindTy and IntRangeVectorTy would describe a range + /// imposed on a particular argument or return value symbol. + /// + /// Given a range, should the argument stay inside or outside this range? + /// The special `ComparesToArgument' value indicates that we should + /// impose a constraint that involves other argument or return value symbols. + enum ValueRangeKindTy { OutOfRange, WithinRange, ComparesToArgument }; + + /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is + /// a non-negative integer, which less than 5 and not equal to 2. For + /// `ComparesToArgument', holds information about how exactly to compare to + /// the argument. + typedef std::vector> IntRangeVectorTy; + + /// A reference to an argument or return value by its number. + /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but + /// obviously uint32_t should be enough for all practical purposes. + typedef uint32_t ArgNoTy; + static const ArgNoTy Ret = std::numeric_limits::max(); + + /// Incapsulates a single range on a single symbol within a branch. + class ValueRange { + ArgNoTy ArgNo; // Argument to which we apply the range. + ValueRangeKindTy Kind; // Kind of range definition. + IntRangeVectorTy Args; // Polymorphic arguments. + + public: + ValueRange(ArgNoTy ArgNo, ValueRangeKindTy Kind, + const IntRangeVectorTy &Args) + : ArgNo(ArgNo), Kind(Kind), Args(Args) {} + + ArgNoTy getArgNo() const { return ArgNo; } + ValueRangeKindTy getKind() const { return Kind; } + + BinaryOperator::Opcode getOpcode() const { + assert(Kind == ComparesToArgument); + assert(Args.size() == 1); + BinaryOperator::Opcode Op = + static_cast(Args[0].first); + assert(BinaryOperator::isComparisonOp(Op) && + "Only comparison ops are supported for ComparesToArgument"); + return Op; + } + + ArgNoTy getOtherArgNo() const { + assert(Kind == ComparesToArgument); + assert(Args.size() == 1); + return static_cast(Args[0].second); + } + + const IntRangeVectorTy &getRanges() const { + assert(Kind != ComparesToArgument); + return Args; + } + + // We avoid creating a virtual apply() method because + // it makes initializer lists harder to write. + private: + ProgramStateRef applyAsOutOfRange(ProgramStateRef State, + const CallEvent &Call, + const FunctionSpecTy &Spec) const; + ProgramStateRef applyAsWithinRange(ProgramStateRef State, + const CallEvent &Call, + const FunctionSpecTy &Spec) const; + ProgramStateRef applyAsComparesToArgument(ProgramStateRef State, + const CallEvent &Call, + const FunctionSpecTy &Spec) const; + + public: + ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, + const FunctionSpecTy &Spec) const { + switch (Kind) { + case OutOfRange: + return applyAsOutOfRange(State, Call, Spec); + case WithinRange: + return applyAsWithinRange(State, Call, Spec); + case ComparesToArgument: + return applyAsComparesToArgument(State, Call, Spec); + } + llvm_unreachable("Unknown ValueRange kind!"); + } + }; + + /// The complete list of ranges that defines a single branch. + typedef std::vector ValueRangeSet; + + /// Includes information about function prototype (which is necessary to + /// ensure we're modeling the right function and casting values properly), + /// approach to invalidation, and a list of branches - essentially, a list + /// of list of ranges - essentially, a list of lists of lists of segments. + struct FunctionSpecTy { + const std::vector ArgTypes; + const QualType RetType; + const InvalidationKindTy InvalidationKind; + const std::vector Ranges; + + private: + static void assertTypeSuitableForSpec(QualType T) { + assert(!T->isVoidType() && + "We should have had no significant void types in the spec"); + assert(T.isCanonical() && + "We should only have canonical types in the spec"); + // FIXME: lift this assert (but not the ones above!) + assert(T->isIntegralOrEnumerationType() && + "We only support integral ranges in the spec"); + } + + public: + QualType getArgType(ArgNoTy ArgNo) const { + QualType T = (ArgNo == Ret) ? RetType : ArgTypes[ArgNo]; + assertTypeSuitableForSpec(T); + return T; + } + + /// Try our best to figure out if the call expression is the call of + /// *the* library function to which this specification applies. + bool matchesCall(const CallExpr *CE) const; + }; + + // The map of all functions supported by the checker. It is initialized + // lazily, and it doesn't change after initialization. + typedef llvm::StringMap FunctionSpecMapTy; + mutable FunctionSpecMapTy FunctionSpecMap; + + // Auxiliary functions to support ArgNoTy within all structures + // in a unified manner. + static QualType getArgType(const FunctionSpecTy &Spec, ArgNoTy ArgNo) { + return Spec.getArgType(ArgNo); + } + static QualType getArgType(const CallEvent &Call, ArgNoTy ArgNo) { + return ArgNo == Ret ? Call.getResultType().getCanonicalType() + : Call.getArgExpr(ArgNo)->getType().getCanonicalType(); + } + static QualType getArgType(const CallExpr *CE, ArgNoTy ArgNo) { + return ArgNo == Ret ? CE->getType().getCanonicalType() + : CE->getArg(ArgNo)->getType().getCanonicalType(); + } + static SVal getArgSVal(const CallEvent &Call, ArgNoTy ArgNo) { + return ArgNo == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgNo); + } + +public: + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + bool evalCall(const CallExpr *CE, CheckerContext &C) const; + +private: + Optional findFunctionSpec(const FunctionDecl *FD, + const CallExpr *CE, + CheckerContext &C) const; + + void initFunctionSpecs(BasicValueFactory &BVF) const; +}; +} // end of anonymous namespace + +ProgramStateRef +StdLibraryFunctionsChecker::ValueRange::applyAsOutOfRange( + ProgramStateRef State, const CallEvent &Call, + const FunctionSpecTy &Spec) const { + + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + ConstraintManager &CM = Mgr.getConstraintManager(); + QualType T = getArgType(Spec, getArgNo()); + SVal V = getArgSVal(Call, getArgNo()); + + if (auto N = V.getAs()) { + const IntRangeVectorTy &R = getRanges(); + size_t E = R.size(); + for (size_t I = 0; I != E; ++I) { + const llvm::APSInt &Min = BVF.getValue(R[I].first, T); + const llvm::APSInt &Max = BVF.getValue(R[I].second, T); + assert(Min <= Max); + State = CM.assumeWithinInclusiveRange(State, *N, Min, Max, false); + if (!State) + break; + } + } + + return State; +} + +ProgramStateRef +StdLibraryFunctionsChecker::ValueRange::applyAsWithinRange( + ProgramStateRef State, const CallEvent &Call, + const FunctionSpecTy &Spec) const { + + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + ConstraintManager &CM = Mgr.getConstraintManager(); + QualType T = getArgType(Spec, getArgNo()); + SVal V = getArgSVal(Call, getArgNo()); + + // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". + // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, + // and then cut away all holes in R one by one. + if (auto N = V.getAs()) { + const IntRangeVectorTy &R = getRanges(); + size_t E = R.size(); + + const llvm::APSInt &MinusInf = BVF.getMinValue(T); + const llvm::APSInt &PlusInf = BVF.getMaxValue(T); + + const llvm::APSInt &Left = BVF.getValue(R[0].first - 1, T); + if (Left != PlusInf) { + assert(MinusInf <= Left); + State = CM.assumeWithinInclusiveRange(State, *N, MinusInf, Left, false); + if (!State) + return nullptr; + } + + const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1, T); + if (Right != MinusInf) { + assert(Right <= PlusInf); + State = CM.assumeWithinInclusiveRange(State, *N, Right, PlusInf, false); + if (!State) + return nullptr; + } + + for (size_t I = 1; I != E; ++I) { + const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1, T); + const llvm::APSInt &Max = BVF.getValue(R[I].first - 1, T); + assert(Min <= Max); + State = CM.assumeWithinInclusiveRange(State, *N, Min, Max, false); + if (!State) + return nullptr; + } + } + + return State; +} + +ProgramStateRef +StdLibraryFunctionsChecker::ValueRange::applyAsComparesToArgument( + ProgramStateRef State, const CallEvent &Call, + const FunctionSpecTy &Spec) const { + + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + QualType CondT = SVB.getConditionType(); + QualType T = getArgType(Spec, getArgNo()); + SVal V = getArgSVal(Call, getArgNo()); + + BinaryOperator::Opcode Op = getOpcode(); + ArgNoTy OtherArg = getOtherArgNo(); + SVal OtherV = getArgSVal(Call, OtherArg); + QualType OtherT = getArgType(Call, OtherArg); + // Note: we avoid integral promotion for comparison. + OtherV = SVB.evalCast(OtherV, T, OtherT); + if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) + .getAs()) + State = State->assume(*CompV, true); + return State; +} + +void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + const FunctionDecl *FD = dyn_cast_or_null(Call.getDecl()); + if (!FD) + return; + + const CallExpr *CE = dyn_cast_or_null(Call.getOriginExpr()); + if (!CE) + return; + + Optional FoundSpec = findFunctionSpec(FD, CE, C); + if (!FoundSpec) + return; + + // Now apply ranges. + const FunctionSpecTy &Spec = *FoundSpec; + ProgramStateRef State = C.getState(); + + for (const auto &VRS: Spec.Ranges) { + ProgramStateRef NewState = State; + for (const auto &VR: VRS) { + NewState = VR.apply(NewState, Call, Spec); + if (!NewState) + break; + } + + if (NewState && NewState != State) + C.addTransition(NewState); + } +} + +bool StdLibraryFunctionsChecker::evalCall(const CallExpr *CE, + CheckerContext &C) const { + const FunctionDecl *FD = dyn_cast_or_null(CE->getCalleeDecl()); + if (!FD) + return false; + + Optional FoundSpec = findFunctionSpec(FD, CE, C); + if (!FoundSpec) + return false; + + const FunctionSpecTy &Spec = *FoundSpec; + switch (Spec.InvalidationKind) { + case EvalCallAsPure: { + ProgramStateRef State = C.getState(); + const LocationContext *LC = C.getLocationContext(); + SVal V = C.getSValBuilder().conjureSymbolVal( + CE, LC, CE->getType().getCanonicalType(), C.blockCount()); + State = State->BindExpr(CE, LC, V); + C.addTransition(State); + return true; + } + case NoEvalCall: + // Spec tells us to avoid performing eval::Call. The function is possibly + // evaluated by another checker, or evaluated conservatively. + return false; + } + llvm_unreachable("Unknown invalidation kind!"); +} + +bool StdLibraryFunctionsChecker::FunctionSpecTy::matchesCall( + const CallExpr *CE) const { + // Check number of arguments: + if (CE->getNumArgs() != ArgTypes.size()) + return false; + + // Check return type if relevant: + if (!RetType.isNull() && RetType != CE->getType().getCanonicalType()) + return false; + + // Check argument types when relevant: + for (size_t I = 0, E = ArgTypes.size(); I != E; ++I) { + QualType FormalT = ArgTypes[I]; + // Null type marks irrelevant arguments. + if (FormalT.isNull()) + continue; + + assertTypeSuitableForSpec(FormalT); + + QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I); + assert(ActualT.isCanonical()); + if (ActualT != FormalT) + return false; + } + + return true; +} + +Optional +StdLibraryFunctionsChecker::findFunctionSpec(const FunctionDecl *FD, + const CallExpr *CE, + CheckerContext &C) const { + // Note: we cannot always obtain FD from CE + // (eg. virtual call, or call by pointer). + assert(CE); + + if (!FD) + return None; + + SValBuilder &SVB = C.getSValBuilder(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + initFunctionSpecs(BVF); + + std::string Name = FD->getQualifiedNameAsString(); + if (Name.empty() || !C.isCLibraryFunction(FD, Name)) + return None; + + auto FSMI = FunctionSpecMap.find(Name); + if (FSMI == FunctionSpecMap.end()) + return None; + + // Verify that function signature matches the spec in advance. + // Otherwise we might be modeling the wrong function. + // Strict checking is important because we will be conducting + // very integral-type-sensitive operations on arguments and + // return values. + const FunctionSpecTy &Spec = FSMI->second; + if (!Spec.matchesCall(CE)) + return None; + + return Spec; +} + +void StdLibraryFunctionsChecker::initFunctionSpecs( + BasicValueFactory &BVF) const { + if (!FunctionSpecMap.empty()) + return; + + ASTContext &ACtx = BVF.getContext(); + + // These types are useful for writing specifications quickly, + // New specifications should probably introduce more types. + QualType Irrelevant; // A placeholder, whenever we do not care about the type. + QualType IntTy = ACtx.IntTy; + QualType SizeTy = ACtx.getSizeType(); + QualType SSizeTy = ACtx.getIntTypeForBitwidth(ACtx.getTypeSize(SizeTy), true); + + // Don't worry about truncation here, it'd be cast back to SIZE_MAX when used. + LLVM_ATTRIBUTE_UNUSED int64_t SizeMax = + BVF.getMaxValue(SizeTy).getLimitedValue(); + int64_t SSizeMax = + BVF.getMaxValue(SSizeTy).getLimitedValue(); + + // We are finally ready to define specifications for all supported functions. + // + // The signature needs to have the correct number of arguments. + // However, we insert `Irrelevant' when the type is insignificant. + // + // Argument ranges should always cover all variants. If return value + // is completely unknown, omit it from the respective range set. + // + // All types in the spec need to be canonical. + // + // Every item in the list of range sets represents a particular + // execution path the analyzer would need to explore once + // the call is modeled - a new program state is constructed + // for every range set, and each range line in the range set + // corresponds to a specific constraint within this state. + // + // Upon comparing to another argument, the other argument is casted + // to the current argument's type. This avoids proper promotion but + // seems useful. For example, read() receives size_t argument, + // and its return value, which is of type ssize_t, cannot be greater + // than this argument. If we made a promotion, and the size argument + // is equal to, say, 10, then we'd impose a range of [0, 10] on the + // return value, however the correct range is [-1, 10]. + // + // Please update the list of functions in the header after editing! + // + // The format is as follows: + // + //{ "function name", + // { spec: + // { argument types list, ... }, + // return type, purity, { range set list: + // { range list: + // { argument index, within or out of, {{from, to}, ...} }, + // { argument index, compares to argument, {{how, which}} }, + // ... + // } + // } + // } + //} + +#define SPEC // Specification for a single function. +#define FOR_FUNCTION(x) #x // Function identifier name covered by the spec. +#define SPEC_DATA // Value of the specification. +#define ARGUMENT_TYPES // List of argument QualTypes. +#define RETURN_TYPE(x) x // Return type - a single QualType. +#define INVALIDATION_APPROACH(x) x // A single InvalidationKindTy. +#define BRANCHES // The list of branches. +#define BRANCH // A single branch for state splitting, a list of ranges. +#define RANGE // A range on specific argument, many ranges define a branch. +#define ARG_NO(x) x ## U // Argument to which the range applies, unsigned. +#define RET_VAL Ret // Use this instead of ARG_NO for applying to return value. +#define RANGE_KIND(x) x // A single ValueRangeKindTy +#define SET // A set of possible values for the argument, list of segments. +#define SEG(x, y) { x, y } // A segment [x, y]. +#define POINT(x) SEG(x, x) // A point {x}. +#define U , // Union of sets, works on SEG and POINT within the SET clause. +#define IS_LESS_THAN(x) SET {{ BO_LE, x }} // Set for ComparesToArgument ranges. + + FunctionSpecMap = { + // The isascii() family of functions. + SPEC { + FOR_FUNCTION(isalnum), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Boils down to isupper() or islower() or isdigit() + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('0', '9') U SEG('A', 'Z') U SEG('a', 'z') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { // The locale-specific range. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(128, 255) } + } + }, + BRANCH { // Other. + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('0', '9') U SEG('A', 'Z') + U SEG('a', 'z') U SEG(128, 255)} + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { FOR_FUNCTION(isalpha), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // isupper() or islower(). Note that 'Z' is less than 'a'. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('A', 'Z') U SEG('a', 'z') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { // The locale-specific range. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(128, 255) } + }, + }, + BRANCH { // Other. + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('A', 'Z') U SEG('a', 'z'), SEG(128, 255) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { FOR_FUNCTION(isascii), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Is ASCII. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(0, 127) } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { // Is not ASCII. + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG(0, 127) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(isblank), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Is tab or space. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { POINT('\t') U POINT(' ') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { // Other. + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { POINT('\t') U POINT(' ') } + }, + RANGE { RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(iscntrl), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // 0..31 or 127 + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(0, 32) U POINT(127) } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + }, + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG(0, 32), POINT(127) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(isdigit), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Is a digit. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('0', '9') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + }, + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('0', '9') } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + }, + } + } + } + }, + SPEC { + FOR_FUNCTION(isgraph), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(33, 126) } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG(33, 126) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(islower), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Is certainly uppercase. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('a', 'z') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET{ POINT(0) } + } + }, + BRANCH { // Is ascii but not uppercase. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(0, 127) } + }, + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('a', 'z') } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + }, + BRANCH { // The locale-specific range. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(128, 255) } + } + }, + BRANCH { // Is not an unsigned char. + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG(0, 255) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(isprint), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(32, 126) } + }, + RANGE { + RET_VAL, + RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG(32, 126) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(ispunct), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('!', '/') U SEG(':', '@') + U SEG('[', '`') U SEG('{', '~'}) }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('!', '/') U SEG(':', '@') + U SEG('[', '`') U SEG('{', '~') } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(isspace), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Space, '\f', '\n', '\r', '\t', '\v'. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(9, 13) U POINT(' ') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0)} + } + }, + BRANCH { // The locale-specific range. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(128, 255) } + } + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG(9, 13) U POINT(' ') U SEG(128, 255) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + }, + } + } + }, + SPEC { + FOR_FUNCTION(isupper), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE (IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { // Is certainly uppercase. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('A', 'Z') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { // The locale-specific range. + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(128, 255) } + } + }, + BRANCH { // Other. + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('A', 'Z') U SEG(128, 255) } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(isxdigit), + SPEC_DATA { + ARGUMENT_TYPES { IntTy }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure), + BRANCHES { + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG('0', '9') U SEG('A', 'F') U SEG('a', 'f') } + }, + RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } + } + }, + BRANCH { + RANGE { + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('0', '9'), SEG('A', 'F'), SEG('a', 'f') } + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(0) } + } + } + } + } + }, + + // The getc() family of functions that returns either a char or an EOF. + SPEC { + FOR_FUNCTION(getc), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { // FIXME: EOF is assumed to be defined as -1. + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { SEG(-1, 255) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(fgetc), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant }, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { // FIXME: EOF is assumed to be defined as -1. + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { SEG(-1, 255) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(getchar), + SPEC_DATA { + ARGUMENT_TYPES {}, + RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { // FIXME: EOF is assumed to be defined as -1. + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { SEG(-1, 255) } + } + } + } + } + }, + + // read()-like functions that never return more than buffer size. + SPEC { + FOR_FUNCTION(read), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant, Irrelevant, SizeTy }, + RETURN_TYPE(SSizeTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { + RANGE { + RET_VAL, RANGE_KIND(ComparesToArgument), + IS_LESS_THAN(ARG_NO(2)) + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { SEG(-1, SSizeMax) } + } + }, + } + } + }, + SPEC { + FOR_FUNCTION(write), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant, Irrelevant, SizeTy }, + RETURN_TYPE(SSizeTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { + RANGE { + RET_VAL, RANGE_KIND(ComparesToArgument), + IS_LESS_THAN(ARG_NO(2)) + }, + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { SEG(-1, SSizeMax) } + } + }, + } + } + }, + SPEC { FOR_FUNCTION(fread), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant, Irrelevant, SizeTy, Irrelevant }, + RETURN_TYPE(SizeTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { + RANGE { + RET_VAL, RANGE_KIND(ComparesToArgument), + IS_LESS_THAN(ARG_NO(2)) + } + } + } + } + }, + SPEC { + FOR_FUNCTION(fwrite), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant, Irrelevant, SizeTy, Irrelevant }, + RETURN_TYPE(SizeTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { + RANGE { + RET_VAL, RANGE_KIND(ComparesToArgument), + IS_LESS_THAN(ARG_NO(2)) + } + } + } + } + }, + + // getline()-like functions either fail or read at least the delimiter. + SPEC { + FOR_FUNCTION(getline), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant, Irrelevant, Irrelevant }, + RETURN_TYPE(SSizeTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(-1) U SEG(1, SSizeMax) } + } + } + } + } + }, + SPEC { + FOR_FUNCTION(getdelim), + SPEC_DATA { + ARGUMENT_TYPES { Irrelevant, Irrelevant, Irrelevant, Irrelevant }, + RETURN_TYPE(SSizeTy), + INVALIDATION_APPROACH(NoEvalCall), + BRANCHES { + BRANCH { + RANGE { + RET_VAL, RANGE_KIND(WithinRange), + SET { POINT(-1) U SEG(1, SSizeMax) } + } + } + } + } + } + }; +} + +void ento::registerStdLibraryFunctionsChecker(CheckerManager &mgr) { + mgr.registerChecker(); +} Index: test/Analysis/std-library-functions.c =================================================================== --- /dev/null +++ test/Analysis/std-library-functions.c @@ -0,0 +1,184 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +void clang_analyzer_eval(int); + +int glob; + +typedef struct FILE FILE; +#define EOF -1 + +int getc(FILE *); +void test_getc(FILE *fp) { + int x; + while ((x = getc(fp)) != EOF) { + clang_analyzer_eval(x > 255); // expected-warning{{FALSE}} + clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } +} + +int fgetc(FILE *); +void test_fgets(FILE *fp) { + clang_analyzer_eval(fgetc(fp) < 256); // expected-warning{{TRUE}} + clang_analyzer_eval(fgetc(fp) >= 0); // expected-warning{{UNKNOWN}} +} + + +typedef unsigned long size_t; +typedef signed long ssize_t; +ssize_t read(int, void *, size_t); +ssize_t write(int, const void *, size_t); +void test_read_write(int fd, char *buf) { + glob = 1; + ssize_t x = write(fd, buf, 10); + clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}} + if (x >= 0) { + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + ssize_t y = read(fd, &glob, sizeof(glob)); + if (y >= 0) { + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{TRUE}} + } else { + // -1 overflows on promotion! + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{FALSE}} + } + } else { + clang_analyzer_eval(x == -1); // expected-warning{{TRUE}} + } +} + +size_t fread(void *, size_t, size_t, FILE *); +size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict); +void test_fread_fwrite(FILE *fp, int *buf) { + size_t x = fwrite(buf, sizeof(int), 10, fp); + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + size_t y = fread(buf, sizeof(int), 10, fp); + clang_analyzer_eval(y <= 10); // expected-warning{{TRUE}} + size_t z = fwrite(buf, sizeof(int), y, fp); + // FIXME: should be TRUE once symbol-symbol constraint support is improved. + clang_analyzer_eval(z <= y); // expected-warning{{UNKNOWN}} +} + +ssize_t getline(char **, size_t *, FILE *); +void test_getline(FILE *fp) { + char *line = 0; + size_t n = 0; + ssize_t len; + while ((len = getline(&line, &n, fp)) != -1) { + clang_analyzer_eval(len == 0); // expected-warning{{FALSE}} + } +} + +int isascii(int); +void test_isascii(int x) { + clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}} + clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}} + if (isascii(x)) { + clang_analyzer_eval(x < 128); // expected-warning{{TRUE}} + clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } else { + if (x > 42) + clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}} + else + clang_analyzer_eval(x < 0); // expected-warning{{TRUE}} + } + glob = 1; + isascii('a'); + clang_analyzer_eval(glob); // expected-warning{{TRUE}} +} + +int islower(int); +void test_islower(int x) { + clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}} + clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}} + if (islower(x)) + clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}} +} + +int getchar(void); +void test_getchar() { + int x = getchar(); + if (x == EOF) + return; + clang_analyzer_eval(x < 0); // expected-warning{{FALSE}} + clang_analyzer_eval(x < 256); // expected-warning{{TRUE}} +} + +int isalpha(int); +void test_isalpha() { + clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}} + clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}} +} + +int isalnum(int); +void test_alnum() { + clang_analyzer_eval(isalnum('1')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalnum(')')); // expected-warning{{FALSE}} +} + +int isblank(int); +void test_isblank() { + clang_analyzer_eval(isblank('\t')); // expected-warning{{TRUE}} + clang_analyzer_eval(isblank(' ')); // expected-warning{{TRUE}} + clang_analyzer_eval(isblank('\n')); // expected-warning{{FALSE}} +} + +int ispunct(int); +void test_ispunct(int x) { + clang_analyzer_eval(ispunct(' ')); // expected-warning{{FALSE}} + clang_analyzer_eval(ispunct(-1)); // expected-warning{{FALSE}} + clang_analyzer_eval(ispunct('#')); // expected-warning{{TRUE}} + clang_analyzer_eval(ispunct('_')); // expected-warning{{TRUE}} + if (ispunct(x)) + clang_analyzer_eval(x < 127); // expected-warning{{TRUE}} +} + +int isupper(int); +void test_isupper(int x) { + if (isupper(x)) + clang_analyzer_eval(x < 'A'); // expected-warning{{FALSE}} +} + +int isgraph(int); +int isprint(int); +void test_isgraph_isprint(int x) { + char y = x; + if (isgraph(y)) + clang_analyzer_eval(isprint(x)); // expected-warning{{TRUE}} +} + +int isdigit(int); +void test_mixed_branches(int x) { + if (isdigit(x)) { + clang_analyzer_eval(isgraph(x)); // expected-warning{{TRUE}} + clang_analyzer_eval(isblank(x)); // expected-warning{{FALSE}} + } else if (isascii(x)) { + // isalnum() bifurcates here. + clang_analyzer_eval(isalnum(x)); // expected-warning{{TRUE}} // expected-warning{{FALSE}} + clang_analyzer_eval(isprint(x)); // expected-warning{{TRUE}} // expected-warning{{FALSE}} + } +} + +int isspace(int); +void test_isspace(int x) { + if (!isascii(x)) + return; + char y = x; + if (y == ' ') + clang_analyzer_eval(isspace(x)); // expected-warning{{TRUE}} +} + +int isxdigit(int); +void test_isxdigit(int x) { + if (isxdigit(x) && isupper(x)) { + clang_analyzer_eval(x >= 'A'); // expected-warning{{TRUE}} + clang_analyzer_eval(x <= 'F'); // expected-warning{{TRUE}} + } +} + +void test_call_by_pointer() { + typedef int (*func)(int); + func f = isascii; + clang_analyzer_eval(f('A')); // expected-warning{{TRUE}} + f = ispunct; + clang_analyzer_eval(f('A')); // expected-warning{{FALSE}} +} Index: test/Analysis/std-library-functions.cpp =================================================================== --- /dev/null +++ test/Analysis/std-library-functions.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +// Test that we don't model functions with broken prototypes. +// Because they probably work differently as well. +// +// This test lives in a separate file because we wanted to test all functions +// in the .c file, however in C there are no overloads. + +void clang_analyzer_eval(bool); +bool isalpha(char); + +void test() { + clang_analyzer_eval(isalpha('A')); // no-crash // expected-warning{{UNKNOWN}} +}