Index: include/clang/StaticAnalyzer/Checkers/Checkers.td =================================================================== --- include/clang/StaticAnalyzer/Checkers/Checkers.td +++ include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -799,6 +799,13 @@ def GenericTaintChecker : Checker<"TaintPropagation">, HelpText<"Generate taint information used by other checkers">, + CheckerOptions<[ + CmdLineOption, + ]>, Documentation; } // end "alpha.security.taint" Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -15,16 +15,18 @@ //===----------------------------------------------------------------------===// #include "Taint.h" -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "Yaml.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include -#include +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/YAMLTraits.h" +#include #include using namespace clang; @@ -44,14 +46,51 @@ void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; - void printState(raw_ostream &Out, ProgramStateRef State, - const char *NL, const char *Sep) const override; + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const override; -private: - static const unsigned InvalidArgIndex = UINT_MAX; + using ArgVector = SmallVector; + using SignedArgVector = SmallVector; + + enum class VariadicType { None, Src, Dst }; + + /// Used to parse the configuration file. + struct TaintConfiguration { + using NameArgsPair = std::pair; + + struct Propagation { + std::string Name; + ArgVector SrcArgs; + SignedArgVector DstArgs; + VariadicType VarType; + unsigned VarIndex; + }; + + std::vector Propagations; + std::vector Filters; + std::vector Sinks; + + TaintConfiguration() = default; + TaintConfiguration(const TaintConfiguration &) = delete; + TaintConfiguration(TaintConfiguration &&) = default; + TaintConfiguration &operator=(const TaintConfiguration &) = delete; + TaintConfiguration &operator=(TaintConfiguration &&) = default; + }; + + /// Convert SignedArgVector to ArgVector. + ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, + SignedArgVector Args); + + /// Parse the config. + void parseConfiguration(CheckerManager &Mgr, const std::string &Option, + TaintConfiguration &&Config); + + static const unsigned InvalidArgIndex{std::numeric_limits::max()}; /// Denotes the return vale. - static const unsigned ReturnValueIndex = UINT_MAX - 1; + static const unsigned ReturnValueIndex{std::numeric_limits::max() - + 1}; +private: mutable std::unique_ptr BT; void initBugType() const { if (!BT) @@ -97,8 +136,6 @@ bool generateReportIfTainted(const Expr *E, const char Msg[], CheckerContext &C) const; - using ArgVector = SmallVector; - /// A struct used to specify taint propagation rules for a function. /// /// If any of the possible taint source arguments is tainted, all of the @@ -109,8 +146,6 @@ /// ReturnValueIndex is added to the dst list, the return value will be /// tainted. struct TaintPropagationRule { - enum class VariadicType { None, Src, Dst }; - using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, CheckerContext &C); @@ -131,8 +166,7 @@ : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), PropagationFunc(nullptr) {} - TaintPropagationRule(std::initializer_list &&Src, - std::initializer_list &&Dst, + TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, VariadicType Var = VariadicType::None, unsigned VarIndex = InvalidArgIndex, PropagationFuncType Func = nullptr) @@ -176,6 +210,19 @@ static bool postSocket(bool IsTainted, const CallExpr *CE, CheckerContext &C); }; + + using NameRuleMap = llvm::StringMap; + using NameArgMap = llvm::StringMap; + + /// Defines a map between the propagation function's name and + /// TaintPropagationRule. + NameRuleMap CustomPropagations; + + /// Defines a map between the filter function's name and filtering args. + NameArgMap CustomFilters; + + /// Defines a map between the sink function's name and sinking args. + NameArgMap CustomSinks; }; const unsigned GenericTaintChecker::ReturnValueIndex; @@ -193,15 +240,94 @@ "Untrusted data is used to specify the buffer size " "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " "for character data and the null terminator)"; - } // end of anonymous namespace +using TaintConfig = GenericTaintChecker::TaintConfiguration; + +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) + +namespace llvm { +namespace yaml { +template <> struct MappingTraits { + static void mapping(IO &IO, TaintConfig &Config) { + IO.mapOptional("Propagations", Config.Propagations); + IO.mapOptional("Filters", Config.Filters); + IO.mapOptional("Sinks", Config.Sinks); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { + IO.mapRequired("Name", Propagation.Name); + IO.mapOptional("SrcArgs", Propagation.SrcArgs); + IO.mapOptional("DstArgs", Propagation.DstArgs); + IO.mapOptional("VariadicType", Propagation.VarType, + GenericTaintChecker::VariadicType::None); + IO.mapOptional("VariadicIndex", Propagation.VarIndex, + GenericTaintChecker::InvalidArgIndex); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { + IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); + IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); + IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { + IO.mapRequired("Name", NameArg.first); + IO.mapRequired("Args", NameArg.second); + } +}; +} // namespace yaml +} // namespace llvm + /// A set which is used to pass information from call pre-visit instruction /// to the call post-visit. The values are unsigned integers, which are either /// ReturnValueIndex, or indexes of the pointer/reference argument, which /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( + CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { + ArgVector Result; + for (int Arg : Args) { + if (Arg == -1) + Result.push_back(ReturnValueIndex); + else if (Arg < -1) { + Result.push_back(InvalidArgIndex); + Mgr.reportInvalidCheckerOptionValue( + this, Option, + "an argument number for propagation rules greater or equal to -1"); + } else + Result.push_back(static_cast(Arg)); + } + return Result; +} + +void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, + const std::string &Option, + TaintConfiguration &&Config) { + for (auto &P : Config.Propagations) { + GenericTaintChecker::CustomPropagations.try_emplace( + P.Name, std::move(P.SrcArgs), + convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); + } + + for (auto &F : Config.Filters) { + GenericTaintChecker::CustomFilters.try_emplace(F.first, + std::move(F.second)); + } + + for (auto &S : Config.Sinks) { + GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); + } +} + GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { @@ -218,7 +344,8 @@ .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) - .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar_unlocked", + TaintPropagationRule({}, {ReturnValueIndex})) .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) @@ -454,7 +581,7 @@ // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted - for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) break; } @@ -485,7 +612,7 @@ // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. - for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { const Expr *Arg = CE->getArg(i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); @@ -550,7 +677,7 @@ static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, - unsigned int &ArgNum) { + unsigned &ArgNum) { // Find if the function contains a format string argument. // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, // vsnprintf, syslog, custom annotated functions. @@ -603,7 +730,7 @@ bool GenericTaintChecker::checkUncontrolledFormatString( const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. - unsigned int ArgNum = 0; + unsigned ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) return false; @@ -676,8 +803,15 @@ generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); } -void ento::registerGenericTaintChecker(CheckerManager &mgr) { - mgr.registerChecker(); +void ento::registerGenericTaintChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker(); + std::string Option{"Config"}; + StringRef ConfigFile = + Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); + llvm::Optional Config = + getConfiguration(Mgr, Checker, Option, ConfigFile); + if (Config) + Checker->parseConfiguration(Mgr, Option, std::move(Config).getValue()); } bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { Index: lib/StaticAnalyzer/Checkers/Yaml.h =================================================================== --- /dev/null +++ lib/StaticAnalyzer/Checkers/Yaml.h @@ -0,0 +1,59 @@ +//== Yaml.h ---------------------------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines convenience functions for handling YAML configuration files +// for checkers/packages. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "llvm/Support/YAMLTraits.h" + +namespace clang { +namespace ento { + +/// Read the given file from the filesystem and parse it as a yaml file. The +/// template parameter must have a yaml MappingTraits. +/// Emit diagnostic error in case of any failure. +template +llvm::Optional getConfiguration(CheckerManager &Mgr, Checker *Chk, + StringRef Option, StringRef ConfigFile) { + if (ConfigFile.trim().empty()) + return None; + + llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get(); + llvm::ErrorOr> Buffer = + FS->getBufferForFile(ConfigFile.str()); + + if (std::error_code ec = Buffer.getError()) { + Mgr.reportInvalidCheckerOptionValue(Chk, Option, + "a valid filename instead of '" + + std::string(ConfigFile) + "'"); + return None; + } + + llvm::yaml::Input Input(Buffer.get()->getBuffer()); + T Config; + Input >> Config; + + if (std::error_code ec = Input.error()) { + Mgr.reportInvalidCheckerOptionValue(Chk, Option, + "a valid yaml file: " + ec.message()); + return None; + } + + return Config; +} + +} // namespace ento +} // namespace clang + +#endif // LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H Index: test/Analysis/Inputs/taint-generic-config-ill-formed.yaml =================================================================== --- /dev/null +++ test/Analysis/Inputs/taint-generic-config-ill-formed.yaml @@ -0,0 +1,4 @@ +Propagations: + - Name: mySource1 + DstArgs: [-1] + NotExist: 1 Index: test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml =================================================================== --- /dev/null +++ test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml @@ -0,0 +1,3 @@ +Propagations: + - Name: mySource1 + DstArgs: [-2] Index: test/Analysis/Inputs/taint-generic-config.yaml =================================================================== --- /dev/null +++ test/Analysis/Inputs/taint-generic-config.yaml @@ -0,0 +1,50 @@ +# A list of source/propagation function +Propagations: + # int x = mySource1(); // x is tainted + - Name: mySource1 + DstArgs: [-1] # Index for return value + + # int x; + # mySource2(&x); // x is tainted + - Name: mySource2 + DstArgs: [0] + + # int x, y; + # myScanf("%d %d", &x, &y); // x and y are tainted + - Name: myScanf + VariadicType: Dst + VariadicIndex: 1 + + # int x; // x is tainted + # int y; + # myPropagator(x, &y); // y is tainted + - Name: myPropagator + SrcArgs: [0] + DstArgs: [1] + + # constexpr unsigned size = 100; + # char buf[size]; + # int x, y; + # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted + # // the return value and the buf will be tainted + - Name: mySnprintf + SrcArgs: [1] + DstArgs: [0, -1] + VariadicType: Src + VariadicIndex: 3 + +# A list of filter functions +Filters: + # int x; // x is tainted + # myFilter(&x); // x is not tainted anymore + - Name: myFilter + Args: [0] + +# A list of sink functions +Sinks: + # int x, y; // x and y are tainted + # mySink(x, 0, 1); // It will warn + # mySink(0, 1, y); // It will warn + # mySink(0, x, 1); // It won't warn + - Name: mySink + Args: [0, 2] Index: test/Analysis/taint-generic.c =================================================================== --- test/Analysis/taint-generic.c +++ test/Analysis/taint-generic.c @@ -1,5 +1,49 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s -// RUN: %clang_analyze_cc1 -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s +// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \ +// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=alpha.security.ArrayBoundV2 \ +// RUN: -analyzer-config \ +// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml + +// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \ +// RUN: -DFILE_IS_STRUCT \ +// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=alpha.security.ArrayBoundV2 \ +// RUN: -analyzer-config \ +// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml + +// RUN: not %clang_analyze_cc1 -verify %s \ +// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-config \ +// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \ +// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE + +// CHECK-INVALID-FILE: (frontend): invalid input for checker option +// CHECK-INVALID-FILE-SAME: 'alpha.security.taint.TaintPropagation:Config', +// CHECK-INVALID-FILE-SAME: that expects a valid filename instead of +// CHECK-INVALID-FILE-SAME: 'justguessit' + +// RUN: not %clang_analyze_cc1 -verify %s \ +// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-config \ +// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \ +// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-ILL-FORMED + +// CHECK-ILL-FORMED: (frontend): invalid input for checker option +// CHECK-ILL-FORMED-SAME: 'alpha.security.taint.TaintPropagation:Config', +// CHECK-ILL-FORMED-SAME: that expects a valid yaml file: Invalid argument + +// RUN: not %clang_analyze_cc1 -verify %s \ +// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-config \ +// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \ +// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG + +// CHECK-INVALID-ARG: (frontend): invalid input for checker option +// CHECK-INVALID-ARG-SAME: 'alpha.security.taint.TaintPropagation:Config', +// CHECK-INVALID-ARG-SAME: that expects an argument number for propagation +// CHECK-INVALID-ARG-SAME: rules greater or equal to -1 int scanf(const char *restrict format, ...); char *gets(char *str);