diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -24,9 +24,10 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Support/YAMLTraits.h" +#include #include +#include #include using namespace clang; @@ -56,10 +57,11 @@ /// Used to parse the configuration file. struct TaintConfiguration { - using NameArgsPair = std::pair; + using NameScopeArgs = std::tuple; struct Propagation { std::string Name; + std::string Scope; ArgVector SrcArgs; SignedArgVector DstArgs; VariadicType VarType; @@ -67,8 +69,8 @@ }; std::vector Propagations; - std::vector Filters; - std::vector Sinks; + std::vector Filters; + std::vector Sinks; TaintConfiguration() = default; TaintConfiguration(const TaintConfiguration &) = default; @@ -97,18 +99,49 @@ BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); } + struct FunctionData { + FunctionData() = delete; + FunctionData(const FunctionData &) = default; + FunctionData(FunctionData &&) = default; + FunctionData &operator=(const FunctionData &) = delete; + FunctionData &operator=(FunctionData &&) = delete; + + static Optional create(const CallExpr *CE, + const CheckerContext &C) { + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl || (FDecl->getKind() != Decl::Function && + FDecl->getKind() != Decl::CXXMethod)) + return None; + + StringRef Name = C.getCalleeName(FDecl); + std::string FullName = FDecl->getQualifiedNameAsString(); + if (Name.empty() || FullName.empty()) + return None; + + return FunctionData{FDecl, Name, FullName}; + } + + bool isInScope(StringRef Scope) const { + return StringRef(FullName).startswith(Scope); + } + + const FunctionDecl *const FDecl; + const StringRef Name; + const std::string FullName; + }; + /// Catch taint related bugs. Check if tainted data is passed to a /// system call etc. Returns true on matching. - bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name, + bool checkPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Add taint sources on a pre-visit. Returns true on matching. - bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl, - StringRef Name, CheckerContext &C) const; + bool addSourcesPre(const CallExpr *CE, const FunctionData &FData, + CheckerContext &C) const; /// Mark filter's arguments not tainted on a pre-visit. Returns true on /// matching. - bool addFiltersPre(const CallExpr *CE, StringRef Name, + bool addFiltersPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Propagate taint generated at pre-visit. Returns true on matching. @@ -149,7 +182,7 @@ /// Check if tainted data is used as a custom sink's parameter. static constexpr llvm::StringLiteral MsgCustomSink = "Untrusted data is passed to a user-defined sink"; - bool checkCustomSinks(const CallExpr *CE, StringRef Name, + bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; /// Generate a report if the expression is tainted or points to tainted data. @@ -157,8 +190,17 @@ CheckerContext &C) const; struct TaintPropagationRule; - using NameRuleMap = llvm::StringMap; - using NameArgMap = llvm::StringMap; + template + using ConfigDataMap = + std::unordered_multimap>; + using NameRuleMap = ConfigDataMap; + using NameArgMap = ConfigDataMap; + + /// Find a function with the given name and scope. Returns the first match + /// or the end of the map. + template + static auto findFunctionInConfig(const ConfigDataMap &Map, + const FunctionData &FData); /// A struct used to specify taint propagation rules for a function. /// @@ -200,8 +242,7 @@ /// Get the propagation rule for a given function. static TaintPropagationRule getTaintPropagationRule(const NameRuleMap &CustomPropagations, - const FunctionDecl *FDecl, StringRef Name, - CheckerContext &C); + const FunctionData &FData, CheckerContext &C); void addSrcArg(unsigned A) { SrcArgs.push_back(A); } void addDstArg(unsigned A) { DstArgs.push_back(A); } @@ -236,14 +277,15 @@ CheckerContext &C); }; - /// Defines a map between the propagation function's name and - /// TaintPropagationRule. + /// Defines a map between the propagation function's name, scope + /// and TaintPropagationRule. NameRuleMap CustomPropagations; - /// Defines a map between the filter function's name and filtering args. + /// Defines a map between the filter function's name, scope and filtering + /// args. NameArgMap CustomFilters; - /// Defines a map between the sink function's name and sinking args. + /// Defines a map between the sink function's name, scope and sinking args. NameArgMap CustomSinks; }; @@ -260,7 +302,7 @@ using TaintConfig = GenericTaintChecker::TaintConfiguration; LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) -LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) namespace llvm { namespace yaml { @@ -275,6 +317,7 @@ template <> struct MappingTraits { static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { IO.mapRequired("Name", Propagation.Name); + IO.mapOptional("Scope", Propagation.Scope); IO.mapOptional("SrcArgs", Propagation.SrcArgs); IO.mapOptional("DstArgs", Propagation.DstArgs); IO.mapOptional("VariadicType", Propagation.VarType, @@ -292,10 +335,11 @@ } }; -template <> struct MappingTraits { - static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { - IO.mapRequired("Name", NameArg.first); - IO.mapRequired("Args", NameArg.second); +template <> struct MappingTraits { + static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { + IO.mapRequired("Name", std::get<0>(NSA)); + IO.mapOptional("Scope", std::get<1>(NSA)); + IO.mapRequired("Args", std::get<2>(NSA)); } }; } // namespace yaml @@ -328,31 +372,51 @@ const std::string &Option, TaintConfiguration &&Config) { for (auto &P : Config.Propagations) { - GenericTaintChecker::CustomPropagations.try_emplace( - P.Name, std::move(P.SrcArgs), - convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); + GenericTaintChecker::CustomPropagations.emplace( + P.Name, + std::make_pair(P.Scope, TaintPropagationRule{ + std::move(P.SrcArgs), + convertToArgVector(Mgr, Option, P.DstArgs), + P.VarType, P.VarIndex})); } for (auto &F : Config.Filters) { - GenericTaintChecker::CustomFilters.try_emplace(F.first, - std::move(F.second)); + GenericTaintChecker::CustomFilters.emplace( + std::get<0>(F), + std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); } for (auto &S : Config.Sinks) { - GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); + GenericTaintChecker::CustomSinks.emplace( + std::get<0>(S), + std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); } } +template +auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap &Map, + const FunctionData &FData) { + auto Range = Map.equal_range(FData.Name); + auto It = + std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { + const auto &Value = Entry.second; + StringRef Scope = Value.first; + return Scope.empty() || FData.isInScope(Scope); + }); + return It != Range.second ? It : Map.end(); +} + GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( - const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl, - StringRef Name, CheckerContext &C) { + const NameRuleMap &CustomPropagations, const FunctionData &FData, + CheckerContext &C) { // TODO: Currently, we might lose precision here: we always mark a return // value as tainted even if it's just a pointer, pointing to tainted data. // Check for exact name match for functions without builtin substitutes. + // Use qualified name, because these are C functions without namespace. TaintPropagationRule Rule = - llvm::StringSwitch(Name) + llvm::StringSwitch(FData.FullName) // Source functions // TODO: Add support for vfscanf & family. .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) @@ -397,6 +461,7 @@ // Check if it's one of the memory setting/copying functions. // This check is specialized but faster then calling isCLibraryFunction. + const FunctionDecl *FDecl = FData.FDecl; unsigned BId = 0; if ((BId = FDecl->getMemoryFunctionKind())) switch (BId) { @@ -440,35 +505,32 @@ // or smart memory copy: // - memccpy - copying until hitting a special character. - auto It = CustomPropagations.find(Name); - if (It != CustomPropagations.end()) - return It->getValue(); + auto It = findFunctionInConfig(CustomPropagations, FData); + if (It != CustomPropagations.end()) { + const auto &Value = It->second; + return Value.second; + } return TaintPropagationRule(); } void GenericTaintChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { - const FunctionDecl *FDecl = C.getCalleeDecl(CE); - // Check for non-global functions. - if (!FDecl || FDecl->getKind() != Decl::Function) - return; - - StringRef Name = C.getCalleeName(FDecl); - if (Name.empty()) + Optional FData = FunctionData::create(CE, C); + if (!FData) return; // Check for taintedness related errors first: system call, uncontrolled // format string, tainted buffer size. - if (checkPre(CE, FDecl, Name, C)) + if (checkPre(CE, *FData, C)) return; // Marks the function's arguments and/or return value tainted if it present in // the list. - if (addSourcesPre(CE, FDecl, Name, C)) + if (addSourcesPre(CE, *FData, C)) return; - addFiltersPre(CE, Name, C); + addFiltersPre(CE, *FData, C); } void GenericTaintChecker::checkPostStmt(const CallExpr *CE, @@ -484,12 +546,11 @@ } bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, - const FunctionDecl *FDecl, - StringRef Name, + const FunctionData &FData, CheckerContext &C) const { // First, try generating a propagation rule for this function. TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( - this->CustomPropagations, FDecl, Name, C); + this->CustomPropagations, FData, C); if (!Rule.isNull()) { ProgramStateRef State = Rule.process(CE, C); if (State) { @@ -500,14 +561,16 @@ return false; } -bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name, +bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, + const FunctionData &FData, CheckerContext &C) const { - auto It = CustomFilters.find(Name); + auto It = findFunctionInConfig(CustomFilters, FData); if (It == CustomFilters.end()) return false; ProgramStateRef State = C.getState(); - const ArgVector &Args = It->getValue(); + const auto &Value = It->second; + const ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { if (ArgNum >= CE->getNumArgs()) continue; @@ -564,19 +627,19 @@ } bool GenericTaintChecker::checkPre(const CallExpr *CE, - const FunctionDecl *FDecl, StringRef Name, + const FunctionData &FData, CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; - if (checkSystemCall(CE, Name, C)) + if (checkSystemCall(CE, FData.Name, C)) return true; - if (checkTaintedBufferSize(CE, FDecl, C)) + if (checkTaintedBufferSize(CE, FData.FDecl, C)) return true; - if (checkCustomSinks(CE, Name, C)) + if (checkCustomSinks(CE, FData, C)) return true; return false; @@ -595,7 +658,7 @@ QualType ArgTy = Arg->getType().getCanonicalType(); if (!ArgTy->isPointerType()) - return None; + return State->getSVal(*AddrLoc); QualType ValTy = ArgTy->getPointeeType(); @@ -848,13 +911,15 @@ generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); } -bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name, +bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, + const FunctionData &FData, CheckerContext &C) const { - auto It = CustomSinks.find(Name); + auto It = findFunctionInConfig(CustomSinks, FData); if (It == CustomSinks.end()) return false; - const GenericTaintChecker::ArgVector &Args = It->getValue(); + const auto &Value = It->second; + const GenericTaintChecker::ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { if (ArgNum >= CE->getNumArgs()) continue; diff --git a/clang/test/Analysis/Inputs/taint-generic-config.yaml b/clang/test/Analysis/Inputs/taint-generic-config.yaml --- a/clang/test/Analysis/Inputs/taint-generic-config.yaml +++ b/clang/test/Analysis/Inputs/taint-generic-config.yaml @@ -9,12 +9,29 @@ - Name: mySource2 DstArgs: [0] + # int x = myNamespace::mySource3(); // x is tainted + - Name: mySource3 + Scope: "myNamespace::" + DstArgs: [-1] + + # int x = myAnotherNamespace::mySource3(); // x is tainted + - Name: mySource3 + Scope: "myAnotherNamespace::" + DstArgs: [-1] + # int x, y; # myScanf("%d %d", &x, &y); // x and y are tainted - Name: myScanf VariadicType: Dst VariadicIndex: 1 + # int x, y; + # Foo::myScanf("%d %d", &x, &y); // x and y are tainted + - Name: myMemberScanf + Scope: "Foo::" + VariadicType: Dst + VariadicIndex: 1 + # int x; // x is tainted # int y; # myPropagator(x, &y); // y is tainted @@ -40,6 +57,18 @@ - Name: isOutOfRange Args: [0] + # int x; // x is tainted + # myNamespace::isOutOfRange(&x); // x is not tainted anymore + - Name: isOutOfRange2 + Scope: "myNamespace::" + Args: [0] + + # int x; // x is tainted + # myAnotherNamespace::isOutOfRange(&x); // x is not tainted anymore + - Name: isOutOfRange2 + Scope: "myAnotherNamespace::" + Args: [0] + # A list of sink functions Sinks: # int x, y; // x and y are tainted @@ -48,3 +77,15 @@ # mySink(0, x, 1); // It won't warn - Name: mySink Args: [0, 2] + + # int x; // x is tainted + # myNamespace::mySink(x); // It will warn + - Name: mySink2 + Scope: "myNamespace::" + Args: [0] + + # int x; // x is tainted + # myAnotherNamespace::mySink(x); // It will warn + - Name: mySink2 + Scope: "myAnotherNamespace::" + Args: [0] diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/taint-generic.cpp @@ -0,0 +1,126 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s + +#define BUFSIZE 10 +int Buffer[BUFSIZE]; + +int scanf(const char*, ...); +int mySource1(); +int mySource3(); + +bool isOutOfRange2(const int*); + +void mySink2(int); + +// Test configuration +namespace myNamespace { + void scanf(const char*, ...); + void myScanf(const char*, ...); + int mySource3(); + + bool isOutOfRange(const int*); + bool isOutOfRange2(const int*); + + void mySink(int, int, int); + void mySink2(int); +} + +namespace myAnotherNamespace { + int mySource3(); + + bool isOutOfRange2(const int*); + + void mySink2(int); +} + +void testConfigurationNamespacePropagation1() { + int x; + // The built-in functions should be matched only for functions in + // the global namespace + myNamespace::scanf("%d", &x); + Buffer[x] = 1; // no-warning + + scanf("%d", &x); + Buffer[x] = 1; // expected-warning {{Out of bound memory access }} +} + +void testConfigurationNamespacePropagation2() { + int x = mySource3(); + Buffer[x] = 1; // no-warning + + int y = myNamespace::mySource3(); + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void testConfigurationNamespacePropagation3() { + int x = myAnotherNamespace::mySource3(); + Buffer[x] = 1; // expected-warning {{Out of bound memory access }} +} + +void testConfigurationNamespacePropagation4() { + int x; + // Configured functions without scope should match for all function. + myNamespace::myScanf("%d", &x); + Buffer[x] = 1; // expected-warning {{Out of bound memory access }} +} + +void testConfigurationNamespaceFilter1() { + int x = mySource1(); + if (myNamespace::isOutOfRange2(&x)) + return; + Buffer[x] = 1; // no-warning + + int y = mySource1(); + if (isOutOfRange2(&y)) + return; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void testConfigurationNamespaceFilter2() { + int x = mySource1(); + if (myAnotherNamespace::isOutOfRange2(&x)) + return; + Buffer[x] = 1; // no-warning +} + +void testConfigurationNamespaceFilter3() { + int x = mySource1(); + if (myNamespace::isOutOfRange(&x)) + return; + Buffer[x] = 1; // no-warning +} + +void testConfigurationNamespaceSink1() { + int x = mySource1(); + mySink2(x); // no-warning + + int y = mySource1(); + myNamespace::mySink2(y); + // expected-warning@-1 {{Untrusted data is passed to a user-defined sink}} +} + +void testConfigurationNamespaceSink2() { + int x = mySource1(); + myAnotherNamespace::mySink2(x); + // expected-warning@-1 {{Untrusted data is passed to a user-defined sink}} +} + +void testConfigurationNamespaceSink3() { + int x = mySource1(); + myNamespace::mySink(x, 0, 1); + // expected-warning@-1 {{Untrusted data is passed to a user-defined sink}} +} + +struct Foo { + void scanf(const char*, int*); + void myMemberScanf(const char*, int*); +}; + +void testConfigurationMemberFunc() { + int x; + Foo foo; + foo.scanf("%d", &x); + Buffer[x] = 1; // no-warning + + foo.myMemberScanf("%d", &x); + Buffer[x] = 1; // expected-warning {{Out of bound memory access }} +}