diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/Builtins.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" @@ -26,6 +27,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/YAMLTraits.h" #include @@ -38,7 +40,7 @@ using namespace ento; using namespace taint; -using llvm::ImmutableSet; +using llvm::ImmutableMap; namespace { @@ -416,13 +418,17 @@ } // namespace yaml } // namespace llvm -/// A set which is used to pass information from call pre-visit instruction -/// to the call post-visit. The values are signed integers, which are either + +using ArgIdxSValMap = ImmutableMap; +/// A map from argument indexes to the symbolic values. +/// It is used to pass information from call pre-visit instruction +/// to the call post-visit. The keys are signed integers, which are either /// ReturnValueIndex, or indexes of the pointer/reference argument, which -/// points to data, which should be tainted on return. -REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, - ImmutableSet) -REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) +/// points to data, which should be tainted on return. The values of the map are +/// the symbolic values of the corresponding arguments *before* the call-event. +REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, ArgIdxSValMap) + +REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(ArgIdxSValFactory, ArgIdxTy, SVal) void GenericTaintRuleParser::validateArgVector(const std::string &Option, const ArgVecTy &Args) const { @@ -767,40 +773,88 @@ // checkPostStmt. ProgramStateRef State = C.getState(); const StackFrameContext *CurrentFrame = C.getStackFrame(); + ExplodedNode *Pred = C.getPredecessor(); // Depending on what was tainted at pre-visit, we determined a set of // arguments which should be tainted after the function returns. These are // stored in the state as TaintArgsOnPostVisit set. TaintArgsOnPostVisitTy TaintArgsMap = State->get(); - const ImmutableSet *TaintArgs = TaintArgsMap.lookup(CurrentFrame); + const ArgIdxSValMap *TaintArgs = TaintArgsMap.lookup(CurrentFrame); if (!TaintArgs) return; assert(!TaintArgs->isEmpty()); - LLVM_DEBUG(for (ArgIdxTy I - : *TaintArgs) { + LLVM_DEBUG(for (const auto& [ArgNum, PreCallSVal] : *TaintArgs) { llvm::dbgs() << "PostCall<"; Call.dump(llvm::dbgs()); - llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; + llvm::dbgs() << "> actually wants to taint arg index: " << ArgNum << '\n'; }); - for (ArgIdxTy ArgNum : *TaintArgs) { + struct Pack { + SVal PreValue; + SVal PostValue; + ArgIdxTy Index; + }; + + SmallVector ActualTaintedParams; + + for (const auto& [ArgNum, PreCallSVal] : *TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { State = addTaint(State, Call.getReturnValue()); + const NoteTag *Note = + C.getNoteTag([RetVal = Call.getReturnValue()]( + PathSensitiveBugReport &BR) -> std::string { + return BR.isInteresting(RetVal) ? "Returned tainted value" : ""; + }); + Pred = C.addTransition(State, Pred, Note); continue; } // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) + const SVal PostCallSVal = Call.getArgSVal(ArgNum); + if (auto V = getPointeeOf(C, PostCallSVal)) { + ActualTaintedParams.push_back({PreCallSVal, V.value(), ArgNum}); State = addTaint(State, *V); + } } // Clear up the taint info from the state. State = State->remove(CurrentFrame); - C.addTransition(State); + const NoteTag *Note = [&]() -> const NoteTag * { + if (ActualTaintedParams.empty()) + return nullptr; + return C.getNoteTag( + [ActualTaintedParams](PathSensitiveBugReport &BR) -> std::string { + const auto IsInteresting = [&BR](const auto &Param) -> bool { + return BR.isInteresting(Param.PostValue); + }; + + auto InterestingTaintedParams = + llvm::make_filter_range(ActualTaintedParams, IsInteresting); + + if (InterestingTaintedParams.empty()) + return ""; + + SmallVector MsgParts; + for (const auto &Param : InterestingTaintedParams) { + BR.markInteresting(Param.PreValue); + const auto ParamOrdinal = Param.Index + 1; + MsgParts.emplace_back((Twine(std::to_string(ParamOrdinal)) + + llvm::getOrdinalSuffix(ParamOrdinal)) + .str()); + } + + return (Twine("Propagated taint to the ") + + llvm::join(std::move(MsgParts), ", ") + + (MsgParts.size() == 1 ? " parameter" : " parameters")) + .str(); + }); + }(); + + C.addTransition(State, Pred, Note); } void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, @@ -861,15 +915,15 @@ }; /// Propagate taint where it is necessary. - auto &F = State->getStateManager().get_context(); - ImmutableSet Result = F.getEmptySet(); + auto &F = State->getStateManager().get_context(); + ArgIdxSValMap Result = F.getEmptyMap(); ForEachCallArg( [&](ArgIdxTy I, const Expr *E, SVal V) { if (PropDstArgs.contains(I)) { LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';); - Result = F.add(Result, I); + Result = F.add(Result, I, V); } // TODO: We should traverse all reachable memory regions via the @@ -881,7 +935,7 @@ Call.dump(llvm::dbgs()); llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; }); - Result = F.add(Result, I); + Result = F.add(Result, I, V); } }); @@ -908,7 +962,7 @@ if (ExplodedNode *N = C.generateNonFatalErrorNode()) { auto report = std::make_unique(BT, Msg, N); report->addRange(E->getSourceRange()); - report->addVisitor(std::make_unique(*TaintedSVal)); + report->markInteresting(*TaintedSVal); C.emitReport(std::move(report)); return true; } @@ -978,8 +1032,9 @@ return; ProgramStateRef State = C.getState(); - auto &F = State->getStateManager().get_context(); - ImmutableSet Result = F.add(F.getEmptySet(), ReturnValueIndex); + auto &F = State->getStateManager().get_context(); + ArgIdxSValMap Result = + F.add(F.getEmptyMap(), ReturnValueIndex, Call.getReturnValue()); State = State->set(C.getStackFrame(), Result); C.addTransition(State); } diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c --- a/clang/test/Analysis/taint-diagnostic-visitor.c +++ b/clang/test/Analysis/taint-diagnostic-visitor.c @@ -11,21 +11,21 @@ void taintDiagnostic(void) { char buf[128]; - scanf("%s", buf); // expected-note {{Taint originated here}} + scanf("%s", buf); // expected-note {{Propagated taint to the 2nd parameter}} system(buf); // expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}} } int taintDiagnosticOutOfBound(void) { int index; int Array[] = {1, 2, 3, 4, 5}; - scanf("%d", &index); // expected-note {{Taint originated here}} + scanf("%d", &index); // expected-note {{Taint originated here}} expected-note {{Propagated taint to the 2nd parameter}} return Array[index]; // expected-warning {{Out of bound memory access (index is tainted)}} // expected-note@-1 {{Out of bound memory access (index is tainted)}} } int taintDiagnosticDivZero(int operand) { scanf("%d", &operand); // expected-note {{Value assigned to 'operand'}} - // expected-note@-1 {{Taint originated here}} + // expected-note@-1 {{Taint originated here}} expected-note@-1 {{Propagated taint to the 2nd parameter}} return 10 / operand; // expected-warning {{Division by a tainted value, possibly zero}} // expected-note@-1 {{Division by a tainted value, possibly zero}} } @@ -33,15 +33,15 @@ void taintDiagnosticVLA(void) { int x; scanf("%d", &x); // expected-note {{Value assigned to 'x'}} - // expected-note@-1 {{Taint originated here}} + // expected-note@-1 {{Taint originated here}} expected-note@-1 {{Propagated taint to the 2nd parameter}} int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}} // expected-note@-1 {{Declared variable-length array (VLA) has tainted size}} } void taintDiagnosticMalloc(int conj) { int x; - scanf("%d", &x); - // expected-note@-1 2 {{Taint originated here}} Once for malloc(tainted), once for BoundsV2. + scanf("%d", &x); // expected-note {{Taint originated here}} + // expected-note@-1 2 {{Propagated taint to the 2nd parameter}} Once for malloc(tainted), once for BoundsV2. int *p = (int *)malloc(x + conj); // Generic taint checker forbids tainted allocation. // expected-warning@-1 {{Untrusted data is used to specify the buffer size}}