diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -26,8 +26,16 @@ UnsafeBufferUsageHandler() = default; virtual ~UnsafeBufferUsageHandler() = default; + /// This analyses produces large fixits that are organized into lists + /// of primitive fixits (individual insertions/removals/replacements). + using FixItList = llvm::SmallVectorImpl; + /// Invoked when an unsafe operation over raw pointers is found. virtual void handleUnsafeOperation(const Stmt *Operation) = 0; + + /// Invoked when a fix is suggested against a variable. + virtual void handleFixableVariable(const VarDecl *Variable, + FixItList &&List) = 0; }; // This function invokes the analysis and allows the caller to react to it diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1393,3 +1393,6 @@ // Warnings and notes related to const_var_decl_type attribute checks def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">; + +// Warnings and fixes to support the "safe buffers" programming model. +def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11756,7 +11756,10 @@ "this builtin requires target: loongarch64">; // Unsafe buffer usage diagnostics. -def warn_unsafe_buffer_usage : Warning< +def warn_unsafe_buffer_expression : Warning< "unchecked operation on raw buffer in expression">, - InGroup>, DefaultIgnore; + InGroup, DefaultIgnore; +def warn_unsafe_buffer_variable : Warning< + "variable %0 participates in unchecked buffer operations">, + InGroup, DefaultIgnore; } // end of sema component. diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -14,6 +14,18 @@ using namespace clang; using namespace ast_matchers; +namespace { +// Because the analysis revolves around variables and their types, we'll need to +// track uses of variables (aka DeclRefExprs). +using DeclUseList = SmallVector; + +// Convenience typedef. +using FixItList = SmallVector; + +// Defined below. +class Strategy; +} // namespace + // Because we're dealing with raw pointers, let's define what we mean by that. static auto hasPointerType() { return anyOf(hasType(pointerType()), @@ -49,6 +61,18 @@ virtual bool isSafe() const = 0; virtual const Stmt *getBaseStmt() const = 0; + /// Returns the list of pointer-type variables on which this gadget performs + /// its operation. Typically, there's only one variable. This isn't a list + /// of all DeclRefExprs in the gadget's AST! + virtual DeclUseList getClaimedVarUseSites() const = 0; + + /// Returns a fixit that would fix the current gadget according to + /// the current strategy. Returns None if the fix cannot be produced; + /// returns an empty list if no fixes are necessary. + virtual std::optional getFixits(const Strategy &) const { + return std::nullopt; + } + virtual ~Gadget() = default; private: @@ -103,6 +127,16 @@ } const UnaryOperator *getBaseStmt() const override { return Op; } + + DeclUseList getClaimedVarUseSites() const override { + SmallVector Uses; + if (const auto *DRE = + dyn_cast(Op->getSubExpr()->IgnoreParenImpCasts())) { + Uses.push_back(DRE); + } + + return std::move(Uses); + } }; /// A decrement of a pointer-type value is unsafe as it may run the pointer @@ -128,6 +162,15 @@ } const UnaryOperator *getBaseStmt() const override { return Op; } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast(Op->getSubExpr()->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } }; /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as @@ -154,17 +197,115 @@ } const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast(ASE->getBase()->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } }; } // namespace -// Scan the function and return a list of gadgets found with provided kits. -static GadgetList findGadgets(const Decl *D) { +namespace { +// An auxiliary tracking facility for the fixit analysis. It helps connect +// declarations to its and make sure we've covered all uses with our analysis +// before we try to fix the declaration. +class DeclUseTracker { + using UseSetTy = SmallSet; + using DefMapTy = DenseMap; + + // Allocate on the heap for easier move. + std::unique_ptr Uses{std::make_unique()}; + DefMapTy Defs{}; - class GadgetFinderCallback : public MatchFinder::MatchCallback { - GadgetList &Output; +public: + DeclUseTracker() = default; + DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. + DeclUseTracker(DeclUseTracker &&) = default; + + // Start tracking a freshly discovered DRE. + void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } + + // Stop tracking the DRE as it's been fully figured out. + void claimUse(const DeclRefExpr *DRE) { + assert(Uses->count(DRE) && + "DRE not found or claimed by multiple matchers!"); + Uses->erase(DRE); + } + + // A variable is unclaimed if at least one use is unclaimed. + bool hasUnclaimedUses(const VarDecl *VD) const { + // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? + return any_of(*Uses, [VD](const DeclRefExpr *DRE) { + return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); + }); + } + + void discoverDecl(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) { + if (const auto *VD = dyn_cast(D)) { + assert(Defs.count(VD) == 0 && "Definition already discovered!"); + Defs[VD] = DS; + } + } + } - public: - GadgetFinderCallback(GadgetList &Output) : Output(Output) {} + const DeclStmt *lookupDecl(const VarDecl *VD) const { + auto It = Defs.find(VD); + assert(It != Defs.end() && "Definition never discovered!"); + return It->second; + } +}; +} // namespace + +namespace { +// Strategy is a map from variables to the way we plan to emit fixes for +// these variables. It is figured out gradually by trying different fixes +// for different variables depending on gadgets in which these variables +// participate. +class Strategy { +public: + enum class Kind { + Wontfix, // We don't plan to emit a fixit for this variable. + Span, // We recommend replacing the variable with std::span. + Iterator, // We recommend replacing the variable with std::span::iterator. + Array, // We recommend replacing the variable with std::array. + Vector // We recommend replacing the variable with std::vector. + }; + +private: + using MapTy = llvm::DenseMap; + + MapTy Map; + +public: + Strategy() = default; + Strategy(const Strategy &) = delete; // Let's avoid copies. + Strategy(Strategy &&) = default; + + void set(const VarDecl *VD, Kind K) { + Map[VD] = K; + } + + Kind lookup(const VarDecl *VD) const { + auto I = Map.find(VD); + if (I == Map.end()) + return Kind::Wontfix; + + return I->second; + } +}; +} // namespace + +/// Scan the function and return a list of gadgets found with provided kits. +static std::pair findGadgets(const Decl *D) { + + struct GadgetFinderCallback : MatchFinder::MatchCallback { + GadgetList Gadgets; + DeclUseTracker Tracker; void run(const MatchFinder::MatchResult &Result) override { // In debug mode, assert that we've found exactly one gadget. @@ -176,12 +317,22 @@ #define NEXT ++numFound #endif + if (const auto *DRE = Result.Nodes.getNodeAs("any_dre")) { + Tracker.discoverUse(DRE); + NEXT; + } + + if (const auto *DS = Result.Nodes.getNodeAs("any_ds")) { + Tracker.discoverDecl(DS); + NEXT; + } + // Figure out which matcher we've found, and call the appropriate // subclass constructor. // FIXME: Can we do this more logarithmically? #define GADGET(name) \ if (Result.Nodes.getNodeAs(#name)) { \ - Output.push_back(std::make_unique(Result)); \ + Gadgets.push_back(std::make_unique(Result)); \ NEXT; \ } #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" @@ -191,9 +342,8 @@ } }; - GadgetList G; MatchFinder M; - GadgetFinderCallback CB(G); + GadgetFinderCallback CB; // clang-format off M.addMatcher( @@ -203,8 +353,13 @@ #define GADGET(x) \ x ## Gadget::matcher().bind(#x), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" - // FIXME: Is there a better way to avoid hanging comma? - unless(stmt()) + // In parallel, match all DeclRefExprs so that to find out + // whether there are any uncovered by gadgets. + declRefExpr(hasPointerType(), to(varDecl())).bind("any_dre"), + // Also match DeclStmts because we'll need them when fixing + // their underlying VarDecls that otherwise don't have + // any backreferences to DeclStmts. + declStmt().bind("any_ds") )) // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) // here, to make sure that the statement actually belongs to the @@ -219,15 +374,97 @@ M.match(*D->getBody(), D->getASTContext()); - return G; // NRVO! + // Gadgets "claim" variables they're responsible for. Once this loop finishes, + // the tracker will only track DREs that weren't claimed by any gadgets, + // i.e. not understood by the analysis. + for (const auto &G : CB.Gadgets) { + for (const auto *DRE : G->getClaimedVarUseSites()) { + CB.Tracker.claimUse(DRE); + } + } + + return {std::move(CB.Gadgets), std::move(CB.Tracker)}; } void clang::checkUnsafeBufferUsage(const Decl *D, UnsafeBufferUsageHandler &Handler) { assert(D && D->getBody()); - GadgetList Gadgets = findGadgets(D); + SmallSet WarnedDecls; + + auto [Gadgets, Tracker] = findGadgets(D); + + DenseMap> Map; + + // First, let's sort gadgets by variables. If some gadgets cover more than one + // variable, they'll appear more than once in the map. for (const auto &G : Gadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt()); + DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); + + // Populate the map. + bool Pushed = false; + for (const DeclRefExpr *DRE : ClaimedVarUseSites) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + Map[VD].push_back(G.get()); + Pushed = true; + } + } + + if (!Pushed && !G->isSafe()) { + // We won't return to this gadget later. Emit the warning right away. + Handler.handleUnsafeOperation(G->getBaseStmt()); + continue; + } + } + + Strategy S; + + for (const auto &[VD, VDGadgets] : Map) { + + // If the variable has no unsafe gadgets, skip it entirely. + if (!any_of(VDGadgets, [](const Gadget *G) { return !G->isSafe(); })) + continue; + + std::optional Fixes = std::nullopt; + + // Avoid suggesting fixes if not all uses of the variable are identified + // as known gadgets. + // FIXME: Support parameter variables as well. + if (!Tracker.hasUnclaimedUses(VD) && VD->isLocalVarDecl()) { + // Choose the appropriate strategy. FIXME: We should try different + // strategies. + S.set(VD, Strategy::Kind::Span); + + // Check if it works. + // FIXME: This isn't sufficient (or even correct) when a gadget has + // already produced a fixit for a different variable i.e. it was mentioned + // in the map twice (or more). In such case the correct thing to do is + // to undo the previous fix first, and then if we can't produce the new + // fix for both variables, revert to the old one. + Fixes = FixItList{}; + for (const Gadget *G : VDGadgets) { + std::optional F = G->getFixits(S); + if (!F) { + Fixes = std::nullopt; + break; + } + + for (auto &&Fixit: *F) + Fixes->push_back(std::move(Fixit)); + } + } + + if (Fixes) { + // If we reach this point, the strategy is applicable. + Handler.handleFixableVariable(VD, std::move(*Fixes)); + } else { + // The strategy has failed. Emit the warning without the fixit. + S.set(VD, Strategy::Kind::Wontfix); + for (const Gadget *G : VDGadgets) { + if (!G->isSafe()) { + Handler.handleUnsafeOperation(G->getBaseStmt()); + } + } + } } } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2150,9 +2150,18 @@ UnsafeBufferUsageReporter(Sema &S) : S(S) {} void handleUnsafeOperation(const Stmt *Operation) override { - S.Diag(Operation->getBeginLoc(), diag::warn_unsafe_buffer_usage) + S.Diag(Operation->getBeginLoc(), diag::warn_unsafe_buffer_expression) << Operation->getSourceRange(); } + + void handleFixableVariable(const VarDecl *Variable, + FixItList &&Fixes) override { + const auto &D = + S.Diag(Variable->getBeginLoc(), diag::warn_unsafe_buffer_variable); + D << Variable << Variable->getSourceRange(); + for (const auto &F: Fixes) + D << F; + } }; @@ -2449,7 +2458,8 @@ checkThrowInNonThrowingFunc(S, FD, AC); // Emit unsafe buffer usage warnings and fixits. - if (!Diags.isIgnored(diag::warn_unsafe_buffer_usage, D->getBeginLoc())) { + if (!Diags.isIgnored(diag::warn_unsafe_buffer_expression, D->getBeginLoc()) || + !Diags.isIgnored(diag::warn_unsafe_buffer_variable, D->getBeginLoc())) { UnsafeBufferUsageReporter R(S); checkUnsafeBufferUsage(D, R); }