Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def =================================================================== --- /dev/null +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -0,0 +1,26 @@ +//=- UnsafeBufferUsageGadgets.def - List of ways to use a buffer --*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef GADGET +#define GADGET(name) +#endif + +#ifndef UNSAFE_GADGET +#define UNSAFE_GADGET(name) GADGET(name) +#endif + +#ifndef SAFE_GADGET +#define SAFE_GADGET(name) GADGET(name) +#endif + +UNSAFE_GADGET(Increment) +UNSAFE_GADGET(Decrement) + +#undef SAFE_GADGET +#undef UNSAFE_GADGET +#undef GADGET Index: clang/lib/Analysis/UnsafeBufferUsage.cpp =================================================================== --- clang/lib/Analysis/UnsafeBufferUsage.cpp +++ clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -13,11 +13,144 @@ using namespace clang; using namespace ast_matchers; -namespace { -// TODO: Better abstractions over gadgets. -using GadgetList = std::vector; +// Because we're dealing with raw pointers, let's define what we mean by that. +static auto hasPointerType() { + return anyOf(hasType(pointerType()), + hasType(autoType(hasDeducedType( + hasUnqualifiedDesugaredType(pointerType()))))); } +namespace { +/// Gadget is an individual operation in the code that may be of interest to +/// this analysis. Each (non-abstract) subclass corresponds to a specific +/// rigid AST structure that constitutes an operation on a pointer-type object. +/// Discovery of a gadget in the code corresponds to claiming that we understand +/// what this part of code is doing well enough to potentially improve it. +/// Gadgets can be unsafe (immediately deserving a warning) or safe (not +/// deserving a warning per se, but affecting our decision-making process +/// nonetheless). +class Gadget { +public: + enum class Kind { +#define GADGET(x) x, +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#undef GADGETS + }; + + /// Determine if a kind is a safe kind. Slower than calling isSafe(). + static bool isSafeKind(Kind K) { + switch (K) { +#define UNSAFE_GADGET(x) \ + case Kind::x: +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#undef UNSAFE_GADGET + return false; + +#define SAFE_GADGET(x) \ + case Kind::x: +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#undef SAFE_GADGET + return true; + } + llvm_unreachable("Invalid gadget kind!"); + } + + /// Common type of ASTMatchers used for discovering gadgets. + /// Useful for implementing the static matcher() methods + /// that are expected from all non-abstract subclasses. + using Matcher = decltype(stmt()); + + Gadget(Kind K) : K(K) {} + + Kind getKind() const { return K; } + + virtual bool isSafe() const = 0; + virtual const Stmt *getBaseStmt() const = 0; + + virtual ~Gadget() {} + +private: + Kind K; +}; + +using GadgetList = std::vector>; + +/// Unsafe gadgets correspond to unsafe code patterns that warrants +/// an immediate warning. +class UnsafeGadget : public Gadget { +public: + UnsafeGadget(Kind K) : Gadget(K) { + assert(classof(this) && "Invalid unsafe gadget kind!"); + } + + static bool classof(const Gadget *G) { return !isSafeKind(G->getKind()); } + bool isSafe() const override { return false; } +}; + +/// Safe gadgets correspond to code patterns that aren't unsafe but need to be +/// properly recognized in order to emit correct warnings and fixes over unsafe +/// gadgets. For example, if a raw pointer-type variable is replaced by +/// a safe C++ container, every use of such variable may need to be +/// carefully considered and possibly updated. +class SafeGadget : public Gadget { +public: + SafeGadget(Kind K) : Gadget(K) { + assert(classof(this) && "Invalid safe gadget kind!"); + } + + static bool classof(const Gadget *G) { return isSafeKind(G->getKind()); } + bool isSafe() const override { return true; } +}; + +/// An increment of a pointer-type value is unsafe as it may run the pointer +/// out of bounds. +class IncrementGadget : public UnsafeGadget { + const UnaryOperator *Op; + +public: + IncrementGadget(const MatchFinder::MatchResult &Result) + : UnsafeGadget(Kind::Increment), + Op(Result.Nodes.getNodeAs("op")) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::Increment; + } + + static Matcher matcher() { + return stmt(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) + ).bind("op")); + } + + const Stmt *getBaseStmt() const override { return Op; } +}; + +/// A decrement of a pointer-type value is unsafe as it may run the pointer +/// out of bounds. +class DecrementGadget : public UnsafeGadget { + const UnaryOperator *Op; + +public: + DecrementGadget(const MatchFinder::MatchResult &Result) + : UnsafeGadget(Kind::Decrement), + Op(Result.Nodes.getNodeAs("op")) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::Decrement; + } + + static Matcher matcher() { + return stmt(unaryOperator( + hasOperatorName("--"), + hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) + ).bind("op")); + } + + const Stmt *getBaseStmt() const override { return Op; } +}; +} // namespace + // Scan the function and return a list of gadgets found with provided kits. static GadgetList findGadgets(const Decl *D) { @@ -28,40 +161,45 @@ GadgetFinderCallback(GadgetList &Output) : Output(Output) {} void run(const MatchFinder::MatchResult &Result) override { - Output.push_back(Result.Nodes.getNodeAs("root_node")); + // Figure out which matcher we've found, and call the appropriate + // subclass constructor. + // FIXME: Can we do this more logarithmically? +#define GADGET(x) \ + if (Result.Nodes.getNodeAs(#x)) { \ + Output.push_back(std::make_unique(Result)); \ + return; \ + } +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#undef GADGET } }; GadgetList G; MatchFinder M; - - auto IncrementMatcher = unaryOperator( - hasOperatorName("++"), - hasUnaryOperand(hasType(pointerType())) - ); - auto DecrementMatcher = unaryOperator( - hasOperatorName("--"), - hasUnaryOperand(hasType(pointerType())) - ); - GadgetFinderCallback CB(G); + // clang-format off M.addMatcher( - stmt(forEachDescendant( - stmt( - anyOf( - IncrementMatcher, - DecrementMatcher - /* Fill me in! */ - ) - // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) - // here, to make sure that the statement actually belongs to the - // function and not to a nested function. However, forCallable uses - // ParentMap which can't be used before the AST is fully constructed. - // The original problem doesn't sound like it needs ParentMap though, - // maybe there's a more direct solution? - ).bind("root_node") - )), &CB); + stmt(forEachDescendant( + stmt(anyOf( + // Add Gadget::matcher() for every gadget in the registry. +#define GADGET(x) \ + x ## Gadget::matcher().bind(#x), +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#undef GADGET + // FIXME: Is there a better way to avoid hanging comma? + unless(stmt()) + )) + // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) + // here, to make sure that the statement actually belongs to the + // function and not to a nested function. However, forCallable uses + // ParentMap which can't be used before the AST is fully constructed. + // The original problem doesn't sound like it needs ParentMap though, + // maybe there's a more direct solution? + )), + &CB + ); + // clang-format on M.match(*D->getBody(), D->getASTContext()); @@ -72,8 +210,8 @@ UnsafeBufferUsageHandler &Handler) { assert(D && D->getBody()); - GadgetList G = findGadgets(D); - for (const Stmt *S : G) { - Handler.handleUnsafeOperation(S); + GadgetList Gadgets = findGadgets(D); + for (const auto &G : Gadgets) { + Handler.handleUnsafeOperation(G->getBaseStmt()); } }