Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -40,7 +40,7 @@ /// Returns the text indicating that the user needs to provide input there: virtual std::string - getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { + getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") const { std::string s = std::string("<# "); s += HintTextToUser; s += " #>"; Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -30,7 +30,8 @@ WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) -FIXABLE_GADGET(ULCArraySubscript) +FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context +FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Pointer Context #undef FIXABLE_GADGET #undef WARNING_GADGET Index: clang/lib/Analysis/UnsafeBufferUsage.cpp =================================================================== --- clang/lib/Analysis/UnsafeBufferUsage.cpp +++ clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,7 +9,9 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include #include @@ -110,6 +112,15 @@ bool Matches; }; +// Because we're dealing with raw pointers, let's define what we mean by that. +static auto hasPointerType() { + return hasType(hasCanonicalType(pointerType())); +} + +static auto hasArrayType() { + return hasType(hasCanonicalType(arrayType())); +} + AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher, innerMatcher) { const DynTypedMatcher &DTM = static_cast(innerMatcher); @@ -129,6 +140,29 @@ castSubExpr(innerMatcher)); // FIXME: add assignmentTo context... } + +// Returns a matcher that matches any expression `e` such that `InnerMatcher` +// matches `e` and `e` is in an Unspecified Pointer Context (UPC). +static internal::Matcher +isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) { + // A UPC can be + // 1. an argument of a function call (except the callee has [[unsafe_...]] + // attribute), or + // 2. the operand of a cast operation; or + // ... + auto CallArgMatcher = + callExpr(hasAnyArgument(allOf( + hasPointerType() /* array also decays to pointer type*/, + InnerMatcher)), + unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); + auto CastOperandMatcher = + explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral), + castSubExpr(allOf(hasPointerType(), InnerMatcher))); + + return stmt(anyOf(CallArgMatcher, CastOperandMatcher)); + // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we + // don't have to check that.) +} } // namespace clang::ast_matchers namespace { @@ -143,15 +177,6 @@ class Strategy; } // namespace -// Because we're dealing with raw pointers, let's define what we mean by that. -static auto hasPointerType() { - return hasType(hasCanonicalType(pointerType())); -} - -static auto hasArrayType() { - return hasType(hasCanonicalType(arrayType())); -} - namespace { /// Gadget is an individual operation in the code that may be of interest to /// this analysis. Each (non-abstract) subclass corresponds to a specific @@ -449,6 +474,50 @@ return {}; } }; + +// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer +// Context (see `isInUnspecifiedPointerContext`). +// Note here `[]` is the built-in subscript operator. +class UPCAddressofArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const UPCAddressofArraySubscriptTag = + "AddressofArraySubscriptUnderUPC"; + const UnaryOperator *Node; // the `&DRE[any]` node + +public: + UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs( + UPCAddressofArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UPCAddressofArraySubscript; + } + + static Matcher matcher() { + return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( + unaryOperator(hasOperatorName("&"), + hasUnaryOperand(arraySubscriptExpr( + hasBase(ignoringParenImpCasts(declRefExpr()))))) + .bind(UPCAddressofArraySubscriptTag))))); + } + + virtual std::optional getFixits(const Strategy &) const override; + + virtual const Stmt *getBaseStmt() const override { return Node; } + + virtual DeclUseList getClaimedVarUseSites() const override { + if (const auto *ArraySubst = + dyn_cast(Node->getSubExpr())) + if (const auto *DRE = + dyn_cast(ArraySubst->getBase()->IgnoreImpCasts())) { + return {DRE}; + } + return {}; + } +}; } // namespace namespace { @@ -711,6 +780,28 @@ return std::nullopt; } +static FixItList // forward declaration +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); + +std::optional +UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { + auto DREs = getClaimedVarUseSites(); + + if (DREs.size() == 1) + if (const auto *VD = dyn_cast(DREs.front()->getDecl())) { + switch (S.lookup(VD)) { + case Strategy::Kind::Span: + return fixUPCAddressofArraySubscriptWithSpan(Node); + case Strategy::Kind::Wontfix: + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + } + return std::nullopt; // something went wrong, no fix-it +} + // Return the text representation of the given `APInt Val`: static std::string getAPIntText(APInt Val) { SmallVector Txt; @@ -734,6 +825,21 @@ SM, LangOpts); } +// Similar to `getExprTextOr` except returns `std::nullopt` in case unable to +// get the text representation of `E`. +static std::optional getExprText(const Expr *E, + const SourceManager &SM, + const LangOptions &LangOpts) { + std::optional EndToken = + Lexer::findNextToken(E->getEndLoc().getLocWithOffset(-1), SM, LangOpts); + + if (!EndToken) + return std::nullopt; + return Lexer::getSourceText( + CharSourceRange::getCharRange(E->getBeginLoc(), EndToken->getEndLoc()), + SM, LangOpts); +} + // Return the source location of the last character of `Node`. template static std::optional getEndCharLoc(const NodeTy *Node, @@ -747,6 +853,34 @@ return LastToken->getEndLoc().getLocWithOffset(-1); } +// Generates fix-its replacing an expression of the form `&DRE[e]` with +// `DRE.data() + e`: +static FixItList +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { + if (const auto *ArraySub = dyn_cast(Node->getSubExpr())) + if (const auto *DRE = + dyn_cast(ArraySub->getBase()->IgnoreImpCasts())) { + // FIXME: this `getASTContext` call is costy, we should pass the + // ASTContext in: + const ASTContext &Ctx = DRE->getDecl()->getASTContext(); + const Expr *Idx = ArraySub->getIdx(); + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + SmallString<32> StrBuffer; + + if (auto DRETxt = getExprText(DRE, SM, LangOpts)) { + StrBuffer.append(*DRETxt); + StrBuffer.append(".data() + "); + if (auto IdxTxt = getExprText(Idx, SM, LangOpts)) { + StrBuffer.append(*IdxTxt); + return {FixItHint::CreateReplacement(Node->getSourceRange(), + StrBuffer.str())}; + } + } + } + return {}; // something went wrong. no fix-it +} + // For a non-null initializer `Init` of `T *` type, this function returns // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it // to output stream. Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp =================================================================== --- /dev/null +++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +int f(unsigned long, void *); + +[[clang::unsafe_buffer_usage]] +int unsafe_f(unsigned long, void *); + +void address_to_integer(int x) { + int * p = new int[10]; + unsigned long n = (unsigned long) &p[5]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"p.data() + 5" + unsigned long m = (unsigned long) &p[x]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"p.data() + x" +} + +void call_argument(int x) { + int * p = new int[10]; + + f((unsigned long) &p[5], &p[x]); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"p.data() + 5" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"p.data() + x" +} + +void ignore_unsafe_calls(int x) { + int * p = new int[10]; + + unsafe_f((unsigned long) &p[5], + &p[x]); +} + +