Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -40,7 +40,7 @@ /// Returns the text indicating that the user needs to provide input there: virtual std::string - getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { + getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") const { std::string s = std::string("<# "); s += HintTextToUser; s += " #>"; Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -30,7 +30,8 @@ WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) -FIXABLE_GADGET(ULCArraySubscript) +FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context +FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Pointer Context #undef FIXABLE_GADGET #undef WARNING_GADGET Index: clang/lib/Analysis/UnsafeBufferUsage.cpp =================================================================== --- clang/lib/Analysis/UnsafeBufferUsage.cpp +++ clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -110,6 +110,15 @@ bool Matches; }; +// Because we're dealing with raw pointers, let's define what we mean by that. +static auto hasPointerType() { + return hasType(hasCanonicalType(pointerType())); +} + +static auto hasArrayType() { + return hasType(hasCanonicalType(arrayType())); +} + AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher, innerMatcher) { const DynTypedMatcher &DTM = static_cast(innerMatcher); @@ -129,6 +138,29 @@ castSubExpr(innerMatcher)); // FIXME: add assignmentTo context... } + +// Returns a matcher that matches any expression `e` such that `InnerMatcher` +// matches `e` and `e` is in an Unspecified Pointer Context (UPC). +static internal::Matcher +isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) { + // A UPC can be + // 1. an argument of a function call (except the callee has [[unsafe_...]] + // attribute), or + // 2. the operand of a cast operation; or + // ... + auto CallArgMatcher = + callExpr(hasAnyArgument(allOf( + hasPointerType() /* array also decays to pointer type*/, + InnerMatcher)), + unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); + auto CastOperandMatcher = + explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral), + castSubExpr(allOf(hasPointerType(), InnerMatcher))); + + return stmt(anyOf(CallArgMatcher, CastOperandMatcher)); + // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we + // don't have to check that.) +} } // namespace clang::ast_matchers namespace { @@ -143,15 +175,6 @@ class Strategy; } // namespace -// Because we're dealing with raw pointers, let's define what we mean by that. -static auto hasPointerType() { - return hasType(hasCanonicalType(pointerType())); -} - -static auto hasArrayType() { - return hasType(hasCanonicalType(arrayType())); -} - namespace { /// Gadget is an individual operation in the code that may be of interest to /// this analysis. Each (non-abstract) subclass corresponds to a specific @@ -449,6 +472,50 @@ return {}; } }; + +// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer +// Context (see `isInUnspecifiedPointerContext`). +// Note here `[]` is the built-in subscript operator. +class UPCAddressofArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const UPCAddressofArraySubscriptTag = + "AddressofArraySubscriptUnderUPC"; + const UnaryOperator *Node; // the `&DRE[any]` node + +public: + UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs( + UPCAddressofArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UPCAddressofArraySubscript; + } + + static Matcher matcher() { + return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( + unaryOperator(hasOperatorName("&"), + hasUnaryOperand(arraySubscriptExpr( + hasBase(ignoringParenImpCasts(declRefExpr()))))) + .bind(UPCAddressofArraySubscriptTag))))); + } + + virtual std::optional getFixits(const Strategy &) const override; + + virtual const Stmt *getBaseStmt() const override { return Node; } + + virtual DeclUseList getClaimedVarUseSites() const override { + if (const auto *ArraySubst = + dyn_cast(Node->getSubExpr())) + if (const auto *DRE = + dyn_cast(ArraySubst->getBase()->IgnoreImpCasts())) { + return {DRE}; + } + return {}; + } +}; } // namespace namespace { @@ -753,6 +820,28 @@ return std::nullopt; } +static std::optional // forward declaration +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); + +std::optional +UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { + auto DREs = getClaimedVarUseSites(); + + if (DREs.size() == 1) + if (const auto *VD = dyn_cast(DREs.front()->getDecl())) { + switch (S.lookup(VD)) { + case Strategy::Kind::Span: + return fixUPCAddressofArraySubscriptWithSpan(Node); + case Strategy::Kind::Wontfix: + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + } + return std::nullopt; // something went wrong, no fix-it +} + // Return the text representation of the given `APInt Val`: static std::string getAPIntText(APInt Val) { SmallVector Txt; @@ -786,6 +875,32 @@ LangOpts); } +// Generates fix-its replacing an expression of the form `&DRE[e]` with +// `(DRE.data() + e)`: +static std::optional +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { + if (const auto *ArraySub = dyn_cast(Node->getSubExpr())) + if (const auto *DRE = + dyn_cast(ArraySub->getBase()->IgnoreImpCasts())) { + // FIXME: this `getASTContext` call is costy, we should pass the + // ASTContext in: + const ASTContext &Ctx = DRE->getDecl()->getASTContext(); + const Expr *Idx = ArraySub->getIdx(); + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + SmallString<32> StrBuffer; + + StrBuffer.append("("); + StrBuffer.append(getExprText(DRE, SM, LangOpts)); + StrBuffer.append(".data() + "); + StrBuffer.append(getExprText(Idx, SM, LangOpts)); + StrBuffer.append(")"); + return FixItList{FixItHint::CreateReplacement(Node->getSourceRange(), + StrBuffer.str())}; + } + return std::nullopt; // something went wrong. no fix-it +} + // For a non-null initializer `Init` of `T *` type, this function returns // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it // to output stream. Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp =================================================================== --- /dev/null +++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +int f(unsigned long, void *); + +[[clang::unsafe_buffer_usage]] +int unsafe_f(unsigned long, void *); + +void address_to_integer(int x) { + int * p = new int[10]; + unsigned long n = (unsigned long) &p[5]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)" + unsigned long m = (unsigned long) &p[x]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + x)" +} + +void call_argument(int x) { + int * p = new int[10]; + + f((unsigned long) &p[5], &p[x]); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"(p.data() + 5)" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"(p.data() + x)" +} + +void ignore_unsafe_calls(int x) { + // Cannot fix `&p[x]` for now as it is an argument of an unsafe + // call. So no fix for variable `p`. + int * p = new int[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] + unsafe_f((unsigned long) &p[5], + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] + &p[x]); + + int * q = new int[10]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span q" + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{" + // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}" + unsafe_f((unsigned long) &q[5], + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"(q.data() + 5)" + (void*)0); +} + +void odd_subscript_form() { + int * p = new int[10]; + unsigned long n = (unsigned long) &5[p]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)" +} + +// To test multiple function declarations, each of which carries +// different incomplete informations: +[[clang::unsafe_buffer_usage]] +void unsafe_g(void*); + +void unsafe_g(void*); + +void multiple_unsafe_fundecls() { + int * p = new int[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] + unsafe_g(&p[5]); + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] +} + +void unsafe_h(void*); + +[[clang::unsafe_buffer_usage]] +void unsafe_h(void*); + +void unsafe_h(void* p) { ((char*)p)[10]; } + +void multiple_unsafe_fundecls2() { + int * p = new int[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] + unsafe_h(&p[5]); + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] +}