diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -43,7 +43,7 @@ /// Returns the text indicating that the user needs to provide input there: virtual std::string - getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { + getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") const { std::string s = std::string("<# "); s += HintTextToUser; s += " #>"; diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -30,9 +30,10 @@ WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) -FIXABLE_GADGET(ULCArraySubscript) +FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) +FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Pointer Context #undef FIXABLE_GADGET #undef WARNING_GADGET diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include #include +#include using namespace llvm; using namespace clang; @@ -112,6 +113,15 @@ bool Matches; }; +// Because we're dealing with raw pointers, let's define what we mean by that. +static auto hasPointerType() { + return hasType(hasCanonicalType(pointerType())); +} + +static auto hasArrayType() { + return hasType(hasCanonicalType(arrayType())); +} + AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher, innerMatcher) { const DynTypedMatcher &DTM = static_cast(innerMatcher); @@ -145,6 +155,30 @@ )); // clang-format off } + + +// Returns a matcher that matches any expression `e` such that `InnerMatcher` +// matches `e` and `e` is in an Unspecified Pointer Context (UPC). +static internal::Matcher +isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) { + // A UPC can be + // 1. an argument of a function call (except the callee has [[unsafe_...]] + // attribute), or + // 2. the operand of a cast operation; or + // ... + auto CallArgMatcher = + callExpr(hasAnyArgument(allOf( + hasPointerType() /* array also decays to pointer type*/, + InnerMatcher)), + unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); + auto CastOperandMatcher = + explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral), + castSubExpr(allOf(hasPointerType(), InnerMatcher))); + + return stmt(anyOf(CallArgMatcher, CastOperandMatcher)); + // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we + // don't have to check that.) +} } // namespace clang::ast_matchers namespace { @@ -159,15 +193,6 @@ class Strategy; } // namespace -// Because we're dealing with raw pointers, let's define what we mean by that. -static auto hasPointerType() { - return hasType(hasCanonicalType(pointerType())); -} - -static auto hasArrayType() { - return hasType(hasCanonicalType(arrayType())); -} - namespace { /// Gadget is an individual operation in the code that may be of interest to /// this analysis. Each (non-abstract) subclass corresponds to a specific @@ -502,6 +527,46 @@ virtual std::optional getFixits(const Strategy &S) const override; }; +// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer +// Context (see `isInUnspecifiedPointerContext`). +// Note here `[]` is the built-in subscript operator. +class UPCAddressofArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const UPCAddressofArraySubscriptTag = + "AddressofArraySubscriptUnderUPC"; + const UnaryOperator *Node; // the `&DRE[any]` node + +public: + UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs( + UPCAddressofArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UPCAddressofArraySubscript; + } + + static Matcher matcher() { + return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( + unaryOperator(hasOperatorName("&"), + hasUnaryOperand(arraySubscriptExpr( + hasBase(ignoringParenImpCasts(declRefExpr()))))) + .bind(UPCAddressofArraySubscriptTag))))); + } + + virtual std::optional getFixits(const Strategy &) const override; + + virtual const Stmt *getBaseStmt() const override { return Node; } + + virtual DeclUseList getClaimedVarUseSites() const override { + const auto *ArraySubst = cast(Node->getSubExpr()); + const auto *DRE = + cast(ArraySubst->getBase()->IgnoreImpCasts()); + return {DRE}; + } +}; } // namespace namespace { @@ -710,7 +775,7 @@ // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers // match for the same node, so that we can group them // in one `anyOf` group (for better performance via short-circuiting). - stmt(anyOf( + stmt(eachOf( #define FIXABLE_GADGET(x) \ x ## Gadget::matcher().bind(#x), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" @@ -869,6 +934,26 @@ return std::nullopt; } +static std::optional // forward declaration +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); + +std::optional +UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { + auto DREs = getClaimedVarUseSites(); + const auto *VD = cast(DREs.front()->getDecl()); + + switch (S.lookup(VD)) { + case Strategy::Kind::Span: + return fixUPCAddressofArraySubscriptWithSpan(Node); + case Strategy::Kind::Wontfix: + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + return std::nullopt; // something went wrong, no fix-it +} + // Return the text representation of the given `APInt Val`: static std::string getAPIntText(APInt Val) { SmallVector Txt; @@ -985,6 +1070,35 @@ return std::nullopt; } +// Generates fix-its replacing an expression of the form `&DRE[e]` with +// `&DRE.data()[e]`: +static std::optional +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { + const auto *ArraySub = cast(Node->getSubExpr()); + const auto *DRE = cast(ArraySub->getBase()->IgnoreImpCasts()); + // FIXME: this `getASTContext` call is costly, we should pass the + // ASTContext in: + const ASTContext &Ctx = DRE->getDecl()->getASTContext(); + const Expr *Idx = ArraySub->getIdx(); + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + std::stringstream SS; + bool IdxIsLitZero = false; + + if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) + if ((*ICE).isZero()) + IdxIsLitZero = true; + if (IdxIsLitZero) { + // If the index is literal zero, we produce the most concise fix-it: + SS << getExprText(DRE, SM, LangOpts).str() << ".data()"; + } else { + SS << "&" << getExprText(DRE, SM, LangOpts).str() << ".data()" + << "[" << getExprText(Idx, SM, LangOpts).str() << "]"; + } + return FixItList{ + FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; +} + // For a non-null initializer `Init` of `T *` type, this function returns // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it // to output stream. diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +int f(unsigned long, void *); + +[[clang::unsafe_buffer_usage]] +int unsafe_f(unsigned long, void *); + +void address_to_integer(int x) { + int * p = new int[10]; + unsigned long n = (unsigned long) &p[5]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[5]" + unsigned long m = (unsigned long) &p[x]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[x]" +} + +void call_argument(int x) { + int * p = new int[10]; + + f((unsigned long) &p[5], &p[x]); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"&p.data()[5]" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"&p.data()[x]" +} + +void ignore_unsafe_calls(int x) { + // Cannot fix `&p[x]` for now as it is an argument of an unsafe + // call. So no fix for variable `p`. + int * p = new int[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] + unsafe_f((unsigned long) &p[5], + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] + &p[x]); + + int * q = new int[10]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span q" + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{" + // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}" + unsafe_f((unsigned long) &q[5], + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"&q.data()[5]" + (void*)0); +} + +void odd_subscript_form() { + int * p = new int[10]; + unsigned long n = (unsigned long) &5[p]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[5]" +} + +void index_is_zero() { + int * p = new int[10]; + int n = p[5]; + + f((unsigned long)&p[0], + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:20-[[@LINE-1]]:25}:"p.data()" + &p[0]); + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:5-[[@LINE-1]]:10}:"p.data()" +} + +// CHECK-NOT: fix-it +// To test multiple function declarations, each of which carries +// different incomplete informations: +[[clang::unsafe_buffer_usage]] +void unsafe_g(void*); + +void unsafe_g(void*); + +void multiple_unsafe_fundecls() { + int * p = new int[10]; + + unsafe_g(&p[5]); +} + +void unsafe_h(void*); + +[[clang::unsafe_buffer_usage]] +void unsafe_h(void*); + +void unsafe_h(void* p) { ((char*)p)[10]; } + +void multiple_unsafe_fundecls2() { + int * p = new int[10]; + + unsafe_h(&p[5]); +}