Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -37,6 +37,15 @@ /// Invoked when a fix is suggested against a variable. virtual void handleFixableVariable(const VarDecl *Variable, FixItList &&List) = 0; + + /// Returns the text indicating that the user needs to provide input there: + static std::string + getUserFillPlaceHolder(const StringRef &HintTextToUser = "placeholder") { + std::string s = std::string("<# "); + s += HintTextToUser; + s += " #>"; + return s; + } }; // This function invokes the analysis and allows the caller to react to it Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -29,6 +29,7 @@ WARNING_GADGET(Decrement) WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) +FIXABLE_GADGET(ULCArraySubscript) #undef FIXABLE_GADGET #undef WARNING_GADGET Index: clang/lib/Analysis/UnsafeBufferUsage.cpp =================================================================== --- clang/lib/Analysis/UnsafeBufferUsage.cpp +++ clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -115,6 +115,21 @@ MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All); return Visitor.findMatch(DynTypedNode::create(Node)); } + +AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { + return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); +} + +// Returns a matcher that matches any expression 'e' such that `innerMatcher` +// matches 'e' and 'e' is in an Unspecified Lvalue Context. +static auto isInUnspecifiedLvalueContext(internal::Matcher innerMatcher) { + auto isLvalueToRvalueCast = [](internal::Matcher M) { + return implicitCastExpr(hasCastKind(CastKind::CK_LValueToRValue), + castSubExpr(M)); + }; + //FIXME: add assignmentTo context... + return isLvalueToRvalueCast(innerMatcher); +} } // namespace clang::ast_matchers namespace { @@ -282,7 +297,7 @@ /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as /// it doesn't have any bounds checks for the array. class ArraySubscriptGadget : public WarningGadget { - static constexpr const char *const ArraySubscrTag = "arraySubscr"; + static constexpr const char *const ArraySubscrTag = "ArraySubscript"; const ArraySubscriptExpr *ASE; public: @@ -366,6 +381,51 @@ // FIXME: pointer adding zero should be fine //FIXME: this gadge will need a fix-it }; + +class Strategy; + +// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue +// Context (see `isInUnspecifiedLvalueContext`). +// Note here `[]` is the built-in subscript operator. +class ULCArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const ULCArraySubscriptTag = "ULCArraySubscript"; + const ArraySubscriptExpr *Node; + +public: + ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs(ULCArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::ULCArraySubscript; + } + + static Matcher matcher() { + auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); + auto BaseIsArrayOrPtrDRE = + hasBase(ignoringParenImpCasts(allOf(declRefExpr(), ArrayOrPtr))); + auto Target = + arraySubscriptExpr(BaseIsArrayOrPtrDRE, + unless(hasIndex(integerLiteral(equals(0))))) + .bind(ULCArraySubscriptTag); + + return expr(isInUnspecifiedLvalueContext(Target)); + } + + virtual Optional getFixits(const Strategy &S) const override; + + virtual const Stmt *getBaseStmt() const override { return Node; } + + virtual DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = dyn_cast(Node->getBase()->IgnoreImpCasts())) { + return {DRE}; + } + return {}; + } +}; } // namespace namespace { @@ -519,26 +579,30 @@ // clang-format off M.addMatcher( stmt(forEveryDescendant( + eachOf( + // A `FixableGadget` matcher and a `WarningGadget` matcher should not disable + // each other (they could if they were put in the same `anyOf` group). + // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers + // match for the same node, so that we can group them + // in one `anyOf` group (for better performance via short-circuiting). stmt(anyOf( - // Add Gadget::matcher() for every gadget in the registry. -#define GADGET(x) \ - x ## Gadget::matcher().bind(#x), +#define FIXABLE_GADGET(x) \ + x ## Gadget::matcher(), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" - // In parallel, match all DeclRefExprs so that to find out - // whether there are any uncovered by gadgets. - declRefExpr(anyOf(hasPointerType(), hasArrayType()), - to(varDecl())).bind("any_dre"), // Also match DeclStmts because we'll need them when fixing // their underlying VarDecls that otherwise don't have // any backreferences to DeclStmts. declStmt().bind("any_ds") - )) - // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) - // here, to make sure that the statement actually belongs to the - // function and not to a nested function. However, forCallable uses - // ParentMap which can't be used before the AST is fully constructed. - // The original problem doesn't sound like it needs ParentMap though, - // maybe there's a more direct solution? + )), + stmt(anyOf( + // Add Gadget::matcher() for every gadget in the registry. +#define WARNING_GADGET(x) \ + x ## Gadget::matcher().bind(#x), +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + // In parallel, match all DeclRefExprs so that to find out + // whether there are any uncovered by gadgets. + declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre") + ))) )), &CB ); @@ -607,11 +671,189 @@ return FixablesForUnsafeVars; } +static Strategy +getNaiveStrategy(const llvm::SmallVectorImpl &UnsafeVars) { + Strategy S; + for (const VarDecl *VD : UnsafeVars) { + S.set(VD, Strategy::Kind::Span); + } + return S; +} + +Optional +ULCArraySubscriptGadget::getFixits(const Strategy &S) const { + if (const auto *DRE = dyn_cast(Node->getBase()->IgnoreImpCasts())) + if (const auto *VD = dyn_cast(DRE->getDecl())) { + switch (S.lookup(VD)) { + case Strategy::Kind::Span: + return FixItList{}; + case Strategy::Kind::Wontfix: + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + } + return std::nullopt; +} + +// For a non-null initializer `Init` of `T *` type, this function writes +// `{Init, S}` as a part of a fix-it to output stream . The list-initializer +// `{Init, S}` belongs to a span constructor, where `Init` specifies the +// address of the data and `S` is the extent. In many cases, this function +// cannot figure out the actual extent `S`. It then will use a place holder to +// replace `S` to ask users to fill `S` in. The span object being constructed +// will have type `span`. +// +// FIXME: This function so far only works for spanify "single-level" pointers. +// When it comes to multi-level pointers, the data address is not necessarily +// identical to `Init` and the element type of the constructing span object is +// not necessarily `T`. +// +// Parameters: +// `Init` a pointer to the initializer expression +// `Ctx` a reference to the ASTContext +// `OS` the output stream where fix-it texts being written to +static void populateInitializerFixItWithSpan(const Expr *Init, + const ASTContext &Ctx, raw_ostream &OS) { + const PrintingPolicy &PP = Ctx.getPrintingPolicy(); + + // Prints the begin address of the span constructor: + OS << "{"; + Init->printPretty(OS, nullptr, PP); + OS << ", "; + + bool ExtKnown = false; // true iff the extent can be obtained + // Printers that print extent into OS and sets ExtKnown to true: + std::function PrintExpr = [&ExtKnown, &OS, &PP](const Expr *Size) { + Size->printPretty(OS, nullptr, PP); + ExtKnown = true; + }; + std::function PrintAPInt = [&ExtKnown, &OS](const APInt &Size) { + Size.print(OS, false); + ExtKnown = true; + }; + std::function PrintOne = [&ExtKnown, &OS](void) { + OS << "1"; + ExtKnown = true; + }; + + // Try to get the data extent. Break into different cases: + if (auto CxxNew = dyn_cast(Init->IgnoreImpCasts())) { + // In cases `Init` is `new T[n]` and there is no explicit cast over + // `Init`, we know that `Init` must evaluates to a pointer to `n` objects + // of `T`. So the extent is `n` unless `n` has side effects. Similar but + // simpler for `Init` is `new T`. + if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { + if (!Ext->HasSideEffects(Ctx)) + PrintExpr(Ext); + } else if (!CxxNew->isArray()) + PrintOne(); + } else if (auto CArrTy = dyn_cast( + Init->IgnoreImpCasts()->getType().getTypePtr())) { + // In cases `Init` is of an array type after stripping off implicit casts, + // `PointeeTy` must besimilar to the element type of `CArrTy`, so the + // extent is the array size of `CArrTy`. Note that if the array size is + // not a constant, we cannot use it as the extent. + PrintAPInt(CArrTy->getSize()); + // FIXME: local static array may be converted to std:array. Is there any + // dependency among these fix-its? + } else { + // In cases `Init` is of the form `&Var` after stripping of implicit + // casts, where `&` is the built-in operator, `PointeeTy` must be similar + // to the type of `Var`, so the extent is 1. + if (auto AddrOfExpr = dyn_cast(Init->IgnoreImpCasts())) + if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && + isa_and_present(AddrOfExpr->getSubExpr())) + PrintOne(); + // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, std::addressof,... + // etc. + } + if (!ExtKnown) + OS << UnsafeBufferUsageHandler::getUserFillPlaceHolder(); + OS << "}"; +} + +// For a `VarDecl` of the form `T * var (= Init)?`, this +// function generates a fix-it for the declaration, which re-declares `var` to +// be of `span` type and transforms the initializer, if present, to a span +// constructor---`span var {Init, Extent}`, where `Extent` may need the user +// to fill in. +// +// FIXME: +// For now, we only consider single level pointer types, i.e., given `T` is +// `int **`, the converted type will be `span`. +// We need to handle multi-level pointers correctly later. +// +// Parameters: +// `D` a pointer the variable declaration node +// `Ctx` a reference to the ASTContext +// Returns: +// the generated fix-it +static FixItHint fixVarDeclWithSpan(const VarDecl *D, + const ASTContext &Ctx) { + const QualType &T = D->getType().getDesugaredType(Ctx); + const QualType &SpanEltT = T->getPointeeType(); + assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!"); + + SmallString<32> Replacement; + raw_svector_ostream OS(Replacement); + + // Simply make `D` to be of span type: + OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName(); + if (const Expr *Init = D->getInit()) + populateInitializerFixItWithSpan(Init, Ctx, OS); + + return FixItHint::CreateReplacement( + SourceRange{D->getInnerLocStart(), D->getEndLoc()}, OS.str()); +} + +static FixItList fixVariableWithSpan(const VarDecl *VD, + const DeclUseTracker &Tracker, + const ASTContext &Ctx) { + const DeclStmt *DS = Tracker.lookupDecl(VD); + assert(DS && "Fixing non-local variables not implemented yet!"); + assert(DS->getSingleDecl() == VD && + "Fixing non-single declarations not implemented yet!"); + // Currently DS is an unused variable but we'll need it when + // non-single decls are implemented, where the pointee type name + // and the '*' are spread around the place. + (void)DS; + + // FIXME: handle cases where DS has multiple declarations + FixItHint Fix = fixVarDeclWithSpan(VD, Ctx); + return {Fix}; +} + +static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K, + const DeclUseTracker &Tracker, + const ASTContext &Ctx) { + switch (K) { + case Strategy::Kind::Span: { + if (VD->getType()->isPointerType()) + return fixVariableWithSpan(VD, Tracker, Ctx); + return {}; + } + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("Strategy not implemented yet!"); + case Strategy::Kind::Wontfix: + llvm_unreachable("Invalid strategy!"); + } +} + static std::map -getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S) { +getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S, const DeclUseTracker &Tracker, const ASTContext &Ctx) { std::map FixItsForVariable; for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) { - // TODO fixVariable - fixit for the variable itself + FixItsForVariable[VD] = fixVariable(VD, S.lookup(VD), Tracker, Ctx); + // If we fail to produce Fix-It for the declaration we have to skip the variable entirely. + if (FixItsForVariable[VD].empty()) { + FixItsForVariable.erase(VD); + continue; + } + bool ImpossibleToFix = false; llvm::SmallVector FixItsForVD; for (const auto &F : Fixables) { @@ -634,15 +876,6 @@ return FixItsForVariable; } -static Strategy -getNaiveStrategy(const llvm::SmallVectorImpl &UnsafeVars) { - Strategy S; - for (const VarDecl *VD : UnsafeVars) { - S.set(VD, Strategy::Kind::Span); - } - return S; -} - void clang::checkUnsafeBufferUsage(const Decl *D, UnsafeBufferUsageHandler &Handler) { assert(D && D->getBody()); @@ -675,7 +908,7 @@ Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars); std::map FixItsForVariable = - getFixIts(FixablesForUnsafeVars, NaiveStrategy); + getFixIts(FixablesForUnsafeVars, NaiveStrategy, Tracker, D->getASTContext()); // FIXME Detect overlapping FixIts. Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp =================================================================== --- /dev/null +++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp @@ -0,0 +1,86 @@ +// RUN: cp %s %t.cpp +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fixit %t.cpp +// RUN: grep -v CHECK %t.cpp | FileCheck %s +typedef int * Int_ptr_t; +typedef int Int_t; + +void local_array_subscript_simple() { + int tmp; +// CHECK: std::span p{new int [10], 10}; +// CHECK: std::span q{new int [10], 10}; +// CHECK: tmp = p[5]; +// CHECK: tmp = q[5]; + int *p = new int[10]; + const int *q = new int[10]; + tmp = p[5]; + tmp = q[5]; + +// CHECK: std::span x{new int [10], 10}; +// CHECK: std::span y{new int, 1}; +// CHECK: std::span z{new int [10], 10}; +// CHECK: std::span w{new Int_t [10], 10}; + + Int_ptr_t x = new int[10]; + Int_ptr_t y = new int; + Int_t * z = new int[10]; + Int_t * w = new Int_t[10]; + + // CHECK: tmp = x[5]; + tmp = x[5]; + // CHECK: tmp = y[5]; + tmp = y[5]; // y[5] will crash after being span + // CHECK: tmp = z[5]; + tmp = z[5]; + // CHECK: tmp = w[5]; + tmp = w[5]; +} + +void local_array_subscript_auto() { + int tmp; +// CHECK: std::span p{new int [10], 10}; +// CHECK: tmp = p[5]; + auto p = new int[10]; + tmp = p[5]; +} + +void local_array_subscript_variable_extent() { + int n = 10; + int tmp; + //FIXME: need to think it twice about side effects in the expressions + //used to initialize span objects + // CHECK: std::span p{new int [n], n}; + // CHECK: std::span q{new int [n++], <# placeholder #>}; + // CHECK: tmp = p[5]; + // CHECK: tmp = q[5]; + int *p = new int[n]; + // If the extent expression does not have a constant value, we cannot fill the extent for users... + int *q = new int[n++]; + tmp = p[5]; + tmp = q[5]; +} + + +void local_ptr_to_array() { + int tmp; + int n = 10; + int a[10]; + int b[n]; // If the extent expression does not have a constant value, we cannot fill the extent for users... + // CHECK: std::span p{a, 10}; + // CHECK: std::span q{b, <# placeholder #>}; + // CHECK: tmp = p[5]; + // CHECK: tmp = q[5]; + int *p = a; + int *q = b; + tmp = p[5]; + tmp = q[5]; +} + +void local_ptr_addrof_init() { + int var; +// CHECK: std::span q{&var, 1}; +// CHECK: var = q[5]; + int * q = &var; + // This expression involves unsafe buffer accesses, which will crash + // at runtime after applying the fix-it, + var = q[5]; +}