Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -37,6 +37,15 @@ /// Invoked when a fix is suggested against a variable. virtual void handleFixableVariable(const VarDecl *Variable, FixItList &&List) = 0; + + /// Returns the text indicating that the user needs to provide input there: + static std::string + getUserFillPlaceHolder(const StringRef &HintTextToUser = "placeholder") { + std::string s = std::string("<# "); + s += HintTextToUser; + s += " #>"; + return s; + } }; // This function invokes the analysis and allows the caller to react to it Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def =================================================================== --- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -29,6 +29,7 @@ WARNING_GADGET(Decrement) WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) +FIXABLE_GADGET(ULCArraySubscript) #undef FIXABLE_GADGET #undef WARNING_GADGET Index: clang/lib/Analysis/UnsafeBufferUsage.cpp =================================================================== --- clang/lib/Analysis/UnsafeBufferUsage.cpp +++ clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,6 +9,7 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallVector.h" #include #include @@ -115,6 +116,21 @@ MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All); return Visitor.findMatch(DynTypedNode::create(Node)); } + +AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { + return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); +} + +// Returns a matcher that matches any expression 'e' such that `innerMatcher` +// matches 'e' and 'e' is in an Unspecified Lvalue Context. +static auto isInUnspecifiedLvalueContext(internal::Matcher innerMatcher) { + auto isLvalueToRvalueCast = [](internal::Matcher M) { + return implicitCastExpr(hasCastKind(CastKind::CK_LValueToRValue), + castSubExpr(M)); + }; + //FIXME: add assignmentTo context... + return isLvalueToRvalueCast(innerMatcher); +} } // namespace clang::ast_matchers namespace { @@ -282,7 +298,7 @@ /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as /// it doesn't have any bounds checks for the array. class ArraySubscriptGadget : public WarningGadget { - static constexpr const char *const ArraySubscrTag = "arraySubscr"; + static constexpr const char *const ArraySubscrTag = "ArraySubscript"; const ArraySubscriptExpr *ASE; public: @@ -366,6 +382,51 @@ // FIXME: pointer adding zero should be fine //FIXME: this gadge will need a fix-it }; + +class Strategy; + +// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue +// Context (see `isInUnspecifiedLvalueContext`). +// Note here `[]` is the built-in subscript operator. +class ULCArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const ULCArraySubscriptTag = "ArraySubscriptUnderULC"; + const ArraySubscriptExpr *Node; + +public: + ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs(ULCArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::ULCArraySubscript; + } + + static Matcher matcher() { + auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); + auto BaseIsArrayOrPtrDRE = + hasBase(ignoringParenImpCasts(allOf(declRefExpr(), ArrayOrPtr))); + auto Target = + arraySubscriptExpr(BaseIsArrayOrPtrDRE, + unless(hasIndex(integerLiteral(equals(0))))) + .bind(ULCArraySubscriptTag); + + return expr(isInUnspecifiedLvalueContext(Target)); + } + + virtual Optional getFixits(const Strategy &S) const override; + + virtual const Stmt *getBaseStmt() const override { return Node; } + + virtual DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = dyn_cast(Node->getBase()->IgnoreImpCasts())) { + return {DRE}; + } + return {}; + } +}; } // namespace namespace { @@ -519,26 +580,30 @@ // clang-format off M.addMatcher( stmt(forEveryDescendant( + eachOf( + // A `FixableGadget` matcher and a `WarningGadget` matcher should not disable + // each other (they could if they were put in the same `anyOf` group). + // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers + // match for the same node, so that we can group them + // in one `anyOf` group (for better performance via short-circuiting). stmt(anyOf( - // Add Gadget::matcher() for every gadget in the registry. -#define GADGET(x) \ +#define FIXABLE_GADGET(x) \ x ## Gadget::matcher().bind(#x), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" - // In parallel, match all DeclRefExprs so that to find out - // whether there are any uncovered by gadgets. - declRefExpr(anyOf(hasPointerType(), hasArrayType()), - to(varDecl())).bind("any_dre"), // Also match DeclStmts because we'll need them when fixing // their underlying VarDecls that otherwise don't have // any backreferences to DeclStmts. declStmt().bind("any_ds") - )) - // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) - // here, to make sure that the statement actually belongs to the - // function and not to a nested function. However, forCallable uses - // ParentMap which can't be used before the AST is fully constructed. - // The original problem doesn't sound like it needs ParentMap though, - // maybe there's a more direct solution? + )), + stmt(anyOf( + // Add Gadget::matcher() for every gadget in the registry. +#define WARNING_GADGET(x) \ + x ## Gadget::matcher().bind(#x), +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + // In parallel, match all DeclRefExprs so that to find out + // whether there are any uncovered by gadgets. + declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre") + ))) )), &CB ); @@ -607,11 +672,232 @@ return FixablesForUnsafeVars; } +static Strategy +getNaiveStrategy(const llvm::SmallVectorImpl &UnsafeVars) { + Strategy S; + for (const VarDecl *VD : UnsafeVars) { + S.set(VD, Strategy::Kind::Span); + } + return S; +} + +Optional +ULCArraySubscriptGadget::getFixits(const Strategy &S) const { + if (const auto *DRE = dyn_cast(Node->getBase()->IgnoreImpCasts())) + if (const auto *VD = dyn_cast(DRE->getDecl())) { + switch (S.lookup(VD)) { + case Strategy::Kind::Span: + return FixItList{}; + case Strategy::Kind::Wontfix: + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + } + return std::nullopt; +} + +// Return the text representation of the given `APInt Val`: +static std::string getAPIntText(APInt Val) { + SmallVector Txt; + Val.toString(Txt, 10, true); + return Txt.data(); +} + +// Return the SourceLocation right before the given `Loc` +static SourceLocation prevLoc(SourceLocation Loc, const SourceManager &SM) { + auto [FileID, Offset] = SM.getDecomposedLoc(Loc); + return SM.getComposedLoc(FileID, Offset - 1); +} + +// Return text representation of an `Expr`. In case unable to get the text +// representatiion of `E`, return `Default`. +static StringRef getExprTextOr(const Expr *E, const SourceManager &SM, + const LangOptions &LangOpts, StringRef Default) { + std::optional EndToken = + Lexer::findNextToken(prevLoc(E->getEndLoc(), SM), SM, LangOpts); + + if (!EndToken) + return Default; + return Lexer::getSourceText( + CharSourceRange::getCharRange(E->getBeginLoc(), EndToken->getEndLoc()), + SM, LangOpts); +} + +// Return the source location right passed the last character of `Node`. +template +static std::optional getPassedLoc(const NodeTy *Node, + const SourceManager &SM, + const LangOptions &LangOpts) { + std::optional LastToken = + Lexer::findNextToken(prevLoc(Node->getEndLoc(), SM), SM, LangOpts); + + if (!LastToken) + return std::nullopt; + return LastToken->getEndLoc(); +} + +// For a non-null initializer `Init` of `T *` type, this function returns +// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it +// to output stream. +// In many cases, this function cannot figure out the actual extent `S`. It +// then will use a place holder to replace `S` to ask users to fill `S` in. The +// initializer shall be used to initialize a variable of type `std::span`. +// +// FIXME: Support multi-level pointers +// +// Parameters: +// `Init` a pointer to the initializer expression +// `Ctx` a reference to the ASTContext +static FixItList populateInitializerFixItWithSpan(const Expr *Init, + const ASTContext &Ctx) { + static const std::string DefaultExtentTxt = + UnsafeBufferUsageHandler::getUserFillPlaceHolder(); + FixItList FixIts{}; + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + std::string ExtentText = DefaultExtentTxt; + StringRef One = "1"; + + // Insert `{` before `Init`: + FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); + // Try to get the data extent. Break into different cases: + if (auto CxxNew = dyn_cast(Init->IgnoreImpCasts())) { + // In cases `Init` is `new T[n]` and there is no explicit cast over + // `Init`, we know that `Init` must evaluates to a pointer to `n` objects + // of `T`. So the extent is `n` unless `n` has side effects. Similar but + // simpler for the case where `Init` is `new T`. + if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { + if (!Ext->HasSideEffects(Ctx)) + ExtentText = + getExprTextOr(Ext, SM, LangOpts, StringRef(DefaultExtentTxt)); + } else if (!CxxNew->isArray()) + ExtentText = One; + } else if (auto CArrTy = dyn_cast( + Init->IgnoreImpCasts()->getType().getTypePtr())) { + // In cases `Init` is of an array type after stripping off implicit casts, + // the extent is the array size. Note that if the array size is not a + // constant, we cannot use it as the extent. + ExtentText = getAPIntText(CArrTy->getSize()); + } else { + // In cases `Init` is of the form `&Var` after stripping of implicit + // casts, where `&` is the built-in operator, the extent is 1. + if (auto AddrOfExpr = dyn_cast(Init->IgnoreImpCasts())) + if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && + isa_and_present(AddrOfExpr->getSubExpr())) + ExtentText = One; + // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, std::addressof,... + // etc. + } + + SmallString<32> StrBuffer{}; + std::optional LocPassInit = + getPassedLoc(Init, SM, LangOpts); + + if (!LocPassInit) + return {}; + StrBuffer.append(", "); + StrBuffer.append(ExtentText); + StrBuffer.append("}"); + FixIts.push_back( + FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); + return FixIts; +} + +// For a `VarDecl` of the form `T * var (= Init)?`, this +// function generates a fix-it for the declaration, which re-declares `var` to +// be of `span` type and transforms the initializer, if present, to a span +// constructor---`span var {Init, Extent}`, where `Extent` may need the user +// to fill in. +// +// FIXME: support Multi-level pointers +// +// Parameters: +// `D` a pointer the variable declaration node +// `Ctx` a reference to the ASTContext +// Returns: +// the generated fix-it +static FixItList fixVarDeclWithSpan(const VarDecl *D, const ASTContext &Ctx) { + const QualType &T = D->getType().getDesugaredType(Ctx); + const QualType &SpanEltT = T->getPointeeType(); + assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!"); + + FixItList FixIts{}; + SourceLocation + ReplacementLastLoc; // the inclusive end location of the replacement + const SourceManager &SM = Ctx.getSourceManager(); + + if (const Expr *Init = D->getInit()) { + FixItList InitFixIts = populateInitializerFixItWithSpan(Init, Ctx); + + if (InitFixIts.empty()) + return {}; // Something wrong with fixing initializer, give up + // The loc right before the initializer: + ReplacementLastLoc = prevLoc(Init->getBeginLoc(), SM); + FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()), + std::make_move_iterator(InitFixIts.end())); + } else if (auto LocPassDecl = getPassedLoc(D, SM, Ctx.getLangOpts())) + // The loc right before the loc right passed `D`: + ReplacementLastLoc = prevLoc(*LocPassDecl, SM); + else + return {}; // Something wrong, give up + + // Producing fix-it for variable declaration---make `D` to be of span type: + SmallString<32> Replacement; + raw_svector_ostream OS(Replacement); + + OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName(); + FixIts.push_back(FixItHint::CreateReplacement( + SourceRange{D->getBeginLoc(), ReplacementLastLoc}, OS.str())); + return FixIts; +} + +static FixItList fixVariableWithSpan(const VarDecl *VD, + const DeclUseTracker &Tracker, + const ASTContext &Ctx) { + const DeclStmt *DS = Tracker.lookupDecl(VD); + assert(DS && "Fixing non-local variables not implemented yet!"); + assert(DS->getSingleDecl() == VD && + "Fixing non-single declarations not implemented yet!"); + // Currently DS is an unused variable but we'll need it when + // non-single decls are implemented, where the pointee type name + // and the '*' are spread around the place. + (void)DS; + + // FIXME: handle cases where DS has multiple declarations + return fixVarDeclWithSpan(VD, Ctx); +} + +static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K, + const DeclUseTracker &Tracker, + const ASTContext &Ctx) { + switch (K) { + case Strategy::Kind::Span: { + if (VD->getType()->isPointerType()) + return fixVariableWithSpan(VD, Tracker, Ctx); + return {}; + } + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("Strategy not implemented yet!"); + case Strategy::Kind::Wontfix: + llvm_unreachable("Invalid strategy!"); + } +} + static std::map -getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S) { +getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S, const DeclUseTracker &Tracker, const ASTContext &Ctx) { std::map FixItsForVariable; for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) { - // TODO fixVariable - fixit for the variable itself + FixItsForVariable[VD] = fixVariable(VD, S.lookup(VD), Tracker, Ctx); + // If we fail to produce Fix-It for the declaration we have to skip the variable entirely. + if (FixItsForVariable[VD].empty()) { + FixItsForVariable.erase(VD); + continue; + } + bool ImpossibleToFix = false; llvm::SmallVector FixItsForVD; for (const auto &F : Fixables) { @@ -634,15 +920,6 @@ return FixItsForVariable; } -static Strategy -getNaiveStrategy(const llvm::SmallVectorImpl &UnsafeVars) { - Strategy S; - for (const VarDecl *VD : UnsafeVars) { - S.set(VD, Strategy::Kind::Span); - } - return S; -} - void clang::checkUnsafeBufferUsage(const Decl *D, UnsafeBufferUsageHandler &Handler) { assert(D && D->getBody()); @@ -675,7 +952,7 @@ Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars); std::map FixItsForVariable = - getFixIts(FixablesForUnsafeVars, NaiveStrategy); + getFixIts(FixablesForUnsafeVars, NaiveStrategy, Tracker, D->getASTContext()); // FIXME Detect overlapping FixIts. Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp =================================================================== --- /dev/null +++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp @@ -0,0 +1,96 @@ +// RUN: cp %s %t.cpp +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fixit %t.cpp +// RUN: grep -v CHECK %t.cpp | FileCheck %s +typedef int * Int_ptr_t; +typedef int Int_t; + +void local_array_subscript_simple() { + int tmp; +// CHECK: std::span p {new int[10], 10}; +// CHECK: std::span q {new int[10], 10}; +// CHECK: tmp = p[5]; +// CHECK: tmp = q[5]; + int *p = new int[10]; + const int *q = new int[10]; + tmp = p[5]; + tmp = q[5]; + +// CHECK: std::span x {new int[10], 10}; +// CHECK: std::span y {new int, 1}; +// CHECK: std::span z {new int[10], 10}; +// CHECK: std::span w {new Int_t[10], 10}; + + Int_ptr_t x = new int[10]; + Int_ptr_t y = new int; + Int_t * z = new int[10]; + Int_t * w = new Int_t[10]; + + // CHECK: tmp = x[5]; + tmp = x[5]; + // CHECK: tmp = y[5]; + tmp = y[5]; // y[5] will crash after being span + // CHECK: tmp = z[5]; + tmp = z[5]; + // CHECK: tmp = w[5]; + tmp = w[5]; +} + +void local_array_subscript_auto() { + int tmp; +// CHECK: std::span p {new int[10], 10}; +// CHECK: tmp = p[5]; + auto p = new int[10]; + tmp = p[5]; +} + +void local_array_subscript_variable_extent() { + int n = 10; + int tmp; + + // CHECK: std::span p {new int[n], n}; + // CHECK: std::span q {new int[n++], <# placeholder #>}; + // CHECK: tmp = p[5]; + // CHECK: tmp = q[5]; + int *p = new int[n]; + // If the extent expression does not have a constant value, we cannot fill the extent for users... + int *q = new int[n++]; + tmp = p[5]; + tmp = q[5]; +} + + +void local_ptr_to_array() { + int tmp; + int n = 10; + int a[10]; + int b[n]; // If the extent expression does not have a constant value, we cannot fill the extent for users... + // CHECK: std::span p {a, 10}; + // CHECK: std::span q {b, <# placeholder #>}; + // CHECK: tmp = p[5]; + // CHECK: tmp = q[5]; + int *p = a; + int *q = b; + tmp = p[5]; + tmp = q[5]; +} + +void local_ptr_addrof_init() { + int var; +// CHECK: std::span q {&var, 1}; +// CHECK: var = q[5]; + int * q = &var; + // This expression involves unsafe buffer accesses, which will crash + // at runtime after applying the fix-it, + var = q[5]; +} + +void decl_without_init() { + int tmp; + // CHECK: std::span p; + int * p; + // CHECK: std::span q; + Int_ptr_t q; + + tmp = p[5]; + tmp = q[5]; +}