diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -36,6 +36,7 @@ #include "NotNullTerminatedResultCheck.h" #include "ParentVirtualCallCheck.h" #include "PosixReturnCheck.h" +#include "SignedCharMisuseCheck.h" #include "SizeofContainerCheck.h" #include "SizeofExpressionCheck.h" #include "StringConstructorCheck.h" @@ -119,6 +120,8 @@ "bugprone-parent-virtual-call"); CheckFactories.registerCheck( "bugprone-posix-return"); + CheckFactories.registerCheck( + "bugprone-signed-char-misuse"); CheckFactories.registerCheck( "bugprone-sizeof-container"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -28,6 +28,7 @@ NotNullTerminatedResultCheck.cpp ParentVirtualCallCheck.cpp PosixReturnCheck.cpp + SignedCharMisuseCheck.cpp SizeofContainerCheck.cpp SizeofExpressionCheck.cpp StringConstructorCheck.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h @@ -0,0 +1,44 @@ +//===--- SignedCharMisuseCheck.h - clang-tidy -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace bugprone { + +/// Finds ``signed char`` -> integer conversions which might indicate a programming +/// error. The basic problem with the ``signed char``, that it might store the +/// non-ASCII characters as negative values. The human programmer probably +/// expects that after an integer conversion the converted value matches with the +/// character code (a value from [0..255]), however, the actual value is in +/// [-128..127] interval. This also applies to the plain ``char`` type on +/// those implementations which represent ``char`` similar to ``signed char``. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html +class SignedCharMisuseCheck : public ClangTidyCheck { +public: + SignedCharMisuseCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + const std::string CharTypdefsToIgnoreList; +}; + +} // namespace bugprone +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -0,0 +1,108 @@ +//===--- SignedCharMisuseCheck.cpp - clang-tidy ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SignedCharMisuseCheck.h" +#include "../utils/OptionsUtils.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; +using namespace clang::ast_matchers::internal; + +namespace clang { +namespace tidy { +namespace bugprone { + +namespace { +Matcher hasAnyListedName(const std::string &Names) { + const std::vector NameList = + utils::options::parseStringList(Names); + return hasAnyName(std::vector(NameList.begin(), NameList.end())); +} +} // namespace + +SignedCharMisuseCheck::SignedCharMisuseCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + CharTypdefsToIgnoreList(Options.get("CharTypdefsToIgnore", "")) {} + +void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList); +} + +void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { + // We can ignore typedefs which are some kind of integer types + // (e.g. typedef char sal_Int8). In this case, we don't need to + // worry about the misinterpretation of char values. + const auto IntTypedef = qualType( + hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList)))); + + const auto SignedCharType = expr(hasType(qualType( + allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))))); + + const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()), + unless(booleanType()))) + .bind("integerType"); + ; + + // We are interested in signed char -> integer conversion. + const auto ImplicitCastExpr = + implicitCastExpr(hasSourceExpression(SignedCharType), + hasImplicitDestinationType(IntegerType)) + .bind("castExpression"); + ; + + const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr)); + const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr)); + const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr)); + + // We catch any type of casts to an integer. We need to have these cast + // expressions explicitly to catch only those casts which are direct children + // of an assignment/declaration. + const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr, + StaticCastExpr, FunctionalCastExpr)); + + // Catch assignments with the suspicious type conversion. + const auto AssignmentOperatorExpr = expr(binaryOperator( + hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr))); + + Finder->addMatcher(AssignmentOperatorExpr, this); + + // Catch declarations with the suspicious type conversion. + const auto Declaration = + varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr)); + + Finder->addMatcher(Declaration, this); +} + +void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { + const auto *CastExpression = + Result.Nodes.getNodeAs("castExpression"); + const auto *IntegerType = Result.Nodes.getNodeAs("integerType"); + assert(CastExpression); + assert(IntegerType); + + // Ignore the match if we know that the value is not negative. + // The potential misinterpretation happens for negative values only. + Expr::EvalResult EVResult; + if (!CastExpression->isValueDependent() && + CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) { + llvm::APSInt Value1 = EVResult.Val.getInt(); + if (Value1.isNonNegative()) + return; + } + + diag(CastExpression->getBeginLoc(), + "singed char -> integer (%0) conversion; " + "consider to cast to unsigned char first.") + << *IntegerType; +} + +} // namespace bugprone +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -94,6 +94,17 @@ Without the null terminator it can result in undefined behaviour when the string is read. +- New :doc:`bugprone-signed-char-misuse + ` check. + + Finds ``signed char`` -> integer conversions which might indicate a programming + error. The basic problem with the ``signed char``, that it might store the + non-ASCII characters as negative values. The human programmer probably + expects that after an integer conversion the converted value matches with the + character code (a value from [0..255]), however, the actual value is in + [-128..127] interval. This also applies to the plain ``char`` type on + those implementations which represent ``char`` similar to ``signed char``. + - New :doc:`cert-mem57-cpp ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst new file mode 100644 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst @@ -0,0 +1,72 @@ +.. title:: clang-tidy - bugprone-signed-char-misuse + +bugprone-signed-char-misuse +=========================== + +Finds ``signed char`` -> integer conversions which might indicate a programming +error. The basic problem with the ``signed char``, that it might store the +non-ASCII characters as negative values. The human programmer probably +expects that after an integer conversion the converted value matches with the +character code (a value from [0..255]), however, the actual value is in +[-128..127] interval. This also applies to the plain ``char`` type on +those implementations which represent ``char`` similar to ``signed char``. + +To avoid this kind of misinterpretation, the desired way of converting from a +``signed char`` to an integer value is converting to ``unsigned char`` first, +which stores all the characters in the positive [0..255] interval which matches +with the known character codes. + +By now, this check is limited to assignments and variable declarations, +where a ``signed char`` is assigned to an integer variable. There are other +use cases where the same misinterpretation might lead to similar bugous +behavior. + +See also: +`STR34-C. Cast characters to unsigned char before converting to larger integer sizes +`_ + +A good example from the CERT description when a ``char`` variable is used to +read from a file that might contain non-ASCII characters. The problem comes +up when the code uses the ``-1`` integer value as EOF, while the 255 character +code is also stored as ``-1`` in two's complement form of char type. +See a simple example of this bellow. This code stops not only when it reaches +the end of the file, but also when it gets a character with the 255 code. + +.. code-block:: c++ + + #define EOF (-1) + + int read(void) { + char CChar; + int IChar = EOF; + + if (readChar(CChar)) { + IChar = CChar; + } + return IChar; + } + +A proper way to fix the code above is converting the ``char`` variable to +an ``unsigned char`` value first. + +.. code-block:: c++ + + #define EOF (-1) + + int read(void) { + char CChar; + int IChar = EOF; + + if (readChar(CChar)) { + IChar = static_cast(CChar); + } + return IChar; + } + +.. option:: CharTypdefsToIgnore + + A semicolon-separated list of typedef names. In this list, we can list + typedefs for ``char`` or ``signed char``, which will be ignored by the + check. This is useful when a typedef introduces an integer alias like + ``sal_Int8`` or ``int8_t``. In this case, human misinterpretation is not + an issue. \ No newline at end of file diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -63,6 +63,7 @@ bugprone-not-null-terminated-result bugprone-parent-virtual-call bugprone-posix-return + bugprone-signed-char-misuse bugprone-sizeof-container bugprone-sizeof-expression bugprone-string-constructor diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp @@ -0,0 +1,74 @@ +// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t \ +// RUN: -config='{CheckOptions: \ +// RUN: [{key: bugprone-signed-char-misuse.CharTypdefsToIgnore, value: "sal_Int8;int8_t"}]}' \ +// RUN: -- + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly caught by the check. + +// Check that a simple test case is still caught. +int SimpleAssignment() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:16: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +typedef signed char sal_Char; + +int TypedefNotInIgnorableList() { + sal_Char CCharacter = 'a'; + int NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly ignored by the check. + +typedef signed char sal_Int8; + +int OneIgnorableTypedef() { + sal_Int8 CCharacter = 'a'; + int NCharacter = CCharacter; + + return NCharacter; +} + +typedef signed char int8_t; + +int OtherIgnorableTypedef() { + int8_t CCharacter = 'a'; + int NCharacter = CCharacter; + + return NCharacter; +} + +/////////////////////////////////////////////////////////////////// +/// Test cases which should be caught by the check. + +namespace boost { + +template +class optional { + T *member; + +public: + optional(T value) { + member = new T(value); + } + + T operator*() { return *member; } +}; + +} // namespace boost + +int DereferenceWithTypdef(boost::optional param) { + int NCharacter = *param; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp @@ -0,0 +1,123 @@ +// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly caught by the check. + +int SimpleVarDeclaration() { + signed char CCharacter = -5; + int NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int SimpleAssignment() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:16: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int CStyleCast() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = (int)CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:21: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int StaticCast() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = static_cast(CCharacter); + // CHECK-MESSAGES: [[@LINE-1]]:33: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int FunctionalCast() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = int(CCharacter); + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int NegativeConstValue() { + const signed char CCharacter = -5; + int NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int CharPointer(signed char *CCharacter) { + int NCharacter = *CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly ignored by the check. + +int UnsignedCharCast() { + unsigned char CCharacter = 'a'; + int NCharacter = CCharacter; + + return NCharacter; +} + +int PositiveConstValue() { + const signed char CCharacter = 5; + int NCharacter = CCharacter; + + return NCharacter; +} + +// singed char -> integer cast is not the direct child of declaration expression. +int DescendantCast() { + signed char CCharacter = 'a'; + int NCharacter = 10 + CCharacter; + + return NCharacter; +} + +// singed char -> integer cast is not the direct child of assignment expression. +int DescendantCastAssignment() { + signed char CCharacter = 'a'; + int NCharacter; + NCharacter = 10 + CCharacter; + + return NCharacter; +} + +// bool is an integer type in clang; make sure to ignore it. +bool BoolVarDeclaration() { + signed char CCharacter = 'a'; + bool BCharacter = CCharacter == 'b'; + + return BCharacter; +} + +// bool is an integer type in clang; make sure to ignore it. +bool BoolAssignment() { + signed char CCharacter = 'a'; + bool BCharacter; + BCharacter = CCharacter == 'b'; + + return BCharacter; +} + +// char is an integer type in clang; make sure to ignore it. +unsigned char CharToCharCast() { + signed char SCCharacter = 'a'; + unsigned char USCharacter; + USCharacter = SCCharacter; + + return USCharacter; +}