Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -446,6 +446,8 @@ QualType ElemT); SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, QualType ElemT); + bool canAccessStoredValue(QualType OrigT, QualType ThroughT, + uint64_t Index) const; public: // Part of public interface to class. @@ -1651,6 +1653,20 @@ return Extents; } +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return a root type of the n-dimensional array. +/// +/// E.g. for `const int[1][2][3];` returns `int`. +QualType getConstantArrayRootElement(const ConstantArrayType *CAT) { + assert(CAT && "ConstantArrayType should not be null"); + while (const auto *DummyCAT = + dyn_cast(CAT->getElementType())) { + CAT = DummyCAT; + } + return CAT->getElementType(); +} + /// This is a helper function for `getConstantValFromConstArrayInitializer`. /// /// Return an array of offsets from nested ElementRegions. The array is never @@ -1736,6 +1752,60 @@ return None; } +/// Returns true if the stored value can be accessed through the pointer to +/// another type. +/// +/// C++20 7.2.1.11 [basic.lval] (excerpt): +/// A program can access the stored value of an object through: +/// - the same type of the object; +/// - a signed or unsigned type corresponding to the type of the +/// object; +/// - a char, unsigned char, std::byte. (NOTE: +/// Otherwise, the behavior is undefined. +/// +/// Example: +/// const int arr[42] = {}; +/// auto* pchar = (char*)arr; +/// auto* punsigned = (unsigned int*)arr; +/// auto* pshort = (short*)arr; +/// auto x1 = pchar[0]; // valid +/// auto x2 = pchar[1]; // UB +/// auto x3 = punsigned[0]; // valid +/// auto x4 = pshort[0]; // UB +bool RegionStoreManager::canAccessStoredValue(QualType OrigT, QualType ThroughT, + uint64_t Index) const { + // Remove cv-qualifiers. + OrigT = OrigT->getCanonicalTypeUnqualified(); + ThroughT = ThroughT->getCanonicalTypeUnqualified(); + + // - is same + if (OrigT == ThroughT) + return true; + + // NOTE: C++20 6.8.2(3.4) [basic.compound]: + // An object of type T that is not an array element is considered to + // belong to an array with ONE element of type T. + // Hence, the first element can be retrieved only. At least until a + // paper P1839R0 be considered by the committee. + if ((Index != 0)) + return false; + + // - is char, uchar, std::byte + if ((ThroughT == Ctx.CharTy) || (ThroughT == Ctx.UnsignedCharTy) || + ThroughT->isStdByteType()) + return true; + + QualType TypeWithOppositeSign = OrigT->isUnsignedIntegerOrEnumerationType() + ? Ctx.getCorrespondingSignedType(OrigT) + : Ctx.getCorrespondingUnsignedType(OrigT); + + // - is opposite sign + if (ThroughT == TypeWithOppositeSign) + return true; + + return false; +} + Optional RegionStoreManager::getConstantValFromConstArrayInitializer( RegionBindingsConstRef B, const ElementRegion *R) { assert(R && "ElementRegion should not be null"); @@ -1799,18 +1869,24 @@ ConcreteOffsets)) return *V; + // Check whether a program can access the stored value of another + // type. + QualType ElemT = R->getElementType(); + QualType ArrT = getConstantArrayRootElement(CAT); + if (!canAccessStoredValue(ArrT, ElemT, ConcreteOffsets.back())) + return UndefinedVal(); + // Handle InitListExpr. // Example: // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; if (const auto *ILE = dyn_cast(Init)) - return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); + return getSValFromInitListExpr(ILE, ConcreteOffsets, ElemT); // Handle StringLiteral. // Example: // const char arr[] = "abc"; if (const auto *SL = dyn_cast(Init)) - return getSValFromStringLiteral(SL, ConcreteOffsets.front(), - R->getElementType()); + return getSValFromStringLiteral(SL, ConcreteOffsets.front(), ElemT); // FIXME: Handle CompoundLiteralExpr. Index: clang/test/Analysis/initialization.cpp =================================================================== --- clang/test/Analysis/initialization.cpp +++ clang/test/Analysis/initialization.cpp @@ -4,6 +4,10 @@ void clang_analyzer_dump(T x); void clang_analyzer_eval(int); +namespace std { +enum class byte : unsigned char {}; +}; + struct S { int a = 3; }; @@ -256,3 +260,55 @@ clang_analyzer_eval(glob_arr9[1][2] == 7); // expected-warning{{TRUE}} clang_analyzer_eval(glob_arr9[1][3] == 0); // expected-warning{{TRUE}} } + +void glob_cast_same1() { + auto *ptr = (int *)glob_arr2; + auto x1 = ptr[0]; // no-warning + auto x2 = ptr[1]; // no-warning +} + +void glob_cast_char1() { + const auto *ptr = (char *)glob_arr2; // 1-dim array to char* + auto x1 = ptr[0]; // no-warning + auto x2 = ptr[1]; // expected-warning{{garbage or undefined}} +} + +void glob_cast_char2() { + const auto *ptr = (char *)glob_arr5; // 2-dim array to char* + auto x1 = ptr[0]; // no-warning + // FIXME: Should warn {{garbage or undefined}}. + auto x2 = ptr[1]; // no-warning +} + +void glob_cast_uchar1() { + auto *ptr = (unsigned char *)glob_arr2; + auto x1 = ptr[0]; // no-warning + auto x2 = ptr[1]; // expected-warning{{garbage or undefined}} +} + +void glob_cast_byte1() { + auto *ptr = (const std::byte *)glob_arr2; + auto x1 = ptr[0]; // no-warning + auto x2 = ptr[1]; // expected-warning{{garbage or undefined}} +} + +void glob_cast_opposite_sign1() { + auto *ptr = (unsigned int *)glob_arr2; + auto x1 = ptr[0]; // no-warning + auto x2 = ptr[1]; // expected-warning{{garbage or undefined}} +} + +void glob_cast_invalid1() { + auto *ptr = (signed char *)glob_arr2; + auto x = ptr[0]; // expected-warning{{garbage or undefined}} +} + +void glob_cast_invalid2() { + using T = short *; + auto x = ((T)glob_arr2)[0]; // expected-warning{{garbage or undefined}} +} + +void glob_cast_invalid3() { + auto *ptr = (char32_t *)glob_arr2; + auto x = ptr[0]; // expected-warning{{garbage or undefined}} +}