diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -441,6 +441,8 @@ RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R); Optional getSValFromInitListExpr(const InitListExpr *ILE, uint64_t Offset, QualType ElemT); + SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, + QualType ElemT); public: // Part of public interface to class. @@ -1701,10 +1703,16 @@ // From here `Offset` is in the bounds. // Handle InitListExpr. + // Example: + // const char arr[] = { 1, 2, 3 }; if (const auto *ILE = dyn_cast(Init)) return getSValFromInitListExpr(ILE, Offset, R->getElementType()); - // FIXME: Handle StringLiteral. + // Handle StringLiteral. + // Example: + // const char arr[] = "abc"; + if (const auto *SL = dyn_cast(Init)) + return getSValFromStringLiteral(SL, Offset, R->getElementType()); // FIXME: Handle CompoundLiteralExpr. @@ -1716,6 +1724,15 @@ uint64_t Offset, QualType ElemT) { assert(ILE && "InitListExpr should not be null"); + // C++20 [dcl.init.string] 9.4.2.1: + // An array of ordinary character type [...] can be initialized by [...] + // an appropriately-typed string-literal enclosed in braces. + // Example: + // const char arr[] = { "abc" }; + if (ILE->isStringLiteralInit()) + if (const auto *SL = dyn_cast(ILE->getInit(0))) + return getSValFromStringLiteral(SL, Offset, ElemT); + // C++20 [expr.add] 9.4.17.5 (excerpt): // i-th array element is value-initialized for each k < i ≤ n, // where k is an expression-list size and n is an array extent. @@ -1728,6 +1745,42 @@ return svalBuilder.getConstantVal(E); } +/// Returns an SVal, if possible, for the specified position in a string +/// literal. +/// +/// \param SL The given string literal. +/// \param Offset The unsigned offset. E.g. for the expression +/// `char x = str[42];` an offset should be 42. +/// E.g. for the string "abc" offset: +/// - 1 returns SVal{b}, because it's the second position in the string. +/// - 42 returns SVal{0}, because there's no explicit value at this +/// position in the string. +/// \param ElemT The type of the result SVal expression. +/// +/// NOTE: We return `0` for every offset >= the literal length for array +/// declarations, like: +/// const char str[42] = "123"; // Literal length is 4. +/// char c = str[41]; // Offset is 41. +/// FIXME: Nevertheless, we can't do the same for pointer declaraions, like: +/// const char * const str = "123"; // Literal length is 4. +/// char c = str[41]; // Offset is 41. Returns `0`, but Undef +/// // expected. +/// It should be properly handled before reaching this point. +/// The main problem is that we can't distinguish between these declarations, +/// because in case of array we can get the Decl from VarRegion, but in case +/// of pointer the region is a StringRegion, which doesn't contain a Decl. +/// Possible solution could be passing an array extent along with the offset. +SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL, + uint64_t Offset, + QualType ElemT) { + assert(SL && "StringLiteral should not be null"); + // C++20 [dcl.init.string] 9.4.2.3: + // If there are fewer initializers than there are array elements, each + // element not explicitly initialized shall be zero-initialized [dcl.init]. + uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset); + return svalBuilder.makeIntVal(Code, ElemT); +} + SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, const ElementRegion* R) { // Check if the region has a binding. @@ -1739,26 +1792,17 @@ // Check if the region is an element region of a string literal. if (const StringRegion *StrR = dyn_cast(superR)) { // FIXME: Handle loads from strings where the literal is treated as - // an integer, e.g., *((unsigned int*)"hello") + // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according + // to C++20 7.2.1.11 [basic.lval]. QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType(); if (!Ctx.hasSameUnqualifiedType(T, R->getElementType())) return UnknownVal(); - - const StringLiteral *Str = StrR->getStringLiteral(); - SVal Idx = R->getIndex(); - if (Optional CI = Idx.getAs()) { - int64_t i = CI->getValue().getSExtValue(); - // Abort on string underrun. This can be possible by arbitrary - // clients of getBindingForElement(). - if (i < 0) + if (const auto CI = R->getIndex().getAs()) { + const llvm::APSInt &Idx = CI->getValue(); + if (Idx < 0) return UndefinedVal(); - int64_t length = Str->getLength(); - // Technically, only i == length is guaranteed to be null. - // However, such overflows should be caught before reaching this point; - // the only time such an access would be made is if a string literal was - // used to initialize a larger array. - char c = (i >= length) ? '\0' : Str->getCodeUnit(i); - return svalBuilder.makeIntVal(c, T); + const StringLiteral *SL = StrR->getStringLiteral(); + return getSValFromStringLiteral(SL, Idx.getZExtValue(), T); } } else if (const VarRegion *VR = dyn_cast(superR)) { if (Optional V = getConstantValFromConstArrayInitializer(B, VR, R)) diff --git a/clang/test/Analysis/initialization.cpp b/clang/test/Analysis/initialization.cpp --- a/clang/test/Analysis/initialization.cpp +++ b/clang/test/Analysis/initialization.cpp @@ -146,3 +146,110 @@ void struct_arr_index1() { clang_analyzer_eval(S2::arr_no_init[2]); // expected-warning{{UNKNOWN}} } + +char const glob_arr6[5] = "123"; +void glob_array_index5() { + clang_analyzer_eval(glob_arr6[0] == '1'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[1] == '2'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[2] == '3'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[3] == '\0'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[4] == '\0'); // expected-warning{{TRUE}} +} + +void glob_ptr_index3() { + char const *ptr = glob_arr6; + clang_analyzer_eval(ptr[-42] == '\0'); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[0] == '1'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[1] == '2'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[2] == '3'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[3] == '\0'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[4] == '\0'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[5] == '\0'); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[6] == '\0'); // expected-warning{{UNDEFINED}} +} + +void glob_invalid_index7() { + int idx = -42; + auto x = glob_arr6[idx]; // expected-warning{{garbage or undefined}} +} + +void glob_invalid_index8() { + const char *ptr = glob_arr6; + int idx = 42; + auto x = ptr[idx]; // expected-warning{{garbage or undefined}} +} + +char const glob_arr7[5] = {"123"}; +void glob_array_index6() { + clang_analyzer_eval(glob_arr7[0] == '1'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr7[1] == '2'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr7[2] == '3'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr7[3] == '\0'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr7[4] == '\0'); // expected-warning{{TRUE}} +} + +void glob_invalid_index9() { + int idx = -42; + auto x = glob_arr7[idx]; // expected-warning{{garbage or undefined}} +} + +void glob_invalid_index10() { + const char *ptr = glob_arr7; + int idx = 42; + auto x = ptr[idx]; // expected-warning{{garbage or undefined}} +} + +char const *const glob_ptr8 = "123"; +void glob_ptr_index4() { + clang_analyzer_eval(glob_ptr8[0] == '1'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr8[1] == '2'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr8[2] == '3'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr8[3] == '\0'); // expected-warning{{TRUE}} + // FIXME: Should be UNDEFINED. + // We should take into account a declaration in which the literal is used. + clang_analyzer_eval(glob_ptr8[4] == '\0'); // expected-warning{{TRUE}} +} + +void glob_invalid_index11() { + int idx = -42; + auto x = glob_ptr8[idx]; // expected-warning{{garbage or undefined}} +} + +void glob_invalid_index12() { + int idx = 42; + // FIXME: Should warn {{garbage or undefined}} + // We should take into account a declaration in which the literal is used. + auto x = glob_ptr8[idx]; // no-warning +} + +const char16_t *const glob_ptr9 = u"абв"; +void glob_ptr_index5() { + clang_analyzer_eval(glob_ptr9[0] == u'а'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr9[1] == u'б'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr9[2] == u'в'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr9[3] == '\0'); // expected-warning{{TRUE}} +} + +const char32_t *const glob_ptr10 = U"\U0001F607\U0001F608\U0001F609"; +void glob_ptr_index6() { + clang_analyzer_eval(glob_ptr10[0] == U'\U0001F607'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr10[1] == U'\U0001F608'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr10[2] == U'\U0001F609'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr10[3] == '\0'); // expected-warning{{TRUE}} +} + +const wchar_t *const glob_ptr11 = L"\123\u0041\xFF"; +void glob_ptr_index7() { + clang_analyzer_eval(glob_ptr11[0] == L'\123'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr11[1] == L'\u0041'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr11[2] == L'\xFF'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr11[3] == L'\0'); // expected-warning{{TRUE}} +} + +const char *const glob_ptr12 = u8"abc"; +void glob_ptr_index8() { + clang_analyzer_eval(glob_ptr12[0] == 'a'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr12[1] == 'b'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr12[2] == 'c'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_ptr12[3] == '\0'); // expected-warning{{TRUE}} +}