Index: include/clang-c/CXString.h =================================================================== --- include/clang-c/CXString.h +++ include/clang-c/CXString.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_C_CXSTRING_H #include "clang-c/Platform.h" +#include #ifdef __cplusplus extern "C" { @@ -36,8 +37,11 @@ * with the string data, call \c clang_disposeString() to free the string. */ typedef struct { - const void *data; - unsigned private_flags; + const void *Contents; + unsigned Size; + bool IsNullTerminated; + bool IsOwned; + bool IsPooled; } CXString; typedef struct { @@ -47,10 +51,33 @@ /** * \brief Retrieve the character data associated with the given string. + * + * This returns a pointer to a C string even if this \b CXString was built + * from e.g. a StringRef reference (which is not null-terminated). In those + * cases a string copy is performed on-demand. + * + * See also \b clang_getStringData and \b clang_getStringSize which could + * provide a range or "window" of data, if needed, and a C string is not + * strictly needed; this could save a C string allocation, deallocation, and + * string copy. */ CINDEX_LINKAGE const char *clang_getCString(CXString string); /** + * \brief Retrieve a pointer to the start of the character data for this string. + * + * Note that the string is not necessarily null-terminated, so it might not be + * suitable for use as a C string. See \b clang_getCString which guarantees + * a C string result. + */ +CINDEX_LINKAGE const char *clang_getStringData(CXString string); + +/** + * \brief Retrieve the string length (not including null-terminator, if any). + */ +CINDEX_LINKAGE unsigned clang_getStringSize(CXString string); + +/** * \brief Free the given string. */ CINDEX_LINKAGE void clang_disposeString(CXString string); @@ -68,4 +95,3 @@ } #endif #endif - Index: tools/c-index-test/c-index-test.c =================================================================== --- tools/c-index-test/c-index-test.c +++ tools/c-index-test/c-index-test.c @@ -1040,6 +1040,16 @@ } } +static CXString createCXString(const char *CS) { + CXString Str; + Str.Contents = (const void *) CS; + Str.Size = strlen(CS); + Str.IsNullTerminated = true; + Str.IsOwned = false; + Str.IsPooled = false; + return Str; +} + /******************************************************************************/ /* Callbacks. */ /******************************************************************************/ @@ -2994,7 +3004,7 @@ int first_check_printed; int fail_for_error; int abort; - const char *main_filename; + CXString main_filename; ImportedASTFilesData *importedASTs; IndexDataStringList *strings; CXTranslationUnit TU; @@ -3033,6 +3043,7 @@ const char *cname; CXIdxClientFile file; unsigned line, column; + const char *main_filename; int isMainFile; index_data = (IndexData *)client_data; @@ -3047,7 +3058,8 @@ } filename = clang_getFileName((CXFile)file); cname = clang_getCString(filename); - if (strcmp(cname, index_data->main_filename) == 0) + main_filename = clang_getCString(index_data->main_filename); + if (strcmp(cname, main_filename) == 0) isMainFile = 1; else isMainFile = 0; @@ -3249,14 +3261,11 @@ static CXIdxClientFile index_enteredMainFile(CXClientData client_data, CXFile file, void *reserved) { IndexData *index_data; - CXString filename; index_data = (IndexData *)client_data; printCheck(index_data); - filename = clang_getFileName(file); - index_data->main_filename = clang_getCString(filename); - clang_disposeString(filename); + index_data->main_filename = clang_getFileName(file); printf("[enteredMainFile]: "); printCXIndexFile((CXIdxClientFile)file); @@ -3495,7 +3504,7 @@ index_data.first_check_printed = 0; index_data.fail_for_error = 0; index_data.abort = 0; - index_data.main_filename = ""; + index_data.main_filename = createCXString(""); index_data.importedASTs = importedASTs; index_data.strings = NULL; index_data.TU = NULL; @@ -3511,6 +3520,7 @@ if (index_data.fail_for_error) result = -1; + clang_disposeString(index_data.main_filename); free_client_data(&index_data); return result; } @@ -3532,7 +3542,7 @@ index_data.first_check_printed = 0; index_data.fail_for_error = 0; index_data.abort = 0; - index_data.main_filename = ""; + index_data.main_filename = createCXString(""); index_data.importedASTs = importedASTs; index_data.strings = NULL; index_data.TU = TU; @@ -3545,6 +3555,7 @@ result = -1; clang_disposeTranslationUnit(TU); + clang_disposeString(index_data.main_filename); free_client_data(&index_data); return result; } @@ -4037,9 +4048,7 @@ if (!isUSR(I[2])) return not_usr("", I[2]); else { - CXString x; - x.data = (void*) I[2]; - x.private_flags = 0; + CXString x = createCXString(I[2]); print_usr(clang_constructUSR_ObjCIvar(I[1], x)); } @@ -4064,9 +4073,7 @@ if (!isUSR(I[3])) return not_usr("", I[3]); else { - CXString x; - x.data = (void*) I[3]; - x.private_flags = 0; + CXString x = createCXString(I[3]); print_usr(clang_constructUSR_ObjCMethod(I[1], atoi(I[2]), x)); } I += 4; @@ -4094,9 +4101,7 @@ if (!isUSR(I[2])) return not_usr("", I[2]); else { - CXString x; - x.data = (void*) I[2]; - x.private_flags = 0; + CXString x = createCXString(I[2]); print_usr(clang_constructUSR_ObjCProperty(I[1], x)); } I += 3; Index: tools/libclang/CXString.h =================================================================== --- tools/libclang/CXString.h +++ tools/libclang/CXString.h @@ -106,4 +106,3 @@ } #endif - Index: tools/libclang/CXString.cpp =================================================================== --- tools/libclang/CXString.cpp +++ tools/libclang/CXString.cpp @@ -15,99 +15,141 @@ #include "CXString.h" #include "CXTranslationUnit.h" +#include "clang-c/CXString.h" #include "clang-c/Index.h" #include "clang/Frontend/ASTUnit.h" #include "llvm/Support/ErrorHandling.h" +#include -using namespace clang; - -/// Describes the kind of underlying data in CXString. -enum CXStringFlag { - /// CXString contains a 'const char *' that it doesn't own. - CXS_Unmanaged, +static_assert(sizeof(CXString) <= 16, ""); - /// CXString contains a 'const char *' that it allocated with malloc(). - CXS_Malloc, - - /// CXString contains a CXStringBuf that needs to be returned to the - /// CXStringPool. - CXS_StringBuf -}; +using namespace clang; namespace clang { namespace cxstring { +/** + * This is for \b CXString 's which are created with \b CreateRef(StringRef). + * We'll store the info from the input \b StringRef: char ptr and size. + * + * We don't know for sure whether this is null-terminated so, when and if + * \b clang_getCString is called for this \b CXString, we'll allocate C string + * storage and copy data into the storage. We'll memo-ize that in the + * \b CString member. + * + * This is refcounted; the \b Count member is initially 1. When a \b CXString + * instance using this object is disposed via \b clang_disposeString, \b Count + * is decremented. When this string is duplicated the \b Count increases. + * + * When \b Count finally drops to zero, the ptr at \b CString, and this object, + * should be freed with \b free . + */ +struct RefCountedCharRange { + const char *Data; + const char *CString; + unsigned Size; + unsigned Count; +}; + //===----------------------------------------------------------------------===// // Basic generation of CXStrings. //===----------------------------------------------------------------------===// CXString createEmpty() { CXString Str; - Str.data = ""; - Str.private_flags = CXS_Unmanaged; + Str.Contents = (const void *) ""; + Str.Size = 0; + Str.IsNullTerminated = true; + Str.IsOwned = false; + Str.IsPooled = false; return Str; } CXString createNull() { CXString Str; - Str.data = nullptr; - Str.private_flags = CXS_Unmanaged; + Str.Contents = nullptr; + Str.Size = 0; + Str.IsNullTerminated = false; + Str.IsOwned = false; + Str.IsPooled = false; return Str; } CXString createRef(const char *String) { - if (String && String[0] == '\0') + if (!String) { + return createNull(); + } + if (String[0] == '\0') { return createEmpty(); + } CXString Str; - Str.data = String; - Str.private_flags = CXS_Unmanaged; + Str.Contents = (const void *) String; + Str.Size = strlen(String); + Str.IsNullTerminated = true; + Str.IsOwned = false; + Str.IsPooled = false; return Str; } +inline static const char *copyCharRange(const char *CS, unsigned Size) { + char *Spelling = static_cast(malloc(Size + 1)); + memcpy(Spelling, CS, Size); + Spelling[Size] = 0; + return Spelling; +} + CXString createDup(const char *String) { - if (!String) + if (!String) { return createNull(); - - if (String[0] == '\0') + } + if (String[0] == '\0') { return createEmpty(); + } CXString Str; - Str.data = strdup(String); - Str.private_flags = CXS_Malloc; + Str.Size = strlen(String); + Str.Contents = (const void *) copyCharRange(String, Str.Size); + Str.IsNullTerminated = true; + Str.IsOwned = true; + Str.IsPooled = false; return Str; } CXString createRef(StringRef String) { - // If the string is not nul-terminated, we have to make a copy. - - // FIXME: This is doing a one past end read, and should be removed! For memory - // we don't manage, the API string can become unterminated at any time outside - // our control. - - if (!String.empty() && String.data()[String.size()] != 0) - return createDup(String); - - CXString Result; - Result.data = String.data(); - Result.private_flags = (unsigned) CXS_Unmanaged; - return Result; + assert (String.size() <= std::numeric_limits::max()); + CXString Str; + Str.Size = unsigned(String.size()); + Str.IsNullTerminated = false; + Str.IsOwned = false; + Str.IsPooled = false; + auto *RC = new RefCountedCharRange { + /* Data */ String.data(), + /* CString */ nullptr, + /* Size */ Str.Size, + /* Count */ 1, + }; + Str.Contents = (const void *) RC; + return Str; } CXString createDup(StringRef String) { - CXString Result; - char *Spelling = static_cast(malloc(String.size() + 1)); - memmove(Spelling, String.data(), String.size()); - Spelling[String.size()] = 0; - Result.data = Spelling; - Result.private_flags = (unsigned) CXS_Malloc; - return Result; + CXString Str; + Str.Size = String.size(); + Str.Contents = (const void *) copyCharRange(String.data(), Str.Size); + Str.IsNullTerminated = true; + Str.IsOwned = true; + Str.IsPooled = false; + return Str; } CXString createCXString(CXStringBuf *buf) { CXString Str; - Str.data = buf; - Str.private_flags = (unsigned) CXS_StringBuf; + Str.Contents = buf->Data.data(); + Str.Size = buf->Data.size(); + Str.IsNullTerminated = true; + Str.IsOwned = false; + Str.IsPooled = true; return Str; } @@ -151,7 +193,7 @@ } bool isManagedByPool(CXString str) { - return ((CXStringFlag) str.private_flags) == CXS_StringBuf; + return str.IsPooled; } } // end namespace cxstring @@ -162,24 +204,65 @@ //===----------------------------------------------------------------------===// const char *clang_getCString(CXString string) { - if (string.private_flags == (unsigned) CXS_StringBuf) { - return static_cast(string.data)->Data.data(); + const char *CString = nullptr; + + // If not null-terminated, then this is a reference to a range of characters + // owned elsewhere. We'll need to copy this to a new C string to ensure it + // is null-terminated, if we haven't done so already. + if (string.IsNullTerminated) { + CString = (const char *) string.Contents; + } else { + auto *V = const_cast(string.Contents); + if (V) { + auto *R = (clang::cxstring::RefCountedCharRange *) V; + if (R) { + if (!R->CString) { + // Need to make this a new C string. + char *NewCString = (char *) malloc(R->Size + 1); + if (R->Data) { + memcpy(NewCString, R->Data, R->Size); + } + NewCString[R->Size] = 0; + R->CString = (const char *) NewCString; + } + CString = R->CString; + } + } } - return static_cast(string.data); + + return CString; +} + +const char *clang_getStringData(CXString string) { + if (!(string.IsOwned || string.IsNullTerminated)) { + auto *R = (const clang::cxstring::RefCountedCharRange *) string.Contents; + return R ? R->Data : nullptr; + } + return (const char *) string.Contents; +} + +unsigned clang_getStringSize(CXString string) { + return string.Size; } void clang_disposeString(CXString string) { - switch ((CXStringFlag) string.private_flags) { - case CXS_Unmanaged: - break; - case CXS_Malloc: - if (string.data) - free(const_cast(string.data)); - break; - case CXS_StringBuf: - static_cast( - const_cast(string.data))->dispose(); - break; + if (string.IsOwned) { + // Is a C string that we own; need to free it. + free(const_cast(string.Contents)); + } else { + if (!string.IsNullTerminated) { + // Neither null-terminated nor owned. Is a char range for which we might + // have created a C string. + auto *CR = (const clang::cxstring::RefCountedCharRange *) string.Contents; + auto *R = const_cast(CR); + if (R) { + -- R->Count; + if (!R->Count) { + free((void *) R->CString); + free((void *) R); + } + } + } } } @@ -189,4 +272,3 @@ delete[] set->Strings; delete set; } -