Index: clang/tools/libclang/CIndex.cpp =================================================================== --- clang/tools/libclang/CIndex.cpp +++ clang/tools/libclang/CIndex.cpp @@ -164,6 +164,50 @@ return Result; } +static void clang_tokenize_impl(ASTUnit *CXXUnit, SourceRange R, + CXToken **Tokens, unsigned *NumTokens); + +SourceRange cxloc::translateCharRangeToTokenRange(CXTranslationUnit TU, + CXSourceRange R) { + if (isNotUsableTU(TU)) { + LOG_BAD_TU(TU); + return {}; + } + + ASTUnit *CXXUnit = cxtu::getASTUnit(TU); + if (!CXXUnit) + return {}; + + CXToken *Tokens; + unsigned int NumTokens; + clang_tokenize_impl( + CXXUnit, + // FIXME: naturally tokenization doesn't expect a token range - we should + // use a distinct type. + SourceRange(SourceLocation::getFromRawEncoding(R.begin_int_data), + SourceLocation::getFromRawEncoding(R.end_int_data)), + &Tokens, &NumTokens); + + if (NumTokens == 0) + return {}; + + // Check that then begin of the range actually points to begin of some token. + const CXSourceRange FirstTokenRng = clang_getTokenExtent(TU, Tokens[0]); + if (FirstTokenRng.begin_int_data != R.begin_int_data) + return {}; + + const CXSourceRange LastTokenRng = + clang_getTokenExtent(TU, Tokens[NumTokens - 1]); + const SourceLocation LastTokenBegin = + SourceLocation::getFromRawEncoding(LastTokenRng.begin_int_data); + + if (!LastTokenBegin.isValid()) + return {}; + + return SourceRange(SourceLocation::getFromRawEncoding(R.begin_int_data), + LastTokenBegin); +} + //===----------------------------------------------------------------------===// // Cursor visitor. //===----------------------------------------------------------------------===// @@ -6908,6 +6952,23 @@ return Token; } +static void clang_tokenize_impl(ASTUnit *CXXUnit, SourceRange R, + CXToken **Tokens, unsigned *NumTokens) { + if (!CXXUnit || !Tokens || !NumTokens) + return; + + SmallVector CXTokens; + getTokens(CXXUnit, R, CXTokens); + + if (CXTokens.empty()) + return; + + *Tokens = static_cast( + llvm::safe_malloc(sizeof(CXToken) * CXTokens.size())); + memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size()); + *NumTokens = CXTokens.size(); +} + void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, CXToken **Tokens, unsigned *NumTokens) { LOG_FUNC_SECTION { *Log << TU << ' ' << Range; } @@ -6932,16 +6993,7 @@ if (R.isInvalid()) return; - SmallVector CXTokens; - getTokens(CXXUnit, R, CXTokens); - - if (CXTokens.empty()) - return; - - *Tokens = static_cast( - llvm::safe_malloc(sizeof(CXToken) * CXTokens.size())); - memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size()); - *NumTokens = CXTokens.size(); + clang_tokenize_impl(CXXUnit, R, Tokens, NumTokens); } void clang_disposeTokens(CXTranslationUnit TU, CXToken *Tokens, Index: clang/tools/libclang/CXSourceLocation.h =================================================================== --- clang/tools/libclang/CXSourceLocation.h +++ clang/tools/libclang/CXSourceLocation.h @@ -71,7 +71,12 @@ SourceLocation::getFromRawEncoding(R.end_int_data)); } - +/// Translates CXSourceRange to SourceRange. +/// The semantics of \p R are: +/// R.begin_int_data is begin of the first token of the range. +/// R.end_int_data is begin of the last token of the range. +SourceRange translateCharRangeToTokenRange(CXTranslationUnit TU, + CXSourceRange R); }} // end namespace: clang::cxloc #endif