diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -993,14 +993,25 @@ llvm::MutableArrayRef All(begin_tokens, end_tokens); llvm::MutableArrayRef Partition; + // The maximum distance between two consecutive tokens in a partition. + // This is am important trick to avoid using too much SourceLocation address + // space! + const static int MaxDistance = 50; // Partition the tokens by their FileID. // This is a hot function, and calling getFileID can be expensive, the // implementation is optimized by reducing the number of getFileID. if (BeginLoc.isFileID()) { // Consecutive tokens not written in macros must be from the same file. // (Neither #include nor eof can occur inside a macro argument.) + SourceLocation LastLoc = BeginLoc; Partition = All.take_while([&](const Token &T) { - return T.getLocation().isFileID(); + if (T.getLocation().isFileID()) { + unsigned distance = + T.getLocation().getRawEncoding() - LastLoc.getRawEncoding(); + LastLoc = T.getLocation(); + return distance <= MaxDistance; + } + return false; }); } else { // Call getFileID once to calculate the bounds, and use the cheaper @@ -1008,8 +1019,15 @@ FileID BeginFID = SM.getFileID(BeginLoc); SourceLocation Limit = SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID)); + SourceLocation LastLoc = BeginLoc; Partition = All.take_while([&](const Token &T) { - return T.getLocation() >= BeginLoc && T.getLocation() < Limit; + if (T.getLocation() >= BeginLoc && T.getLocation() < Limit) { + unsigned distance = + T.getLocation().getRawEncoding() - LastLoc.getRawEncoding(); + LastLoc = T.getLocation(); + return distance <= MaxDistance; + } + return false; }); } assert(!Partition.empty());