Index: include/clang/AST/Expr.h =================================================================== --- include/clang/AST/Expr.h +++ include/clang/AST/Expr.h @@ -1611,11 +1611,14 @@ /// SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, - const TargetInfo &Target) const; + const TargetInfo &Target, + unsigned * StartToken=nullptr, + unsigned * StartTokenByteOffset=nullptr + ) const; typedef const SourceLocation *tokloc_iterator; tokloc_iterator tokloc_begin() const { return TokLocs; } - tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; } + tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; } SourceLocation getLocStart() const LLVM_READONLY { return TokLocs[0]; } SourceLocation getLocEnd() const LLVM_READONLY { Index: lib/AST/Expr.cpp =================================================================== --- lib/AST/Expr.cpp +++ lib/AST/Expr.cpp @@ -1007,15 +1007,30 @@ /// can have escape sequences in them in addition to the usual trigraph and /// escaped newline business. This routine handles this complexity. /// +/// The *StartToken sets the first token to be searched in this function and +/// the *StartTokenByteOffset is the byte offset of the first token. Before +/// returning, it updates the *StartToken to the TokNo of the token being found +/// and sets *StartTokenByteOffset to the byte offset of the token in the string. +/// Using these two parameters can reduce the time complexity from O(n^2) to +/// O(n) if one wants to get the location of byte for all the tokens in a string. +/// SourceLocation StringLiteral:: getLocationOfByte(unsigned ByteNo, const SourceManager &SM, - const LangOptions &Features, const TargetInfo &Target) const { + const LangOptions &Features, const TargetInfo &Target, + unsigned * StartToken, unsigned * StartTokenByteOffset) const { assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) && "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that // contains the byte we're looking for. unsigned TokNo = 0; + unsigned StringOffset = 0; + if (StartToken) + TokNo = *StartToken; + if (StartTokenByteOffset){ + StringOffset = *StartTokenByteOffset; + ByteNo -= StringOffset; + } while (1) { assert(TokNo < getNumConcatenated() && "Invalid byte number!"); SourceLocation StrTokLoc = getStrTokenLoc(TokNo); @@ -1024,13 +1039,19 @@ // the string literal, not the identifier for the macro it is potentially // expanded through. SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc); - + // Re-lex the token to get its length and original spelling. - std::pair LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc); + std::pair LocInfo = + SM.getDecomposedLoc(StrTokSpellingLoc); bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); - if (Invalid) + if (Invalid) { + if (StartTokenByteOffset != nullptr) + *StartTokenByteOffset = StringOffset; + if (StartToken != nullptr) + *StartToken = TokNo; return StrTokSpellingLoc; + } const char *StrData = Buffer.data()+LocInfo.second; @@ -1047,14 +1068,19 @@ // If the byte is in this token, return the location of the byte. if (ByteNo < TokNumBytes || (ByteNo == TokNumBytes && TokNo == getNumConcatenated() - 1)) { - unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); - + unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); + // Now that we know the offset of the token in the spelling, use the // preprocessor to get the offset in the original source. + if (StartTokenByteOffset != nullptr) + *StartTokenByteOffset = StringOffset; + if (StartToken != nullptr) + *StartToken = TokNo; return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features); } - + // Move to the next string token. + StringOffset += TokNumBytes; ++TokNo; ByteNo -= TokNumBytes; } Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -1707,13 +1707,18 @@ if (!StrVal.empty()) { const SourceManager &SM = CGF.CGM.getContext().getSourceManager(); const LangOptions &LangOpts = CGF.CGM.getLangOpts(); + unsigned StartToken = 0; + unsigned ByteOffset = 0; // Add the location of the start of each subsequent line of the asm to the // MDNode. - for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) { - if (StrVal[i] != '\n') continue; + for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) { + if (StrVal[i] != '\n') + continue; SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts, - CGF.getTarget()); + CGF.getTarget(), + &StartToken, + &ByteOffset); Locs.push_back(llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding()))); }