Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -1157,7 +1157,8 @@ encoding::Encoding Encoding) : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style), - IdentTable(getFormattingLangOpts()), Encoding(Encoding) { + IdentTable(getFormattingLangOpts()), Encoding(Encoding), + FirstInLineIndex(0) { Lex.SetKeepWhitespaceMode(true); for (const std::string& ForEachMacro : Style.ForEachMacros) @@ -1167,9 +1168,12 @@ ArrayRef lex() { assert(Tokens.empty()); + assert(FirstInLineIndex == 0); do { Tokens.push_back(getNextToken()); tryMergePreviousTokens(); + if (Tokens.back()->NewlinesBefore > 0) + FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); return Tokens; } @@ -1180,6 +1184,8 @@ void tryMergePreviousTokens() { if (tryMerge_TMacro()) return; + if (tryMergeConflictMarkers()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; @@ -1254,6 +1260,68 @@ return true; } + bool tryMergeConflictMarkers() { + if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) + return false; + + // Conflict lines look like: + // + // For example: + // >>>>>>> /file/in/file/system at revision 1234 + // + // We merge all tokens in a line that starts with a conflict marker + // into a single token with a special token type that the unwrapped line + // parser will use to correctly rebuild the underlying code. + + FileID ID; + // Get the position of the first token in the line. + unsigned FirstInLineOffset; + std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( + Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); + StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); + // Calculate the offset of the start of the current line. + auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); + if (LineOffset == StringRef::npos) { + LineOffset = 0; + } else { + ++LineOffset; + } + + auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); + StringRef LineStart; + if (FirstSpace == StringRef::npos) { + LineStart = Buffer.substr(LineOffset); + } else { + LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); + } + + TokenType Type = TT_Unknown; + if (LineStart == "<<<<<<<" || LineStart == ">>>>") { + Type = TT_ConflictStart; + } else if (LineStart == "|||||||" || LineStart == "=======" || + LineStart == "====") { + Type = TT_ConflictAlternative; + } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { + Type = TT_ConflictEnd; + } + + if (Type != TT_Unknown) { + FormatToken *Next = Tokens.back(); + + Tokens.resize(FirstInLineIndex + 1); + // We do not need to build a complete token here, as we will skip it + // during parsing anyway (as we must not touch whitespace around conflict + // markers). + Tokens.back()->Type = Type; + Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); + + Tokens.push_back(Next); + return true; + } + + return false; + } + FormatToken *getNextToken() { if (GreaterStashed) { // Create a synthesized second '>' token. @@ -1401,6 +1469,8 @@ IdentifierTable IdentTable; encoding::Encoding Encoding; llvm::SpecificBumpPtrAllocator Allocator; + // Index (in 'Tokens') of the last token that starts a new line. + unsigned FirstInLineIndex; SmallVector Tokens; SmallVector ForEachMacros; Index: lib/Format/FormatToken.h =================================================================== --- lib/Format/FormatToken.h +++ lib/Format/FormatToken.h @@ -33,19 +33,22 @@ TT_BlockComment, TT_CastRParen, TT_ConditionalExpr, + TT_ConflictAlternative, + TT_ConflictEnd, + TT_ConflictStart, TT_CtorInitializerColon, TT_CtorInitializerComma, TT_DesignatedInitializerPeriod, TT_DictLiteral, - TT_ImplicitStringLiteral, - TT_InlineASMColon, - TT_InheritanceColon, TT_FunctionLBrace, TT_FunctionTypeLParen, + TT_ImplicitStringLiteral, + TT_InheritanceColon, + TT_InlineASMColon, TT_LambdaLSquare, TT_LineComment, - TT_ObjCBlockLParen, TT_ObjCBlockLBrace, + TT_ObjCBlockLParen, TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, Index: lib/Format/UnwrappedLineParser.h =================================================================== --- lib/Format/UnwrappedLineParser.h +++ lib/Format/UnwrappedLineParser.h @@ -107,7 +107,16 @@ void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); void calculateBraceTypes(); - void pushPPConditional(); + + // Marks a conditional compilation edge (for example, an '#if', '#ifdef', + // '#else' or merge conflict marker). If 'Unreachable' is true, assumes + // this branch either cannot be taken (for example '#if false'), or should + // not be taken in this round. + void conditionalCompilationCondition(bool Unreachable); + void conditionalCompilationStart(bool Unreachable); + void conditionalCompilationAlternative(); + void conditionalCompilationEnd(); + bool isOnNewLine(const FormatToken& FormatTok); // FIXME: We are constantly running into bugs where Line.Level is incorrectly Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -465,14 +465,14 @@ } } -void UnwrappedLineParser::pushPPConditional() { - if (!PPStack.empty() && PPStack.back() == PP_Unreachable) +void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { + if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) PPStack.push_back(PP_Unreachable); else PPStack.push_back(PP_Conditional); } -void UnwrappedLineParser::parsePPIf(bool IfDef) { +void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { @@ -480,37 +480,22 @@ PPLevelBranchCount.push_back(0); } PPChainBranchIndex.push(0); - nextToken(); - bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && - StringRef(FormatTok->Tok.getLiteralData(), - FormatTok->Tok.getLength()) == "0") || - FormatTok->Tok.is(tok::kw_false); - if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) { - PPStack.push_back(PP_Unreachable); - } else { - pushPPConditional(); - } - parsePPUnknown(); + bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; + conditionalCompilationCondition(Unreachable || Skip); } -void UnwrappedLineParser::parsePPElse() { +void UnwrappedLineParser::conditionalCompilationAlternative() { if (!PPStack.empty()) PPStack.pop_back(); assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (!PPChainBranchIndex.empty()) ++PPChainBranchIndex.top(); - if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && - PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) { - PPStack.push_back(PP_Unreachable); - } else { - pushPPConditional(); - } - parsePPUnknown(); + conditionalCompilationCondition( + PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && + PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); } -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - -void UnwrappedLineParser::parsePPEndIf() { +void UnwrappedLineParser::conditionalCompilationEnd() { assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { @@ -524,6 +509,27 @@ PPChainBranchIndex.pop(); if (!PPStack.empty()) PPStack.pop_back(); +} + +void UnwrappedLineParser::parsePPIf(bool IfDef) { + nextToken(); + bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && + StringRef(FormatTok->Tok.getLiteralData(), + FormatTok->Tok.getLength()) == "0") || + FormatTok->Tok.is(tok::kw_false); + conditionalCompilationStart(!IfDef && IsLiteralFalse); + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElse() { + conditionalCompilationAlternative(); + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } + +void UnwrappedLineParser::parsePPEndIf() { + conditionalCompilationEnd(); parsePPUnknown(); } @@ -1406,6 +1412,19 @@ flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); } + while (FormatTok->Type == TT_ConflictStart || + FormatTok->Type == TT_ConflictEnd || + FormatTok->Type == TT_ConflictAlternative) { + if (FormatTok->Type == TT_ConflictStart) { + conditionalCompilationStart(/*Unreachable=*/false); + } else if (FormatTok->Type == TT_ConflictAlternative) { + conditionalCompilationAlternative(); + } else if(FormatTok->Type == TT_ConflictEnd) { + conditionalCompilationEnd(); + } + FormatTok = Tokens->getNextToken(); + FormatTok->MustBreakBefore = true; + } if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && !Line->InPPDirective) {