Index: include/llvm/MC/MCAsmInfo.h =================================================================== --- include/llvm/MC/MCAsmInfo.h +++ include/llvm/MC/MCAsmInfo.h @@ -362,6 +362,9 @@ /// construction (see LLVMTargetMachine::initAsmInfo()). bool UseIntegratedAssembler; + /// Preserve Comments in assembly + bool PreserveAsmComments; + /// Compress DWARF debug sections. Defaults to no compression. DebugCompressionType CompressDebugSections; @@ -575,6 +578,14 @@ UseIntegratedAssembler = Value; } + /// Return true if assembly (inline or otherwise) should be parsed. + bool preserveAsmComments() const { return PreserveAsmComments; } + + /// Set whether assembly (inline or otherwise) should be parsed. + virtual void setPreserveAsmComments(bool Value) { + PreserveAsmComments = Value; + } + DebugCompressionType compressDebugSections() const { return CompressDebugSections; } Index: include/llvm/MC/MCParser/AsmLexer.h =================================================================== --- include/llvm/MC/MCParser/AsmLexer.h +++ include/llvm/MC/MCParser/AsmLexer.h @@ -30,6 +30,7 @@ const char *CurPtr; StringRef CurBuf; bool isAtStartOfLine; + bool isAtStartOfStatement; void operator=(const AsmLexer&) = delete; AsmLexer(const AsmLexer&) = delete; @@ -45,17 +46,15 @@ void setBuffer(StringRef Buf, const char *ptr = nullptr); StringRef LexUntilEndOfStatement() override; - StringRef LexUntilEndOfLine(); size_t peekTokens(MutableArrayRef Buf, bool ShouldSkipSpace = true) override; - bool isAtStartOfComment(const char *Ptr); - bool isAtStatementSeparator(const char *Ptr); - const MCAsmInfo &getMAI() const { return MAI; } private: + bool isAtStartOfComment(const char *Ptr); + bool isAtStatementSeparator(const char *Ptr); int getNextChar(); AsmToken ReturnError(const char *Loc, const std::string &Msg); @@ -67,6 +66,8 @@ AsmToken LexQuote(); AsmToken LexFloatLiteral(); AsmToken LexHexFloatLiteral(bool NoIntDigits); + + StringRef LexUntilEndOfLine(); }; } // end namespace llvm Index: include/llvm/MC/MCParser/MCAsmLexer.h =================================================================== --- include/llvm/MC/MCParser/MCAsmLexer.h +++ include/llvm/MC/MCParser/MCAsmLexer.h @@ -26,7 +26,8 @@ public: enum TokenKind { // Markers - Eof, Error, + Eof, + Error, // String values. Identifier, @@ -39,20 +40,48 @@ // Real values. Real, + // Comments + Comment, + HashDirective, // No-value. EndOfStatement, Colon, Space, - Plus, Minus, Tilde, - Slash, // '/' + Plus, + Minus, + Tilde, + Slash, // '/' BackSlash, // '\' - LParen, RParen, LBrac, RBrac, LCurly, RCurly, - Star, Dot, Comma, Dollar, Equal, EqualEqual, - - Pipe, PipePipe, Caret, - Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, - Less, LessEqual, LessLess, LessGreater, - Greater, GreaterEqual, GreaterGreater, At + LParen, + RParen, + LBrac, + RBrac, + LCurly, + RCurly, + Star, + Dot, + Comma, + Dollar, + Equal, + EqualEqual, + + Pipe, + PipePipe, + Caret, + Amp, + AmpAmp, + Exclaim, + ExclaimEqual, + Percent, + Hash, + Less, + LessEqual, + LessLess, + LessGreater, + Greater, + GreaterEqual, + GreaterGreater, + At }; private: @@ -153,8 +182,9 @@ const AsmToken &Lex() { assert(!CurTok.empty()); CurTok.erase(CurTok.begin()); + // Always place in front as LexToken may generate multiple tokens via UnLex. if (CurTok.empty()) - CurTok.emplace_back(LexToken()); + CurTok.insert(CurTok.begin(), LexToken()); return CurTok.front(); } Index: include/llvm/MC/MCStreamer.h =================================================================== --- include/llvm/MC/MCStreamer.h +++ include/llvm/MC/MCStreamer.h @@ -252,7 +252,7 @@ /// correctly? virtual bool isIntegratedAssemblerRequired() const { return false; } - /// \brief Add a textual command. + /// \brief Add a textual comment. /// /// Typically for comments that can be emitted to the generated .s /// file if applicable as a QoI issue to make the output of the compiler @@ -274,6 +274,12 @@ /// only prints comments, the object streamer ignores it instead of asserting. virtual void emitRawComment(const Twine &T, bool TabPrefix = true); + /// \brief Add explicit comment T. T is required to be a valid + /// comment in the output and does not need to be escaped. + virtual void addExplicitComment(const Twine &T); + /// \brief emit explicit comments + virtual void outputExplicitComments(); + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. virtual void AddBlankLine() {} Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -187,6 +187,9 @@ /// Disable the integrated assembler. unsigned DisableIntegratedAS : 1; + /// Disable the integrated assembler. + unsigned PreserveAsmComments : 1; + /// Compress DWARF debug sections. unsigned CompressDebugSections : 1; Index: lib/CodeGen/LLVMTargetMachine.cpp =================================================================== --- lib/CodeGen/LLVMTargetMachine.cpp +++ lib/CodeGen/LLVMTargetMachine.cpp @@ -70,6 +70,8 @@ if (Options.DisableIntegratedAS) TmpAsmInfo->setUseIntegratedAssembler(false); + TmpAsmInfo->setPreserveAsmComments(Options.PreserveAsmComments); + if (Options.CompressDebugSections) TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu); Index: lib/MC/MCAsmInfo.cpp =================================================================== --- lib/MC/MCAsmInfo.cpp +++ lib/MC/MCAsmInfo.cpp @@ -107,6 +107,7 @@ // architecture basis. // - The target subclasses for AArch64, ARM, and X86 handle these cases UseIntegratedAssembler = false; + PreserveAsmComments = true; CompressDebugSections = DebugCompressionType::DCT_None; } Index: lib/MC/MCAsmStreamer.cpp =================================================================== --- lib/MC/MCAsmStreamer.cpp +++ lib/MC/MCAsmStreamer.cpp @@ -46,6 +46,7 @@ std::unique_ptr Emitter; std::unique_ptr AsmBackend; + SmallString<128> ExplicitCommentToEmit; SmallString<128> CommentToEmit; raw_svector_ostream CommentStream; @@ -73,6 +74,8 @@ } inline void EmitEOL() { + // Dump Explicit Comments here. + outputExplicitComments(); // If we don't have any comments, just emit a \n. if (!IsVerboseAsm) { OS << '\n'; @@ -112,6 +115,9 @@ void emitRawComment(const Twine &T, bool TabPrefix = true) override; + void addExplicitComment(const Twine &T) override; + void outputExplicitComments() override; + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. void AddBlankLine() override { EmitEOL(); @@ -325,6 +331,44 @@ EmitEOL(); } +void MCAsmStreamer::addExplicitComment(const Twine &T) { + StringRef c = T.getSingleStringRef(); + if (c.equals(StringRef(MAI->getSeparatorString()))) + return; + if (c.startswith(StringRef("//"))) { + ExplicitCommentToEmit.append(MAI->getCommentString()); + // drop // + ExplicitCommentToEmit.append(c.slice(2, c.size()).str()); + } else if (c.startswith(StringRef("/*"))) { + size_t p = 2, len = c.size() - 2; + // emit each line in comment as separate newline. + do { + size_t newp = std::min(len, c.find_first_of("\r\n", p)); + ExplicitCommentToEmit.append(MAI->getCommentString()); + ExplicitCommentToEmit.append(c.slice(p, newp).str()); + p = newp + 1; + } while (p < len); + } else if (c.startswith(StringRef(MAI->getCommentString()))) { + ExplicitCommentToEmit.append(c.str()); + } else if (c.front() == '#') { + // # are comments for ## commentString. Output extra #. + ExplicitCommentToEmit.append("#"); + ExplicitCommentToEmit.append(c.str()); + } else + assert(false && "Unexpected Assembly Comment"); + + // full line comments immediately output + if (c.back() == '\n') + outputExplicitComments(); +} + +void MCAsmStreamer::outputExplicitComments() { + StringRef Comments = ExplicitCommentToEmit; + if (!Comments.empty()) + OS << '\t' << Comments; + ExplicitCommentToEmit.clear(); +} + void MCAsmStreamer::ChangeSection(MCSection *Section, const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); @@ -511,8 +555,10 @@ } void MCAsmStreamer::EmitSyntaxDirective() { - if (MAI->getAssemblerDialect() == 1) - OS << "\t.intel_syntax noprefix\n"; + if (MAI->getAssemblerDialect() == 1) { + OS << "\t.intel_syntax noprefix"; + EmitEOL(); + } // FIXME: Currently emit unprefix'ed registers. // The intel_syntax directive has one optional argument // with may have a value of prefix or noprefix. Index: lib/MC/MCParser/AsmLexer.cpp =================================================================== --- lib/MC/MCParser/AsmLexer.cpp +++ lib/MC/MCParser/AsmLexer.cpp @@ -24,6 +24,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; isAtStartOfLine = true; + isAtStartOfStatement = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -46,24 +47,13 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { SetError(SMLoc::getFromPointer(Loc), Msg); - return AsmToken(AsmToken::Error, StringRef(Loc, 0)); + return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); } int AsmLexer::getNextChar() { - char CurChar = *CurPtr++; - switch (CurChar) { - default: - return (unsigned char)CurChar; - case 0: - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr - 1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + if (CurPtr == CurBuf.end()) return EOF; - } + return (unsigned char)*CurPtr++; } /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? @@ -169,43 +159,53 @@ AsmToken AsmLexer::LexSlash() { switch (*CurPtr) { case '*': + isAtStartOfStatement = false; break; // C style comment. case '/': ++CurPtr; return LexLineComment(); default: - return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1)); + isAtStartOfStatement = false; + return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); } // C Style comment. ++CurPtr; // skip the star. - while (1) { - int CurChar = getNextChar(); - switch (CurChar) { - case EOF: - return ReturnError(TokStart, "unterminated comment"); + while (CurPtr != CurBuf.end()) { + switch (*CurPtr++) { case '*': // End of the comment? - if (CurPtr[0] != '/') break; - + if (*CurPtr != '/') + break; ++CurPtr; // End the */. - return LexToken(); + return AsmToken(AsmToken::Comment, + StringRef(TokStart, CurPtr - TokStart)); } } + return ReturnError(TokStart, "unterminated comment"); } /// LexLineComment: Comment: #[^\n]* /// : //[^\n]* AsmToken AsmLexer::LexLineComment() { - // FIXME: This is broken if we happen to a comment at the end of a file, which - // was .included, and which doesn't end with a newline. + // Mark This as an end of statement with a body of the + // comment. While it would be nicer to leave this two tokens, + // backwards compatability with TargetParsers makes keeping this in this form + // better. int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) CurChar = getNextChar(); - if (CurChar == EOF) - return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); - return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); + // If this is a whole line comment. leave newline + bool keepnewline = isAtStartOfStatement; + isAtStartOfLine = true; + isAtStartOfStatement = true; + + if (keepnewline) + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - TokStart)); + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - 1 - TokStart)); } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { @@ -423,8 +423,7 @@ while (!isAtStartOfComment(CurPtr) && // Start of line comment. !isAtStatementSeparator(CurPtr) && // End of statement marker. - *CurPtr != '\n' && *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf.end())) { + *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -433,8 +432,7 @@ StringRef AsmLexer::LexUntilEndOfLine() { TokStart = CurPtr; - while (*CurPtr != '\n' && *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf.end())) { + while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -445,6 +443,7 @@ const char *SavedTokStart = TokStart; const char *SavedCurPtr = CurPtr; bool SavedAtStartOfLine = isAtStartOfLine; + bool SavedAtStartOfStatement = isAtStartOfStatement; bool SavedSkipSpace = SkipSpace; std::string SavedErr = getErr(); @@ -466,6 +465,7 @@ SkipSpace = SavedSkipSpace; isAtStartOfLine = SavedAtStartOfLine; + isAtStartOfStatement = SavedAtStartOfStatement; CurPtr = SavedCurPtr; TokStart = SavedTokStart; @@ -495,29 +495,45 @@ // This always consumes at least one character. int CurChar = getNextChar(); - if (isAtStartOfComment(TokStart)) { - // If this comment starts with a '#', then return the Hash token and let - // the assembler parser see if it can be parsed as a cpp line filename - // comment. We do this only if we are at the start of a line. - if (CurChar == '#' && isAtStartOfLine) - return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); - isAtStartOfLine = true; + if (CurChar == '#' && isAtStartOfStatement) { + // If this starts with a '#', this may be a cpp + // hash directive and otherwise a line comment. + AsmToken TokenBuf[2]; + MutableArrayRef Buf(TokenBuf, 2); + size_t num = peekTokens(Buf, true); + // There cannot be a space preceeding this + if (isAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && + TokenBuf[1].is(AsmToken::String)) { + CurPtr = TokStart; // reset curPtr; + StringRef s = LexUntilEndOfLine(); + UnLex(TokenBuf[1]); + UnLex(TokenBuf[0]); + return AsmToken(AsmToken::HashDirective, s); + } return LexLineComment(); } + + if (isAtStartOfComment(TokStart)) + return LexLineComment(); + if (isAtStatementSeparator(TokStart)) { CurPtr += strlen(MAI.getSeparatorString()) - 1; + isAtStartOfLine = true; + isAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, strlen(MAI.getSeparatorString()))); } // If we're missing a newline at EOF, make sure we still get an // EndOfStatement token before the Eof token. - if (CurChar == EOF && !isAtStartOfLine) { + if (CurChar == EOF && !isAtStartOfStatement) { isAtStartOfLine = true; + isAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); } - isAtStartOfLine = false; + bool OldIsAtStartOfStatement = isAtStartOfStatement; + isAtStartOfStatement = false; switch (CurChar) { default: // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* @@ -526,24 +542,24 @@ // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); - case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case EOF: + isAtStartOfLine = true; + isAtStartOfStatement = true; + return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); case 0: case ' ': case '\t': - if (SkipSpace) { - // Ignore whitespace. - return LexToken(); - } else { - int len = 1; - while (*CurPtr==' ' || *CurPtr=='\t') { - CurPtr++; - len++; - } - return AsmToken(AsmToken::Space, StringRef(TokStart, len)); - } - case '\n': // FALL THROUGH. + isAtStartOfStatement = OldIsAtStartOfStatement; + while (*CurPtr == ' ' || *CurPtr == '\t') + CurPtr++; + if (SkipSpace) + return LexToken(); // Ignore whitespace. + else + return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); + case '\n': case '\r': isAtStartOfLine = true; + isAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); @@ -586,7 +602,9 @@ } return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); - case '/': return LexSlash(); + case '/': + isAtStartOfStatement = OldIsAtStartOfStatement; + return LexSlash(); case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); case '\'': return LexSingleQuote(); case '"': return LexQuote(); Index: lib/MC/MCParser/AsmParser.cpp =================================================================== --- lib/MC/MCParser/AsmParser.cpp +++ lib/MC/MCParser/AsmParser.cpp @@ -257,7 +257,6 @@ bool parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI); bool parseCurlyBlockScope(SmallVectorImpl& AsmStrRewrites); - void eatToEndOfLine(); bool parseCppHashLineFilenameComment(SMLoc L); void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, @@ -624,7 +623,24 @@ } const AsmToken &AsmParser::Lex() { + if (Lexer.getTok().is(AsmToken::Error)) + Error(Lexer.getErrLoc(), Lexer.getErr()); + + // if it's a end of statement with a comment in it + if (getTok().is(AsmToken::EndOfStatement)) { + // if this is a line comment output it. + if (getTok().getString().front() != '\n' && + getTok().getString().front() != '\r' && MAI.preserveAsmComments()) + Out.addExplicitComment(Twine(getTok().getString())); + } + const AsmToken *tok = &Lexer.Lex(); + // Parse comments here to be deferred until end of next statement. + while (tok->is(AsmToken::Comment)) { + if (MAI.preserveAsmComments()) + Out.addExplicitComment(Twine(tok->getString())); + tok = &Lexer.Lex(); + } if (tok->is(AsmToken::Eof)) { // If this is the end of an included file, pop the parent file off the @@ -632,12 +648,10 @@ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { jumpToLoc(ParentIncludeLoc); - tok = &Lexer.Lex(); + return Lex(); } } - if (tok->is(AsmToken::Error)) - Error(Lexer.getErrLoc(), Lexer.getErr()); return *tok; } @@ -675,6 +689,12 @@ if (!parseStatement(Info, nullptr)) continue; + // If we've failed, but on a Error Token, but did not consume it in + // favor of a better message, emit it now. + if (Lexer.getTok().is(AsmToken::Error)) { + Lex(); + } + // We had an error, validate that one was emitted and recover by skipping to // the next line. assert(HadError && "Parse statement returned an error, but none emitted!"); @@ -713,8 +733,8 @@ // first referenced for a source location. We need to add something // to track that. Currently, we just point to the end of the file. HadError |= - Error(getLexer().getLoc(), "assembler local symbol '" + - Sym->getName() + "' not defined"); + Error(getTok().getLoc(), "assembler local symbol '" + + Sym->getName() + "' not defined"); } } @@ -748,18 +768,18 @@ /// \brief Throw away the rest of the line for testing purposes. void AsmParser::eatToEndOfStatement() { while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) - Lex(); + Lexer.Lex(); // Eat EOL. if (Lexer.is(AsmToken::EndOfStatement)) - Lex(); + Lexer.Lex(); } StringRef AsmParser::parseStringToEndOfStatement() { const char *Start = getTok().getLoc().getPointer(); while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) - Lex(); + Lexer.Lex(); const char *End = getTok().getLoc().getPointer(); return StringRef(Start, End - Start); @@ -770,7 +790,7 @@ while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof)) - Lex(); + Lexer.Lex(); const char *End = getTok().getLoc().getPointer(); return StringRef(Start, End - Start); @@ -852,7 +872,7 @@ if (!MAI.useParensForSymbolVariant()) { if (FirstTokenKind == AsmToken::String) { if (Lexer.is(AsmToken::At)) { - Lexer.Lex(); // eat @ + Lex(); // eat @ SMLoc AtLoc = getLexer().getLoc(); StringRef VName; if (parseIdentifier(VName)) @@ -864,14 +884,14 @@ Split = Identifier.split('@'); } } else if (Lexer.is(AsmToken::LParen)) { - Lexer.Lex(); // eat ( + Lex(); // eat '('. StringRef VName; parseIdentifier(VName); if (Lexer.isNot(AsmToken::RParen)) { return Error(Lexer.getTok().getLoc(), "unexpected token in variant, expected ')'"); } - Lexer.Lex(); // eat ) + Lex(); // eat ')'. Split = std::make_pair(Identifier, VName); } @@ -1336,21 +1356,24 @@ /// ::= Label* Identifier OperandList* EndOfStatement bool AsmParser::parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI) { + // Eat initial spaces and comments + while (Lexer.is(AsmToken::Space)) + Lex(); if (Lexer.is(AsmToken::EndOfStatement)) { - Out.AddBlankLine(); + // if this is a line comment we can drop it safely + if (getTok().getString().front() == '\r' || + getTok().getString().front() == '\n') + Out.AddBlankLine(); Lex(); return false; } - - // Statements always start with an identifier or are a full line comment. + // Statements always start with an identifier. AsmToken ID = getTok(); SMLoc IDLoc = ID.getLoc(); StringRef IDVal; int64_t LocalLabelVal = -1; - // A full line comment is a '#' as the first token. - if (Lexer.is(AsmToken::Hash)) + if (Lexer.is(AsmToken::HashDirective)) return parseCppHashLineFilenameComment(IDLoc); - // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { LocalLabelVal = getTok().getIntVal(); @@ -1641,7 +1664,8 @@ return parseDirectiveIncbin(); case DK_CODE16: case DK_CODE16GCC: - return TokError(Twine(IDVal) + " not supported yet"); + return TokError(Twine(IDVal) + + " not currently supported for this target"); case DK_REPT: return parseDirectiveRept(IDLoc, IDVal); case DK_IRP: @@ -1861,37 +1885,20 @@ return true; } -/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line -/// since they may not be able to be tokenized to get to the end of line token. -void AsmParser::eatToEndOfLine() { - if (!Lexer.is(AsmToken::EndOfStatement)) - Lexer.LexUntilEndOfLine(); - // Eat EOL. - Lex(); -} - /// parseCppHashLineFilenameComment as this: /// ::= # number "filename" -/// or just as a full line comment if it doesn't have a number and a string. bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) { Lex(); // Eat the hash token. - - if (getLexer().isNot(AsmToken::Integer)) { - // Consume the line since in cases it is not a well-formed line directive, - // as if were simply a full line comment. - eatToEndOfLine(); - return false; - } - + // Lexer only ever emits HashDirective if it fully formed if it's + // done the checking already so this is an internal error. + assert(getTok().is(AsmToken::Integer) && + "Lexing Cpp line comment: Expected Integer"); int64_t LineNumber = getTok().getIntVal(); Lex(); - - if (getLexer().isNot(AsmToken::String)) { - eatToEndOfLine(); - return false; - } - + assert(getTok().is(AsmToken::String) && + "Lexing Cpp line comment: Expected String"); StringRef Filename = getTok().getString(); + Lex(); // Get rid of the enclosing quotes. Filename = Filename.substr(1, Filename.size() - 2); @@ -1900,9 +1907,6 @@ CppHashInfo.Filename = Filename; CppHashInfo.LineNumber = LineNumber; CppHashInfo.Buf = CurBuffer; - - // Ignore any trailing characters, they're just comment. - eatToEndOfLine(); return false; } @@ -2164,7 +2168,7 @@ if (Lexer.is(AsmToken::Space)) { SpaceEaten = true; - Lex(); // Eat spaces + Lexer.Lex(); // Eat spaces } // Spaces can delimit parameters, but could also be part an expression. @@ -2173,11 +2177,11 @@ if (!IsDarwin) { if (isOperator(Lexer.getKind())) { MA.push_back(getTok()); - Lex(); + Lexer.Lex(); // Whitespace after an operator can be ignored. if (Lexer.is(AsmToken::Space)) - Lex(); + Lexer.Lex(); continue; } @@ -2199,7 +2203,7 @@ // Append the token to the current argument list. MA.push_back(getTok()); - Lex(); + Lexer.Lex(); } if (ParenLevel != 0) @@ -2261,7 +2265,7 @@ break; if (FAI >= NParameters) { - assert(M && "expected macro to be defined"); + assert(M && "expected macro to be defined"); Error(IDLoc, "parameter named '" + FA.Name + "' does not exist for macro '" + M->Name + "'"); @@ -2408,7 +2412,7 @@ SMLoc PrefixLoc = getLexer().getLoc(); // Consume the prefix character, and check for a following identifier. - Lex(); + Lexer.Lex(); // Lexer's Lex guarantees consecutive token. if (Lexer.isNot(AsmToken::Identifier)) return true; @@ -2419,7 +2423,7 @@ // Construct the joined identifier and consume the token. Res = StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); - Lex(); + Lexer.Lex(); // Lexer's Lex guarantees consecutive token return false; } @@ -2561,16 +2565,16 @@ if (Lexer.isNot(AsmToken::Comma)) return TokError("expected comma"); - Lexer.Lex(); + Lex(); if (Lexer.isNot(AsmToken::Identifier)) return TokError("expected relocation name"); SMLoc NameLoc = Lexer.getTok().getLoc(); StringRef Name = Lexer.getTok().getIdentifier(); - Lexer.Lex(); + Lex(); if (Lexer.is(AsmToken::Comma)) { - Lexer.Lex(); + Lex(); SMLoc ExprLoc = Lexer.getLoc(); if (parseExpression(Expr)) return true; @@ -2686,14 +2690,15 @@ // have to manually parse unary prefixes. bool IsNeg = false; if (getLexer().is(AsmToken::Minus)) { - Lex(); + Lexer.Lex(); IsNeg = true; } else if (getLexer().is(AsmToken::Plus)) - Lex(); + Lexer.Lex(); - if (getLexer().isNot(AsmToken::Integer) && - getLexer().isNot(AsmToken::Real) && - getLexer().isNot(AsmToken::Identifier)) + if (Lexer.is(AsmToken::Error)) + return TokError(Lexer.getErr()); + if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) && + Lexer.isNot(AsmToken::Identifier)) return TokError("unexpected token in directive"); // Convert to an APFloat. @@ -2720,10 +2725,10 @@ getStreamer().EmitIntValue(AsInt.getLimitedValue(), AsInt.getBitWidth() / 8); - if (getLexer().is(AsmToken::EndOfStatement)) + if (Lexer.is(AsmToken::EndOfStatement)) break; - if (getLexer().isNot(AsmToken::Comma)) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); } @@ -3762,14 +3767,19 @@ Lex(); } - // Eat the end of statement. - Lex(); + // Eat just the end of statement. + Lexer.Lex(); + // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors AsmToken EndToken, StartToken = getTok(); unsigned MacroDepth = 0; - // Lex the macro definition. for (;;) { + // Ignore Lexing errors in macros. + while (Lexer.is(AsmToken::Error)) { + Lexer.Lex(); + } + // Check whether we have reached the end of the file. if (getLexer().is(AsmToken::Eof)) return Error(DirectiveLoc, "no matching '.endmacro' in definition"); @@ -3780,7 +3790,7 @@ getTok().getIdentifier() == ".endmacro") { if (MacroDepth == 0) { // Outermost macro. EndToken = getTok(); - Lex(); + Lexer.Lex(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '" + EndToken.getIdentifier() + "' directive"); @@ -5237,10 +5247,9 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, MCAsmParser &Parser, MCSymbol *&Sym, const MCExpr *&Value) { - MCAsmLexer &Lexer = Parser.getLexer(); // FIXME: Use better location, we should use proper tokens. - SMLoc EqualLoc = Lexer.getLoc(); + SMLoc EqualLoc = Parser.getTok().getLoc(); if (Parser.parseExpression(Value)) { Parser.TokError("missing expression"); @@ -5252,7 +5261,7 @@ // a = b // b = c - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) return Parser.TokError("unexpected token in assignment"); // Eat the end of statement marker. Index: lib/MC/MCParser/ELFAsmParser.cpp =================================================================== --- lib/MC/MCParser/ELFAsmParser.cpp +++ lib/MC/MCParser/ELFAsmParser.cpp @@ -188,6 +188,7 @@ if (getParser().parseExpression(Subsection)) return true; } + Lex(); getStreamer().SwitchSection(getContext().getELFSection(Section, Type, Flags), Subsection); Index: lib/MC/MCParser/MCAsmLexer.cpp =================================================================== --- lib/MC/MCParser/MCAsmLexer.cpp +++ lib/MC/MCParser/MCAsmLexer.cpp @@ -13,7 +13,7 @@ using namespace llvm; MCAsmLexer::MCAsmLexer() : TokStart(nullptr), SkipSpace(true) { - CurTok.emplace_back(AsmToken::Error, StringRef()); + CurTok.emplace_back(AsmToken::Space, StringRef()); } MCAsmLexer::~MCAsmLexer() { Index: lib/MC/MCStreamer.cpp =================================================================== --- lib/MC/MCStreamer.cpp +++ lib/MC/MCStreamer.cpp @@ -70,6 +70,9 @@ void MCStreamer::emitRawComment(const Twine &T, bool TabPrefix) {} +void MCStreamer::addExplicitComment(const Twine &T) {} +void MCStreamer::outputExplicitComments() {} + void MCStreamer::generateCompactUnwindEncodings(MCAsmBackend *MAB) { for (auto &FI : DwarfFrameInfos) FI.CompactUnwindEncoding = Index: lib/Target/ARM/AsmParser/ARMAsmParser.cpp =================================================================== --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -10054,7 +10054,7 @@ StringRef Arch = Parser.getTok().getString(); SMLoc ArchLoc = Parser.getTok().getLoc(); - getLexer().Lex(); + Lex(); unsigned ID = ARM::parseArch(Arch); @@ -10176,7 +10176,7 @@ StringRef Name = Parser.getTok().getString(); SMLoc ExtLoc = Parser.getTok().getLoc(); - getLexer().Lex(); + Lex(); bool EnableFeature = true; if (Name.startswith_lower("no")) { Index: lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp =================================================================== --- lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -729,11 +729,10 @@ bool HexagonAsmParser::matchBundleOptions() { MCAsmParser &Parser = getParser(); - MCAsmLexer &Lexer = getLexer(); while (true) { if (!Parser.getTok().is(AsmToken::Colon)) return false; - Lexer.Lex(); + Lex(); StringRef Option = Parser.getTok().getString(); if (Option.compare_lower("endloop0") == 0) HexagonMCInstrInfo::setInnerLoop(MCB); @@ -745,7 +744,7 @@ HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB); else return true; - Lexer.Lex(); + Lex(); } } @@ -1105,7 +1104,7 @@ AsmToken const &Token = getParser().getTok(); StringRef String = Token.getString(); SMLoc Loc = Token.getLoc(); - getLexer().Lex(); + Lex(); do { std::pair HeadTail = String.split('.'); if (!HeadTail.first.empty()) @@ -1297,7 +1296,7 @@ static char const * Comma = ","; do { Tokens.emplace_back (Lexer.getTok()); - Lexer.Lex(); + Lex(); switch (Tokens.back().getKind()) { case AsmToken::TokenKind::Hash: @@ -1346,7 +1345,7 @@ AsmToken const &Token = Parser.getTok(); switch (Token.getKind()) { case AsmToken::EndOfStatement: { - Lexer.Lex(); + Lex(); return false; } case AsmToken::LCurly: { @@ -1354,19 +1353,19 @@ return true; Operands.push_back( HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); - Lexer.Lex(); + Lex(); return false; } case AsmToken::RCurly: { if (Operands.empty()) { Operands.push_back( HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); - Lexer.Lex(); + Lex(); } return false; } case AsmToken::Comma: { - Lexer.Lex(); + Lex(); continue; } case AsmToken::EqualEqual: @@ -1379,7 +1378,7 @@ Token.getString().substr(0, 1), Token.getLoc())); Operands.push_back(HexagonOperand::CreateToken( Token.getString().substr(1, 1), Token.getLoc())); - Lexer.Lex(); + Lex(); continue; } case AsmToken::Hash: { @@ -1389,12 +1388,12 @@ if (!ImplicitExpression) Operands.push_back( HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); - Lexer.Lex(); + Lex(); bool MustExtend = false; bool HiOnly = false; bool LoOnly = false; if (Lexer.is(AsmToken::Hash)) { - Lexer.Lex(); + Lex(); MustExtend = true; } else if (ImplicitExpression) MustNotExtend = true; @@ -1412,7 +1411,7 @@ HiOnly = false; LoOnly = false; } else { - Lexer.Lex(); + Lex(); } } } Index: lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp =================================================================== --- lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -1122,7 +1122,7 @@ // Parse until end of statement, consuming commas between operands while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.is(AsmToken::Comma)) { // Consume comma token - Lexer.Lex(); + Lex(); // Parse next operand if (parseOperand(&Operands, Mnemonic) != MatchOperand_Success) Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1704,7 +1704,7 @@ while (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().is(AsmToken::Comma)) { // Consume the comma token - getLexer().Lex(); + Lex(); // Parse the next operand if (ParseOperand(Operands)) Index: test/MC/AsmParser/floating-literals.s =================================================================== --- test/MC/AsmParser/floating-literals.s +++ test/MC/AsmParser/floating-literals.s @@ -58,25 +58,19 @@ .float -0x1.0p0 # CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit -# CHECK-ERROR: unexpected token in directive .float 0xa.apa # CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit -# CHECK-ERROR: unexpected token in directive .double -0x1.2p+ # CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit -# CHECK-ERROR: unexpected token in directive .double -0x1.2p # CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one significand digit -# CHECK-ERROR: unexpected token in directive .float 0xp2 # CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one significand digit -# CHECK-ERROR: unexpected token in directive .float 0x.p5 # CHECK-ERROR: error: invalid hexadecimal floating-point constant: expected exponent part 'p' -# CHECK-ERROR: unexpected token in directive .float 0x1.2 Index: test/MC/AsmParser/hash-directive.s =================================================================== --- /dev/null +++ test/MC/AsmParser/hash-directive.s @@ -0,0 +1,23 @@ +# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 | FileCheck %s +error +# CHECK: hash-directive.s:[[@LINE-1]]:1: error +# 3 "FILE1" 1 #<- This is a CPP Hash w/ comment +error +# CHECK: FILE1:3:1: error +# 0 "" 2 #<- This is too +error +# CHECK: hash-directive.s:[[@LINE-1]]:1: error + # 1 "FILE2" 2 #<- This is a comment +error +# CHECK: hash-directive.s:[[@LINE-1]]:1: error +nop; # 6 "FILE3" 2 #<- This is a still comment +error +# CHECK: hash-directive.s:[[@LINE-1]]:1: error +nop;# 6 "FILE4" 2 + nop; +error +# CHECK: FILE4:7:1: error +# 0 "" 2 +/*comment*/# 6 "FILE5" 2 #<- This is a comment +error +# CHECK: hash-directive.s:[[@LINE-1]]:1: error Index: test/MC/AsmParser/inline-comments.ll =================================================================== --- /dev/null +++ test/MC/AsmParser/inline-comments.ll @@ -0,0 +1,87 @@ +; RUN: llc %s -o - | sed -n -e '/#APP/,/#NO_APP/p' > %t +; RUN: sed -n -e 's/^;CHECK://p' %s > %t2 +; RUN: diff %t %t2 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo() #0 { +entry: + call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: #NO_APP + call void asm sideeffect " ", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: +;CHECK: #NO_APP + call void asm sideeffect "\0A", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: +;CHECK: +;CHECK: #NO_APP + call void asm sideeffect "/*isolated c comment*/", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: #isolated c comment +;CHECK: #NO_APP + call void asm sideeffect "/**/", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: # +;CHECK: #NO_APP + call void asm sideeffect "/*comment with\0Anewline*/", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: #comment with#newline +;CHECK: #NO_APP + call void asm sideeffect "//isolated line comment", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: #isolated line comment +;CHECK: #NO_APP + call void asm sideeffect "#isolated line comment", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: #isolated line comment +;CHECK: #NO_APP + call void asm sideeffect "nop /* after nop */", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # after nop +;CHECK: #NO_APP + call void asm sideeffect "nop // after nop", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # after nop +;CHECK: #NO_APP + call void asm sideeffect "nop # after nop", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # after nop +;CHECK: #NO_APP + call void asm sideeffect "nop /* after explicit ended nop */", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # after explicit ended nop +;CHECK: #NO_APP + call void asm sideeffect "nop # after explicit ended nop", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # after explicit ended nop +;CHECK: #NO_APP + call void asm sideeffect "nop # after explicit end nop", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # after explicit end nop +;CHECK: #NO_APP + call void asm sideeffect "/* before nop */ nop", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: nop # before nop +;CHECK: #NO_APP + call void asm sideeffect "//comment with escaped newline\0A", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: #comment with escaped newline +;CHECK: +;CHECK: #NO_APP + call void asm sideeffect "/*0*/xor/*1*/%eax,/*2*/%ecx/*3*///eol", "~{dirflag},~{fpsr},~{flags}"() #0 +;CHECK: #APP +;CHECK: xorl %eax, %ecx #0#1#2#3#eol +;CHECK: #NO_APP + ret void +} + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (trunk 268625) (llvm/trunk 268631)"} Index: test/MC/AsmParser/macro_parsing.s =================================================================== --- /dev/null +++ test/MC/AsmParser/macro_parsing.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s + + .macro DEF num + int $0x\num + .endm + DEF 02 + DEF 08 + DEF 09 + DEF 0A + DEF 10 + +# CHECK: int $2 +# CHECK: int $8 +# CHECK: int $9 +# CHECK: int $10 +# CHECK: int $16 Index: test/MC/AsmParser/preserve-comments.s =================================================================== --- /dev/null +++ test/MC/AsmParser/preserve-comments.s @@ -0,0 +1,9 @@ + #RUN: llvm-mc -preserve-comments -n -triple i386-linux-gnu < %s > %t + #RUN: diff %s %t + .text + + nop + #if DIRECTIVE COMMENT + ## WHOLE LINE COMMENT + cmpl $196, %eax ## EOL COMMENT + #endif Index: tools/llc/llc.cpp =================================================================== --- tools/llc/llc.cpp +++ tools/llc/llc.cpp @@ -73,6 +73,10 @@ NoIntegratedAssembler("no-integrated-as", cl::Hidden, cl::desc("Disable integrated assembler")); +static cl::opt + NoPreserveComments("fno-preserve-as-comments", cl::Hidden, + cl::desc("Preserve Comments in outputted assembly")); + // Determine optimization level. static cl::opt OptLevel("O", @@ -315,6 +319,7 @@ TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); Options.DisableIntegratedAS = NoIntegratedAssembler; + Options.PreserveAsmComments = !NoPreserveComments; Options.MCOptions.ShowMCEncoding = ShowMCEncoding; Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory; Options.MCOptions.AsmVerbose = AsmVerbose; Index: tools/llvm-mc/llvm-mc.cpp =================================================================== --- tools/llvm-mc/llvm-mc.cpp +++ tools/llvm-mc/llvm-mc.cpp @@ -87,6 +87,10 @@ static cl::list DefineSymbol("defsym", cl::desc("Defines a symbol to be an integer constant")); +static cl::opt + PreserveComments("preserve-comments", + cl::desc("Preserve Comments in outputted assembly")); + enum OutputFileType { OFT_Null, OFT_AssemblyFile, @@ -430,6 +434,7 @@ } MAI->setCompressDebugSections(CompressDebugSections); } + MAI->setPreserveAsmComments(PreserveComments); // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and // MCObjectFileInfo needs a MCContext reference in order to initialize itself.