Index: llvm/include/llvm/MC/MCParser/MCAsmParser.h =================================================================== --- llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -304,6 +304,9 @@ MCAsmParser *createMCAsmParser(SourceMgr &, MCContext &, MCStreamer &, const MCAsmInfo &, unsigned CB = 0); +MCAsmParser *createMCMasmParser(SourceMgr &, MCContext &, MCStreamer &, + const MCAsmInfo &, unsigned CB = 0); + } // end namespace llvm #endif // LLVM_MC_MCPARSER_MCASMPARSER_H Index: llvm/lib/MC/MCParser/AsmParser.cpp =================================================================== --- llvm/lib/MC/MCParser/AsmParser.cpp +++ llvm/lib/MC/MCParser/AsmParser.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This class implements the parser for assembly files. +// This class implements a parser for assembly files similar to gas syntax. // //===----------------------------------------------------------------------===// Index: llvm/lib/MC/MCParser/CMakeLists.txt =================================================================== --- llvm/lib/MC/MCParser/CMakeLists.txt +++ llvm/lib/MC/MCParser/CMakeLists.txt @@ -8,6 +8,7 @@ MCAsmParser.cpp MCAsmParserExtension.cpp MCTargetAsmParser.cpp + MasmParser.cpp WasmAsmParser.cpp ADDITIONAL_HEADER_DIRS Index: llvm/lib/MC/MCParser/MasmParser.cpp =================================================================== --- /dev/null +++ llvm/lib/MC/MCParser/MasmParser.cpp @@ -0,0 +1,846 @@ +//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements a parser for masm-style assembly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; + +// Note that this is a full MCAsmParser, not an MCAsmParserExtension! +// It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc. +class MasmParser : public MCAsmParser { +private: + AsmLexer Lexer; // FIXME: Probably want custom MCLexer subclass + MCContext &Ctx; + MCStreamer &Out; + const MCAsmInfo &MAI; + SourceMgr &SrcMgr; + //SourceMgr::DiagHandlerTy SavedDiagHandler; + //void *SavedDiagContext; + //std::unique_ptr PlatformParser; + + /// This is the current buffer index we're lexing from as managed by the + /// SourceMgr object. + unsigned CurBuffer; + + //AsmCond TheCondState; + //std::vector TheCondStack; + + /// maps directive names to handler methods in parser + /// extensions. Extensions register themselves in this map by calling + /// addDirectiveHandler. + //StringMap ExtensionDirectiveMap; + + /// Stack of active macro instantiations. + //std::vector ActiveMacros; + + /// List of bodies of anonymous macros. + //std::deque MacroLikeBodies; + + /// Boolean tracking whether macro substitution is enabled. + //unsigned MacrosEnabledFlag : 1; + + /// Keeps track of how many .macro's have been instantiated. + //unsigned NumOfMacroInstantiations; + + /// The values from the last parsed cpp hash file line comment if any. + //struct CppHashInfoTy { + // StringRef Filename; + // int64_t LineNumber; + // SMLoc Loc; + // unsigned Buf; + // CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {} + //}; + //CppHashInfoTy CppHashInfo; + + /// List of forward directional labels for diagnosis at the end. + //SmallVector, 4> DirLabels; + + /// AssemblerDialect. ~OU means unset value and use value provided by MAI. + //unsigned AssemblerDialect = ~0U; + + /// is Darwin compatibility enabled? + //bool IsDarwin = false; + + /// Are we parsing ms-style inline assembly? + //bool ParsingInlineAsm = false; + + /// Did we already inform the user about inconsistent MD5 usage? + //bool ReportedInconsistentMD5 = false; + + // Is alt macro mode enabled. + //bool AltMacroMode = false; + +public: + MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + const MCAsmInfo &MAI, unsigned CB); + MasmParser(const MasmParser &) = delete; + MasmParser &operator=(const MasmParser &) = delete; + ~MasmParser() override; + + /// @name MCAsmParser Interface + /// { + + bool Run(bool NoInitialTextSection, bool NoFinalize = false) override; + + void addDirectiveHandler(StringRef Directive, + ExtensionDirectiveHandler Handler) override { + assert(false && "not supported for MasmParser"); + } + + void addAliasForDirective(StringRef Directive, StringRef Alias) override { + assert(false && "not supported for MasmParser"); + } + + SourceMgr &getSourceManager() override { return SrcMgr; } + MCAsmLexer &getLexer() override { return Lexer; } + MCContext &getContext() override { return Ctx; } + MCStreamer &getStreamer() override { return Out; } + + //CodeViewContext &getCVContext() { return Ctx.getCVContext(); } + + unsigned getAssemblerDialect() override { + assert(false && "not supported for MasmParser"); + return 0; + } + void setAssemblerDialect(unsigned i) override { + assert(false && "not supported for MasmParser"); + } + + void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override; + bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override; + bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override; + + const AsmToken &Lex() override; + + void setParsingInlineAsm(bool V) override { + assert(!false); + Lexer.setLexMasmIntegers(true); + } + bool isParsingInlineAsm() override { return false; } + + bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, + unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl> &OpDecls, + SmallVectorImpl &Constraints, + SmallVectorImpl &Clobbers, + const MCInstrInfo *MII, const MCInstPrinter *IP, + MCAsmParserSemaCallback &SI) override; + + //bool parseExpression(const MCExpr *&Res); + bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override; + bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; + bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override; + bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res, + SMLoc &EndLoc) override; + bool parseAbsoluteExpression(int64_t &Res) override; + + /// Parse a floating point expression using the float \p Semantics + /// and set \p Res to the value. + //bool parseRealValue(const fltSemantics &Semantics, APInt &Res); + + /// Parse an identifier or string (as a quoted identifier) + /// and set \p Res to the identifier contents. + bool parseIdentifier(StringRef &Res) override; + void eatToEndOfStatement() override; + + bool checkForValidSection() override; + + /// Parse up to the end of statement and a return the contents from the + /// current token until the end of the statement; the current token on exit + /// will be either the EndOfStatement or EOF. + StringRef parseStringToEndOfStatement() override; + + /// } + +private: + //bool parseStatement(ParseStatementInfo &Info, + //MCAsmParserSemaCallback *SI); + //bool parseCurlyBlockScope(SmallVectorImpl& AsmStrRewrites); + //bool parseCppHashLineFilenameComment(SMLoc L); + + //void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, + //ArrayRef Parameters); + // bool expandMacro(raw_svector_ostream &OS, StringRef Body, + // ArrayRef Parameters, + // ArrayRef A, bool EnableAtPseudoVariable, + // SMLoc L); + + /// Are macros enabled in the parser? + //bool areMacrosEnabled() {return MacrosEnabledFlag;} + + /// Control a flag in the parser that enables or disables macros. + //void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;} + + /// Are we inside a macro instantiation? + //bool isInsideMacroInstantiation() {return !ActiveMacros.empty();} + + /// Handle entry to macro instantiation. + /// + /// \param M The macro. + /// \param NameLoc Instantiation location. + //bool handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); + + /// Handle exit from macro instantiation. + //void handleMacroExit(); + + /// Extract AsmTokens for a macro argument. + //bool parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg); + + /// Parse all macro arguments for a given macro. + //bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); + + void printMacroInstantiations(); + void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, + SMRange Range = None) const { + ArrayRef Ranges(Range); + SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); + } + //static void DiagHandler(const SMDiagnostic &Diag, void *Context); + + /// Should we emit DWARF describing this assembler source? (Returns false if + /// the source has .file directives, which means we don't want to generate + /// info describing the assembler source itself.) + //bool enabledGenDwarfForAssembly(); + + /// Enter the specified file. This returns true on failure. + //bool enterIncludeFile(const std::string &Filename); + + /// Process the specified file for the .incbin directive. + /// This returns true on failure. + //bool processIncbinFile(const std::string &Filename, int64_t Skip = 0, + //const MCExpr *Count = nullptr, SMLoc Loc = SMLoc()); + + /// Reset the current lexer position to that given by \p Loc. The + /// current token is not set; clients should ensure Lex() is called + /// subsequently. + /// + /// \param InBuffer If not 0, should be the known buffer id that contains the + /// location. + //void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0); + + /// Parse until the end of a statement or a comma is encountered, + /// return the contents from the current token up to the end or comma. + //StringRef parseStringToComma(); + + //bool parseAssignment(StringRef Name, bool allow_redef, + //bool NoDeadStrip = false); + + //unsigned getBinOpPrecedence(AsmToken::TokenKind K, + //MCBinaryExpr::Opcode &Kind); + + //bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); + //bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); + //bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); + + //bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc); + + //bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName); + //bool parseCVFileId(int64_t &FileId, StringRef DirectiveName); + + // Generic (target and platform independent) directive parsing. + //enum DirectiveKind { + // DK_NO_DIRECTIVE, // Placeholder + // DK_SET, + // DK_EQU, + // DK_EQUIV, + // DK_ASCII, + // DK_ASCIZ, + // DK_STRING, + // DK_BYTE, + // DK_SHORT, + // DK_RELOC, + // DK_VALUE, + // DK_2BYTE, + // DK_LONG, + // DK_INT, + // DK_4BYTE, + // DK_QUAD, + // DK_8BYTE, + // DK_OCTA, + // DK_DC, + // DK_DC_A, + // DK_DC_B, + // DK_DC_D, + // DK_DC_L, + // DK_DC_S, + // DK_DC_W, + // DK_DC_X, + // DK_DCB, + // DK_DCB_B, + // DK_DCB_D, + // DK_DCB_L, + // DK_DCB_S, + // DK_DCB_W, + // DK_DCB_X, + // DK_DS, + // DK_DS_B, + // DK_DS_D, + // DK_DS_L, + // DK_DS_P, + // DK_DS_S, + // DK_DS_W, + // DK_DS_X, + // DK_SINGLE, + // DK_FLOAT, + // DK_DOUBLE, + // DK_ALIGN, + // DK_ALIGN32, + // DK_BALIGN, + // DK_BALIGNW, + // DK_BALIGNL, + // DK_P2ALIGN, + // DK_P2ALIGNW, + // DK_P2ALIGNL, + // DK_ORG, + // DK_FILL, + // DK_ENDR, + // DK_BUNDLE_ALIGN_MODE, + // DK_BUNDLE_LOCK, + // DK_BUNDLE_UNLOCK, + // DK_ZERO, + // DK_EXTERN, + // DK_GLOBL, + // DK_GLOBAL, + // DK_LAZY_REFERENCE, + // DK_NO_DEAD_STRIP, + // DK_SYMBOL_RESOLVER, + // DK_PRIVATE_EXTERN, + // DK_REFERENCE, + // DK_WEAK_DEFINITION, + // DK_WEAK_REFERENCE, + // DK_WEAK_DEF_CAN_BE_HIDDEN, + // DK_COLD, + // DK_COMM, + // DK_COMMON, + // DK_LCOMM, + // DK_ABORT, + // DK_INCLUDE, + // DK_INCBIN, + // DK_CODE16, + // DK_CODE16GCC, + // DK_REPT, + // DK_IRP, + // DK_IRPC, + // DK_IF, + // DK_IFEQ, + // DK_IFGE, + // DK_IFGT, + // DK_IFLE, + // DK_IFLT, + // DK_IFNE, + // DK_IFB, + // DK_IFNB, + // DK_IFC, + // DK_IFEQS, + // DK_IFNC, + // DK_IFNES, + // DK_IFDEF, + // DK_IFNDEF, + // DK_IFNOTDEF, + // DK_ELSEIF, + // DK_ELSE, + // DK_ENDIF, + // DK_SPACE, + // DK_SKIP, + // DK_FILE, + // DK_LINE, + // DK_LOC, + // DK_STABS, + // DK_CV_FILE, + // DK_CV_FUNC_ID, + // DK_CV_INLINE_SITE_ID, + // DK_CV_LOC, + // DK_CV_LINETABLE, + // DK_CV_INLINE_LINETABLE, + // DK_CV_DEF_RANGE, + // DK_CV_STRINGTABLE, + // DK_CV_STRING, + // DK_CV_FILECHECKSUMS, + // DK_CV_FILECHECKSUM_OFFSET, + // DK_CV_FPO_DATA, + // DK_CFI_SECTIONS, + // DK_CFI_STARTPROC, + // DK_CFI_ENDPROC, + // DK_CFI_DEF_CFA, + // DK_CFI_DEF_CFA_OFFSET, + // DK_CFI_ADJUST_CFA_OFFSET, + // DK_CFI_DEF_CFA_REGISTER, + // DK_CFI_OFFSET, + // DK_CFI_REL_OFFSET, + // DK_CFI_PERSONALITY, + // DK_CFI_LSDA, + // DK_CFI_REMEMBER_STATE, + // DK_CFI_RESTORE_STATE, + // DK_CFI_SAME_VALUE, + // DK_CFI_RESTORE, + // DK_CFI_ESCAPE, + // DK_CFI_RETURN_COLUMN, + // DK_CFI_SIGNAL_FRAME, + // DK_CFI_UNDEFINED, + // DK_CFI_REGISTER, + // DK_CFI_WINDOW_SAVE, + // DK_CFI_B_KEY_FRAME, + // DK_MACROS_ON, + // DK_MACROS_OFF, + // DK_ALTMACRO, + // DK_NOALTMACRO, + // DK_MACRO, + // DK_EXITM, + // DK_ENDM, + // DK_ENDMACRO, + // DK_PURGEM, + // DK_SLEB128, + // DK_ULEB128, + // DK_ERR, + // DK_ERROR, + // DK_WARNING, + // DK_PRINT, + // DK_ADDRSIG, + // DK_ADDRSIG_SYM, + // DK_END + //}; + + /// Maps directive name --> DirectiveKind enum, for + /// directives parsed by this class. + //StringMap DirectiveKindMap; + + // ".ascii", ".asciz", ".string" + //bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); + //bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc" + //bool parseDirectiveValue(StringRef IDVal, + // unsigned Size); // ".byte", ".long", ... + //bool parseDirectiveOctaValue(StringRef IDVal); // ".octa", ... + //bool parseDirectiveRealValue(StringRef IDVal, + //const fltSemantics &); // ".single", ... + //bool parseDirectiveFill(); // ".fill" + //bool parseDirectiveZero(); // ".zero" + // ".set", ".equ", ".equiv" + //bool parseDirectiveSet(StringRef IDVal, bool allow_redef); + //bool parseDirectiveOrg(); // ".org" + // ".align{,32}", ".p2align{,w,l}" + //bool parseDirectiveAlign(bool IsPow2, unsigned ValueSize); + + // ".file", ".line", ".loc", ".stabs" + //bool parseDirectiveFile(SMLoc DirectiveLoc); + //bool parseDirectiveLine(); + //bool parseDirectiveLoc(); + //bool parseDirectiveStabs(); + + // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable", + // ".cv_inline_linetable", ".cv_def_range", ".cv_string" + //bool parseDirectiveCVFile(); + //bool parseDirectiveCVFuncId(); + //bool parseDirectiveCVInlineSiteId(); + //bool parseDirectiveCVLoc(); + //bool parseDirectiveCVLinetable(); + //bool parseDirectiveCVInlineLinetable(); + //bool parseDirectiveCVDefRange(); + //bool parseDirectiveCVString(); + //bool parseDirectiveCVStringTable(); + //bool parseDirectiveCVFileChecksums(); + //bool parseDirectiveCVFileChecksumOffset(); + //bool parseDirectiveCVFPOData(); + + // .cfi directives + //bool parseDirectiveCFIRegister(SMLoc DirectiveLoc); + //bool parseDirectiveCFIWindowSave(); + //bool parseDirectiveCFISections(); + //bool parseDirectiveCFIStartProc(); + //bool parseDirectiveCFIEndProc(); + //bool parseDirectiveCFIDefCfaOffset(); + //bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc); + //bool parseDirectiveCFIAdjustCfaOffset(); + //bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc); + //bool parseDirectiveCFIOffset(SMLoc DirectiveLoc); + //bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc); + //bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality); + //bool parseDirectiveCFIRememberState(); + //bool parseDirectiveCFIRestoreState(); + //bool parseDirectiveCFISameValue(SMLoc DirectiveLoc); + //bool parseDirectiveCFIRestore(SMLoc DirectiveLoc); + //bool parseDirectiveCFIEscape(); + //bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc); + //bool parseDirectiveCFISignalFrame(); + //bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc); + + // macro directives + //bool parseDirectivePurgeMacro(SMLoc DirectiveLoc); + //bool parseDirectiveExitMacro(StringRef Directive); + //bool parseDirectiveEndMacro(StringRef Directive); + //bool parseDirectiveMacro(SMLoc DirectiveLoc); + //bool parseDirectiveMacrosOnOff(StringRef Directive); + // alternate macro mode directives + //bool parseDirectiveAltmacro(StringRef Directive); + // ".bundle_align_mode" + //bool parseDirectiveBundleAlignMode(); + // ".bundle_lock" + //bool parseDirectiveBundleLock(); + // ".bundle_unlock" + //bool parseDirectiveBundleUnlock(); + + // ".space", ".skip" + //bool parseDirectiveSpace(StringRef IDVal); + + // ".dcb" + //bool parseDirectiveDCB(StringRef IDVal, unsigned Size); + //bool parseDirectiveRealDCB(StringRef IDVal, const fltSemantics &); + // ".ds" + //bool parseDirectiveDS(StringRef IDVal, unsigned Size); + + // .sleb128 (Signed=true) and .uleb128 (Signed=false) + //bool parseDirectiveLEB128(bool Signed); + + /// Parse a directive like ".globl" which + /// accepts a single symbol (which should be a label or an external). + //bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr); + + //bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" + + //bool parseDirectiveAbort(); // ".abort" + //bool parseDirectiveInclude(); // ".include" + //bool parseDirectiveIncbin(); // ".incbin" + + // ".if", ".ifeq", ".ifge", ".ifgt" , ".ifle", ".iflt" or ".ifne" + //bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind); + // ".ifb" or ".ifnb", depending on ExpectBlank. + //bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); + // ".ifc" or ".ifnc", depending on ExpectEqual. + //bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual); + // ".ifeqs" or ".ifnes", depending on ExpectEqual. + //bool parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual); + // ".ifdef" or ".ifndef", depending on expect_defined + //bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); + //bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" + //bool parseDirectiveElse(SMLoc DirectiveLoc); // ".else" + //bool parseDirectiveEndIf(SMLoc DirectiveLoc); // .endif + bool parseEscapedString(std::string &Data) override; + + //const MCExpr *applyModifierToExpr(const MCExpr *E, + //MCSymbolRefExpr::VariantKind Variant); + + // Macro-like directives + //MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc); + //void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, + //raw_svector_ostream &OS); + //bool parseDirectiveRept(SMLoc DirectiveLoc, StringRef Directive); + //bool parseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" + //bool parseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" + //bool parseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" + + // "_emit" or "__emit" + //bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, + //size_t Len); + + // "align" + //bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); + + // "end" + //bool parseDirectiveEnd(SMLoc DirectiveLoc); + + // ".err" or ".error" + //bool parseDirectiveError(SMLoc DirectiveLoc, bool WithMessage); + + // ".warning" + //bool parseDirectiveWarning(SMLoc DirectiveLoc); + + // .print + //bool parseDirectivePrint(SMLoc DirectiveLoc); + + // Directives to support address-significance tables. + //bool parseDirectiveAddrsig(); + //bool parseDirectiveAddrsigSym(); + + //void initializeDirectiveKindMap(); +}; + +MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + const MCAsmInfo &MAI, unsigned CB = 0) + : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM), + CurBuffer(CB ? CB : SM.getMainFileID()) { + HadError = false; +#if 0 + // Save the old handler. + SavedDiagHandler = SrcMgr.getDiagHandler(); + SavedDiagContext = SrcMgr.getDiagContext(); + // Set our own handler which calls the saved handler. + SrcMgr.setDiagHandler(DiagHandler, this); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); + + // Initialize the platform / file format parser. + switch (Ctx.getObjectFileInfo()->getObjectFileType()) { + case MCObjectFileInfo::IsCOFF: + PlatformParser.reset(createCOFFAsmParser()); + break; + case MCObjectFileInfo::IsMachO: + PlatformParser.reset(createDarwinAsmParser()); + IsDarwin = true; + break; + case MCObjectFileInfo::IsELF: + PlatformParser.reset(createELFAsmParser()); + break; + case MCObjectFileInfo::IsWasm: + PlatformParser.reset(createWasmAsmParser()); + break; + } + + PlatformParser->Initialize(*this); + initializeDirectiveKindMap(); + + NumOfMacroInstantiations = 0; +#endif +} + +MasmParser::~MasmParser() { + //assert((HadError || ActiveMacros.empty()) && + //"Unexpected active macro instantiation!"); + + // Restore the saved diagnostics handler and context for use during + // finalization. + //SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext); +} + +void MasmParser::printMacroInstantiations() { +#if 0 + // Print the active macro instantiation stack. + for (std::vector::const_reverse_iterator + it = ActiveMacros.rbegin(), + ie = ActiveMacros.rend(); + it != ie; ++it) + printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note, + "while in macro instantiation"); +#endif +} + +bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) { + HadError = true; + printMessage(L, SourceMgr::DK_Error, Msg, Range); + printMacroInstantiations(); + return true; +} + +bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) { + // Create the initial section, if requested. + //if (!NoInitialTextSection) + //Out.InitSections(false); + +#if 0 + // Prime the lexer. + Lex(); + + HadError = false; + AsmCond StartingCondState = TheCondState; + SmallVector AsmStrRewrites; + + // If we are generating dwarf for assembly source files save the initial text + // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't + // emitting any actual debug info yet and haven't had a chance to parse any + // embedded .file directives.) + if (getContext().getGenDwarfForAssembly()) { + MCSection *Sec = getStreamer().getCurrentSectionOnly(); + if (!Sec->getBeginSymbol()) { + MCSymbol *SectionStartSym = getContext().createTempSymbol(); + getStreamer().EmitLabel(SectionStartSym); + Sec->setBeginSymbol(SectionStartSym); + } + bool InsertResult = getContext().addGenDwarfSection(Sec); + assert(InsertResult && ".text section should not have debug info yet"); + (void)InsertResult; + } + + // While we have input, parse each statement. + while (Lexer.isNot(AsmToken::Eof)) { + ParseStatementInfo Info(&AsmStrRewrites); + if (!parseStatement(Info, nullptr)) + continue; + + // If we have a Lexer Error we are on an Error Token. Load in Lexer Error + // for printing ErrMsg via Lex() only if no (presumably better) parser error + // exists. + if (!hasPendingError() && Lexer.getTok().is(AsmToken::Error)) { + Lex(); + } + + // parseStatement returned true so may need to emit an error. + printPendingErrors(); + + // Skipping to the next line if needed. + if (!getLexer().isAtStartOfStatement()) + eatToEndOfStatement(); + } + + // Make sure we get proper DWARF even for empty files. + (void)enabledGenDwarfForAssembly(); + + getTargetParser().onEndOfFile(); + printPendingErrors(); + + // All errors should have been emitted. + assert(!hasPendingError() && "unexpected error from parseStatement"); + + getTargetParser().flushPendingInstructions(getStreamer()); + + if (TheCondState.TheCond != StartingCondState.TheCond || + TheCondState.Ignore != StartingCondState.Ignore) + printError(getTok().getLoc(), "unmatched .ifs or .elses"); + // Check to see there are no empty DwarfFile slots. + const auto &LineTables = getContext().getMCDwarfLineTables(); + if (!LineTables.empty()) { + unsigned Index = 0; + for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) { + if (File.Name.empty() && Index != 0) + printError(getTok().getLoc(), "unassigned file number: " + + Twine(Index) + + " for .file directives"); + ++Index; + } + } + + // Check to see that all assembler local symbols were actually defined. + // Targets that don't do subsections via symbols may not want this, though, + // so conservatively exclude them. Only do this if we're finalizing, though, + // as otherwise we won't necessarilly have seen everything yet. + if (!NoFinalize) { + if (MAI.hasSubsectionsViaSymbols()) { + for (const auto &TableEntry : getContext().getSymbols()) { + MCSymbol *Sym = TableEntry.getValue(); + // Variable symbols may not be marked as defined, so check those + // explicitly. If we know it's a variable, we have a definition for + // the purposes of this check. + if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined()) + // FIXME: We would really like to refer back to where the symbol was + // first referenced for a source location. We need to add something + // to track that. Currently, we just point to the end of the file. + printError(getTok().getLoc(), "assembler local symbol '" + + Sym->getName() + "' not defined"); + } + } + + // Temporary symbols like the ones for directional jumps don't go in the + // symbol table. They also need to be diagnosed in all (final) cases. + for (std::tuple &LocSym : DirLabels) { + if (std::get<2>(LocSym)->isUndefined()) { + // Reset the state of any "# line file" directives we've seen to the + // context as it was at the diagnostic site. + CppHashInfo = std::get<1>(LocSym); + printError(std::get<0>(LocSym), "directional label undefined"); + } + } + } + + // Finalize the output stream if there are no errors and if the client wants + // us to. + if (!HadError && !NoFinalize) + Out.Finish(); + + return HadError || getContext().hadError(); +#endif + return 0; +} + +bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + assert(false && "todo parseExpression"); + return true; + +#if 0 + // Parse the expression. + Res = nullptr; + if (getTargetParser().parsePrimaryExpr(Res, EndLoc) || + parseBinOpRHS(1, Res, EndLoc)) + return true; + + // As a special case, we support 'a op b @ modifier' by rewriting the + // expression to include the modifier. This is inefficient, but in general we + // expect users to use 'a@modifier op b'. + if (Lexer.getKind() == AsmToken::At) { + Lex(); + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("unexpected symbol modifier following '@'"); + + MCSymbolRefExpr::VariantKind Variant = + MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier()); + if (Variant == MCSymbolRefExpr::VK_Invalid) + return TokError("invalid variant '" + getTok().getIdentifier() + "'"); + + const MCExpr *ModifiedRes = applyModifierToExpr(Res, Variant); + if (!ModifiedRes) { + return TokError("invalid modifier '" + getTok().getIdentifier() + + "' (no symbols present)"); + } + + Res = ModifiedRes; + Lex(); + } + + // Try to constant fold it up front, if possible. Do not exploit + // assembler here. + int64_t Value; + if (Res->evaluateAsAbsolute(Value)) + Res = MCConstantExpr::create(Value, getContext()); + + return false; +#endif +} + +/// parseIdentifier: +/// ::= identifier +/// ::= string +bool MasmParser::parseIdentifier(StringRef &Res) { +#if 0 + // The assembler has relaxed rules for accepting identifiers, in particular we + // allow things like '.globl $foo' and '.def @feat.00', which would normally be + // separate tokens. At this level, we have already lexed so we cannot (currently) + // handle this as a context dependent token, instead we detect adjacent tokens + // and return the combined identifier. + if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) { + SMLoc PrefixLoc = getLexer().getLoc(); + + // Consume the prefix character, and check for a following identifier. + + AsmToken Buf[1]; + Lexer.peekTokens(Buf, false); + + if (Buf[0].isNot(AsmToken::Identifier)) + return true; + + // We have a '$' or '@' followed by an identifier, make sure they are adjacent. + if (PrefixLoc.getPointer() + 1 != Buf[0].getLoc().getPointer()) + return true; + + // eat $ or @ + Lexer.Lex(); // Lexer's Lex guarantees consecutive token. + // Construct the joined identifier and consume the token. + Res = + StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); + Lex(); // Parser Lex to maintain invariants. + return false; + } +#endif + + if (Lexer.isNot(AsmToken::Identifier) /*&& Lexer.isNot(AsmToken::String)*/) + return true; + + Res = getTok().getIdentifier(); + + Lex(); // Consume the identifier token. + + return false; +} + +/// Create an MCAsmParser instance. +MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C, + MCStreamer &Out, const MCAsmInfo &MAI, + unsigned CB) { + return new MasmParser(SM, C, Out, MAI, CB); +} Index: llvm/tools/llvm-ml/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/tools/llvm-ml/CMakeLists.txt @@ -0,0 +1 @@ +# FIXME Index: llvm/tools/llvm-ml/Disassembler.h =================================================================== --- /dev/null +++ llvm/tools/llvm-ml/Disassembler.h @@ -0,0 +1,41 @@ +//===- Disassembler.h - Text File Disassembler ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MC_DISASSEMBLER_H +#define LLVM_TOOLS_LLVM_MC_DISASSEMBLER_H + +#include + +namespace llvm { + +class MemoryBuffer; +class Target; +class raw_ostream; +class SourceMgr; +class MCSubtargetInfo; +class MCStreamer; + +class Disassembler { +public: + static int disassemble(const Target &T, + const std::string &Triple, + MCSubtargetInfo &STI, + MCStreamer &Streamer, + MemoryBuffer &Buffer, + SourceMgr &SM, + raw_ostream &Out); +}; + +} // namespace llvm + +#endif Index: llvm/tools/llvm-ml/Disassembler.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-ml/Disassembler.cpp @@ -0,0 +1,211 @@ +//===- Disassembler.cpp - Disassembler for hex strings --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// +//===----------------------------------------------------------------------===// + +#include "Disassembler.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +typedef std::pair, std::vector> + ByteArrayTy; + +static bool PrintInsts(const MCDisassembler &DisAsm, + const ByteArrayTy &Bytes, + SourceMgr &SM, raw_ostream &Out, + MCStreamer &Streamer, bool InAtomicBlock, + const MCSubtargetInfo &STI) { + ArrayRef Data(Bytes.first.data(), Bytes.first.size()); + + // Disassemble it to strings. + uint64_t Size; + uint64_t Index; + + for (Index = 0; Index < Bytes.first.size(); Index += Size) { + MCInst Inst; + + MCDisassembler::DecodeStatus S; + S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, + /*REMOVE*/ nulls(), nulls()); + switch (S) { + case MCDisassembler::Fail: + SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), + SourceMgr::DK_Warning, + "invalid instruction encoding"); + // Don't try to resynchronise the stream in a block + if (InAtomicBlock) + return true; + + if (Size == 0) + Size = 1; // skip illegible bytes + + break; + + case MCDisassembler::SoftFail: + SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), + SourceMgr::DK_Warning, + "potentially undefined instruction encoding"); + LLVM_FALLTHROUGH; + + case MCDisassembler::Success: + Streamer.EmitInstruction(Inst, STI); + break; + } + } + + return false; +} + +static bool SkipToToken(StringRef &Str) { + for (;;) { + if (Str.empty()) + return false; + + // Strip horizontal whitespace and commas. + if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) { + Str = Str.substr(Pos); + continue; + } + + // If this is the start of a comment, remove the rest of the line. + if (Str[0] == '#') { + Str = Str.substr(Str.find_first_of('\n')); + continue; + } + return true; + } +} + + +static bool ByteArrayFromString(ByteArrayTy &ByteArray, + StringRef &Str, + SourceMgr &SM) { + while (SkipToToken(Str)) { + // Handled by higher level + if (Str[0] == '[' || Str[0] == ']') + return false; + + // Get the current token. + size_t Next = Str.find_first_of(" \t\n\r,#[]"); + StringRef Value = Str.substr(0, Next); + + // Convert to a byte and add to the byte vector. + unsigned ByteVal; + if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) { + // If we have an error, print it and skip to the end of line. + SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error, + "invalid input token"); + Str = Str.substr(Str.find('\n')); + ByteArray.first.clear(); + ByteArray.second.clear(); + continue; + } + + ByteArray.first.push_back(ByteVal); + ByteArray.second.push_back(Value.data()); + Str = Str.substr(Next); + } + + return false; +} + +int Disassembler::disassemble(const Target &T, + const std::string &Triple, + MCSubtargetInfo &STI, + MCStreamer &Streamer, + MemoryBuffer &Buffer, + SourceMgr &SM, + raw_ostream &Out) { + + std::unique_ptr MRI(T.createMCRegInfo(Triple)); + if (!MRI) { + errs() << "error: no register info for target " << Triple << "\n"; + return -1; + } + + std::unique_ptr MAI(T.createMCAsmInfo(*MRI, Triple)); + if (!MAI) { + errs() << "error: no assembly info for target " << Triple << "\n"; + return -1; + } + + // Set up the MCContext for creating symbols and MCExpr's. + MCContext Ctx(MAI.get(), MRI.get(), nullptr); + + std::unique_ptr DisAsm( + T.createMCDisassembler(STI, Ctx)); + if (!DisAsm) { + errs() << "error: no disassembler for target " << Triple << "\n"; + return -1; + } + + // Set up initial section manually here + Streamer.InitSections(false); + + bool ErrorOccurred = false; + + // Convert the input to a vector for disassembly. + ByteArrayTy ByteArray; + StringRef Str = Buffer.getBuffer(); + bool InAtomicBlock = false; + + while (SkipToToken(Str)) { + ByteArray.first.clear(); + ByteArray.second.clear(); + + if (Str[0] == '[') { + if (InAtomicBlock) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, + "nested atomic blocks make no sense"); + ErrorOccurred = true; + } + InAtomicBlock = true; + Str = Str.drop_front(); + continue; + } else if (Str[0] == ']') { + if (!InAtomicBlock) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, + "attempt to close atomic block without opening"); + ErrorOccurred = true; + } + InAtomicBlock = false; + Str = Str.drop_front(); + continue; + } + + // It's a real token, get the bytes and emit them + ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM); + + if (!ByteArray.first.empty()) + ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer, + InAtomicBlock, STI); + } + + if (InAtomicBlock) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, + "unclosed atomic block"); + ErrorOccurred = true; + } + + return ErrorOccurred; +} Index: llvm/tools/llvm-ml/llvm-ml.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-ml/llvm-ml.cpp @@ -0,0 +1,431 @@ +//===-- llvm-ml.cpp - masm-compatible assembler -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A simple driver around MasmAsmParser. +// +//===----------------------------------------------------------------------===// + +#include "Disassembler.h" +#include "llvm/MC/MCParser/MCAsmParser.h" + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" + +using namespace llvm; + +static cl::opt +InputFilename(cl::Positional, cl::desc(""), cl::init("-")); + +static cl::opt OutputFilename("o", cl::desc("Output filename"), + cl::value_desc("filename"), + cl::init("-")); + +static cl::opt +ShowEncoding("show-encoding", cl::desc("Show instruction encodings")); + +static cl::opt +ShowInst("show-inst", cl::desc("Show internal instruction representation")); + +static cl::opt +ShowInstOperands("show-inst-operands", + cl::desc("Show instructions operands as parsed")); + +static cl::opt +OutputAsmVariant("output-asm-variant", + cl::desc("Syntax variant to use for output printing")); + +static cl::opt +PrintImmHex("print-imm-hex", cl::init(false), + cl::desc("Prefer hex format for immediate values")); + +static cl::opt + PreserveComments("preserve-comments", + cl::desc("Preserve Comments in outputted assembly")); + +enum OutputFileType { + OFT_Null, + OFT_AssemblyFile, + OFT_ObjectFile +}; +static cl::opt +FileType("filetype", cl::init(OFT_AssemblyFile), + cl::desc("Choose an output file type:"), + cl::values( + clEnumValN(OFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(OFT_Null, "null", + "Don't emit anything (for timing purposes)"), + clEnumValN(OFT_ObjectFile, "obj", + "Emit a native object ('.o') file"))); + +static cl::list +IncludeDirs("I", cl::desc("Directory of include files"), + cl::value_desc("directory"), cl::Prefix); + +enum BitnessType { + m32, m64, +}; +cl::opt Bitness(cl::desc("Choose bitness:"), cl::init(m64), + cl::values(clEnumVal(m32, "32-bit"), + clEnumVal(m64, "64-bit (default)"))); + +//static cl::opt +//ArchName("arch", cl::desc("Target arch to assemble for, " + //"see -version for available targets")); + +//static cl::opt +//TripleName("triple", cl::desc("Target triple to assemble for, " + //"see -version for available targets")); + +//static cl::opt +//MCPU("mcpu", + //cl::desc("Target a specific cpu type (-mcpu=help for details)"), + //cl::value_desc("cpu-name"), + //cl::init("")); + +//static cl::list +//MAttrs("mattr", + //cl::CommaSeparated, + //cl::desc("Target specific attributes (-mattr=help for details)"), + //cl::value_desc("a1,+a2,-a3,...")); + +//static cl::opt PIC("position-independent", + //cl::desc("Position independent"), cl::init(false)); + +//static cl::opt + //LargeCodeModel("large-code-model", + //cl::desc("Create cfi directives that assume the code might " + //"be more than 2gb away")); + +//static cl::opt +//NoInitialTextSection("n", cl::desc("Don't assume assembly file starts " + //"in the text section")); + +static cl::opt +DebugCompilationDir("fdebug-compilation-dir", + cl::desc("Specifies the debug info's compilation dir")); + +static cl::list +DebugPrefixMap("fdebug-prefix-map", + cl::desc("Map file source paths in debug info"), + cl::value_desc("= separated key-value pairs")); + +static cl::opt +MainFileName("main-file-name", + cl::desc("Specifies the name we should consider the input file")); + +static cl::opt SaveTempLabels("save-temp-labels", + cl::desc("Don't discard temporary labels")); + +//static cl::opt LexMasmIntegers( + //"masm-integers", + //cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)")); + +//static cl::opt NoExecStack("no-exec-stack", + //cl::desc("File doesn't need an exec stack")); + +enum ActionType { + AC_AsLex, + AC_Assemble, + AC_Disassemble, + AC_MDisassemble, +}; + +static cl::opt +Action(cl::desc("Action to perform:"), + cl::init(AC_Assemble), + cl::values(clEnumValN(AC_AsLex, "as-lex", + "Lex tokens from a .s file"), + clEnumValN(AC_Assemble, "assemble", + "Assemble a .s file (default)"), + clEnumValN(AC_Disassemble, "disassemble", + "Disassemble strings of hex bytes"), + clEnumValN(AC_MDisassemble, "mdis", + "Marked up disassembly of strings of hex bytes"))); + +std::string TripleName; + +static const Target *GetTarget(const char *ProgName) { + // Figure out the target triple. + if (Bitness == m32) + TripleName = "i386-pc-windows"; + else if (Bitness == m64) + TripleName = "x86_64-pc-windows"; + Triple TheTriple(Triple::normalize(TripleName)); + + // Get the target specific parser. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget("", TheTriple, Error); + if (!TheTarget) { + WithColor::error(errs(), ProgName) << Error; + return nullptr; + } + + // Update the triple name and return the found target. + TripleName = TheTriple.getTriple(); + return TheTarget; +} + +static std::unique_ptr GetOutputStream(StringRef Path) { + std::error_code EC; + auto Out = llvm::make_unique(Path, EC, sys::fs::F_None); + if (EC) { + WithColor::error() << EC.message() << '\n'; + return nullptr; + } + + return Out; +} + +static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, + raw_ostream &OS) { + + AsmLexer Lexer(MAI); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); + + bool Error = false; + while (Lexer.Lex().isNot(AsmToken::Eof)) { + Lexer.getTok().dump(OS); + OS << "\n"; + if (Lexer.getTok().getKind() == AsmToken::Error) + Error = true; + } + + return Error; +} + +static int AssembleInput(const char *ProgName, const Target *TheTarget, + SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, + MCAsmInfo &MAI, MCSubtargetInfo &STI, + MCInstrInfo &MCII, MCTargetOptions &MCOptions) { + std::unique_ptr Parser( + createMCMasmParser(SrcMgr, Ctx, Str, MAI)); + + // XXX hm + std::unique_ptr TAP( + TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); + + if (!TAP) { + WithColor::error(errs(), ProgName) + << "this target does not support assembly parsing.\n"; + return 1; + } + + Parser->setShowParsedOperands(ShowInstOperands); + Parser->setTargetParser(*TAP); + //Parser->getLexer().setLexMasmIntegers(LexMasmIntegers); + + int Res = Parser->Run(/*NoInitialTextSection=*/true); + + return Res; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + // Initialize targets and assembly printers/parsers. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + + // Register the target printer for --version. + cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); + + cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + + const char *ProgName = argv[0]; + const Target *TheTarget = GetTarget(ProgName); + if (!TheTarget) + return 1; + // Now that GetTarget() has (potentially) replaced TripleName, it's safe to + // construct the Triple object. + Triple TheTriple(TripleName); + + ErrorOr> BufferPtr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = BufferPtr.getError()) { + WithColor::error(errs(), ProgName) + << InputFilename << ": " << EC.message() << '\n'; + return 1; + } + MemoryBuffer *Buffer = BufferPtr->get(); + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); + + // Record the location of the include directories so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + assert(MRI && "Unable to create target register info!"); + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + assert(MAI && "Unable to create target asm info!"); + + MAI->setPreserveAsmComments(PreserveComments); + + // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and + // MCObjectFileInfo needs a MCContext reference in order to initialize itself. + MCObjectFileInfo MOFI; + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + MOFI.InitMCObjectFileInfo(TheTriple, /*PIC=*/false, Ctx, + /*LargeCodeModel=*/true); + + if (SaveTempLabels) + Ctx.setAllowTemporaryLabels(false); + + if (!DebugCompilationDir.empty()) + Ctx.setCompilationDir(DebugCompilationDir); + else { + // If no compilation dir is set, try to use the current directory. + SmallString<128> CWD; + if (!sys::fs::current_path(CWD)) + Ctx.setCompilationDir(CWD); + } + for (const auto &Arg : DebugPrefixMap) { + const auto &KV = StringRef(Arg).split('='); + Ctx.addDebugPrefixMapEntry(KV.first, KV.second); + } + if (!MainFileName.empty()) + Ctx.setMainFileName(MainFileName); + + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + //if (MAttrs.size()) { + //SubtargetFeatures Features; + //for (unsigned i = 0; i != MAttrs.size(); ++i) + //Features.AddFeature(MAttrs[i]); + //FeaturesStr = Features.getString(); + //} + + std::unique_ptr Out = GetOutputStream(OutputFilename); + if (!Out) + return 1; + + std::unique_ptr BOS; + raw_pwrite_stream *OS = &Out->os(); + std::unique_ptr Str; + + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, /*MCPU=*/"", FeaturesStr)); + + MCInstPrinter *IP = nullptr; + if (FileType == OFT_AssemblyFile) { + IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, + *MAI, *MCII, *MRI); + + if (!IP) { + WithColor::error() + << "unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << OutputAsmVariant << ".\n"; + return 1; + } + + // Set the display preference for hex vs. decimal immediates. + IP->setPrintImmHex(PrintImmHex); + + // Set up the AsmStreamer. + std::unique_ptr CE; + if (ShowEncoding) + CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + std::unique_ptr MAB( + TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); + auto FOut = llvm::make_unique(*OS); + Str.reset( + TheTarget->createAsmStreamer(Ctx, std::move(FOut), /*asmverbose*/ true, + /*useDwarfDirectory*/ true, IP, + std::move(CE), std::move(MAB), ShowInst)); + + } else if (FileType == OFT_Null) { + Str.reset(TheTarget->createNullStreamer(Ctx)); + } else { + assert(FileType == OFT_ObjectFile && "Invalid file type!"); + + // Don't waste memory on names of temp labels. + Ctx.setUseNamesOnTempLabels(false); + + if (!Out->os().supportsSeeking()) { + BOS = make_unique(Out->os()); + OS = BOS.get(); + } + + MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); + MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); + Str.reset(TheTarget->createMCObjectStreamer( + TheTriple, Ctx, std::unique_ptr(MAB), + MAB->createObjectWriter(*OS), std::unique_ptr(CE), *STI, + MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + //if (NoExecStack) + //Str->InitSections(true); + } + + // Use Assembler information for parsing. + Str->setUseAssemblerInfoForParsing(true); + + int Res = 1; + bool disassemble = false; + switch (Action) { + case AC_AsLex: + Res = AsLexInput(SrcMgr, *MAI, Out->os()); + break; + case AC_Assemble: + Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, + *MCII, MCOptions); + break; + case AC_MDisassemble: + assert(IP && "Expected assembly output"); + IP->setUseMarkup(1); + disassemble = true; + break; + case AC_Disassemble: + disassemble = true; + break; + } + if (disassemble) + Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, + *Buffer, SrcMgr, Out->os()); + + // Keep output if no errors. + if (Res == 0) + Out->keep(); + return Res; +} + Index: llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn +++ llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn @@ -14,6 +14,7 @@ "MCAsmLexer.cpp", "MCAsmParser.cpp", "MCAsmParserExtension.cpp", + "MasmParser.cpp", "MCTargetAsmParser.cpp", "WasmAsmParser.cpp", ] Index: llvm/utils/gn/secondary/llvm/test/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -224,6 +224,7 @@ "//llvm/tools/llvm-lto2", "//llvm/tools/llvm-mc", "//llvm/tools/llvm-mca", + "//llvm/tools/llvm-ml", "//llvm/tools/llvm-modextract", "//llvm/tools/llvm-mt", "//llvm/tools/llvm-nm", Index: llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn =================================================================== --- /dev/null +++ llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn @@ -0,0 +1,17 @@ +executable("llvm-ml") { + deps = [ + "//llvm/lib/MC", + "//llvm/lib/MC/MCParser", + "//llvm/lib/Support", + "//llvm/lib/Target:AllTargetsAsmParsers", + "//llvm/lib/Target:AllTargetsAsmPrinters", + "//llvm/lib/Target:AllTargetsDescs", + "//llvm/lib/Target:AllTargetsDisassemblers", + "//llvm/lib/Target:AllTargetsInfos", + ] + sources = [ + # FIXME: duplicating this is stupid + "Disassembler.cpp", + "llvm-ml.cpp", + ] +}