Index: include/llvm/MC/MCAsmBackend.h =================================================================== --- include/llvm/MC/MCAsmBackend.h +++ include/llvm/MC/MCAsmBackend.h @@ -116,11 +116,20 @@ /// virtual unsigned getMinimumNopSize() const { return 1; } - /// Write an (optimal) nop sequence of Count bytes to the given output. If the - /// target cannot generate such a sequence, it should return an error. + /// \brief Returns a list of tokens to be used with .arch directive to specify + /// preferences for expanding .p2align into nop instructions. + virtual SmallVector + getNopPrefs(const MCSubtargetInfo &STI) const { + return SmallVector(); + } + + /// Write an (optimal for \p STI subtarget) nop sequence of \p Count bytes to + /// the given output. If the target cannot generate such a sequence, it should + /// return an error. /// /// \return - True on success. - virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const = 0; + virtual bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const = 0; /// Give backend an opportunity to finish layout after relaxation virtual void finishLayout(MCAssembler const &Asm, Index: include/llvm/MC/MCFragment.h =================================================================== --- include/llvm/MC/MCFragment.h +++ include/llvm/MC/MCFragment.h @@ -220,11 +220,19 @@ /// Fragment for data and encoded instructions. /// class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> { + /// STI - The MCSubtargetInfo for padding emission. + const MCSubtargetInfo *STI; + public: MCDataFragment(MCSection *Sec = nullptr) - : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {} + : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec), STI(nullptr) {} + + void setHasInstructions(bool V, const MCSubtargetInfo *S) { + HasInstructions = V; + STI = S; + } - void setHasInstructions(bool V) { HasInstructions = V; } + const MCSubtargetInfo *getSubtargetInfo() const { return STI; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Data; @@ -237,10 +245,16 @@ /// consumption. /// class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> { + /// STI - The MCSubtargetInfo for padding emission. + const MCSubtargetInfo &STI; + public: - MCCompactEncodedInstFragment(MCSection *Sec = nullptr) - : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) { - } + MCCompactEncodedInstFragment(const MCSubtargetInfo &STI, + MCSection *Sec = nullptr) + : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec), + STI(STI) {} + + const MCSubtargetInfo &getSubtargetInfo() const { return STI; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_CompactEncodedInst; @@ -267,7 +281,7 @@ const MCInst &getInst() const { return Inst; } void setInst(const MCInst &Value) { Inst = Value; } - const MCSubtargetInfo &getSubtargetInfo() { return STI; } + const MCSubtargetInfo &getSubtargetInfo() const { return STI; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Relaxable; @@ -294,12 +308,16 @@ /// cannot be satisfied in this width then this fragment is ignored. unsigned MaxBytesToEmit; + /// STI - Subtarget info to specify which nop instructions to emit. + const MCSubtargetInfo &STI; + public: MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize, - unsigned MaxBytesToEmit, MCSection *Sec = nullptr) + unsigned MaxBytesToEmit, const MCSubtargetInfo &STI, + MCSection *Sec = nullptr) : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment), - EmitNops(false), Value(Value), - ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {} + EmitNops(false), Value(Value), ValueSize(ValueSize), + MaxBytesToEmit(MaxBytesToEmit), STI(STI) {} /// \name Accessors /// @{ @@ -312,6 +330,8 @@ unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } + const MCSubtargetInfo &getSubtargetInfo() const { return STI; } + bool hasEmitNops() const { return EmitNops; } void setEmitNops(bool Value) { EmitNops = Value; } Index: include/llvm/MC/MCObjectStreamer.h =================================================================== --- include/llvm/MC/MCObjectStreamer.h +++ include/llvm/MC/MCObjectStreamer.h @@ -146,6 +146,7 @@ SMLoc Loc = SMLoc()) override; void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()) override; + void EmitArch(StringRef Token) override; void FinishImpl() override; Index: include/llvm/MC/MCStreamer.h =================================================================== --- include/llvm/MC/MCStreamer.h +++ include/llvm/MC/MCStreamer.h @@ -190,6 +190,9 @@ unsigned NextWinCFIID = 0; protected: + const MCSubtargetInfo *STI = nullptr; + +protected: MCStreamer(MCContext &Ctx); virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame); @@ -213,6 +216,13 @@ TargetStreamer.reset(TS); } + const MCSubtargetInfo &getSubtarget() const { + assert(STI && "STI object should be always available."); + return *STI; + } + + void setSubtarget(const MCSubtargetInfo &S) { STI = &S; } + /// State management /// virtual void reset(); @@ -758,6 +768,9 @@ virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except); virtual void EmitWinEHHandlerData(); + virtual void EmitArch(StringRef Token); + void EmitArchForAlignment(const MCAsmBackend &MAB); + /// Get the .pdata section used for the given section. Typically the given /// section is either the main .text section or some other COMDAT .text /// section, but it may be any section containing code. Index: include/llvm/MC/MCSubtargetInfo.h =================================================================== --- include/llvm/MC/MCSubtargetInfo.h +++ include/llvm/MC/MCSubtargetInfo.h @@ -63,9 +63,10 @@ const Triple &getTargetTriple() const { return TargetTriple; } /// getCPU - Return the CPU string. - StringRef getCPU() const { - return CPU; - } + StringRef getCPU() const { return CPU; } + + /// setCPU - Set the CPU string. + void setCPU(const StringRef NewCPU) { CPU = NewCPU.str(); } /// getFeatureBits - Return the feature bits. /// @@ -164,6 +165,12 @@ auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + + /// Check whether the Feature string is valid. + bool isFeatureStringValid(StringRef Feature) const { + auto Found = std::lower_bound(ProcFeatures.begin(), ProcFeatures.end(), Feature); + return Found != ProcFeatures.end() && StringRef(Found->Key) == Feature; + } }; } // End llvm namespace Index: include/llvm/Support/TargetRegistry.h =================================================================== --- include/llvm/Support/TargetRegistry.h +++ include/llvm/Support/TargetRegistry.h @@ -22,6 +22,7 @@ #include "llvm-c/Disassembler.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/FormattedStream.h" #include @@ -40,7 +41,6 @@ class MCInstPrinter; class MCInstrInfo; class MCRegisterInfo; -class MCStreamer; class MCSubtargetInfo; class MCSymbolizer; class MCRelocationInfo; @@ -461,6 +461,7 @@ } if (ObjectTargetStreamerCtorFn) ObjectTargetStreamerCtorFn(*S, STI); + S->setSubtarget(STI); return S; } @@ -468,12 +469,14 @@ std::unique_ptr OS, bool IsVerboseAsm, bool UseDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) const { + MCAsmBackend *TAB, bool ShowInst, + const MCSubtargetInfo &STI) const { formatted_raw_ostream &OSRef = *OS; MCStreamer *S = llvm::createAsmStreamer(Ctx, std::move(OS), IsVerboseAsm, UseDwarfDirectory, InstPrint, CE, TAB, ShowInst); createAsmTargetStreamer(*S, OSRef, InstPrint, IsVerboseAsm); + S->setSubtarget(STI); return S; } Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1263,6 +1263,11 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; + + // Pass STI to the streamer to be able to generate long nops according to + // the function-specific subtarget information. + OutStreamer->setSubtarget(MF.getSubtarget()); + // Get the function symbol. CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; Index: lib/CodeGen/LLVMTargetMachine.cpp =================================================================== --- lib/CodeGen/LLVMTargetMachine.cpp +++ lib/CodeGen/LLVMTargetMachine.cpp @@ -235,7 +235,7 @@ MCStreamer *S = getTarget().createAsmStreamer( *Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, - Options.MCOptions.ShowMCInst); + Options.MCOptions.ShowMCInst, STI); AsmStreamer.reset(S); break; } Index: lib/MC/MCAsmStreamer.cpp =================================================================== --- lib/MC/MCAsmStreamer.cpp +++ lib/MC/MCAsmStreamer.cpp @@ -280,6 +280,8 @@ /// indicated by the hasRawTextSupport() predicate. void EmitRawTextImpl(StringRef String) override; + void EmitArch(StringRef Token) override; + void FinishImpl() override; }; @@ -915,6 +917,9 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { + if (AsmBackend) + EmitArchForAlignment(*AsmBackend); + // Some assemblers don't support non-power of two alignments, so we always // emit alignments as a power of two if possible. if (isPowerOf2_32(ByteAlignment)) { @@ -1594,6 +1599,11 @@ EmitEOL(); } +void MCAsmStreamer::EmitArch(StringRef Token) { + OS << "\t.arch\t" << Token; + EmitEOL(); +} + void MCAsmStreamer::FinishImpl() { // If we are generating dwarf for assembly source files dump out the sections. if (getContext().getGenDwarfForAssembly()) Index: lib/MC/MCAssembler.cpp =================================================================== --- lib/MC/MCAssembler.cpp +++ lib/MC/MCAssembler.cpp @@ -390,11 +390,30 @@ // Should NOP padding be written out before this fragment? unsigned BundlePadding = F.getBundlePadding(); if (BundlePadding > 0) { + const MCSubtargetInfo *STI; + assert(isBundlingEnabled() && "Writing bundle padding with disabled bundling"); assert(F.hasInstructions() && "Writing bundle padding for a fragment without instructions"); + // Get subtarget information for emitting padding. + switch (F.getKind()) { + case MCFragment::FT_CompactEncodedInst: + STI = &cast(F).getSubtargetInfo(); + break; + case MCFragment::FT_Data: + STI = cast(F).getSubtargetInfo(); + break; + case MCFragment::FT_Relaxable: + STI = &cast(F).getSubtargetInfo(); + break; + default: + llvm_unreachable("Unexpected fragment kind."); + } + + assert(STI && "Subtarget information must be available."); + unsigned TotalLength = BundlePadding + static_cast(FSize); if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) { // If the padding itself crosses a bundle boundary, it must be emitted @@ -406,12 +425,12 @@ // ---------------------------- // ^-------------------^ <- TotalLength unsigned DistanceToBoundary = TotalLength - getBundleAlignSize(); - if (!getBackend().writeNopData(DistanceToBoundary, OW)) + if (!getBackend().writeNopData(DistanceToBoundary, *STI, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(DistanceToBoundary) + " bytes"); BundlePadding -= DistanceToBoundary; } - if (!getBackend().writeNopData(BundlePadding, OW)) + if (!getBackend().writeNopData(BundlePadding, *STI, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(BundlePadding) + " bytes"); } @@ -456,7 +475,7 @@ // bytes left to fill use the Value and ValueSize to fill the rest. // If we are aligning with nops, ask that target to emit the right data. if (AF.hasEmitNops()) { - if (!Asm.getBackend().writeNopData(Count, OW)) + if (!Asm.getBackend().writeNopData(Count, AF.getSubtargetInfo(), OW)) report_fatal_error("unable to write nop sequence of " + Twine(Count) + " bytes"); break; Index: lib/MC/MCELFStreamer.cpp =================================================================== --- lib/MC/MCELFStreamer.cpp +++ lib/MC/MCELFStreamer.cpp @@ -82,7 +82,7 @@ DF->getContents().size()); DF->getFixups().push_back(EF->getFixups()[i]); } - DF->setHasInstructions(true); + DF->setHasInstructions(true, &getSubtarget()); DF->getContents().append(EF->getContents().begin(), EF->getContents().end()); } @@ -516,7 +516,8 @@ // Optimize memory usage by emitting the instruction to a // MCCompactEncodedInstFragment when not in a bundle-locked group and // there are no fixups registered. - MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(); + MCCompactEncodedInstFragment *CEIF = + new MCCompactEncodedInstFragment(getSubtarget()); insert(CEIF); CEIF->getContents().append(Code.begin(), Code.end()); return; @@ -544,7 +545,7 @@ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); DF->getFixups().push_back(Fixups[i]); } - DF->setHasInstructions(true); + DF->setHasInstructions(true, &getSubtarget()); DF->getContents().append(Code.begin(), Code.end()); if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { Index: lib/MC/MCMachOStreamer.cpp =================================================================== --- lib/MC/MCMachOStreamer.cpp +++ lib/MC/MCMachOStreamer.cpp @@ -427,7 +427,8 @@ // Emit an align fragment if necessary. if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section); + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, getSubtarget(), + Section); MCFragment *F = new MCFillFragment(0, Size, Section); Symbol->setFragment(F); Index: lib/MC/MCObjectStreamer.cpp =================================================================== --- lib/MC/MCObjectStreamer.cpp +++ lib/MC/MCObjectStreamer.cpp @@ -424,9 +424,12 @@ int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { + EmitArchForAlignment(Assembler->getBackend()); + if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit)); + insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getSubtarget())); // Update the maximum alignment on the current section if necessary. MCSection *CurSec = getCurrentSection().first; @@ -534,6 +537,35 @@ MCStreamer::emitFill(IntNumValues, Size, Expr); } +void MCObjectStreamer::EmitArch(StringRef Token) { + assert(Token.find(' ') == std::string::npos && + Token.find('\t') == std::string::npos && + ".arch directive has too many arguments."); + + MCSubtargetInfo &NewSTI = getContext().getSubtargetCopy(getSubtarget()); + + if (Token.startswith(".")) { + // .arch is used to enable/disable a CPU feature. + StringRef Feature = Token.substr(1); + + bool EnableFeature = true; + if (Feature.startswith_lower("no")) { + EnableFeature = false; + Feature = Feature.substr(2); + } + + assert(NewSTI.isFeatureStringValid(Feature) && "Unknown CPU feature."); + NewSTI.ApplyFeatureFlag((EnableFeature ? "+" : "-") + Feature.str()); + } else { + // .arch is used to specify target CPU. + assert(NewSTI.isCPUStringValid(Token) && "Unknown CPU name."); + NewSTI.setDefaultFeatures(Token, ""); + NewSTI.setCPU(Token); + } + + setSubtarget(NewSTI); +} + void MCObjectStreamer::FinishImpl() { // If we are generating dwarf for assembly source files dump out the sections. if (getContext().getGenDwarfForAssembly()) Index: lib/MC/MCStreamer.cpp =================================================================== --- lib/MC/MCStreamer.cpp +++ lib/MC/MCStreamer.cpp @@ -23,12 +23,21 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/Support/COFF.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; +// Emitting .arch directive hurts compatibility with assemblers other than LLVM, +// so don't preserve alignment instructions across assembly by default. +static cl::opt + AsmPreserveAlignment("asm-preserve-alignment", cl::Hidden, + cl::desc("Preserve alignment instructions across " + "assembly by emitting .arch directive"), + cl::init(false)); + // Pin the vtables to this file. MCTargetStreamer::~MCTargetStreamer() {} @@ -518,6 +527,18 @@ report_fatal_error("Chained unwind areas can't have handlers!"); } +void MCStreamer::EmitArch(StringRef Token) {} + +void MCStreamer::EmitArchForAlignment(const MCAsmBackend &MAB) { + if (!AsmPreserveAlignment) + return; + + assert(STI && "Subtarget info must be available."); + SmallVector Prefs = MAB.getNopPrefs(*STI); + for (auto T : Prefs) + EmitArch(T); +} + static MCSection *getWinCFISection(MCContext &Context, unsigned *NextWinCFIID, MCSection *MainCFISec, const MCSection *TextSec) { Index: lib/MC/WinCOFFStreamer.cpp =================================================================== --- lib/MC/WinCOFFStreamer.cpp +++ lib/MC/WinCOFFStreamer.cpp @@ -249,7 +249,7 @@ if (ByteAlignment != 1) new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0, - ByteAlignment, Section); + ByteAlignment, getSubtarget(), Section); MCFillFragment *Fragment = new MCFillFragment( /*Value=*/0, Size, Section); Index: lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -80,7 +80,8 @@ const MCAsmLayout &Layout) const override; void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override; - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; void HandleAssemblerFlag(MCAssemblerFlag Flag) {} @@ -319,7 +320,8 @@ llvm_unreachable("AArch64AsmBackend::relaxInstruction() unimplemented"); } -bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool AArch64AsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { // If the count is not 4-byte aligned, we must be writing data into the text // section (otherwise we have unaligned instructions, and thus have far // bigger problems), so just write zeros instead. Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -58,7 +58,8 @@ assert(!"Not implemented"); } bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; }; @@ -141,7 +142,8 @@ return Infos[Kind - FirstTargetFixupKind]; } -bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool AMDGPUAsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { OW->WriteZeros(Count); return true; Index: lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -66,7 +66,8 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override; - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; void handleAssemblerFlag(MCAssemblerFlag Flag) override; Index: lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -289,7 +289,8 @@ Res.setOpcode(RelaxedOp); } -bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool ARMAsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 Index: lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp =================================================================== --- lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -50,10 +50,12 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override {} - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; }; -bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool BPFAsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { if ((Count % 8) != 0) return false; Index: lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp =================================================================== --- lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -665,8 +665,8 @@ assert(Update && "Didn't find relaxation target"); } - bool writeNopData(uint64_t Count, - MCObjectWriter * OW) const override { + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override { static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP. ParseIn = 0x00004000, // In packet parse-bits. ParseEnd = 0x0000c000; // End of packet parse-bits. Index: lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp =================================================================== --- lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -75,10 +75,12 @@ const MCSubtargetInfo & /*STI*/, MCInst & /*Res*/) const override {} - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; }; -bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool LanaiAsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { if ((Count % 4) != 0) return false; Index: lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h =================================================================== --- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -80,7 +80,8 @@ /// @} - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, Index: lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp =================================================================== --- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -455,7 +455,8 @@ /// it should return an error. /// /// \return - True on success. -bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool MipsAsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { // Check for a less than instruction size number of bytes // FIXME: 16 bit instructions are not handled yet here. // We shouldn't be using a hard coded number for instruction size. Index: lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -174,7 +174,8 @@ llvm_unreachable("relaxInstruction() unimplemented"); } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->write32(0x60000000); Index: lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp =================================================================== --- lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -254,7 +254,8 @@ llvm_unreachable("relaxInstruction() unimplemented"); } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override { // Cannot emit NOP with size not multiple of 32 bits. if (Count % 4 != 0) return false; Index: lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp =================================================================== --- lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -62,7 +62,8 @@ MCInst &Res) const override { llvm_unreachable("SystemZ does do not have assembler relaxation"); } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createSystemZObjectWriter(OS, OSABI); } @@ -104,6 +105,7 @@ } bool SystemZMCAsmBackend::writeNopData(uint64_t Count, + const MCSubtargetInfo &STI, MCObjectWriter *OW) const { for (uint64_t I = 0; I != Count; ++I) OW->write8(7); Index: lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp =================================================================== --- lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -58,10 +58,12 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override {} - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; }; bool WebAssemblyAsmBackend::writeNopData(uint64_t Count, + const MCSubtargetInfo &STI, MCObjectWriter *OW) const { if (Count == 0) return true; Index: lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmParser.cpp +++ lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -718,6 +718,7 @@ bool parseDirectiveEven(SMLoc L); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); + bool ParseDirectiveArch(SMLoc L); bool processInstruction(MCInst &Inst, const OperandVector &Ops); @@ -2927,6 +2928,8 @@ return false; } else if (IDVal == ".even") return parseDirectiveEven(DirectiveID.getLoc()); + else if (IDVal == ".arch") + return ParseDirectiveArch(DirectiveID.getLoc()); return true; } @@ -2948,6 +2951,7 @@ getStreamer().EmitValueToAlignment(2, 0, 1, 0); return false; } + /// ParseDirectiveWord /// ::= .word [ expression (, expression)* ] bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { @@ -3015,6 +3019,60 @@ return false; } +/// ParseDirectiveArch +/// ::= .arch cpu +/// ::= .arch .[no]feature +bool X86AsmParser::ParseDirectiveArch(SMLoc L) { + // Use parseStringToEndOfStatement rather than getTok sequence to handle + // feature names containing '-' character more easily. + StringRef Token = getParser().parseStringToEndOfStatement().trim(); + + // Make sure .arch has only one argument. + if (Token.find(' ') != std::string::npos || + Token.find('\t') != std::string::npos) { + Error(L, "unexpected number of arguments in .arch directive"); + return false; + } + + MCSubtargetInfo &NewSTI = copySTI(); + + if (Token.startswith(".")) { + // .arch is used to enable/disable a CPU feature. + StringRef Feature = Token.substr(1); + + bool EnableFeature = true; + if (Feature.startswith_lower("no")) { + EnableFeature = false; + Feature = Feature.substr(2); + } + + if (!NewSTI.isFeatureStringValid(Feature)) { + Error(L, "unrecognized feature \'" + Feature + "\' in .arch directive"); + return false; + } + + NewSTI.ApplyFeatureFlag((EnableFeature ? "+" : "-") + Feature.str()); + } else { + // .arch is used to specify target CPU. + // Preserve mode bit. + StringRef Mode = is64BitMode() + ? "+64bit-mode" + : (is32BitMode() ? "+32bit-mode" : "+16bit-mode"); + + if (!NewSTI.isCPUStringValid(Token)) { + Error(L, "unrecognized CPU \'" + Token + "\' in .arch directive"); + return false; + } + + NewSTI.setDefaultFeatures(Token, Mode); + NewSTI.setCPU(Token); + } + + getParser().getStreamer().EmitArch(Token); + + return false; +} + // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser X(TheX86_32Target); Index: lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -29,6 +29,12 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +// Long nop related feature bits. +const struct LongNopDesc { + unsigned Bit; + StringRef Name; +} LongNopFeatures[0] = {}; + static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: @@ -70,19 +76,8 @@ }; class X86AsmBackend : public MCAsmBackend { - const StringRef CPU; - bool HasNopl; - const uint64_t MaxNopLength; public: - X86AsmBackend(const Target &T, StringRef CPU) - : MCAsmBackend(), CPU(CPU), - MaxNopLength((CPU == "slm" || CPU == "lakemont") ? 7 : 15) { - HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && - CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && - CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && - CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" && - CPU != "c3" && CPU != "c3-2"; - } + X86AsmBackend(const Target &T) : MCAsmBackend() {} unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -135,7 +130,31 @@ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override; - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const override; + + SmallVector getNopPrefs(const MCSubtargetInfo &STI) const { + SmallVector Prefs; + + // Emit ".arch" directive only for valid CPUs. + if (!STI.isCPUStringValid(STI.getCPU())) + return Prefs; + + // Emit ".arch cpu". + Prefs.push_back(STI.getCPU().str()); + + // Emit ".arch .[no]feature" if some of the long nop related features were + // explicitly changed from the default values. + MCSubtargetInfo DefaultSTI = MCSubtargetInfo(STI); + DefaultSTI.setDefaultFeatures(STI.getCPU(), ""); + for (auto Desc : LongNopFeatures) { + bool Enabled = STI.getFeatureBits()[Desc.Bit]; + if (DefaultSTI.getFeatureBits()[Desc.Bit] != Enabled) + Prefs.push_back((Enabled ? "." : ".no") + Desc.Name.str()); + } + + return Prefs; + } }; } // end anonymous namespace @@ -324,28 +343,37 @@ /// \brief Write a sequence of optimal nops to the output, covering \p Count /// bytes. /// \return - true on success, false on failure -bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool X86AsmBackend::writeNopData(uint64_t Count, const MCSubtargetInfo &STI, + MCObjectWriter *OW) const { + const StringRef CPU = STI.getCPU(); + const bool HasNopl = + CPU != "generic" && CPU != "i386" && CPU != "i486" && CPU != "i586" && + CPU != "pentium" && CPU != "pentium-mmx" && CPU != "i686" && + CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && CPU != "geode" && + CPU != "winchip-c6" && CPU != "winchip2" && CPU != "c3" && CPU != "c3-2"; + const uint64_t MaxNopLength = (CPU == "slm" || CPU == "lakemont") ? 7 : 15; + static const uint8_t Nops[10][10] = { - // nop - {0x90}, - // xchg %ax,%ax - {0x66, 0x90}, - // nopl (%[re]ax) - {0x0f, 0x1f, 0x00}, - // nopl 0(%[re]ax) - {0x0f, 0x1f, 0x40, 0x00}, - // nopl 0(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopl 0L(%[re]ax) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopw 0L(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopw %cs:0L(%[re]ax,%[re]ax,1) - {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nop + {0x90}, + // xchg %ax,%ax + {0x66, 0x90}, + // nopl (%[re]ax) + {0x0f, 0x1f, 0x00}, + // nopl 0(%[re]ax) + {0x0f, 0x1f, 0x40, 0x00}, + // nopl 0(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopl 0L(%[re]ax) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopw 0L(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopw %cs:0L(%[re]ax,%[re]ax,1) + {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, }; // This CPU doesn't support long nops. If needed add more. @@ -380,14 +408,14 @@ class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; - ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : X86AsmBackend(T, CPU), OSABI(OSABI) {} + ELFX86AsmBackend(const Target &T, uint8_t OSABI) + : X86AsmBackend(T), OSABI(OSABI) {} }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_32AsmBackend(const Target &T, uint8_t OSABI) + : ELFX86AsmBackend(T, OSABI) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); @@ -396,8 +424,8 @@ class ELFX86_X32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI) + : ELFX86AsmBackend(T, OSABI) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, @@ -407,8 +435,8 @@ class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { public: - ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI) + : ELFX86AsmBackend(T, OSABI) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, @@ -418,11 +446,12 @@ class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_64AsmBackend(const Target &T, uint8_t OSABI) + : ELFX86AsmBackend(T, OSABI) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); + return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, + ELF::EM_X86_64); } }; @@ -430,10 +459,8 @@ bool Is64Bit; public: - WindowsX86AsmBackend(const Target &T, bool is64Bit, StringRef CPU) - : X86AsmBackend(T, CPU) - , Is64Bit(is64Bit) { - } + WindowsX86AsmBackend(const Target &T, bool is64Bit) + : X86AsmBackend(T), Is64Bit(is64Bit) {} Optional getFixupKind(StringRef Name) const override { return StringSwitch>(Name) @@ -784,9 +811,8 @@ } public: - DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef CPU, - bool Is64Bit) - : X86AsmBackend(T, CPU), MRI(MRI), Is64Bit(Is64Bit) { + DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, bool Is64Bit) + : X86AsmBackend(T), MRI(MRI), Is64Bit(Is64Bit) { memset(SavedRegs, 0, sizeof(SavedRegs)); OffsetSize = Is64Bit ? 8 : 4; MoveInstrSize = Is64Bit ? 3 : 2; @@ -796,9 +822,8 @@ class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { public: - DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU) - : DarwinX86AsmBackend(T, MRI, CPU, false) {} + DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI) + : DarwinX86AsmBackend(T, MRI, false) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, @@ -808,17 +833,18 @@ /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( - ArrayRef Instrs) const override { + ArrayRef Instrs) const override { return generateCompactUnwindEncodingImpl(Instrs); } }; class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { const MachO::CPUSubTypeX86 Subtype; + public: DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} + MachO::CPUSubTypeX86 st) + : DarwinX86AsmBackend(T, MRI, true), Subtype(st) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, @@ -827,7 +853,7 @@ /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( - ArrayRef Instrs) const override { + ArrayRef Instrs) const override { return generateCompactUnwindEncodingImpl(Instrs); } }; @@ -840,17 +866,17 @@ StringRef CPU, const MCTargetOptions &Options) { if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, CPU); + return new DarwinX86_32AsmBackend(T, MRI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) - return new WindowsX86AsmBackend(T, false, CPU); + return new WindowsX86AsmBackend(T, false); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); if (TheTriple.isOSIAMCU()) - return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU); + return new ELFX86_IAMCUAsmBackend(T, OSABI); - return new ELFX86_32AsmBackend(T, OSABI, CPU); + return new ELFX86_32AsmBackend(T, OSABI); } MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, @@ -863,15 +889,15 @@ StringSwitch(TheTriple.getArchName()) .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, CPU, CS); + return new DarwinX86_64AsmBackend(T, MRI, CS); } if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) - return new WindowsX86AsmBackend(T, true, CPU); + return new WindowsX86AsmBackend(T, true); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); if (TheTriple.getEnvironment() == Triple::GNUX32) - return new ELFX86_X32AsmBackend(T, OSABI, CPU); - return new ELFX86_64AsmBackend(T, OSABI, CPU); + return new ELFX86_X32AsmBackend(T, OSABI); + return new ELFX86_64AsmBackend(T, OSABI); } Index: test/CodeGen/X86/multiversioning-long-nops.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/multiversioning-long-nops.ll @@ -0,0 +1,95 @@ +; .ll -> .o +; RUN: llc < %s -mtriple i386-unknown-linux -filetype=obj -o - \ +; RUN: | llvm-objdump -d -no-show-raw-insn - \ +; RUN: | FileCheck %s + +; .ll -> .s -> .o +; RUN: llc < %s -mtriple i386-unknown-linux -asm-preserve-alignment -o %t +; RUN: cat %t | FileCheck -check-prefix=ASM %s +; RUN: llvm-mc < %t -triple i386-unknown-linux -filetype=obj -o - \ +; RUN: | llvm-objdump -d -no-show-raw-insn - \ +; RUN: | FileCheck %s + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +module asm "inc %eax" +module asm ".p2align 3" +module asm "inc %eax" + +; CHECK: incl %eax +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: incl %eax + +; ASM: incl %eax +; ASM: .arch generic +; ASM-NEXT: .p2align 3, 0x90 +; ASM: incl %eax + +define void @test1() #0 { +entry: + call void asm sideeffect "inc %eax", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 5", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect "inc %eax", "~{dirflag},~{fpsr},~{flags}"() + ret void + +; CHECK-LABEL: test1: +; CHECK: incl %eax +; CHECK: nopw %cs:(%eax,%eax) +; CHECK: incl %eax + +; ASM-LABEL: test1: +; ASM: incl %eax +; ASM: .arch pentium4 +; ASM-NEXT: .p2align 5, 0x90 +; ASM: incl %eax +} + +define void @test2() #1 { +entry: + call void asm sideeffect "inc %eax", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 5", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect "inc %eax", "~{dirflag},~{fpsr},~{flags}"() + ret void + +; CHECK-LABEL: test2: +; CHECK: incl %eax +; CHECK: nopw %cs:(%eax,%eax) +; CHECK: incl %eax + +; ASM-LABEL: test2: +; ASM: incl %eax +; ASM: .arch core-avx2 +; ASM-NEXT: .p2align 5, 0x90 +; ASM: incl %eax +} + +define void @test3() #2 { +entry: + call void asm sideeffect "inc %eax", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 5", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect "inc %eax", "~{dirflag},~{fpsr},~{flags}"() + ret void + +; CHECK-LABEL: test3: +; CHECK: incl %eax +; CHECK: nopl (%eax) +; CHECK: nopl (%eax) +; CHECK: incl %eax + +; ASM-LABEL: test3: +; ASM: incl %eax +; ASM: .arch slm +; ASM-NEXT: .p2align 5, 0x90 +; ASM: incl %eax +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core-avx2" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="slm" "target-features"="+aes,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/MC/COFF/align-nops.s =================================================================== --- test/MC/COFF/align-nops.s +++ test/MC/COFF/align-nops.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s +// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 -mcpu=pentium4 %s | llvm-readobj -s -sd | FileCheck %s // Test that we get optimal nops in text .text Index: test/MC/ELF/align-nops.s =================================================================== --- test/MC/ELF/align-nops.s +++ test/MC/ELF/align-nops.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - | llvm-readobj -s -sd | FileCheck %s // Test that we get optimal nops in text .text Index: test/MC/MachO/x86_32-optimal_nop.s =================================================================== --- test/MC/MachO/x86_32-optimal_nop.s +++ test/MC/MachO/x86_32-optimal_nop.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s +// RUN: llvm-mc -triple i386-apple-darwin9 -mcpu=pentium4 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s # 1 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros Index: test/MC/X86/AlignedBundling/different-sections.s =================================================================== --- test/MC/X86/AlignedBundling/different-sections.s +++ test/MC/X86/AlignedBundling/different-sections.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test two different executable sections with bundling. Index: test/MC/X86/AlignedBundling/long-nop-pad.s =================================================================== --- test/MC/X86/AlignedBundling/long-nop-pad.s +++ test/MC/X86/AlignedBundling/long-nop-pad.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test that long nops are generated for padding where possible. Index: test/MC/X86/AlignedBundling/misaligned-bundle-group.s =================================================================== --- test/MC/X86/AlignedBundling/misaligned-bundle-group.s +++ test/MC/X86/AlignedBundling/misaligned-bundle-group.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ # RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s -# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ # RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s Index: test/MC/X86/AlignedBundling/misaligned-bundle.s =================================================================== --- test/MC/X86/AlignedBundling/misaligned-bundle.s +++ test/MC/X86/AlignedBundling/misaligned-bundle.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ # RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s -# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - \ # RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s Index: test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s =================================================================== --- test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s +++ test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test some variations of padding to the end of a bundle. Index: test/MC/X86/AlignedBundling/pad-bundle-groups.s =================================================================== --- test/MC/X86/AlignedBundling/pad-bundle-groups.s +++ test/MC/X86/AlignedBundling/pad-bundle-groups.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s # Test some variations of padding for bundle-locked groups. Index: test/MC/X86/AlignedBundling/relax-in-bundle-group.s =================================================================== --- test/MC/X86/AlignedBundling/relax-in-bundle-group.s +++ test/MC/X86/AlignedBundling/relax-in-bundle-group.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble - | FileCheck %s -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble - | FileCheck %s # Test that instructions inside bundle-locked groups are relaxed even if their Index: test/MC/X86/AlignedBundling/single-inst-bundling.s =================================================================== --- test/MC/X86/AlignedBundling/single-inst-bundling.s +++ test/MC/X86/AlignedBundling/single-inst-bundling.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s -# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mcpu=pentium4 -mc-relax-all %s -o - \ # RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s # Test simple NOP insertion for single instructions. Index: test/MC/X86/arch-directive.s =================================================================== --- /dev/null +++ test/MC/X86/arch-directive.s @@ -0,0 +1,42 @@ +# RUN: not llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu %s 2>%t -o - | FileCheck %s +# RUN: FileCheck < %t %s --check-prefix=CHECK-ERR + +# CHECK: .text +.arch core-avx2 +# CHECK: .arch core-avx2 +.arch .avx512f +# CHECK: .arch .avx512f +.arch .noavx2 +# CHECK: .arch .noavx2 + +addq $1, %rax +# CHECK: addq $1, %rax + +.arch i386 +# CHECK: .arch i386 +.arch .noavx +# CHECK: .arch .noavx +.arch .x87 +# CHECK: .arch .x87 +.arch .no64bit-mode +# CHECK: .arch .no64bit-mode +.arch .32bit-mode +# CHECK: .arch .32bit-mode + +addq $1, %rax +# CHECK-NOT: addq $1, %rax +# CHECK-ERR: error: register %rax is only available in 64-bit mode + +.arch fake-cpu +# CHECK-NOT: .arch fake-cpu +# CHECK-ERR: error: unrecognized CPU 'fake-cpu' in .arch directive +.arch .fake-feature +# CHECK-NOT: .arch .fake-feature +# CHECK-ERR: error: unrecognized feature 'fake-feature' in .arch directive +.arch .nofake-feature +# CHECK-NOT: .arch .nofake-feature +# CHECK-ERR: error: unrecognized feature 'fake-feature' in .arch directive + +.arch something unexpected +# CHECK-NOT: .arch something unexpected +# CHECK-ERR: error: unexpected number of arguments in .arch directive Index: test/MC/X86/x86_long_nop.s =================================================================== --- test/MC/X86/x86_long_nop.s +++ test/MC/X86/x86_long_nop.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-apple-darwin10.0 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-apple-darwin8 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=pentium4 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=core-avx2 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-apple-darwin10.0 -mcpu=k8 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-apple-darwin8 -mcpu=barcelona %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=slm %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP7 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=lakemont %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP7 %s Index: test/MC/X86/x86_long_nop_arch.s =================================================================== --- /dev/null +++ test/MC/X86/x86_long_nop_arch.s @@ -0,0 +1,44 @@ +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s + +# Ensure .arch directive affects long nop emission. + +incl %eax + +.arch i386 +.p2align 4 + +incl %eax + +.arch pentium4 +.p2align 4 + +incl %eax + +.arch slm +.p2align 4 + +incl %eax + +# CHECK: 0: incl +# CHECK-NEXT: 1: nop +# CHECK-NEXT: 2: nop +# CHECK-NEXT: 3: nop +# CHECK-NEXT: 4: nop +# CHECK-NEXT: 5: nop +# CHECK-NEXT: 6: nop +# CHECK-NEXT: 7: nop +# CHECK-NEXT: 8: nop +# CHECK-NEXT: 9: nop +# CHECK-NEXT: a: nop +# CHECK-NEXT: b: nop +# CHECK-NEXT: c: nop +# CHECK-NEXT: d: nop +# CHECK-NEXT: e: nop +# CHECK-NEXT: f: nop +# CHECK-NEXT: 10: incl +# CHECK-NEXT: 11: nopw +# CHECK-NEXT: 20: incl +# CHECK-NEXT: 21: nopl +# CHECK-NEXT: 28: nopl +# CHECK-NEXT: 2f: nop +# CHECK-NEXT: 30: incl Index: tools/llvm-mc/llvm-mc.cpp =================================================================== --- tools/llvm-mc/llvm-mc.cpp +++ tools/llvm-mc/llvm-mc.cpp @@ -580,7 +580,7 @@ auto FOut = llvm::make_unique(*OS); Str.reset(TheTarget->createAsmStreamer( Ctx, std::move(FOut), /*asmverbose*/ true, - /*useDwarfDirectory*/ true, IP, CE, MAB, ShowInst)); + /*useDwarfDirectory*/ true, IP, CE, MAB, ShowInst, *STI)); } else if (FileType == OFT_Null) { Str.reset(TheTarget->createNullStreamer(Ctx));