Index: lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -67,19 +67,11 @@ }; class X86AsmBackend : public MCAsmBackend { - const StringRef CPU; - bool HasNopl; - const uint64_t MaxNopLength; + const MCSubtargetInfo &STI; + public: - X86AsmBackend(const Target &T, StringRef CPU) - : MCAsmBackend(), CPU(CPU), - MaxNopLength((CPU == "slm" || CPU == "silvermont") ? 7 : 15) { - HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && - CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && - CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && - CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" && - CPU != "c3" && CPU != "c3-2" && CPU != "lakemont" && CPU != ""; - } + X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) + : MCAsmBackend(), STI(STI) {} unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -346,14 +338,17 @@ }; // This CPU doesn't support long nops. If needed add more. - // FIXME: Can we get this from the subtarget somehow? // FIXME: We could generated something better than plain 0x90. - if (!HasNopl) { + if (!STI.getFeatureBits()[X86::FeatureNOPL]) { for (uint64_t i = 0; i < Count; ++i) OW->write8(0x90); return true; } + uint64_t MaxNopLength = STI.getFeatureBits()[X86::ProcIntelSLM] ? 7 + : STI.getFeatureBits()[X86::FeaturePreferNOP15] ? 15 + : 10; + // 15 is the longest single nop instruction. Emit as many 15-byte nops as // needed, then emit a nop of the remaining length. do { @@ -377,14 +372,15 @@ class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; - ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : X86AsmBackend(T, CPU), OSABI(OSABI) {} + ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) + : X86AsmBackend(T, STI), OSABI(OSABI) {} }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -394,8 +390,9 @@ class ELFX86_X32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -406,8 +403,9 @@ class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { public: - ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -418,8 +416,9 @@ class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -431,9 +430,9 @@ bool Is64Bit; public: - WindowsX86AsmBackend(const Target &T, bool is64Bit, StringRef CPU) - : X86AsmBackend(T, CPU) - , Is64Bit(is64Bit) { + WindowsX86AsmBackend(const Target &T, bool is64Bit, + const MCSubtargetInfo &STI) + : X86AsmBackend(T, STI), Is64Bit(is64Bit) { } Optional getFixupKind(StringRef Name) const override { @@ -790,9 +789,9 @@ } public: - DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef CPU, - bool Is64Bit) - : X86AsmBackend(T, CPU), MRI(MRI), Is64Bit(Is64Bit) { + DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, bool Is64Bit) + : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) { memset(SavedRegs, 0, sizeof(SavedRegs)); OffsetSize = Is64Bit ? 8 : 4; MoveInstrSize = Is64Bit ? 3 : 2; @@ -803,8 +802,8 @@ class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { public: DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU) - : DarwinX86AsmBackend(T, MRI, CPU, false) {} + const MCSubtargetInfo &STI) + : DarwinX86AsmBackend(T, MRI, STI, false) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -824,8 +823,8 @@ const MachO::CPUSubTypeX86 Subtype; public: DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} + const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st) + : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -847,19 +846,18 @@ const MCRegisterInfo &MRI, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); - StringRef CPU = STI.getCPU(); if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, CPU); + return new DarwinX86_32AsmBackend(T, MRI, STI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) - return new WindowsX86AsmBackend(T, false, CPU); + return new WindowsX86AsmBackend(T, false, STI); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); if (TheTriple.isOSIAMCU()) - return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU); + return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); - return new ELFX86_32AsmBackend(T, OSABI, CPU); + return new ELFX86_32AsmBackend(T, OSABI, STI); } MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, @@ -867,21 +865,20 @@ const MCRegisterInfo &MRI, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); - StringRef CPU = STI.getCPU(); if (TheTriple.isOSBinFormatMachO()) { MachO::CPUSubTypeX86 CS = StringSwitch(TheTriple.getArchName()) .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, CPU, CS); + return new DarwinX86_64AsmBackend(T, MRI, STI, CS); } if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) - return new WindowsX86AsmBackend(T, true, CPU); + return new WindowsX86AsmBackend(T, true, STI); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); if (TheTriple.getEnvironment() == Triple::GNUX32) - return new ELFX86_X32AsmBackend(T, OSABI, CPU); - return new ELFX86_64AsmBackend(T, OSABI, CPU); + return new ELFX86_X32AsmBackend(T, OSABI, STI); + return new ELFX86_64AsmBackend(T, OSABI, STI); } Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -34,6 +34,12 @@ def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; +def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", + "Enable NOPL instruction">; + +def FeaturePreferNOP15 : SubtargetFeature<"nopl", "PreferNOP15", "true", + "Enable up to 15 byte NOPL instruction">; + def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; @@ -390,16 +396,16 @@ def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; -foreach P = ["i686", "pentiumpro"] in { - def : Proc; -} +def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>; +def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, + FeatureNOPL]>; def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureCMOV, FeatureFXSR]>; + FeatureCMOV, FeatureFXSR, FeatureNOPL]>; foreach P = ["pentium3", "pentium3m"] in { def : Proc; + FeatureFXSR, FeatureNOPL]>; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. @@ -414,12 +420,12 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR]>; + FeatureSSE2, FeatureFXSR, FeatureNOPL]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcessorModel; + FeatureSSE2, FeatureFXSR, FeatureNOPL]>; } // Intel Quark. @@ -428,18 +434,19 @@ // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR]>; + FeatureFXSR, FeatureNOPL]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR]>; + FeatureFXSR, FeatureNOPL]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B ]>; @@ -450,6 +457,7 @@ FeatureMMX, FeatureSSSE3, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion @@ -460,6 +468,7 @@ FeatureMMX, FeatureSSE41, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion @@ -473,6 +482,7 @@ FeatureMMX, FeatureSSSE3, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureMOVBE, FeatureLEAForSP, @@ -492,6 +502,7 @@ FeatureMMX, FeatureSSE42, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, @@ -514,6 +525,7 @@ FeatureMMX, FeatureSSE42, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, @@ -543,6 +555,7 @@ FeatureMMX, FeatureSSE42, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureLAHFSAHF, @@ -558,6 +571,7 @@ FeatureMMX, FeatureSSE42, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, @@ -584,6 +598,7 @@ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, @@ -757,27 +772,28 @@ def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; + def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { def : Proc; + Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { def : Proc; + FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc; + FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>; } foreach P = ["amdfam10", "barcelona"] in { def : Proc; } @@ -788,6 +804,8 @@ FeatureSSSE3, FeatureSSE4A, FeatureFXSR, + FeatureNOPL, + FeaturePreferNOP15, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, @@ -802,6 +820,8 @@ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, + FeaturePreferNOP15, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, @@ -832,6 +852,7 @@ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureLZCNT, FeaturePOPCNT, @@ -853,6 +874,7 @@ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureF16C, FeatureLZCNT, @@ -879,6 +901,7 @@ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureF16C, FeatureLZCNT, @@ -901,6 +924,7 @@ FeatureMMX, FeatureAVX2, FeatureFXSR, + FeatureNOPL, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, @@ -938,6 +962,8 @@ FeatureFMA, FeatureFSGSBase, FeatureFXSR, + FeatureNOPL, + FeaturePreferNOP15, FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, @@ -982,6 +1008,7 @@ FeatureMMX, FeatureSSE2, FeatureFXSR, + FeatureNOPL, Feature64Bit, FeatureSlow3OpsLEA, FeatureSlowIncDec, Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -92,6 +92,13 @@ /// True if the processor supports X87 instructions. bool HasX87; + /// True if this processor has NOPL instruction + /// (generally pentium pro+). + bool HasNOPL; + + /// True if this processor prefers up to a 15 byte NOPL instruction. + bool PreferNOP15; + /// True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; @@ -469,6 +476,7 @@ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasX87() const { return HasX87; } + bool hasNOPL() const { return HasNOPL; } bool hasCMov() const { return HasCMov; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -260,6 +260,8 @@ X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; HasX87 = false; + HasNOPL = false; + PreferNOP15 = false; HasCMov = false; HasX86_64 = false; HasPOPCNT = false; Index: test/MC/MachO/x86_32-optimal_nop.s =================================================================== --- test/MC/MachO/x86_32-optimal_nop.s +++ test/MC/MachO/x86_32-optimal_nop.s @@ -202,15 +202,15 @@ // CHECK: 0090: C3000000 00000000 00000000 00000000 |................| // CHECK: 00A0: C3C3C3C3 C3C3C366 0F1F8400 00000000 |.......f........| // CHECK: 00B0: C3000000 00000000 00000000 00000000 |................| -// CHECK: 00C0: C3C3C3C3 C366662E 0F1F8400 00000000 |.....ff.........| +// CHECK: 00C0: C3C3C3C3 C3662E0F 1F840000 00000090 |.....f..........| // CHECK: 00D0: C3000000 00000000 00000000 00000000 |................| -// CHECK: 00E0: C3C3C3C3 6666662E 0F1F8400 00000000 |....fff.........| +// CHECK: 00E0: C3C3C3C3 662E0F1F 84000000 00006690 |....f.........f.| // CHECK: 00F0: C3000000 00000000 00000000 00000000 |................| -// CHECK: 0100: C3C3C366 6666662E 0F1F8400 00000000 |...ffff.........| +// CHECK: 0100: C3C3C366 2E0F1F84 00000000 000F1F00 |...f............| // CHECK: 0110: C3000000 00000000 00000000 00000000 |................| -// CHECK: 0120: C3C36666 6666662E 0F1F8400 00000000 |..fffff.........| +// CHECK: 0120: C3C3662E 0F1F8400 00000000 0F1F4000 |..f...........@.| // CHECK: 0130: C3000000 00000000 00000000 00000000 |................| -// CHECK: 0140: C3666666 6666662E 0F1F8400 00000000 |.ffffff.........| +// CHECK: 0140: C3662E0F 1F840000 0000000F 1F440000 |.f...........D..| // CHECK: 0150: C3 |.| // CHECK: ) // CHECK: } Index: test/MC/X86/AlignedBundling/long-nop-pad.s =================================================================== --- test/MC/X86/AlignedBundling/long-nop-pad.s +++ test/MC/X86/AlignedBundling/long-nop-pad.s @@ -15,7 +15,8 @@ .bundle_unlock # To align this group to a bundle end, we need a 15-byte NOP and a 12-byte NOP. # CHECK: 0: nop -# CHECK-NEXT: f: nop +# CHECK-NEXT: a: nop +# CHECK-NEXT: 14: nop # CHECK: 1b: callq # This push instruction is 1 byte long @@ -24,6 +25,7 @@ .bundle_unlock # To align this group to a bundle end, we need two 15-byte NOPs, and a 1-byte. # CHECK: 20: nop -# CHECK-NEXT: 2f: nop +# CHECK-NEXT: 2a: nop +# CHECK-NEXT: 34: nop # CHECK-NEXT: 3e: nop # CHECK-NEXT: 3f: pushq Index: test/MC/X86/AlignedBundling/misaligned-bundle-group.s =================================================================== --- test/MC/X86/AlignedBundling/misaligned-bundle-group.s +++ test/MC/X86/AlignedBundling/misaligned-bundle-group.s @@ -13,9 +13,9 @@ .bundle_lock align_to_end # CHECK: 1: nopw %cs:(%eax,%eax) # CHECK: 10: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 1f: nop +# CHECK-RELAX: 1a: nop # CHECK-RELAX: 20: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 2f: nopw %cs:(%eax,%eax) +# CHECK-RELAX: 2a: nopw %cs:(%eax,%eax) # CHECK-OPT: 1b: calll -4 # CHECK-RELAX: 3b: calll -4 calll bar # 5 bytes Index: test/MC/X86/AlignedBundling/misaligned-bundle.s =================================================================== --- test/MC/X86/AlignedBundling/misaligned-bundle.s +++ test/MC/X86/AlignedBundling/misaligned-bundle.s @@ -12,7 +12,7 @@ .align 16 # CHECK: 1: nopw %cs:(%eax,%eax) # CHECK-RELAX: 10: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 1f: nop +# CHECK-RELAX: 1a: nop # CHECK-OPT: 10: movl $1, (%esp) # CHECK-RELAX: 20: movl $1, (%esp) movl $0x1, (%esp) # 7 bytes Index: test/MC/X86/AlignedBundling/pad-bundle-groups.s =================================================================== --- test/MC/X86/AlignedBundling/pad-bundle-groups.s +++ test/MC/X86/AlignedBundling/pad-bundle-groups.s @@ -41,6 +41,7 @@ # And here we'll need a 11-byte NOP # CHECK: 30: callq # CHECK: 35: nop +# CHECK-NEXT: 3f: nop # CHECK-NEXT: 40: callq # CHECK-NEXT: 45: callq Index: test/MC/X86/x86_long_nop.s =================================================================== --- test/MC/X86/x86_long_nop.s +++ test/MC/X86/x86_long_nop.s @@ -6,14 +6,15 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=silvermont %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP7 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=lakemont %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=NOP1 %s -# Ensure alignment directives also emit sequences of 15-byte NOPs on processors +# Ensure alignment directives also emit sequences of 10-byte NOPs on processors # capable of using long NOPs. inc %eax .p2align 5 inc %eax # CHECK: 0: inc # CHECK-NEXT: 1: nop -# CHECK-NEXT: 10: nop +# CHECK-NEXT: b: nop +# CHECK-NEXT: 15: nop # CHECK-NEXT: 1f: nop # CHECK-NEXT: 20: inc