diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -96,8 +96,8 @@ }; // Emit a minimal sequence of nops spanning NumBytes bytes. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI); +static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, + const X86Subtarget *Subtarget); void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, const MCSubtargetInfo &STI, @@ -117,8 +117,8 @@ MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { if (InShadow && CurrentShadowSize < RequiredShadowSize) { InShadow = false; - EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, - MF->getSubtarget().is64Bit(), STI); + emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, + &MF->getSubtarget()); } } @@ -1082,29 +1082,26 @@ /// Return the longest nop which can be efficiently decoded for the given /// target cpu. 15-bytes is the longest single NOP instruction, but some /// platforms can't decode the longest forms efficiently. -static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) { - uint64_t MaxNopLength = 10; - if (STI.getFeatureBits()[X86::ProcIntelSLM]) - MaxNopLength = 7; - else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) - MaxNopLength = 15; - else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) - MaxNopLength = 11; - return MaxNopLength; +static unsigned maxLongNopLength(const X86Subtarget *Subtarget) { + if (Subtarget->getFeatureBits()[X86::ProcIntelSLM]) + return 7; + if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP]) + return 15; + if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP]) + return 11; + if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit()) + return 10; + if (Subtarget->is32Bit()) + return 2; + return 1; } /// Emit the largest nop instruction smaller than or equal to \p NumBytes /// bytes. Return the size of nop emitted. -static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI) { - if (!Is64Bit) { - // TODO Do additional checking if the CPU supports multi-byte nops. - OS.emitInstruction(MCInstBuilder(X86::NOOP), STI); - return 1; - } - +static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, + const X86Subtarget *Subtarget, bool LegacyNop = false) { // Cap a single nop emission at the profitable value for the target - NumBytes = std::min(NumBytes, MaxLongNopLength(STI)); + NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget)); unsigned NopSize; unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; @@ -1121,7 +1118,7 @@ break; case 2: NopSize = 2; - Opc = X86::XCHG16ar; + Opc = LegacyNop ? X86::MOV32rr_REV : X86::XCHG16ar; break; case 3: NopSize = 3; @@ -1178,10 +1175,15 @@ switch (Opc) { default: llvm_unreachable("Unexpected opcode"); case X86::NOOP: - OS.emitInstruction(MCInstBuilder(Opc), STI); + OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); break; case X86::XCHG16ar: - OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI); + OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), + *Subtarget); + break; + case X86::MOV32rr_REV: + OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::EDI).addReg(X86::EDI), + *Subtarget); break; case X86::NOOPL: case X86::NOOPW: @@ -1191,7 +1193,7 @@ .addReg(IndexReg) .addImm(Displacement) .addReg(SegmentReg), - STI); + *Subtarget); break; } assert(NopSize <= NumBytes && "We overemitted?"); @@ -1199,12 +1201,12 @@ } /// Emit the optimal amount of multi-byte nops on X86. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI) { +static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, + const X86Subtarget *Subtarget) { unsigned NopsToEmit = NumBytes; (void)NopsToEmit; while (NumBytes) { - NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI); + NumBytes -= emitNop(OS, NumBytes, Subtarget); assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); } } @@ -1217,8 +1219,7 @@ StatepointOpers SOpers(&MI); if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { - EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), - getSubtargetInfo()); + emitX86Nops(*OutStreamer, PatchBytes, Subtarget); } else { // Lower call target and choose correct opcode const MachineOperand &CallTarget = SOpers.getCallTarget(); @@ -1350,8 +1351,15 @@ // bytes too, so the check on MinSize is important. MCI.setOpcode(X86::PUSH64rmr); } else { - unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), - getSubtargetInfo()); + // For compatibilty reasons, when targetting MSVC, is is important to + // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools + // rely specifically on this pattern to be able to patch a function. + // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. + bool LegacyNop = + Subtarget->is32Bit() && Subtarget->isTargetWindowsMSVC() && + (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3"); + + unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget, LegacyNop); assert(NopSize == MinSize && "Could not implement MinSize!"); (void)NopSize; } @@ -1435,8 +1443,7 @@ assert(NumBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); - EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(), - getSubtargetInfo()); + emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); } void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, @@ -1496,7 +1503,7 @@ EmitAndCountInstruction( MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); } else { - EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 4, Subtarget); } } @@ -1525,7 +1532,7 @@ if (UsedMask[I]) EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); else - EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 1, Subtarget); OutStreamer->AddComment("xray custom event end."); @@ -1594,7 +1601,7 @@ EmitAndCountInstruction( MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); } else { - EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 4, Subtarget); } } @@ -1628,7 +1635,7 @@ if (UsedMask[I]) EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); else - EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 1, Subtarget); OutStreamer->AddComment("xray typed event end."); @@ -1648,7 +1655,7 @@ .getValueAsString() .getAsInteger(10, Num)) return; - EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, Num, Subtarget); return; } // We want to emit the following pattern: @@ -1672,7 +1679,7 @@ // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. OutStreamer->emitBytes("\xeb\x09"); - EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 9, Subtarget); recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); } @@ -1704,7 +1711,7 @@ if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) Ret.addOperand(MaybeOperand.getValue()); OutStreamer->emitInstruction(Ret, getSubtargetInfo()); - EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 10, Subtarget); recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); } @@ -1727,7 +1734,7 @@ // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. OutStreamer->emitBytes("\xeb\x09"); - EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 9, Subtarget); OutStreamer->emitLabel(Target); recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); diff --git a/llvm/test/CodeGen/X86/patchable-function-entry.ll b/llvm/test/CodeGen/X86/patchable-function-entry.ll --- a/llvm/test/CodeGen/X86/patchable-function-entry.ll +++ b/llvm/test/CodeGen/X86/patchable-function-entry.ll @@ -31,7 +31,7 @@ define void @f2() "patchable-function-entry"="2" { ; CHECK-LABEL: f2: ; CHECK-NEXT: .Lfunc_begin2: -; 32-COUNT-2: nop +; 32: xchgw %ax, %ax ; 64: xchgw %ax, %ax ; CHECK-NEXT: ret ; CHECK: .section __patchable_function_entries,"awo",@progbits,f2{{$}} @@ -46,7 +46,8 @@ define void @f3() "patchable-function-entry"="3" comdat { ; CHECK-LABEL: f3: ; CHECK-NEXT: .Lfunc_begin3: -; 32-COUNT-3: nop +; 32: xchgw %ax, %ax +; 32-NEXT: nop ; 64: nopl (%rax) ; CHECK: ret ; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f3,comdat,f3{{$}} @@ -61,7 +62,8 @@ define void @f5() "patchable-function-entry"="5" comdat { ; CHECK-LABEL: f5: ; CHECK-NEXT: .Lfunc_begin4: -; 32-COUNT-5: nop +; 32-COUNT-2: xchgw %ax, %ax +; 32-NEXT: nop ; 64: nopl 8(%rax,%rax) ; CHECK-NEXT: ret ; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}} diff --git a/llvm/test/CodeGen/X86/patchable-prologue.ll b/llvm/test/CodeGen/X86/patchable-prologue.ll --- a/llvm/test/CodeGen/X86/patchable-prologue.ll +++ b/llvm/test/CodeGen/X86/patchable-prologue.ll @@ -1,5 +1,14 @@ ; RUN: llc -verify-machineinstrs -filetype=obj -o - -mtriple=x86_64-apple-macosx < %s | llvm-objdump --triple=x86_64-apple-macosx -d - | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx < %s | FileCheck %s --check-prefix=CHECK-ALIGN +; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386 < %s | FileCheck %s --check-prefixes=32,32CFI,XCHG +; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc < %s | FileCheck %s --check-prefixes=32,MOV +; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium3 < %s | FileCheck %s --check-prefixes=32,MOV +; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium4 < %s | FileCheck %s --check-prefixes=32,XCHG +; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=64 +; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-unknown-linux-code16 < %s | FileCheck %s --check-prefix=16 + +; 16-NOT: movl %edi, %edi +; 16-NOT: xchgw %ax, %ax declare void @callee(i64*) @@ -10,6 +19,18 @@ ; CHECK-ALIGN: .p2align 4, 0x90 ; CHECK-ALIGN: _f0: +; 32: f0: +; 32CFI-NEXT: .cfi_startproc +; 32-NEXT: # %bb.0: +; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90] +; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff] +; 32-NEXT: retl + +; 64: f0: +; 64-NEXT: # %bb.0: +; 64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90] +; 64-NEXT: retq + ret void } @@ -19,6 +40,19 @@ ; CHECK-ALIGN: .p2align 4, 0x90 ; CHECK-ALIGN: _f1: + +; 32: f1: +; 32CFI-NEXT: .cfi_startproc +; 32-NEXT: # %bb.0: +; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90] +; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff] +; 32-NEXT: pushl %ebp + +; 64: f1: +; 64-NEXT: .seh_proc f1 +; 64-NEXT: # %bb.0: +; 64-NEXT: pushq %rbp + ret void } @@ -28,6 +62,19 @@ ; CHECK-ALIGN: .p2align 4, 0x90 ; CHECK-ALIGN: _f2: + +; 32: f2: +; 32CFI-NEXT: .cfi_startproc +; 32-NEXT: # %bb.0: +; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90] +; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff] +; 32-NEXT: pushl %ebp + +; 64: f2: +; 64-NEXT: .seh_proc f2 +; 64-NEXT: # %bb.0: +; 64-NEXT: subq $200, %rsp + %ptr = alloca i64, i32 20 call void @callee(i64* %ptr) ret void @@ -39,6 +86,19 @@ ; CHECK-ALIGN: .p2align 4, 0x90 ; CHECK-ALIGN: _f3: + +; 32: f3: +; 32CFI-NEXT: .cfi_startproc +; 32-NEXT: # %bb.0: +; XCHG-NEXT: xchgw %ax, %ax +; MOV-NEXT: movl %edi, %edi +; 32-NEXT: retl + +; 64: f3: +; 64-NEXT: # %bb.0: +; 64-NEXT: xchgw %ax, %ax +; 64-NEXT: retq + ret void } @@ -47,6 +107,17 @@ ; patchable one. ; CHECK-LABEL: f4{{>?}}: ; CHECK-NEXT: 8b 0c 37 movl (%rdi,%rsi), %ecx +; 32: f4: +; 32CFI-NEXT: .cfi_startproc +; 32-NEXT: # %bb.0: +; XCHG-NEXT: xchgw %ax, %ax +; MOV-NEXT: movl %edi, %edi +; 32-NEXT: pushl %ebx + +; 64: f4: +; 64-NEXT: # %bb.0: +; 64-NOT: xchgw %ax, %ax + define i32 @f4(i8* %arg1, i64 %arg2, i32 %arg3) "patchable-function"="prologue-short-redirect" { bb: %tmp10 = getelementptr i8, i8* %arg1, i64 %arg2