Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1405,6 +1405,31 @@ passes make choices that keep the code size of this function low, and otherwise do optimizations specifically to reduce code size as long as they do not significantly impact runtime performance. +``"patchable-function"`` + This attribute tells the code generator that the code + generated for this function needs to follow certain conventions that + make it possible for a runtime function to patch over it later. + The exact effect of this attribute depends on its string value, + for which there currently is one legal possiblity: + + * ``"prologue-short-redirect"`` - This style of patchable + function is intended to support patching a function prologue to + redirect control away from the function in a thread safe + manner. It guarantees that the first instruction of the + function will be large enough to accommodate a short jump + instruction, and will be sufficiently aligned to allow being + fully changed via an atomic compare-and-swap instruction. + While the first requirement can be satisfied by inserting large + enough NOP, LLVM can and will try to re-purpose an existing + instruction (i.e. one that would have to be emitted anyway) as + the patchable instruction larger than a short jump. + + ``"prologue-short-redirect"`` is currently only supported on + x86-64. + + This attribute by itself does not imply restrictions on + inter-procedural optimizations. All of the semantic effects the + patching may have to be separately conveyed via the linkage type. ``readnone`` On a function, this attribute indicates that the function computes its result (or decides to unwind an exception) based strictly on its arguments, Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -599,6 +599,9 @@ /// \brief This pass lays out funclets contiguously. extern char &FuncletLayoutID; + /// \brief This pass implements the "patchable-function" attribute. + extern char &PatchableFunctionID; + /// createStackProtectorPass - This pass adds stack protectors to functions. /// FunctionPass *createStackProtectorPass(const TargetMachine *TM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -329,6 +329,7 @@ void initializeFunctionImportPassPass(PassRegistry &); void initializeLoopVersioningPassPass(PassRegistry &); void initializeWholeProgramDevirtPass(PassRegistry &); +void initializePatchableFunctionPass(PassRegistry &); } #endif Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -929,6 +929,14 @@ let usesCustomInserter = 1; let mayLoad = 1; } +def PATCHABLE_OP : Instruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins variable_ops); + let usesCustomInserter = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: include/llvm/Target/TargetOpcodes.def =================================================================== --- include/llvm/Target/TargetOpcodes.def +++ include/llvm/Target/TargetOpcodes.def @@ -133,16 +133,25 @@ /// comparisons into existing memory operations. HANDLE_TARGET_OPCODE(FAULTING_LOAD_OP, 22) +/// Wraps a machine instruction to add patchability constraints. An +/// instruction wrapped in PATCHABLE_OP has to either have a minimum +/// size or be preceded with a nop of that size. The first operand is +/// an immediate denoting the minimum size of the instruction, the +/// second operand is an immediate denoting the opcode of the original +/// instruction. The rest of the operands are the operands of the +/// original instruction. +HANDLE_TARGET_OPCODE(PATCHABLE_OP, 23) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. /// Generic ADD instruction. This is an integer add. -HANDLE_TARGET_OPCODE(G_ADD, 23) +HANDLE_TARGET_OPCODE(G_ADD, 24) HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD) /// Generic BRANCH instruction. This is an unconditional branch. -HANDLE_TARGET_OPCODE(G_BR, 24) +HANDLE_TARGET_OPCODE(G_BR, 25) // TODO: Add more generic opcodes as we move along. Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -76,6 +76,7 @@ MachineSSAUpdater.cpp MachineTraceMetrics.cpp MachineVerifier.cpp + PatchableFunction.cpp MIRPrinter.cpp MIRPrintingPass.cpp OptimizePHIs.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -55,6 +55,7 @@ initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); + initializePatchableFunctionPass(Registry); initializeOptimizePHIsPass(Registry); initializePEIPass(Registry); initializePHIEliminationPass(Registry); Index: lib/CodeGen/Passes.cpp =================================================================== --- lib/CodeGen/Passes.cpp +++ lib/CodeGen/Passes.cpp @@ -602,6 +602,8 @@ addPass(&StackMapLivenessID, false); addPass(&LiveDebugValuesID, false); + addPass(&PatchableFunctionID, false); + AddingMachinePasses = false; } Index: lib/CodeGen/PatchableFunction.cpp =================================================================== --- /dev/null +++ lib/CodeGen/PatchableFunction.cpp @@ -0,0 +1,70 @@ +//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements edits function bodies in place to support the +// "patchable-function" attribute. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { +struct PatchableFunction : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + PatchableFunction() : MachineFunctionPass(ID) { + initializePatchableFunctionPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } +}; +} + +bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getFunction()->hasFnAttribute("patchable-function")) + return false; + + Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function"); + StringRef PatchType = PatchAttr.getValueAsString(); + + assert(PatchType == "prologue-short-redirect" && "Only possibility today!"); + (void)PatchType; + + auto &FirstMBB = *MF.begin(); + auto &FirstMI = *FirstMBB.begin(); + + auto *TII = MF.getSubtarget().getInstrInfo(); + auto MIB = BuildMI(FirstMBB, FirstMBB.begin(), FirstMI.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_OP)) + .addImm(2) + .addImm(FirstMI.getOpcode()); + + for (auto &MO : FirstMI.operands()) + MIB.addOperand(MO); + + FirstMI.eraseFromParent(); + MF.ensureAlignment(4); + return true; +} + +char PatchableFunction::ID = 0; +char &llvm::PatchableFunctionID = PatchableFunction::ID; +INITIALIZE_PASS(PatchableFunction, "patchable-function", "", false, false) Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -29,6 +29,7 @@ const X86Subtarget *Subtarget; StackMaps SM; FaultMaps FM; + std::unique_ptr CodeEmitter; // This utility class tracks the length of a stackmap instruction's 'shadow'. // It is used by the X86AsmPrinter to ensure that the stackmap shadow @@ -40,10 +41,11 @@ // few instruction bytes to cover the shadow are NOPs used for padding. class StackMapShadowTracker { public: - StackMapShadowTracker(TargetMachine &TM); + StackMapShadowTracker(); ~StackMapShadowTracker(); void startFunction(MachineFunction &MF); - void count(MCInst &Inst, const MCSubtargetInfo &STI); + void count(MCInst &Inst, const MCSubtargetInfo &STI, + MCCodeEmitter *CodeEmitter); // Called to signal the start of a shadow of RequiredSize bytes. void reset(unsigned RequiredSize) { @@ -56,9 +58,7 @@ // to emit any necessary padding-NOPs. void emitShadowPadding(MCStreamer &OutStreamer, const MCSubtargetInfo &STI); private: - TargetMachine &TM; const MachineFunction *MF; - std::unique_ptr CodeEmitter; bool InShadow; // RequiredShadowSize holds the length of the shadow specified in the most @@ -82,14 +82,14 @@ void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_OP(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI); public: explicit X86AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this), - SMShadowTracker(TM) {} + : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this) {} const char *getPassName() const override { return "X86 Assembly / Object Emitter"; Index: lib/Target/X86/X86AsmPrinter.cpp =================================================================== --- lib/Target/X86/X86AsmPrinter.cpp +++ lib/Target/X86/X86AsmPrinter.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" @@ -50,6 +51,9 @@ Subtarget = &MF.getSubtarget(); SMShadowTracker.startFunction(MF); + CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( + *MF.getSubtarget().getInstrInfo(), *MF.getSubtarget().getRegisterInfo(), + MF.getContext())); SetupMachineFunction(MF); Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -20,6 +20,7 @@ #include "Utils/X86ShuffleDecode.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineOperand.h" @@ -70,24 +71,21 @@ // Emit a minimal sequence of nops spanning NumBytes bytes. static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI); + const MCSubtargetInfo &STI, bool OnlyOneNop = false); namespace llvm { - X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker(TargetMachine &TM) - : TM(TM), InShadow(false), RequiredShadowSize(0), CurrentShadowSize(0) {} + X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker() + : InShadow(false), RequiredShadowSize(0), CurrentShadowSize(0) {} X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {} - void - X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) { + void X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) { MF = &F; - CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( - *MF->getSubtarget().getInstrInfo(), - *MF->getSubtarget().getRegisterInfo(), MF->getContext())); } void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, + MCCodeEmitter *CodeEmitter) { if (InShadow) { SmallString<256> Code; SmallVector Fixups; @@ -110,7 +108,7 @@ void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); - SMShadowTracker.count(Inst, getSubtargetInfo()); + SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); } } // end llvm namespace @@ -786,7 +784,8 @@ } /// \brief Emit the optimal amount of multi-byte nops on X86. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI) { +static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, + const MCSubtargetInfo &STI, bool OnlyOneNop) { // This works only for 64bit. For 32bit we have to do additional checking if // the CPU supports multi-byte nops. assert(Is64Bit && "EmitNops only supports X86-64"); @@ -833,6 +832,10 @@ .addImm(Displacement).addReg(SegmentReg), STI); break; } + + (void) OnlyOneNop; + assert((!OnlyOneNop || NumBytes == 0) && + "Allowed only one nop instruction!"); } // while (NumBytes) } @@ -915,6 +918,41 @@ OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // PATCHABLE_OP minsize, opcode, operands + + unsigned MinSize = MI.getOperand(0).getImm(); + unsigned Opcode = MI.getOperand(1).getImm(); + + MCInst MCI; + MCI.setOpcode(Opcode); + for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end())) + if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) + MCI.addOperand(MaybeOperand.getValue()); + + SmallString<256> Code; + SmallVector Fixups; + raw_svector_ostream VecOS(Code); + CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo()); + + if (Code.size() < MinSize) { + if (MinSize == 2 && Opcode == X86::PUSH64r) { + // This is an optimization that lets us get away without emitting a nop in + // many cases. + // + // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two + // bytes too, so the check on MinSize is important. + MCI.setOpcode(X86::PUSH64rmr); + } else { + EmitNops(*OutStreamer, MinSize, Subtarget->is64Bit(), getSubtargetInfo(), + /* OnlyOneNop = */ true); + } + } + + OutStreamer->EmitInstruction(MCI, getSubtargetInfo()); +} + // Lower a stackmap of the form: // , , ... void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { @@ -1213,6 +1251,9 @@ case TargetOpcode::FAULTING_LOAD_OP: return LowerFAULTING_LOAD_OP(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_OP: + return LowerPATCHABLE_OP(*MI, MCInstLowering); + case TargetOpcode::STACKMAP: return LowerSTACKMAP(*MI); @@ -1475,7 +1516,7 @@ // is at the end of the shadow. if (MI->isCall()) { // Count then size of the call towards the shadow - SMShadowTracker.count(TmpInst, getSubtargetInfo()); + SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); // Then flush the shadow so that we fill with nops before the call, not // after it. SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); Index: test/CodeGen/X86/patchable-prologue.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/patchable-prologue.ll @@ -0,0 +1,43 @@ +; RUN: llc -filetype=obj -o - -mtriple=x86_64-apple-macosx < %s | llvm-objdump -triple x86_64-apple-macosx -disassemble - | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s --check-prefix=CHECK-ALIGN + +declare void @callee(i64*) + +define void @f0() "patchable-function"="prologue-short-redirect" { +; CHECK-LABEL: _f0: +; CHECK-NEXT: 66 90 nop + +; CHECK-ALIGN: .p2align 4, 0x90 +; CHECK-ALIGN: _f0: + + ret void +} + +define void @f1() "patchable-function"="prologue-short-redirect" "no-frame-pointer-elim"="true" { +; CHECK-LABEL: _f1 +; CHECK-NEXT: ff f5 pushq %rbp + +; CHECK-ALIGN: .p2align 4, 0x90 +; CHECK-ALIGN: _f1: + ret void +} + +define void @f2() "patchable-function"="prologue-short-redirect" { +; CHECK-LABEL: _f2 +; CHECK-NEXT: 48 81 ec a8 00 00 00 subq $168, %rsp + +; CHECK-ALIGN: .p2align 4, 0x90 +; CHECK-ALIGN: _f2: + %ptr = alloca i64, i32 20 + call void @callee(i64* %ptr) + ret void +} + +define void @f3() "patchable-function"="prologue-short-redirect" optsize { +; CHECK-LABEL: _f3 +; CHECK-NEXT: 66 90 nop + +; CHECK-ALIGN: .p2align 4, 0x90 +; CHECK-ALIGN: _f3: + ret void +} Index: test/TableGen/trydecode-emission.td =================================================================== --- test/TableGen/trydecode-emission.td +++ test/TableGen/trydecode-emission.td @@ -36,8 +36,8 @@ // CHECK: /* 0 */ MCD::OPC_ExtractField, 4, 4, // Inst{7-4} ... // CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 14, 0, // Skip to: 21 // CHECK-NEXT: /* 7 */ MCD::OPC_CheckField, 2, 2, 0, 5, 0, // Skip to: 18 -// CHECK-NEXT: /* 13 */ MCD::OPC_TryDecode, 26, 0, 0, 0, // Opcode: InstB, skip to: 18 -// CHECK-NEXT: /* 18 */ MCD::OPC_Decode, 25, 1, // Opcode: InstA +// CHECK-NEXT: /* 13 */ MCD::OPC_TryDecode, 27, 0, 0, 0, // Opcode: InstB, skip to: 18 +// CHECK-NEXT: /* 18 */ MCD::OPC_Decode, 26, 1, // Opcode: InstA // CHECK-NEXT: /* 21 */ MCD::OPC_Fail, // CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; } Index: test/TableGen/trydecode-emission2.td =================================================================== --- test/TableGen/trydecode-emission2.td +++ test/TableGen/trydecode-emission2.td @@ -35,9 +35,9 @@ // CHECK-NEXT: /* 7 */ MCD::OPC_ExtractField, 5, 3, // Inst{7-5} ... // CHECK-NEXT: /* 10 */ MCD::OPC_FilterValue, 0, 22, 0, // Skip to: 36 // CHECK-NEXT: /* 14 */ MCD::OPC_CheckField, 0, 2, 3, 5, 0, // Skip to: 25 -// CHECK-NEXT: /* 20 */ MCD::OPC_TryDecode, 26, 0, 0, 0, // Opcode: InstB, skip to: 25 +// CHECK-NEXT: /* 20 */ MCD::OPC_TryDecode, 27, 0, 0, 0, // Opcode: InstB, skip to: 25 // CHECK-NEXT: /* 25 */ MCD::OPC_CheckField, 3, 2, 0, 5, 0, // Skip to: 36 -// CHECK-NEXT: /* 31 */ MCD::OPC_TryDecode, 25, 1, 0, 0, // Opcode: InstA, skip to: 36 +// CHECK-NEXT: /* 31 */ MCD::OPC_TryDecode, 26, 1, 0, 0, // Opcode: InstA, skip to: 36 // CHECK-NEXT: /* 36 */ MCD::OPC_Fail, // CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; } Index: test/TableGen/trydecode-emission3.td =================================================================== --- test/TableGen/trydecode-emission3.td +++ test/TableGen/trydecode-emission3.td @@ -37,8 +37,8 @@ // CHECK: /* 0 */ MCD::OPC_ExtractField, 4, 4, // Inst{7-4} ... // CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 14, 0, // Skip to: 21 // CHECK-NEXT: /* 7 */ MCD::OPC_CheckField, 2, 2, 0, 5, 0, // Skip to: 18 -// CHECK-NEXT: /* 13 */ MCD::OPC_TryDecode, 26, 0, 0, 0, // Opcode: InstB, skip to: 18 -// CHECK-NEXT: /* 18 */ MCD::OPC_Decode, 25, 1, // Opcode: InstA +// CHECK-NEXT: /* 13 */ MCD::OPC_TryDecode, 27, 0, 0, 0, // Opcode: InstB, skip to: 18 +// CHECK-NEXT: /* 18 */ MCD::OPC_Decode, 26, 1, // Opcode: InstA // CHECK-NEXT: /* 21 */ MCD::OPC_Fail, // CHECK: if (DecodeInstBOp(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }