Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -6804,6 +6804,86 @@ other aggressive transformations, so the value returned may not be that of the obvious source-language caller. +.. _int_read_register: + +'``llvm.read_register``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.read_register(metadata) + +Overview: +""""""""" + +The '``llvm.read_register``' intrinsic provides access to the named +register. The register must be valid on the architecture being compiled +to and will abort in case the register is not supported or not existent. + +Semantics: +"""""""""" + +The '``llvm.read_register``' intrinsic returns the current value of the +register, where possible, and it's only available on a small selection +of targets (x86, ARM, AArch64) and small selection of registers per target. + +Pointer types are used to make sure it has the same bit width as the +register itself. + +This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels. + +The behaviour of the program when the register is used by interleaving +code is unspecified. + +Warning: There is no register reservation at the moment, so it only +works reliably on non-allocatable registers. + +.. _int_write_register: + +'``llvm.write_register``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.write_register(metadata, i8* @value) + +Overview: +""""""""" + +The '``llvm.write_register``' intrinsic is similar to the '``llvm.read_register``' +and provides write access to the named register. The register must be valid +on the architecture being compiled to and will abort in case the register +is not supported or not existent. + +Semantics: +"""""""""" + +The '``llvm.write_register``' intrinsic sets the current value of the +register, where possible, with the value of the pointer (not what the pointer +is pointing to), and it's only available on a small selection of targets +(x86, ARM, AArch64) and small selection of registers per target. + +Pointer types are used to make sure it has the same bit width as the +register itself. + +This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels. + +The behaviour of the program when the register is used by interleaving +code is unspecified. + +Warning: There is no register reservation at the moment, so it only +works reliably on non-allocatable registers. + .. _int_stacksave: '``llvm.stacksave``' Intrinsic Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -72,6 +72,11 @@ /// the parent's frame or return address, and so on. FRAMEADDR, RETURNADDR, + /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on + /// the DAG, which implements the named register global variables extension. + READ_REGISTER, + WRITE_REGISTER, + /// FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to /// first (possible) on-stack argument. This is needed for correct stack /// adjustment during unwind. Index: include/llvm/CodeGen/SelectionDAGISel.h =================================================================== --- include/llvm/CodeGen/SelectionDAGISel.h +++ include/llvm/CodeGen/SelectionDAGISel.h @@ -242,6 +242,8 @@ // Calls to these functions are generated by tblgen. SDNode *Select_INLINEASM(SDNode *N); + SDNode *Select_READ_REGISTER(SDNode *N); + SDNode *Select_WRITE_REGISTER(SDNode *N); SDNode *Select_UNDEF(SDNode *N); void CannotYetSelect(SDNode *N); Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -250,6 +250,10 @@ // def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_read_register : Intrinsic<[llvm_ptr_ty], [llvm_metadata_ty], + [IntrNoMem], "llvm.read_register">; +def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_ptr_ty], + [IntrNoMem], "llvm.write_register">; // Note: we treat stacksave/stackrestore as writemem because we don't otherwise // model their dependencies on allocas. Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2221,6 +2221,13 @@ return "__clear_cache"; } + /// Return the register ID of the name passed in. Used by named register + /// global variables extension. There is no target-independent behaviour + /// so the default action is to bail. + virtual unsigned getRegisterByName(const char* RegName) const { + report_fatal_error("Named registers not implemented for this target"); + } + /// Return the type that should be used to zero or sign extend a /// zeroext/signext integer argument or return value. FIXME: Most C calling /// convention requires the return type to be promoted, but this is not true Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1268,6 +1268,13 @@ if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: + // Named register is legal in the DAG, but blocked by register name + // selection if not implemented by target (to chose the correct register) + // They'll be converted to Copy(To/From)Reg. + Action = TargetLowering::Legal; + break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4645,6 +4645,21 @@ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; + case Intrinsic::read_register: { + SDValue RegName = DAG.getMDNode(cast(I.getArgOperand(0))); + setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, TLI->getPointerTy(), + RegName)); + return nullptr; + } + case Intrinsic::write_register: { + // FIXME: The Chain doesn't look generic enough... + SDValue Chain = getValue(I.getArgOperand(1)).getOperand(0); + SDValue RegName = DAG.getMDNode(cast(I.getArgOperand(0))); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, + getValue(I.getArgOperand(1)))); + return nullptr; + } case Intrinsic::setjmp: return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -93,6 +93,8 @@ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1812,6 +1812,32 @@ return New.getNode(); } +SDNode +*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op->getOperand(0)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data()); + SDValue New = CurDAG->getCopyFromReg( + CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode +*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data()); + SDValue New = CurDAG->getCopyToReg( + CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); + New->setNodeId(-1); + return New.getNode(); +} + SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); } @@ -2404,6 +2430,8 @@ NodeToMatch->getOperand(0)); return nullptr; case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); + case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); case ISD::UNDEF: return Select_UNDEF(NodeToMatch); } Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -347,6 +347,8 @@ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15,6 +15,7 @@ #include "AArch64.h" #include "AArch64ISelLowering.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "Utils/AArch64BaseInfo.h" @@ -2405,6 +2406,17 @@ return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned AArch64TargetLowering::getRegisterByName(const char* RegName) const { + unsigned Reg = StringSwitch(RegName) + .Cases("sp", "stack", AArch64::XSP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + SDValue AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const { Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -461,6 +461,8 @@ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -3783,6 +3783,17 @@ return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName) const { + unsigned Reg = StringSwitch(RegName) + .Cases("sp", "stack", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 Index: lib/Target/ARM64/ARM64ISelLowering.h =================================================================== --- lib/Target/ARM64/ARM64ISelLowering.h +++ lib/Target/ARM64/ARM64ISelLowering.h @@ -383,6 +383,8 @@ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. Index: lib/Target/ARM64/ARM64ISelLowering.cpp =================================================================== --- lib/Target/ARM64/ARM64ISelLowering.cpp +++ lib/Target/ARM64/ARM64ISelLowering.cpp @@ -3372,6 +3372,17 @@ return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARM64TargetLowering::getRegisterByName(const char* RegName) const { + unsigned Reg = StringSwitch(RegName) + .Cases("sp", "stack", ARM64::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -778,6 +778,8 @@ return 0; // nothing to do, move along. } + unsigned getRegisterByName(const char* RegName) const; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/VariadicFunction.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -12587,6 +12588,20 @@ return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned X86TargetLowering::getRegisterByName(const char* RegName) const { + unsigned StackPointer = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + unsigned Reg = StringSwitch(RegName) + .Case("esp", X86::ESP) + .Case("rsp", X86::RSP) + .Case("stack", StackPointer) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = Index: test/CodeGen/AArch64/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/stackpointer.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov x0, sp + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, x0 + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("sp"); +; CHECK-NOT: .asciz "sp" +!0 = metadata !{metadata !"sp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"} Index: test/CodeGen/ARM/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stackpointer.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov r0, sp + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i32 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, r0 + %ptr = inttoptr i32 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("sp"); +; CHECK-NOT: .asciz "sp" +!0 = metadata !{metadata !"sp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"} Index: test/CodeGen/ARM64/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM64/stackpointer.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov x0, sp + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, x0 + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("sp"); +; CHECK-NOT: .asciz "sp" +!0 = metadata !{metadata !"sp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"} Index: test/CodeGen/X86/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stackpointer.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnueabi | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: movq %rsp, %rax + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: movq %rdi, %rsp + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("rsp"); +; CHECK-NOT: .asciz "rsp" +!0 = metadata !{metadata !"rsp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"}