Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -6782,6 +6782,88 @@ other aggressive transformations, so the value returned may not be that of the obvious source-language caller. +.. _int_read_register: + +'``llvm.read_register``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.read_register(metadata) + +Overview: +""""""""" + +The '``llvm.read_register``' intrinsic provides access to the named +register. The register must be valid on the architecture being compiled +to and will abort in case the register is not supported or not existent. + +Semantics: +"""""""""" + +The '``llvm.read_register``' intrinsic returns the current value of the +register, where possible, and it's only available on a small selection +of targets (x86, ARM, AArch64) and small selection of registers per target. + +Pointer types are used to make sure it has the same bit width as the +register itself. + +This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels. + +The behaviour of the program when the register is used by interleaving +code is undefined, but it's as undefined as its usage from register variables, +inline assembly associations ``asm("sp")`` etc. in user code. + +Warning: There is no register reservation at the moment, so it only +works reliably on non-allocatable registers. + +.. _int_write_register: + +'``llvm.write_register``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.write_register(metadata, i8* @value) + +Overview: +""""""""" + +The '``llvm.write_register``' intrinsic is similar to the '``llvm.read_register``' +and provides write access to the named register. The register must be valid +on the architecture being compiled to and will abort in case the register +is not supported or not existent. + +Semantics: +"""""""""" + +The '``llvm.write_register``' intrinsic sets the current value of the +register, where possible, with the value of the pointer (not what the pointer +is pointing to), and it's only available on a small selection of targets +(x86, ARM, AArch64) and small selection of registers per target. + +Pointer types are used to make sure it has the same bit width as the +register itself. + +This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels. + +The behaviour of the program when the register is used by interleaving +code is undefined, but it's as undefined as its usage from register variables, +inline assembly associations ``asm("sp")`` etc. in user code. + +Warning: There is no register reservation at the moment, so it only +works reliably on non-allocatable registers. + .. _int_stacksave: '``llvm.stacksave``' Intrinsic Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -72,6 +72,11 @@ /// the parent's frame or return address, and so on. FRAMEADDR, RETURNADDR, + /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on + /// the DAG, which implements the named register global variables extension. + READ_REGISTER, + WRITE_REGISTER, + /// FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to /// first (possible) on-stack argument. This is needed for correct stack /// adjustment during unwind. Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -250,6 +250,10 @@ // def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_read_register : Intrinsic<[llvm_ptr_ty], [llvm_metadata_ty], + [IntrNoMem], "llvm.read_register">; +def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_ptr_ty], + [IntrNoMem], "llvm.write_register">; // Note: we treat stacksave/stackrestore as writemem because we don't otherwise // model their dependencies on allocas. Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2157,6 +2157,13 @@ return "__clear_cache"; } + /// Return the register ID of the name passed in. Used by named register + /// global variables extension. There is no target-independent behaviour + /// so the default action is to bail. + unsigned getRegisterByName(const char* RegName) const { + llvm_unreachable("Named registers not implemented for this target"); + } + /// Return the type that should be used to zero or sign extend a /// zeroext/signext integer argument or return value. FIXME: Most C calling /// convention requires the return type to be promoted, but this is not true Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1262,6 +1262,8 @@ case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4631,6 +4631,21 @@ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; + case Intrinsic::read_register: { + SDValue RegName = DAG.getMDNode(cast(I.getArgOperand(0))); + setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, TLI->getPointerTy(), + RegName)); + return 0; + } + case Intrinsic::write_register: { + // FIXME: The Chain doesn't look generic enough... + SDValue Chain = getValue(I.getArgOperand(1)).getOperand(0); + SDValue RegName = DAG.getMDNode(cast(I.getArgOperand(0))); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, + getValue(I.getArgOperand(1)))); + return 0; + } case Intrinsic::setjmp: return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -93,6 +93,8 @@ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -310,6 +310,8 @@ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; @@ -328,6 +330,8 @@ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16,6 +16,7 @@ #include "AArch64.h" #include "AArch64ISelLowering.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "Utils/AArch64BaseInfo.h" @@ -2400,6 +2401,36 @@ } SDValue +AArch64TargetLowering::LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op.getOperand(0)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getRegisterByName(RegStr->getString().data()); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, Op.getValueType()); +} + +SDValue +AArch64TargetLowering::LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op.getOperand(1)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getRegisterByName(RegStr->getString().data()); + SDValue Value = Op.getOperand(2); + return DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, Value); +} + +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned AArch64TargetLowering::getRegisterByName(const char* RegName) const { + unsigned Reg = StringSwitch(RegName) + .Cases("xsp", "stack", AArch64::XSP) + .Default(0); + // FIXME! Make this into an error + assert(Reg && "Register not supported in named register global variable"); + return Reg; +} + +SDValue AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const { assert(getTargetMachine().getCodeModel() == CodeModel::Large); @@ -3300,6 +3331,8 @@ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::READ_REGISTER: return LowerREAD_REGISTER(Op, DAG); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -444,6 +444,8 @@ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; @@ -454,6 +456,8 @@ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -3777,6 +3777,36 @@ return FrameAddr; } +SDValue +ARMTargetLowering::LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op.getOperand(0)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getRegisterByName(RegStr->getString().data()); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, Op.getValueType()); +} + +SDValue +ARMTargetLowering::LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op.getOperand(1)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getRegisterByName(RegStr->getString().data()); + SDValue Value = Op.getOperand(2); + return DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, Value); +} + +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName) const { + unsigned Reg = StringSwitch(RegName) + .Cases("sp", "stack", ARM::SP) + .Default(0); + // FIXME! Make this into an error + assert(Reg && "Register not supported in named register global variable"); + return Reg; +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -6039,6 +6069,8 @@ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::READ_REGISTER: return LowerREAD_REGISTER(Op, DAG); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -775,6 +775,8 @@ return 0; // nothing to do, move along. } + unsigned getRegisterByName(const char* RegName) const; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -901,6 +903,8 @@ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/VariadicFunction.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -12358,6 +12359,39 @@ return FrameAddr; } +SDValue +X86TargetLowering::LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op.getOperand(0)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getRegisterByName(RegStr->getString().data()); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, Op.getValueType()); +} + +SDValue +X86TargetLowering::LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op.getOperand(1)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getRegisterByName(RegStr->getString().data()); + SDValue Value = Op.getOperand(2); + return DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, Value); +} + +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned X86TargetLowering::getRegisterByName(const char* RegName) const { + unsigned StackPointer = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + unsigned Reg = StringSwitch(RegName) + .Case("esp", X86::ESP) + .Case("rsp", X86::RSP) + .Case("stack", StackPointer) + .Default(0); + // FIXME! Make this into an error + assert(Reg && "Register not supported in named register global variable"); + return Reg; +} + SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = @@ -13802,6 +13836,8 @@ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::READ_REGISTER: return LowerREAD_REGISTER(Op, DAG); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); Index: test/CodeGen/AArch64/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/stackpointer.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov x0, sp + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, x0 + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("xsp"); +; CHECK-NOT: .asciz "xsp" +!0 = metadata !{metadata !"xsp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"} Index: test/CodeGen/ARM/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stackpointer.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov r0, sp + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i32 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, r0 + %ptr = inttoptr i32 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("sp"); +; CHECK-NOT: .asciz "sp" +!0 = metadata !{metadata !"sp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"} Index: test/CodeGen/X86/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stackpointer.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnueabi | FileCheck %s + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: movq %rsp, %rax + %sp = call i8* @llvm.read_register(metadata !0) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: movq %rdi, %rsp + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(metadata !1, i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(metadata) nounwind +declare void @llvm.write_register(metadata, i8*) nounwind + +; register unsigned long current_stack_pointer asm("rsp"); +; CHECK-NOT: .asciz "rsp" +!0 = metadata !{metadata !"rsp\00"} +; unsigned long current_stack_pointer = __builtin_stack_pointer(); +; CHECK-NOT: .asciz "stack" +!1 = metadata !{metadata !"stack\00"}