Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -6782,6 +6782,82 @@ other aggressive transformations, so the value returned may not be that of the obvious source-language caller. +.. _int_read_register: + +'``llvm.read_register``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.read_register(i8* @regname) + +Overview: +""""""""" + +The '``llvm.read_register``' intrinsic provides access to the named +register. The register must be valid on the architecture being compiled +to and will abort in case the register is not supported or not existent. + +Semantics: +"""""""""" + +The '``llvm.read_register``' intrinsic returns the current value of the +register, where possible, and it's only available on a small selection +of targets (x86, ARM, AArch64) and small selection of registers per target. + +Pointer types are used to make sure it has the same bit width as the +register itself. + +This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels. + +The behaviour of the program when the register is used by interleaving +code is undefined, but it's as undefined as its usage from register variables, +inline assembly associations ``asm("sp")`` etc. in user code. + +.. _int_write_register: + +'``llvm.write_register``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.write_register(i8* @regname, i8* @value) + +Overview: +""""""""" + +The '``llvm.write_register``' intrinsic is similar to the '``llvm.read_register``' +and provides write access to the named register. The register must be valid +on the architecture being compiled to and will abort in case the register +is not supported or not existent. + +Semantics: +"""""""""" + +The '``llvm.write_register``' intrinsic sets the current value of the +register, where possible, with the value of the pointer (not what the pointer +is pointing to), and it's only available on a small selection of targets +(x86, ARM, AArch64) and small selection of registers per target. + +Pointer types are used to make sure it has the same bit width as the +register itself. + +This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels. + +The behaviour of the program when the register is used by interleaving +code is undefined, but it's as undefined as its usage from register variables, +inline assembly associations ``asm("sp")`` etc. in user code. + .. _int_stacksave: '``llvm.stacksave``' Intrinsic Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -72,6 +72,11 @@ /// the parent's frame or return address, and so on. FRAMEADDR, RETURNADDR, + /// READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on + /// the DAG, which implements the named register global variables extension. + READ_REGISTER, + WRITE_REGISTER, + /// FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to /// first (possible) on-stack argument. This is needed for correct stack /// adjustment during unwind. Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -250,6 +250,10 @@ // def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_read_register : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrNoMem], "llvm.read_register">; +def int_write_register : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], + [IntrNoMem], "llvm.write_register">; // Note: we treat stacksave/stackrestore as writemem because we don't otherwise // model their dependencies on allocas. Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2156,6 +2156,13 @@ return "__clear_cache"; } + /// Return the register ID of the name passed in. Used by named register + /// global variables extension. There is no target-independent behaviour + /// so the default action is to bail. + unsigned getRegisterByName(const char* RegName) const { + llvm_unreachable("Named registers not implemented for this target"); + } + /// Return the type that should be used to zero or sign extend a /// zeroext/signext integer argument or return value. FIXME: Most C calling /// convention requires the return type to be promoted, but this is not true Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1262,6 +1262,8 @@ case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4630,6 +4630,18 @@ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; + case Intrinsic::read_register: + setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, TLI->getPointerTy(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::write_register: { + // FIXME: The Chain doesn't look generic enough... + SDValue Chain = getValue(I.getArgOperand(1)).getOperand(0); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; + } case Intrinsic::setjmp: return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -93,6 +93,8 @@ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -310,6 +310,8 @@ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; @@ -328,6 +330,8 @@ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16,9 +16,11 @@ #include "AArch64.h" #include "AArch64ISelLowering.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "Utils/AArch64BaseInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -2399,6 +2401,46 @@ return FrameAddr; } +// FIXME? Should this be in a metadata node? A literal string? +static const char * getRegisterStringValue(SDValue& Val) { + assert(Val.getNode()->getOpcode() == ISD::GlobalAddress); + GlobalAddressSDNode *GA = dyn_cast(Val.getNode()); + StringRef RegName; + // FIXME! Make this into an error + if (!getConstantStringInfo(GA->getGlobal(), RegName)) + assert(0 && "Register String name not a global string"); + RegName.lower(); + return RegName.data(); +} + +SDValue +AArch64TargetLowering::LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue RegName = Op.getOperand(0); + unsigned Reg = getRegisterByName(getRegisterStringValue(RegName)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, Op.getValueType()); +} + +SDValue +AArch64TargetLowering::LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue RegName = Op.getOperand(1); + unsigned Reg = getRegisterByName(getRegisterStringValue(RegName)); + SDValue Value = Op.getOperand(2); + return DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, Value); +} + +unsigned AArch64TargetLowering::getRegisterByName(const char* RegName) const { + // FIXME? Maybe this could be a TableGen attribute on some registers and + // this table could be generated automatically from RegInfo. + unsigned Reg = StringSwitch(RegName) + .Case("xsp", AArch64::XSP) + .Default(0); + // FIXME! Make this into an error + assert(Reg && "Register not supported in named register global variable"); + return Reg; +} + SDValue AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const { @@ -3300,6 +3342,8 @@ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::READ_REGISTER: return LowerREAD_REGISTER(Op, DAG); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -444,6 +444,8 @@ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; @@ -454,6 +456,8 @@ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName) const; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -24,6 +24,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -3824,6 +3825,46 @@ return FrameAddr; } +// FIXME? Should this be in a metadata node? A literal string? +static const char * getRegisterStringValue(SDValue& Val) { + assert(Val.getNode()->getOpcode() == ISD::GlobalAddress); + GlobalAddressSDNode *GA = dyn_cast(Val.getNode()); + StringRef RegName; + // FIXME! Make this into an error + if (!getConstantStringInfo(GA->getGlobal(), RegName)) + assert(0 && "Register String name not a global string"); + RegName.lower(); + return RegName.data(); +} + +SDValue +ARMTargetLowering::LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue RegName = Op.getOperand(0); + unsigned Reg = getRegisterByName(getRegisterStringValue(RegName)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, Op.getValueType()); +} + +SDValue +ARMTargetLowering::LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue RegName = Op.getOperand(1); + unsigned Reg = getRegisterByName(getRegisterStringValue(RegName)); + SDValue Value = Op.getOperand(2); + return DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, Value); +} + +unsigned ARMTargetLowering::getRegisterByName(const char* RegName) const { + // FIXME? Maybe this could be a TableGen attribute on some registers and + // this table could be generated automatically from RegInfo. + unsigned Reg = StringSwitch(RegName) + .Case("sp", ARM::SP) + .Default(0); + // FIXME! Make this into an error + assert(Reg && "Register not supported in named register global variable"); + return Reg; +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -6115,6 +6156,8 @@ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::READ_REGISTER: return LowerREAD_REGISTER(Op, DAG); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -775,6 +775,8 @@ return 0; // nothing to do, move along. } + unsigned getRegisterByName(const char* RegName) const; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -901,6 +903,8 @@ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -23,7 +23,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/VariadicFunction.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -12358,6 +12360,47 @@ return FrameAddr; } +// FIXME? Should this be in a metadata node? A literal string? +static const char * getRegisterStringValue(SDValue& Val) { + assert(Val.getNode()->getOpcode() == ISD::GlobalAddress); + GlobalAddressSDNode *GA = dyn_cast(Val.getNode()); + StringRef RegName; + // FIXME! Make this into an error + if (!getConstantStringInfo(GA->getGlobal(), RegName)) + assert(0 && "Register String name not a global string"); + RegName.lower(); + return RegName.data(); +} + +SDValue +X86TargetLowering::LowerREAD_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue RegName = Op.getOperand(0); + unsigned Reg = getRegisterByName(getRegisterStringValue(RegName)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, Op.getValueType()); +} + +SDValue +X86TargetLowering::LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue RegName = Op.getOperand(1); + unsigned Reg = getRegisterByName(getRegisterStringValue(RegName)); + SDValue Value = Op.getOperand(2); + return DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, Value); +} + +unsigned X86TargetLowering::getRegisterByName(const char* RegName) const { + // FIXME? Maybe this could be a TableGen attribute on some registers and + // this table could be generated automatically from RegInfo. + unsigned Reg = StringSwitch(RegName) + .Case("esp", X86::ESP) + .Case("rsp", X86::RSP) + .Default(0); + // FIXME! Make this into an error + assert(Reg && "Register not supported in named register global variable"); + return Reg; +} + SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = @@ -13802,6 +13845,8 @@ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::READ_REGISTER: return LowerREAD_REGISTER(Op, DAG); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); Index: test/CodeGen/AArch64/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/stackpointer.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi | FileCheck %s + +; register unsigned long current_stack_pointer asm("sp"); +@.reg = private unnamed_addr constant [4 x i8] c"xsp\00" + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov x0, sp + %sp = call i8* @llvm.read_register(i8* bitcast ([4 x i8]* @.reg to i8*)) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, x0 + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(i8* bitcast ([4 x i8]* @.reg to i8*), i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(i8*) nounwind +declare void @llvm.write_register(i8*, i8*) nounwind Index: test/CodeGen/ARM/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stackpointer.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s + +; register unsigned long current_stack_pointer asm("sp"); +@.reg = private unnamed_addr constant [3 x i8] c"sp\00" + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: mov r0, sp + %sp = call i8* @llvm.read_register(i8* bitcast ([3 x i8]* @.reg to i8*)) + ret i8* %sp +} + +define void @set_stack(i32 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: mov sp, r0 + %ptr = inttoptr i32 %val to i8* + call void @llvm.write_register(i8* bitcast ([3 x i8]* @.reg to i8*), i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(i8*) nounwind +declare void @llvm.write_register(i8*, i8*) nounwind Index: test/CodeGen/X86/stackpointer.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/stackpointer.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnueabi | FileCheck %s + +; register unsigned long current_stack_pointer asm("sp"); +@.reg = private unnamed_addr constant [4 x i8] c"rsp\00" + +define i8* @get_stack() nounwind { +entry: +; CHECK-LABEL: get_stack: +; CHECK: movq %rsp, %rax + %sp = call i8* @llvm.read_register(i8* bitcast ([4 x i8]* @.reg to i8*)) + ret i8* %sp +} + +define void @set_stack(i64 %val) nounwind { +entry: +; CHECK-LABEL: set_stack: +; CHECK: movq %rdi, %rsp + %ptr = inttoptr i64 %val to i8* + call void @llvm.write_register(i8* bitcast ([4 x i8]* @.reg to i8*), i8* %ptr) + ret void +} + +declare i8* @llvm.read_register(i8*) nounwind +declare void @llvm.write_register(i8*, i8*) nounwind