Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -2926,7 +2926,7 @@ hexadecimal notation (see below). The assembler requires the exact decimal value of a floating-point constant. For example, the assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating - decimal in binary. Floating-point constants must have a + decimal in binary. Floating-point constants must have a :ref:`floating-point ` type. **Null pointer constants** The identifier '``null``' is recognized as a null pointer constant @@ -3331,7 +3331,7 @@ value won't fit in the integer type, the result is a :ref:`poison value `. ``uitofp (CST to TYPE)`` - Convert an unsigned integer constant to the corresponding + Convert an unsigned integer constant to the corresponding floating-point constant. TYPE must be a scalar or vector floating-point type. CST must be of scalar or vector integer type. Both CST and TYPE must be scalars, or vectors of the same number of elements. @@ -5434,7 +5434,7 @@ '``invariant.group``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The experimental ``invariant.group`` metadata may be attached to +The experimental ``invariant.group`` metadata may be attached to ``load``/``store`` instructions referencing a single metadata with no entries. The existence of the ``invariant.group`` metadata on the instruction tells the optimizer that every ``load`` and ``store`` to the same pointer operand @@ -6875,7 +6875,7 @@ """""""""" The two arguments to the '``fadd``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -6883,7 +6883,7 @@ The value produced is the floating-point sum of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -6972,7 +6972,7 @@ """""""""" The two arguments to the '``fsub``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -6980,7 +6980,7 @@ The value produced is the floating-point difference of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -7067,7 +7067,7 @@ """""""""" The two arguments to the '``fmul``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -7075,7 +7075,7 @@ The value produced is the floating-point product of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -7201,7 +7201,7 @@ """""""""" The two arguments to the '``fdiv``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -7209,7 +7209,7 @@ The value produced is the floating-point quotient of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -7344,7 +7344,7 @@ """""""""" The two arguments to the '``frem``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -7352,10 +7352,10 @@ The value produced is the floating-point remainder of the two operands. This is the same output as a libm '``fmod``' function, but without any -possibility of setting ``errno``. The remainder has the same sign as the +possibility of setting ``errno``. The remainder has the same sign as the dividend. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -8809,7 +8809,7 @@ The '``fptrunc``' instruction casts a ``value`` from a larger :ref:`floating-point ` type to a smaller :ref:`floating-point -` type. +` type. This instruction is assumed to execute in the default :ref:`floating-point environment `. @@ -10330,6 +10330,27 @@ This intrinsic is only implemented for x86. +'``llvm.sponentry``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.sponentry() + +Overview: +""""""""" + +The '``llvm.sponentry``' intrinsic returns the stack pointer value at +the entry of the current function calling this intrinsic. + +Semantics: +"""""""""" + +Note this intrinsic is only verified on AArch64. + '``llvm.frameaddress``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12115,11 +12136,11 @@ The '``llvm.fshl``' family of intrinsic functions performs a funnel shift left: the first two values are concatenated as { %a : %b } (%a is the most significant -bits of the wide value), the combined value is shifted left, and the most -significant bits are extracted to produce a result that is the same size as the -original arguments. If the first 2 arguments are identical, this is equivalent -to a rotate left operation. For vector types, the operation occurs for each -element of the vector. The shift argument is treated as an unsigned amount +bits of the wide value), the combined value is shifted left, and the most +significant bits are extracted to produce a result that is the same size as the +original arguments. If the first 2 arguments are identical, this is equivalent +to a rotate left operation. For vector types, the operation occurs for each +element of the vector. The shift argument is treated as an unsigned amount modulo the element size of the arguments. Arguments: @@ -12161,11 +12182,11 @@ The '``llvm.fshr``' family of intrinsic functions performs a funnel shift right: the first two values are concatenated as { %a : %b } (%a is the most significant -bits of the wide value), the combined value is shifted right, and the least -significant bits are extracted to produce a result that is the same size as the -original arguments. If the first 2 arguments are identical, this is equivalent -to a rotate right operation. For vector types, the operation occurs for each -element of the vector. The shift argument is treated as an unsigned amount +bits of the wide value), the combined value is shifted right, and the least +significant bits are extracted to produce a result that is the same size as the +original arguments. If the first 2 arguments are identical, this is equivalent +to a rotate right operation. For vector types, the operation occurs for each +element of the vector. The shift argument is treated as an unsigned amount modulo the element size of the arguments. Arguments: @@ -13446,7 +13467,7 @@ %Tmp = call <8 x double> @llvm.masked.expandload.v8f64(double* %Bptr, <8 x i1> %Mask, <8 x double> undef) ; Store the result in A call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %Tmp, <8 x double>* %Aptr, i32 8, <8 x i1> %Mask) - + ; %Bptr should be increased on each iteration according to the number of '1' elements in the Mask. %MaskI = bitcast <8 x i1> %Mask to i8 %MaskIPopcnt = call i8 @llvm.ctpop.i8(i8 %MaskI) @@ -13503,7 +13524,7 @@ %Tmp = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %Aptr, i32 8, <8 x i1> %Mask, <8 x double> undef) ; Store all selected elements consecutively in array B call @llvm.masked.compressstore.v8f64(<8 x double> %Tmp, double* %Bptr, <8 x i1> %Mask) - + ; %Bptr should be increased on each iteration according to the number of '1' elements in the Mask. %MaskI = bitcast <8 x i1> %Mask to i8 %MaskIPopcnt = call i8 @llvm.ctpop.i8(i8 %MaskI) @@ -14136,7 +14157,7 @@ The '``llvm.experimental.constrained.powi``' intrinsic returns the first operand raised to the (positive or negative) power specified by the second operand. The -order of evaluation of multiplications is not defined. When a vector of +order of evaluation of multiplications is not defined. When a vector of floating-point type is used, the second argument remains a scalar integer value. @@ -14462,7 +14483,7 @@ """"""""" The '``llvm.experimental.constrained.nearbyint``' intrinsic returns the first -operand rounded to the nearest integer. It will not raise an inexact +operand rounded to the nearest integer. It will not raise an inexact floating-point exception if the operand is not an integer. Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -70,7 +70,7 @@ /// of the frame or return address to return. An index of zero corresponds /// to the current function's frame or return address, an index of one to /// the parent's frame or return address, and so on. - FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, + FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, SPONENTRY, /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic. /// Materializes the offset from the local object pointer of another Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -320,6 +320,7 @@ def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1059,6 +1059,7 @@ case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::ADDROFRETURNADDR: + case ISD::SPONENTRY: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5050,6 +5050,10 @@ setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; + case Intrinsic::sponentry: + setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -124,6 +124,7 @@ case ISD::RETURNADDR: return "RETURNADDR"; case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::SPONENTRY: return "SPONENTRY"; case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -3450,6 +3450,21 @@ updateValueMap(II, SrcReg); return true; } + case Intrinsic::sponentry: { + MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); + + // SP = FP + Fixed Object + 16 + int FI = MFI.CreateFixedObject(4, 0, false); + unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::ADDXri), ResultReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0); + + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::memcpy: case Intrinsic::memmove: { const auto *MTI = cast(II); Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -617,6 +617,7 @@ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2863,6 +2863,8 @@ return LowerFP_EXTEND(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::SPONENTRY: + return LowerSPONENTRY(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::INSERT_VECTOR_ELT: @@ -5171,6 +5173,16 @@ return FrameAddr; } +SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op, + SelectionDAG &DAG) const { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + EVT VT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + int FI = MFI.CreateFixedObject(4, 0, false); + return DAG.getFrameIndex(FI, VT); +} + // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT, Index: test/CodeGen/AArch64/sponentry.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sponentry.ll @@ -0,0 +1,104 @@ +; RUN: llc -mtriple=aarch64-windows-msvc -disable-fp-elim %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-windows-msvc -fast-isel -disable-fp-elim %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s --check-prefix=NOFP +; RUN: llc -mtriple=aarch64-windows-msvc -fast-isel %s -o - | FileCheck %s --check-prefix=NOFP + +@env2 = common dso_local global [24 x i64]* null, align 8 + +define dso_local void @bar() { + %1 = call i8* @llvm.sponentry() + %2 = load [24 x i64]*, [24 x i64]** @env2, align 8 + %3 = getelementptr inbounds [24 x i64], [24 x i64]* %2, i32 0, i32 0 + %4 = bitcast i64* %3 to i8* + %5 = call i32 @_setjmpex(i8* %4, i8* %1) #2 + ret void +} + +; CHECK: bar: +; CHECK: mov x29, sp +; CHECK: add x1, x29, #16 +; CEHCK: bl _setjmpex + +; NOFP: str x30, [sp, #-16]! +; NOFP: add x1, sp, #16 + +define dso_local void @foo([24 x i64]*) { + %2 = alloca [24 x i64]*, align 8 + %3 = alloca i32, align 4 + %4 = alloca [100 x i32], align 4 + store [24 x i64]* %0, [24 x i64]** %2, align 8 + %5 = call i8* @llvm.sponentry() + %6 = load [24 x i64]*, [24 x i64]** %2, align 8 + %7 = getelementptr inbounds [24 x i64], [24 x i64]* %6, i32 0, i32 0 + %8 = bitcast i64* %7 to i8* + %9 = call i32 @_setjmpex(i8* %8, i8* %5) + store i32 %9, i32* %3, align 4 + ret void +} + +; CHECK: foo: +; CHECK: sub sp, sp, #448 +; CHECK: add x29, sp, #432 +; CHECK: add x1, x29, #16 +; CEHCK: bl _setjmpex + +; NOFP: sub sp, sp, #432 +; NOFP: add x1, sp, #432 + +define dso_local void @var_args(i8*, ...) { + %2 = alloca i8*, align 8 + %3 = alloca i8*, align 8 + store i8* %0, i8** %2, align 8 + %4 = bitcast i8** %3 to i8* + call void @llvm.va_start(i8* %4) + %5 = load i8*, i8** %3, align 8 + %6 = getelementptr inbounds i8, i8* %5, i64 8 + store i8* %6, i8** %3, align 8 + %7 = bitcast i8* %5 to i32* + %8 = load i32, i32* %7, align 8 + %9 = bitcast i8** %3 to i8* + call void @llvm.va_end(i8* %9) + %10 = call i8* @llvm.sponentry() + %11 = load [24 x i64]*, [24 x i64]** @env2, align 8 + %12 = getelementptr inbounds [24 x i64], [24 x i64]* %11, i32 0, i32 0 + %13 = bitcast i64* %12 to i8* + %14 = call i32 @_setjmpex(i8* %13, i8* %10) #3 + ret void +} + +; CHECK: var_args: +; CHECK: sub sp, sp, #96 +; CHECK: add x29, sp, #16 +; CHECK: add x1, x29, #80 +; CEHCK: bl _setjmpex + +; NOFP: sub sp, sp, #96 +; NOFP: add x1, sp, #96 + +define dso_local void @manyargs(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i64 %x9, i64 %x10) { + %1 = call i8* @llvm.sponentry() + %2 = load [24 x i64]*, [24 x i64]** @env2, align 8 + %3 = getelementptr inbounds [24 x i64], [24 x i64]* %2, i32 0, i32 0 + %4 = bitcast i64* %3 to i8* + %5 = call i32 @_setjmpex(i8* %4, i8* %1) #2 + ret void +} + +; CHECK: manyargs: +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: add x1, x29, #16 + +; NOFP: str x30, [sp, #-16]! +; NOFP: add x1, sp, #16 + +; Function Attrs: nounwind readnone +declare i8* @llvm.sponentry() + +; Function Attrs: returns_twice +declare dso_local i32 @_setjmpex(i8*, i8*) + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #1 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #1