diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake --- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake @@ -166,6 +166,7 @@ check_symbol_exists(__sparcv9 "" __SPARCV9) check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32) check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64) + check_symbol_exists(__ve__ "" __VE) if(__ARM) add_default_target_arch(arm) elseif(__AARCH64) @@ -200,6 +201,8 @@ add_default_target_arch(wasm32) elseif(__WEBASSEMBLY64) add_default_target_arch(wasm64) + elseif(__VE) + add_default_target_arch(ve) endif() endmacro() diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -236,6 +236,8 @@ test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64") test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown") + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve") + test_target_arch(ve "__ve__" "--target=ve-unknown-none") endif() set(COMPILER_RT_OS_SUFFIX "") endif() diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -37,6 +37,7 @@ set(SPARCV9 sparcv9) set(WASM32 wasm32) set(WASM64 wasm64) +set(VE ve) if(APPLE) set(ARM64 arm64 arm64e) @@ -44,8 +45,11 @@ set(X86_64 x86_64 x86_64h) endif() -set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} - ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64}) +set(ALL_BUILTIN_SUPPORTED_ARCH + ${X86} ${X86_64} ${ARM32} ${ARM64} + ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} + ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} + ${WASM32} ${WASM64} ${VE}) include(CompilerRTUtils) include(CompilerRTDarwinUtils) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -573,6 +573,12 @@ ${GENERIC_SOURCES} ) +set(ve_SOURCES + ve/llvm_grow_stack.S + ve/llvm_grow_stack_align.S + ${GENERIC_TF_SOURCES} + ${GENERIC_SOURCES}) + add_custom_target(builtins) set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc") diff --git a/compiler-rt/lib/builtins/ve/llvm_grow_stack.S b/compiler-rt/lib/builtins/ve/llvm_grow_stack.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/ve/llvm_grow_stack.S @@ -0,0 +1,31 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// grow_stack routine +// This routine is VE specific +// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf + +// destroy %s62 and %s63 only + +#ifdef __ve__ + +.text +.p2align 4 +DEFINE_COMPILERRT_FUNCTION(__llvm_grow_stack) + subu.l %sp, %sp, %s0 # sp -= alloca size + and %sp, -16, %sp # align sp + brge.l.t %sp, %sl, 1f + ld %s63, 0x18(,%tp) # load param area + lea %s62, 0x13b # syscall # of grow + shm.l %s62, 0x0(%s63) # stored at addr:0 + shm.l %sl, 0x8(%s63) # old limit at addr:8 + shm.l %sp, 0x10(%s63) # new limit at addr:16 + monc +1: + b.l (,%lr) +END_COMPILERRT_FUNCTION(__llvm_grow_stack) + +#endif // __ve__ diff --git a/compiler-rt/lib/builtins/ve/llvm_grow_stack_align.S b/compiler-rt/lib/builtins/ve/llvm_grow_stack_align.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/ve/llvm_grow_stack_align.S @@ -0,0 +1,31 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// grow_stack routine +// This routine is VE specific +// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf + +// destroy %s62 and %s63 only + +#ifdef __ve__ + +.text +.p2align 4 +DEFINE_COMPILERRT_FUNCTION(__llvm_grow_stack_align) + subu.l %sp, %sp, %s0 # sp -= alloca size + and %sp, %sp, %s1 # align sp + brge.l.t %sp, %sl, 1f + ld %s63, 0x18(,%tp) # load param area + lea %s62, 0x13b # syscall # of grow + shm.l %s62, 0x0(%s63) # stored at addr:0 + shm.l %sl, 0x8(%s63) # old limit at addr:8 + shm.l %sp, 0x10(%s63) # new limit at addr:16 + monc +1: + b.l (,%lr) +END_COMPILERRT_FUNCTION(__llvm_grow_stack_align) + +#endif // __ve__ diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -241,6 +241,10 @@ /// The remainder matches the regular calling convention. WASM_EmscriptenInvoke = 99, + /// Calling convention used for NEC SX-Aurora VE llvm_grow_stack intrinsic + /// function. + VE_LLVM_GROW_STACK = 100, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -84,3 +84,6 @@ // Callee-saved registers def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>; def CSR_NoRegs : CalleeSavedRegs<(add)>; + +// llvm_grow_stack destroys s62 and s63 +def CSR_llvm_grow_stack : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>; diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h --- a/llvm/lib/Target/VE/VEFrameLowering.h +++ b/llvm/lib/Target/VE/VEFrameLowering.h @@ -28,18 +28,23 @@ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes, + MachineBasicBlock::iterator MBBI, uint64_t NumBytes, bool RequireFPUpdate) const; void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes, + MachineBasicBlock::iterator MBBI, uint64_t NumBytes, bool RequireFPUpdate) const; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool hasBP(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const override; + // VE reserves argument space always for call sites in the function + // immediately on entry of the current function. + bool hasReservedCallFrame(const MachineFunction &MF) const override { + return true; + } void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; @@ -58,10 +63,8 @@ return Offsets; } - /// targetHandlesStackFrameRounding - Returns true if the target is - /// responsible for rounding up the stack frame (probably at emitPrologue - /// time). - bool targetHandlesStackFrameRounding() const override { return true; } +protected: + const VESubtarget &STI; private: // Returns true if MF is a leaf procedure. @@ -69,11 +72,12 @@ // Emits code for adjusting SP in function prologue/epilogue. void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes) const; + MachineBasicBlock::iterator MBBI, int64_t NumBytes, + MaybeAlign MayAlign = MaybeAlign()) const; // Emits code for extending SP in function prologue/epilogue. void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes) const; + MachineBasicBlock::iterator MBBI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -30,12 +30,13 @@ VEFrameLowering::VEFrameLowering(const VESubtarget &ST) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0, - Align(16)) {} + Align(16)), + STI(ST) {} void VEFrameLowering::emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int NumBytes, + uint64_t NumBytes, bool RequireFPUpdate) const { DebugLoc dl; @@ -47,6 +48,7 @@ // st %lr, 8(,%sp) // st %got, 24(,%sp) // st %plt, 32(,%sp) + // st %s17, 40(,%sp) iff this function is using s17 as BP // or %fp, 0, %sp BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) @@ -69,6 +71,12 @@ .addImm(0) .addImm(32) .addReg(VE::SX16); + if (hasBP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) + .addReg(VE::SX11) + .addImm(0) + .addImm(40) + .addReg(VE::SX17); BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9) .addReg(VE::SX11) .addImm(0); @@ -77,7 +85,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int NumBytes, + uint64_t NumBytes, bool RequireFPUpdate) const { DebugLoc dl; @@ -86,6 +94,7 @@ // Insert following codes here as epilogue // // or %sp, 0, %fp + // ld %s17, 40(,%sp) iff this function is using s17 as BP // ld %got, 32(,%sp) // ld %plt, 24(,%sp) // ld %lr, 8(,%sp) @@ -94,6 +103,11 @@ BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11) .addReg(VE::SX9) .addImm(0); + if (hasBP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17) + .addReg(VE::SX11) + .addImm(0) + .addImm(40); BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16) .addReg(VE::SX11) .addImm(0) @@ -115,7 +129,8 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int NumBytes) const { + int64_t NumBytes, + MaybeAlign MaybeAlign) const { DebugLoc dl; const VEInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); @@ -143,11 +158,17 @@ .addReg(VE::SX11) .addReg(VE::SX13) .addImm(Hi_32(NumBytes)); + + if (MaybeAlign) { + // and %sp, %sp, Align-1 + BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11) + .addReg(VE::SX11) + .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value()))); + } } void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - int NumBytes) const { + MachineBasicBlock::iterator MBBI) const { DebugLoc dl; const VEInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); @@ -186,11 +207,8 @@ MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo &MFI = MF.getFrameInfo(); - const VESubtarget &Subtarget = MF.getSubtarget(); - const VEInstrInfo &TII = - *static_cast(Subtarget.getInstrInfo()); - const VERegisterInfo &RegInfo = - *static_cast(Subtarget.getRegisterInfo()); + const VEInstrInfo &TII = *STI.getInstrInfo(); + const VERegisterInfo &RegInfo = *STI.getRegisterInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -209,30 +227,15 @@ "(probably because it has a dynamic alloca)."); // Get the number of bytes to allocate from the FrameInfo - int NumBytes = (int)MFI.getStackSize(); - // The VE ABI requires a reserved 176-byte area in the user's stack, starting - // at %sp + 16. This is for the callee Register Save Area (RSA). - // - // We therefore need to add that offset to the total stack size - // after all the stack objects are placed by - // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack - // needs to be aligned *after* the extra size is added, we need to disable - // calculateFrameObjectOffsets's built-in stack alignment, by having - // targetHandlesStackFrameRounding return true. - - // Add the extra call frame stack size, if needed. (This is the same - // code as in PrologEpilogInserter, but also gets disabled by - // targetHandlesStackFrameRounding) - if (MFI.adjustsStack() && hasReservedCallFrame(MF)) - NumBytes += MFI.getMaxCallFrameSize(); - - // Adds the VE subtarget-specific spill area to the stack - // size. Also ensures target-required alignment. - NumBytes = Subtarget.getAdjustedFrameSize(NumBytes); + uint64_t NumBytes = MFI.getStackSize(); + + // The VE ABI requires a reserved 176 bytes area at the top + // of stack as described in VESubtarget.cpp. So, we adjust it here. + NumBytes = STI.getAdjustedFrameSize(NumBytes); // Finally, ensure that the size is sufficiently aligned for the // data on the stack. - NumBytes = alignTo(NumBytes, MFI.getMaxAlign().value()); + NumBytes = alignTo(NumBytes, MFI.getMaxAlign()); // Update stack size with corrected value. MFI.setStackSize(NumBytes); @@ -241,16 +244,25 @@ emitPrologueInsns(MF, MBB, MBBI, NumBytes, true); // Emit stack adjust instructions - emitSPAdjustment(MF, MBB, MBBI, -NumBytes); + MaybeAlign RuntimeAlign = + NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None; + emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign); + + if (hasBP(MF)) { + // Copy SP to BP. + BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17) + .addReg(VE::SX11) + .addImm(0); + } // Emit stack extend instructions - emitSPExtend(MF, MBB, MBBI, -NumBytes); + emitSPExtend(MF, MBB, MBBI); - unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true); + Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true); // Emit ".cfi_def_cfa_register 30". unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP)); + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); @@ -265,7 +277,7 @@ MachineBasicBlock::iterator I) const { if (!hasReservedCallFrame(MF)) { MachineInstr &MI = *I; - int Size = MI.getOperand(0).getImm(); + int64_t Size = MI.getOperand(0).getImm(); if (MI.getOpcode() == VE::ADJCALLSTACKDOWN) Size = -Size; @@ -281,20 +293,17 @@ DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo &MFI = MF.getFrameInfo(); - int NumBytes = (int)MFI.getStackSize(); + uint64_t NumBytes = MFI.getStackSize(); // Emit Epilogue instructions to restore %lr emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true); } -bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - // Reserve call frame if there are no variable sized objects on the stack. - return !MF.getFrameInfo().hasVarSizedObjects(); -} - // hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. +// pointer register. This is true if the function has variable sized allocas +// or if frame pointer elimination is disabled. For the case of VE, we don't +// implement FP eliminator yet, but we returns false from this function to +// not refer fp from generated code. bool VEFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -304,44 +313,57 @@ MFI.isFrameAddressTaken(); } +bool VEFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); +} + int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const { - const VESubtarget &Subtarget = MF.getSubtarget(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const VERegisterInfo *RegInfo = STI.getRegisterInfo(); const VEMachineFunctionInfo *FuncInfo = MF.getInfo(); bool isFixed = MFI.isFixedObjectIndex(FI); - // Addressable stack objects are accessed using neg. offsets from - // %fp, or positive offsets from %sp. - bool UseFP = true; + int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); - // VE uses FP-based references in general, even when "hasFP" is - // false. That function is rather a misnomer, because %fp is - // actually always available, unless isLeafProc. if (FuncInfo->isLeafProc()) { // If there's a leaf proc, all offsets need to be %sp-based, // because we haven't caused %fp to actually point to our frame. - UseFP = false; - } else if (isFixed) { - // Otherwise, argument access should always use %fp. - UseFP = true; - } else if (RegInfo->needsStackRealignment(MF)) { + FrameReg = VE::SX11; // %sp + return FrameOffset + MF.getFrameInfo().getStackSize(); + } else if (RegInfo->needsStackRealignment(MF) && !isFixed) { // If there is dynamic stack realignment, all local object - // references need to be via %sp, to take account of the - // re-alignment. - UseFP = false; - } - - int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); - - if (UseFP) { + // references need to be via %sp or %s17 (bp), to take account + // of the re-alignment. + if (hasBP(MF)) + FrameReg = VE::SX17; // %bp + else + FrameReg = VE::SX11; // %sp + return FrameOffset + MF.getFrameInfo().getStackSize(); + } else { + // Finally, default to using %fp. FrameReg = RegInfo->getFrameRegister(MF); return FrameOffset; } +} + +static bool LLVM_ATTRIBUTE_UNUSED +verifyLeafProcRegUse(MachineRegisterInfo *MRI) { + + // If any of parameter registers are used, this is not leaf function. + for (unsigned reg = VE::SX0; reg <= VE::SX7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + // If any of callee-saved registers are used, this is not leaf function. + for (unsigned reg = VE::SX18; reg <= VE::SX33; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; - FrameReg = VE::SX11; // %sp - return FrameOffset + MF.getFrameInfo().getStackSize(); + return true; } bool VEFrameLowering::isLeafProc(MachineFunction &MF) const { diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -28,6 +28,9 @@ Lo, // Hi/Lo operations, typically on a global address. GETFUNPLT, // load function address through %plt insturction + GETSTACKTOP, // retrieve address of stack top (first address of + // locals and temporaries) + GETTLSADDR, // load address for TLS access CALL, // A call instruction. @@ -81,6 +84,7 @@ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -583,6 +583,11 @@ setOperationAction(ISD::VAEND, MVT::Other, Expand); /// } VAARG handling + /// Stack { + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + /// } Stack + /// Int Ops { for (MVT IntVT : {MVT::i32, MVT::i64}) { // VE has no REM or DIVREM operations. @@ -641,6 +646,7 @@ TARGET_NODE_CASE(Lo) TARGET_NODE_CASE(Hi) TARGET_NODE_CASE(GETFUNPLT) + TARGET_NODE_CASE(GETSTACKTOP) TARGET_NODE_CASE(GETTLSADDR) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) @@ -860,12 +866,48 @@ std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); } +SDValue VETargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + // Generate following code. + // (void)__llvm_grow_stack(size); + // ret = GETSTACKTOP; // pseudo instruction + SDLoc dl(Op); + + SDValue Size = Op.getOperand(1); // Legalize the size. + EVT VT = Size->getValueType(0); + + // Prepare arguments + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Size; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + Type *RetTy = Type::getVoidTy(*DAG.getContext()); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Callee = DAG.getTargetExternalSymbol("__llvm_grow_stack", PtrVT, 0); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CallingConv::VE_LLVM_GROW_STACK, RetTy, Callee, + std::move(Args)) + .setDiscardResult(true); + std::pair pair = LowerCallTo(CLI); + SDValue Chain = pair.second; + SDValue Value = DAG.getNode(VEISD::GETSTACKTOP, dl, VT, Chain); + SDValue Ops[2] = {Value, Chain}; + return DAG.getMergeValues(Ops, dl); +} + SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -25,6 +25,7 @@ class VEInstrInfo : public VEGenInstrInfo { const VERegisterInfo RI; + const VESubtarget &Subtarget; virtual void anchor(); public: @@ -81,6 +82,7 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; bool expandExtendStackPseudo(MachineInstr &MI) const; + bool expandGetStackTopPseudo(MachineInstr &MI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -25,7 +25,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" -#define DEBUG_TYPE "ve" +#define DEBUG_TYPE "ve-instr-info" using namespace llvm; @@ -36,7 +36,8 @@ void VEInstrInfo::anchor() {} VEInstrInfo::VEInstrInfo(VESubtarget &ST) - : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {} + : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(), + Subtarget(ST) {} static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); } @@ -457,6 +458,9 @@ MI.eraseFromParent(); // The pseudo instruction is gone now. return true; } + case VE::GETSTACKTOP: { + return expandGetStackTopPseudo(MI); + } } return false; } @@ -544,3 +548,35 @@ MI.eraseFromParent(); // The pseudo instruction is gone now. return true; } + +bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction &MF = *MBB->getParent(); + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + DebugLoc dl = MBB->findDebugLoc(MI); + + // Create following instruction + // + // dst = %sp + target specific frame + the size of parameter area + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering(); + + // The VE ABI requires a reserved 176 bytes area at the top + // of stack as described in VESubtarget.cpp. So, we adjust it here. + unsigned NumBytes = Subtarget.getAdjustedFrameSize(0); + + // Also adds the size of parameter area. + if (MFI.adjustsStack() && TFL->hasReservedCallFrame(MF)) + NumBytes += MFI.getMaxCallFrameSize(); + + BuildMI(*MBB, MI, dl, TII.get(VE::LEArii)) + .addDef(MI.getOperand(0).getReg()) + .addReg(VE::SX11) + .addImm(0) + .addImm(NumBytes); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return true; +} diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -417,6 +417,9 @@ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// GETSTACKTOP +def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; //===----------------------------------------------------------------------===// @@ -1401,6 +1404,14 @@ "# EXTEND STACK GUARD", []>; +// Dynamic stack allocation yields a __llvm_grow_stack for VE targets. +// These calls are needed to probe the stack when allocating more over +// %s8 (%sl - stack limit). + +let Uses = [SX11], hasSideEffects = 1 in +def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins), + "# GET STACK TOP", + [(set iPTR:$dst, (GetStackTop))]>; // SETCC pattern matches // // CMP %tmp, lhs, rhs ; compare lhs and rhs diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -39,7 +39,12 @@ const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { - return CSR_RegMask; + switch (CC) { + case CallingConv::VE_LLVM_GROW_STACK: + return CSR_llvm_grow_stack_RegMask; + default: + return CSR_RegMask; + } } const uint32_t *VERegisterInfo::getNoPreservedMask() const { diff --git a/llvm/test/CodeGen/VE/alloca.ll b/llvm/test/CodeGen/VE/alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/alloca.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +declare void @bar(i8*, i64) + +; Function Attrs: nounwind +define void @test(i64 %n) { +; CHECK-LABEL: test: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s1, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s2, __llvm_grow_stack@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s12, __llvm_grow_stack@hi(, %s2) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s2, bar@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s12, bar@hi(, %s2) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 + %dyna = alloca i8, i64 %n, align 8 + call void @bar(i8* %dyna, i64 %n) + ret void +}