diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake --- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake @@ -166,6 +166,7 @@ check_symbol_exists(__sparcv9 "" __SPARCV9) check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32) check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64) + check_symbol_exists(__ve__ "" __VE) if(__ARM) add_default_target_arch(arm) elseif(__AARCH64) @@ -200,6 +201,8 @@ add_default_target_arch(wasm32) elseif(__WEBASSEMBLY64) add_default_target_arch(wasm64) + elseif(__VE) + add_default_target_arch(ve) endif() endmacro() diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -236,6 +236,8 @@ test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64") test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown") + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve") + test_target_arch(ve "__ve__" "--target=ve-unknown-none") endif() set(COMPILER_RT_OS_SUFFIX "") endif() diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -37,6 +37,7 @@ set(SPARCV9 sparcv9) set(WASM32 wasm32) set(WASM64 wasm64) +set(VE ve) if(APPLE) set(ARM64 arm64 arm64e) @@ -44,8 +45,11 @@ set(X86_64 x86_64 x86_64h) endif() -set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} - ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64}) +set(ALL_BUILTIN_SUPPORTED_ARCH + ${X86} ${X86_64} ${ARM32} ${ARM64} + ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} + ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} + ${WASM32} ${WASM64} ${VE}) include(CompilerRTUtils) include(CompilerRTDarwinUtils) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -573,6 +573,12 @@ ${GENERIC_SOURCES} ) +set(ve_SOURCES + ve/grow_stack.S + ve/grow_stack_align.S + ${GENERIC_TF_SOURCES} + ${GENERIC_SOURCES}) + add_custom_target(builtins) set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc") diff --git a/compiler-rt/lib/builtins/ve/grow_stack.S b/compiler-rt/lib/builtins/ve/grow_stack.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/ve/grow_stack.S @@ -0,0 +1,31 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// grow_stack routine +// This routine is VE specific +// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf + +// destroy %s62 and %s63 only + +#ifdef __ve__ + +.text +.p2align 4 +DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack) + subu.l %sp, %sp, %s0 # sp -= alloca size + and %sp, -16, %sp # align sp + brge.l.t %sp, %sl, 1f + ld %s63, 0x18(,%tp) # load param area + lea %s62, 0x13b # syscall # of grow + shm.l %s62, 0x0(%s63) # stored at addr:0 + shm.l %sl, 0x8(%s63) # old limit at addr:8 + shm.l %sp, 0x10(%s63) # new limit at addr:16 + monc +1: + b.l (,%lr) +END_COMPILERRT_FUNCTION(__ve_grow_stack) + +#endif // __ve__ diff --git a/compiler-rt/lib/builtins/ve/grow_stack_align.S b/compiler-rt/lib/builtins/ve/grow_stack_align.S new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/builtins/ve/grow_stack_align.S @@ -0,0 +1,31 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// grow_stack routine +// This routine is VE specific +// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf + +// destroy %s62 and %s63 only + +#ifdef __ve__ + +.text +.p2align 4 +DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack_align) + subu.l %sp, %sp, %s0 # sp -= alloca size + and %sp, %sp, %s1 # align sp + brge.l.t %sp, %sl, 1f + ld %s63, 0x18(,%tp) # load param area + lea %s62, 0x13b # syscall # of grow + shm.l %s62, 0x0(%s63) # stored at addr:0 + shm.l %sl, 0x8(%s63) # old limit at addr:8 + shm.l %sp, 0x10(%s63) # new limit at addr:16 + monc +1: + b.l (,%lr) +END_COMPILERRT_FUNCTION(__ve_grow_stack_align) + +#endif // __ve__ diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h --- a/llvm/lib/Target/VE/VEFrameLowering.h +++ b/llvm/lib/Target/VE/VEFrameLowering.h @@ -28,18 +28,23 @@ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes, + MachineBasicBlock::iterator MBBI, uint64_t NumBytes, bool RequireFPUpdate) const; void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes, + MachineBasicBlock::iterator MBBI, uint64_t NumBytes, bool RequireFPUpdate) const; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool hasBP(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const override; + // VE reserves argument space always for call sites in the function + // immediately on entry of the current function. + bool hasReservedCallFrame(const MachineFunction &MF) const override { + return true; + } void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; @@ -58,10 +63,8 @@ return Offsets; } - /// targetHandlesStackFrameRounding - Returns true if the target is - /// responsible for rounding up the stack frame (probably at emitPrologue - /// time). - bool targetHandlesStackFrameRounding() const override { return true; } +protected: + const VESubtarget &STI; private: // Returns true if MF is a leaf procedure. @@ -69,11 +72,12 @@ // Emits code for adjusting SP in function prologue/epilogue. void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes) const; + MachineBasicBlock::iterator MBBI, int64_t NumBytes, + MaybeAlign MayAlign = MaybeAlign()) const; // Emits code for extending SP in function prologue/epilogue. void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int NumBytes) const; + MachineBasicBlock::iterator MBBI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -30,12 +30,13 @@ VEFrameLowering::VEFrameLowering(const VESubtarget &ST) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0, - Align(16)) {} + Align(16)), + STI(ST) {} void VEFrameLowering::emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int NumBytes, + uint64_t NumBytes, bool RequireFPUpdate) const { DebugLoc dl; @@ -47,6 +48,7 @@ // st %lr, 8(,%sp) // st %got, 24(,%sp) // st %plt, 32(,%sp) + // st %s17, 40(,%sp) iff this function is using s17 as BP // or %fp, 0, %sp BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) @@ -69,6 +71,12 @@ .addImm(0) .addImm(32) .addReg(VE::SX16); + if (hasBP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) + .addReg(VE::SX11) + .addImm(0) + .addImm(40) + .addReg(VE::SX17); BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9) .addReg(VE::SX11) .addImm(0); @@ -77,7 +85,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int NumBytes, + uint64_t NumBytes, bool RequireFPUpdate) const { DebugLoc dl; @@ -86,6 +94,7 @@ // Insert following codes here as epilogue // // or %sp, 0, %fp + // ld %s17, 40(,%sp) iff this function is using s17 as BP // ld %got, 32(,%sp) // ld %plt, 24(,%sp) // ld %lr, 8(,%sp) @@ -94,6 +103,11 @@ BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11) .addReg(VE::SX9) .addImm(0); + if (hasBP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17) + .addReg(VE::SX11) + .addImm(0) + .addImm(40); BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16) .addReg(VE::SX11) .addImm(0) @@ -115,7 +129,8 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int NumBytes) const { + int64_t NumBytes, + MaybeAlign MaybeAlign) const { DebugLoc dl; const VEInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); @@ -143,11 +158,17 @@ .addReg(VE::SX11) .addReg(VE::SX13) .addImm(Hi_32(NumBytes)); + + if (MaybeAlign) { + // and %sp, %sp, Align-1 + BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11) + .addReg(VE::SX11) + .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value()))); + } } void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - int NumBytes) const { + MachineBasicBlock::iterator MBBI) const { DebugLoc dl; const VEInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); @@ -186,11 +207,8 @@ MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo &MFI = MF.getFrameInfo(); - const VESubtarget &Subtarget = MF.getSubtarget(); - const VEInstrInfo &TII = - *static_cast(Subtarget.getInstrInfo()); - const VERegisterInfo &RegInfo = - *static_cast(Subtarget.getRegisterInfo()); + const VEInstrInfo &TII = *STI.getInstrInfo(); + const VERegisterInfo &RegInfo = *STI.getRegisterInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -209,30 +227,15 @@ "(probably because it has a dynamic alloca)."); // Get the number of bytes to allocate from the FrameInfo - int NumBytes = (int)MFI.getStackSize(); - // The VE ABI requires a reserved 176-byte area in the user's stack, starting - // at %sp + 16. This is for the callee Register Save Area (RSA). - // - // We therefore need to add that offset to the total stack size - // after all the stack objects are placed by - // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack - // needs to be aligned *after* the extra size is added, we need to disable - // calculateFrameObjectOffsets's built-in stack alignment, by having - // targetHandlesStackFrameRounding return true. - - // Add the extra call frame stack size, if needed. (This is the same - // code as in PrologEpilogInserter, but also gets disabled by - // targetHandlesStackFrameRounding) - if (MFI.adjustsStack() && hasReservedCallFrame(MF)) - NumBytes += MFI.getMaxCallFrameSize(); - - // Adds the VE subtarget-specific spill area to the stack - // size. Also ensures target-required alignment. - NumBytes = Subtarget.getAdjustedFrameSize(NumBytes); + uint64_t NumBytes = MFI.getStackSize(); + + // The VE ABI requires a reserved 176 bytes area at the top + // of stack as described in VESubtarget.cpp. So, we adjust it here. + NumBytes = STI.getAdjustedFrameSize(NumBytes); // Finally, ensure that the size is sufficiently aligned for the // data on the stack. - NumBytes = alignTo(NumBytes, MFI.getMaxAlign().value()); + NumBytes = alignTo(NumBytes, MFI.getMaxAlign()); // Update stack size with corrected value. MFI.setStackSize(NumBytes); @@ -241,16 +244,25 @@ emitPrologueInsns(MF, MBB, MBBI, NumBytes, true); // Emit stack adjust instructions - emitSPAdjustment(MF, MBB, MBBI, -NumBytes); + MaybeAlign RuntimeAlign = + NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None; + emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign); + + if (hasBP(MF)) { + // Copy SP to BP. + BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17) + .addReg(VE::SX11) + .addImm(0); + } // Emit stack extend instructions - emitSPExtend(MF, MBB, MBBI, -NumBytes); + emitSPExtend(MF, MBB, MBBI); - unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true); + Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true); // Emit ".cfi_def_cfa_register 30". unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP)); + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); @@ -265,7 +277,7 @@ MachineBasicBlock::iterator I) const { if (!hasReservedCallFrame(MF)) { MachineInstr &MI = *I; - int Size = MI.getOperand(0).getImm(); + int64_t Size = MI.getOperand(0).getImm(); if (MI.getOpcode() == VE::ADJCALLSTACKDOWN) Size = -Size; @@ -281,20 +293,17 @@ DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo &MFI = MF.getFrameInfo(); - int NumBytes = (int)MFI.getStackSize(); + uint64_t NumBytes = MFI.getStackSize(); // Emit Epilogue instructions to restore %lr emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true); } -bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - // Reserve call frame if there are no variable sized objects on the stack. - return !MF.getFrameInfo().hasVarSizedObjects(); -} - // hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. +// pointer register. This is true if the function has variable sized allocas +// or if frame pointer elimination is disabled. For the case of VE, we don't +// implement FP eliminator yet, but we returns false from this function to +// not refer fp from generated code. bool VEFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -304,44 +313,57 @@ MFI.isFrameAddressTaken(); } +bool VEFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); +} + int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const { - const VESubtarget &Subtarget = MF.getSubtarget(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const VERegisterInfo *RegInfo = STI.getRegisterInfo(); const VEMachineFunctionInfo *FuncInfo = MF.getInfo(); bool isFixed = MFI.isFixedObjectIndex(FI); - // Addressable stack objects are accessed using neg. offsets from - // %fp, or positive offsets from %sp. - bool UseFP = true; + int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); - // VE uses FP-based references in general, even when "hasFP" is - // false. That function is rather a misnomer, because %fp is - // actually always available, unless isLeafProc. if (FuncInfo->isLeafProc()) { // If there's a leaf proc, all offsets need to be %sp-based, // because we haven't caused %fp to actually point to our frame. - UseFP = false; - } else if (isFixed) { - // Otherwise, argument access should always use %fp. - UseFP = true; - } else if (RegInfo->needsStackRealignment(MF)) { + FrameReg = VE::SX11; // %sp + return FrameOffset + MF.getFrameInfo().getStackSize(); + } else if (RegInfo->needsStackRealignment(MF) && !isFixed) { // If there is dynamic stack realignment, all local object - // references need to be via %sp, to take account of the - // re-alignment. - UseFP = false; - } - - int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); - - if (UseFP) { + // references need to be via %sp or %s17 (bp), to take account + // of the re-alignment. + if (hasBP(MF)) + FrameReg = VE::SX17; // %bp + else + FrameReg = VE::SX11; // %sp + return FrameOffset + MF.getFrameInfo().getStackSize(); + } else { + // Finally, default to using %fp. FrameReg = RegInfo->getFrameRegister(MF); return FrameOffset; } +} + +static bool LLVM_ATTRIBUTE_UNUSED +verifyLeafProcRegUse(MachineRegisterInfo *MRI) { + + // If any of parameter registers are used, this is not leaf function. + for (unsigned reg = VE::SX0; reg <= VE::SX7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + // If any of callee-saved registers are used, this is not leaf function. + for (unsigned reg = VE::SX18; reg <= VE::SX33; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; - FrameReg = VE::SX11; // %sp - return FrameOffset + MF.getFrameInfo().getStackSize(); + return true; } bool VEFrameLowering::isLeafProc(MachineFunction &MF) const { diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -27,8 +27,10 @@ Hi, Lo, // Hi/Lo operations, typically on a global address. - GETFUNPLT, // load function address through %plt insturction - GETTLSADDR, // load address for TLS access + GETFUNPLT, // load function address through %plt insturction + GETTLSADDR, // load address for TLS access + GETSTACKTOP, // retrieve address of stack top (first address of + // locals and temporaries) CALL, // A call instruction. RET_FLAG, // Return with a flag operand. @@ -81,6 +83,7 @@ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -583,6 +583,11 @@ setOperationAction(ISD::VAEND, MVT::Other, Expand); /// } VAARG handling + /// Stack { + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + /// } Stack + /// Int Ops { for (MVT IntVT : {MVT::i32, MVT::i64}) { // VE has no REM or DIVREM operations. @@ -641,6 +646,7 @@ TARGET_NODE_CASE(Lo) TARGET_NODE_CASE(Hi) TARGET_NODE_CASE(GETFUNPLT) + TARGET_NODE_CASE(GETSTACKTOP) TARGET_NODE_CASE(GETTLSADDR) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) @@ -860,12 +866,82 @@ std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); } +SDValue VETargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + // Generate following code. + // (void)__llvm_grow_stack(size); + // ret = GETSTACKTOP; // pseudo instruction + SDLoc dl(Op); + + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + MaybeAlign Alignment(Op.getConstantOperandVal(2)); + EVT VT = Node->getValueType(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); + + const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const Align StackAlign(TFI.getStackAlignment()); + bool NeedsAlign = Alignment && Alignment > StackAlign; + + // Prepare arguments + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Size; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + if (NeedsAlign) { + Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT); + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + } + Type *RetTy = Type::getVoidTy(*DAG.getContext()); + + EVT PtrVT = Op.getValueType(); + SDValue Callee; + if (NeedsAlign) { + Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0); + } else { + Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0); + } + + // TODO precise callee-save register masks for the grow_stack builtins. + // The grow_stack builtins only clobber %s62 and %s63. The standard CC grossly + // overapproximates this. This is good enough to get it working. + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(CallingConv::C, RetTy, Callee, std::move(Args)) + .setDiscardResult(true); + std::pair pair = LowerCallTo(CLI); + Chain = pair.second; + SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, dl, VT, Chain); + if (NeedsAlign) { + Result = DAG.getNode(ISD::ADD, dl, VT, Result, + DAG.getConstant((Alignment->value() - 1ULL), dl, VT)); + Result = DAG.getNode(ISD::AND, dl, VT, Result, + DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); + } + // Chain = Result.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, dl); +} + SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -81,6 +81,7 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; bool expandExtendStackPseudo(MachineInstr &MI) const; + bool expandGetStackTopPseudo(MachineInstr &MI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -25,7 +25,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" -#define DEBUG_TYPE "ve" +#define DEBUG_TYPE "ve-instr-info" using namespace llvm; @@ -457,6 +457,9 @@ MI.eraseFromParent(); // The pseudo instruction is gone now. return true; } + case VE::GETSTACKTOP: { + return expandGetStackTopPseudo(MI); + } } return false; } @@ -464,8 +467,8 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const VEInstrInfo &TII = - *static_cast(MF.getSubtarget().getInstrInfo()); + const VESubtarget &STI = MF.getSubtarget(); + const VEInstrInfo &TII = *STI.getInstrInfo(); DebugLoc dl = MBB.findDebugLoc(MI); // Create following instructions and multiple basic blocks. @@ -544,3 +547,35 @@ MI.eraseFromParent(); // The pseudo instruction is gone now. return true; } + +bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction &MF = *MBB->getParent(); + const VESubtarget &STI = MF.getSubtarget(); + const VEInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc dl = MBB->findDebugLoc(MI); + + // Create following instruction + // + // dst = %sp + target specific frame + the size of parameter area + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const VEFrameLowering &TFL = *STI.getFrameLowering(); + + // The VE ABI requires a reserved 176 bytes area at the top + // of stack as described in VESubtarget.cpp. So, we adjust it here. + unsigned NumBytes = STI.getAdjustedFrameSize(0); + + // Also adds the size of parameter area. + if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF)) + NumBytes += MFI.getMaxCallFrameSize(); + + BuildMI(*MBB, MI, dl, TII.get(VE::LEArii)) + .addDef(MI.getOperand(0).getReg()) + .addReg(VE::SX11) + .addImm(0) + .addImm(NumBytes); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return true; +} diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -414,6 +414,9 @@ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// GETSTACKTOP +def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; //===----------------------------------------------------------------------===// @@ -1398,6 +1401,14 @@ "# EXTEND STACK GUARD", []>; +// Dynamic stack allocation yields a __llvm_grow_stack for VE targets. +// These calls are needed to probe the stack when allocating more over +// %s8 (%sl - stack limit). + +let Uses = [SX11], hasSideEffects = 1 in +def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins), + "# GET STACK TOP", + [(set iPTR:$dst, (GetStackTop))]>; // SETCC pattern matches // // CMP %tmp, lhs, rhs ; compare lhs and rhs diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h --- a/llvm/lib/Target/VE/VESubtarget.h +++ b/llvm/lib/Target/VE/VESubtarget.h @@ -42,7 +42,7 @@ const TargetMachine &TM); const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const TargetFrameLowering *getFrameLowering() const override { + const VEFrameLowering *getFrameLowering() const override { return &FrameLowering; } const VERegisterInfo *getRegisterInfo() const override { diff --git a/llvm/test/CodeGen/VE/alloca.ll b/llvm/test/CodeGen/VE/alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/alloca.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +declare void @bar(i8*, i64) + +; Function Attrs: nounwind +define void @test(i64 %n) { +; CHECK-LABEL: test: +; CHECK: .LBB{{[0-9]+}}_2: +;;; (register saving) FIXME spilling due to __ve_grow_stack called with C CC +; CHECK: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea [[GSREG:%s[0-9]+]], __ve_grow_stack@lo +; CHECK-NEXT: and [[GSREG]], [[GSREG]], (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, [[GSREG]]) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea [[BREG:%s[0-9]+]], bar@lo +; CHECK-NEXT: and [[BREG]], [[BREG]], (32)0 +; CHECK-NEXT: lea.sl %s12, bar@hi(, [[BREG]]) +;;; (register reloading) +; CHECK: bsic %s10, (, %s12) + %dyna = alloca i8, i64 %n, align 8 + call void @bar(i8* %dyna, i64 %n) + ret void +} diff --git a/llvm/test/CodeGen/VE/alloca_aligned.ll b/llvm/test/CodeGen/VE/alloca_aligned.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/alloca_aligned.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +declare void @bar(i8*, i64) + +; Function Attrs: nounwind +define void @test(i64 %n) { +; CHECK-LABEL: test: +; CHECK: .LBB{{[0-9]+}}_2: +;;; (register saving) FIXME spilling due to ve_grow_stack_align called with C CC +; CHECK: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea [[GSREG:%s[0-9]+]], __ve_grow_stack_align@lo +; CHECK-NEXT: and [[GSREG]], [[GSREG]], (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, [[GSREG]]) +; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s0, 31(, %s0) +; CHECK-NEXT: and %s0, -32, %s0 +; CHECK-NEXT: lea [[BREG:%s[0-9]+]], bar@lo +; CHECK-NEXT: and [[BREG]], [[BREG]], (32)0 +; CHECK-NEXT: lea.sl %s12, bar@hi(, [[BREG]]) +;;; (register reloading) +; CHECK: bsic %s10, (, %s12) + %dyna = alloca i8, i64 %n, align 32 + call void @bar(i8* %dyna, i64 %n) + ret void +}