diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt --- a/llvm/lib/Target/VE/CMakeLists.txt +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -14,6 +14,7 @@ VEISelDAGToDAG.cpp VEISelLowering.cpp VEInstrInfo.cpp + VEMachineFunctionInfo.cpp VEMCInstLower.cpp VERegisterInfo.cpp VESubtarget.cpp diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -13,6 +13,17 @@ //===----------------------------------------------------------------------===// // Aurora VE //===----------------------------------------------------------------------===// +def CC_VE_C_Stack: CallingConv<[ + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + CCIfType<[f32], CCCustom<"allocateFloat">>, + + // All of the rest are assigned to the stack in 8-byte aligned units. + CCAssignToStack<0, 8> +]>; def CC_VE : CallingConv<[ // All arguments get passed in generic registers if there is space. @@ -33,6 +44,9 @@ // long long/double --> generic 64 bit registers CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // Alternatively, they are assigned to the stack in 8-byte aligned units. + CCDelegateTo ]>; def RetCC_VE : CallingConv<[ diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -12,6 +12,7 @@ #include "VEFrameLowering.h" #include "VEInstrInfo.h" +#include "VEMachineFunctionInfo.h" #include "VESubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -297,9 +298,40 @@ int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + const VESubtarget &Subtarget = MF.getSubtarget(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + bool isFixed = MFI.isFixedObjectIndex(FI); + // Addressable stack objects are accessed using neg. offsets from // %fp, or positive offsets from %sp. + bool UseFP = true; + + // VE uses FP-based references in general, even when "hasFP" is + // false. That function is rather a misnomer, because %fp is + // actually always available, unless isLeafProc. + if (FuncInfo->isLeafProc()) { + // If there's a leaf proc, all offsets need to be %sp-based, + // because we haven't caused %fp to actually point to our frame. + UseFP = false; + } else if (isFixed) { + // Otherwise, argument access should always use %fp. + UseFP = true; + } else if (RegInfo->needsStackRealignment(MF)) { + // If there is dynamic stack realignment, all local object + // references need to be via %sp, to take account of the + // re-alignment. + UseFP = false; + } + int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); + + if (UseFP) { + FrameReg = RegInfo->getFrameRegister(MF); + return FrameOffset; + } + FrameReg = VE::SX11; // %sp return FrameOffset + MF.getFrameInfo().getStackSize(); } @@ -321,5 +353,8 @@ RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - assert(isLeafProc(MF) && "TODO implement for non-leaf procs"); + if (isLeafProc(MF)) { + VEMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setLeafProc(true); + } } diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -27,6 +27,7 @@ Hi, Lo, // Hi/Lo operations, typically on a global address. + CALL, // A call instruction. RET_FLAG, // Return with a flag operand. }; } @@ -55,6 +56,9 @@ const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &ArgsFlags, diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -37,6 +37,28 @@ // Calling Convention Implementation //===----------------------------------------------------------------------===// +static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::f32: { + // Allocate stack like below + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Use align=8 for dummy area to align the beginning of these 2 area. + State.AllocateStack(4, 8); // for empty area + // Use align=4 for value to place it at just after the dummy area. + unsigned Offset = State.AllocateStack(4, 4); // for float value area + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; + } + default: + return false; + } +} + #include "VEGenCallingConv.inc" bool VETargetLowering::CanLowerReturn( @@ -114,6 +136,8 @@ SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); + // Get the base offset of the incoming arguments stack space. + unsigned ArgsBaseOffset = 176; // Get the size of the preserved arguments area unsigned ArgsPreserved = 64; @@ -129,7 +153,6 @@ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - assert(VA.isRegLoc() && "TODO implement argument passing on stack"); if (VA.isRegLoc()) { // This argument is passed in a register. // All integer register arguments are promoted by the caller to i64. @@ -166,6 +189,18 @@ InVals.push_back(Arg); continue; } + + // The registers are exhausted. This argument was passed on the stack. + assert(VA.isMemLoc()); + // The CC_VE_Full/Half functions compute stack offsets relative to the + // beginning of the arguments area at %fp+176. + unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; + unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); + InVals.push_back( + DAG.getLoad(VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), + MachinePointerInfo::getFixedStack(MF, FI))); } assert(!IsVarArg && "TODO implement var args"); @@ -198,6 +233,224 @@ // TargetLowering Implementation //===----------------------------------------------------------------------===// +SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // VE target does not yet support tail call optimization. + CLI.IsTailCall = false; + + // Get the base offset of the outgoing arguments stack space. + unsigned ArgsBaseOffset = 176; + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 8 * 8u; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, 8); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); + + assert(!CLI.IsVarArg); + + // Get the size of the outgoing arguments stack space requirement. + unsigned ArgsSize = CCInfo.getNextStackOffset(); + + // Keep stack frames 16-byte aligned. + ArgsSize = alignTo(ArgsSize, 16); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector MemOpChains; + + // VE needs to get address of callee function in a register + // So, prepare to copy it to SX12 here. + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + + assert(!isPositionIndependent() && "TODO PIC"); + + // Turn GlobalAddress/ExternalSymbol node into a value node + // containing the address of them here. + if (isa(Callee)) { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } else if (isa(Callee)) { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + + RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); + // The argument area starts at %fp+176 in the callee frame, + // %sp+176 in ours. + SDValue PtrOff = + DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); + PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); + MemOpChains.push_back( + DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, + RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // Build the operands for the call instruction itself. + SmallVector Ops; + Ops.push_back(Chain); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), + DAG.getIntPtrConstant(0, DL, true), InGlue, DL); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments. + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Set inreg flag manually for codegen generated library calls that + // return float. + if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS) + CLI.Ins[0].Flags.setInReg(); + + RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = VA.getLocReg(); + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, DL, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. @@ -268,6 +521,7 @@ break; TARGET_NODE_CASE(Lo) TARGET_NODE_CASE(Hi) + TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) } #undef TARGET_NODE_CASE @@ -320,6 +574,7 @@ } /// Custom Lower { + SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -150,6 +150,11 @@ let EncoderMethod = "getBranchTarget32OpValue"; } +def calltarget : Operand { + let EncoderMethod = "getCallTargetOpValue"; + let DecoderMethod = "DecodeCall"; +} + def simm7Op32 : Operand { let DecoderMethod = "DecodeSIMM7"; } @@ -192,7 +197,10 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>; +def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>; +def call : SDNode<"VEISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"VEISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -549,6 +557,11 @@ defm LEA32 : RMm<"lea", 0x06, I32, i32, simm7Op32, simm32Op32, add>; } +let cx = 0, cy = 1, cz = 0, sz = 0, hasSideEffects = 0 in { + def LEAasx : RM< + 0x06, (outs I64:$sx), (ins MEMri:$addr), + "lea $sx,$addr", [(set iPTR:$sx, ADDRri:$addr)]>; +} // 5.3.2.2. Fixed-Point Arithmetic Operation Instructions @@ -776,6 +789,27 @@ "monc">; //===----------------------------------------------------------------------===// +// Instructions for CodeGenOnly +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { + +// Call instruction +let Defs = [SX10], Uses = [SX11], hasDelaySlot = 1, isCall = 1, hasSideEffects = 0 in { +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 0, sz = 0 in +def CALL : RM< + 0x08, (outs), (ins calltarget:$imm32, variable_ops), + "bsic %lr, $imm32">; +// use sz to represent a register +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1, imm32 = 0 in +def CALLr : RM< + 0x08, (outs), (ins I64:$sz, variable_ops), + "bsic %lr, (,$sz)">; +} + +} + +//===----------------------------------------------------------------------===// // Pattern Matchings //===----------------------------------------------------------------------===// @@ -893,6 +927,13 @@ (LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32), (tglobaladdr:$in1))>; +// Calls +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>; +def : Pat<(call i64:$dst), + (CALLr i64:$dst)>; + + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -0,0 +1,35 @@ +//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares VE specific per-machine-function information. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class VEMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + +private: + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; + +public: + VEMachineFunctionInfo() : IsLeafProc(false) {} + explicit VEMachineFunctionInfo(MachineFunction &MF) : IsLeafProc(false) {} + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp @@ -0,0 +1,13 @@ +//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VEMachineFunctionInfo.h" + +using namespace llvm; + +void VEMachineFunctionInfo::anchor() {} diff --git a/llvm/test/CodeGen/VE/call.ll b/llvm/test/CodeGen/VE/call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/call.ll @@ -0,0 +1,124 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i32 @sample_call() { +; CHECK-LABEL: sample_call: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, sample_add@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, sample_add@hi(%s0) +; CHECK-NEXT: or %s0, 1, (0)1 +; CHECK-NEXT: or %s1, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @sample_add(i32 1, i32 2) + ret i32 %r +} + +declare i32 @sample_add(i32, i32) + +define i32 @stack_call_int() { +; CHECK-LABEL: stack_call_int: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 10, (0)1 +; CHECK-NEXT: stl %s0, 248(,%s11) +; CHECK-NEXT: or %s34, 9, (0)1 +; CHECK-NEXT: lea %s0, stack_callee_int@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, stack_callee_int@hi(%s0) +; CHECK-NEXT: or %s0, 1, (0)1 +; CHECK-NEXT: or %s1, 2, (0)1 +; CHECK-NEXT: or %s2, 3, (0)1 +; CHECK-NEXT: or %s3, 4, (0)1 +; CHECK-NEXT: or %s4, 5, (0)1 +; CHECK-NEXT: or %s5, 6, (0)1 +; CHECK-NEXT: or %s6, 7, (0)1 +; CHECK-NEXT: or %s7, 8, (0)1 +; CHECK-NEXT: stl %s34, 240(,%s11) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} + +declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +define i32 @stack_call_int_szext() { +; CHECK-LABEL: stack_call_int_szext: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, -1, (0)1 +; CHECK-NEXT: stl %s0, 248(,%s11) +; CHECK-NEXT: lea %s34, 65535 +; CHECK-NEXT: lea %s1, stack_callee_int_szext@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, stack_callee_int_szext@hi(%s1) +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: or %s2, 3, (0)1 +; CHECK-NEXT: or %s3, 4, (0)1 +; CHECK-NEXT: or %s4, 5, (0)1 +; CHECK-NEXT: or %s5, 6, (0)1 +; CHECK-NEXT: or %s6, 7, (0)1 +; CHECK-NEXT: or %s7, 8, (0)1 +; CHECK-NEXT: stl %s34, 240(,%s11) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1) + ret i32 %r +} + +declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext) + +define float @stack_call_float() { +; CHECK-LABEL: stack_call_float: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 1092616192 +; CHECK-NEXT: stl %s0, 252(,%s11) +; CHECK-NEXT: lea %s0, 1091567616 +; CHECK-NEXT: lea %s1, stack_callee_float@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(%s1) +; CHECK-NEXT: lea.sl %s1, 1065353216 +; CHECK-NEXT: lea.sl %s2, 1073741824 +; CHECK-NEXT: lea.sl %s3, 1077936128 +; CHECK-NEXT: lea.sl %s4, 1082130432 +; CHECK-NEXT: lea.sl %s5, 1084227584 +; CHECK-NEXT: lea.sl %s6, 1086324736 +; CHECK-NEXT: lea.sl %s7, 1088421888 +; CHECK-NEXT: lea.sl %s34, 1090519040 +; CHECK-NEXT: stl %s0, 244(,%s11) +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: or %s1, 0, %s2 +; CHECK-NEXT: or %s2, 0, %s3 +; CHECK-NEXT: or %s3, 0, %s4 +; CHECK-NEXT: or %s4, 0, %s5 +; CHECK-NEXT: or %s5, 0, %s6 +; CHECK-NEXT: or %s6, 0, %s7 +; CHECK-NEXT: or %s7, 0, %s34 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0) + ret float %r +} + +declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float) + +define float @stack_call_float2(float %p0) { +; CHECK-LABEL: stack_call_float2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: stu %s0, 252(,%s11) +; CHECK-NEXT: lea %s1, stack_callee_float@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(%s1) +; CHECK-NEXT: stu %s0, 244(,%s11) +; CHECK-NEXT: or %s1, 0, %s0 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: or %s5, 0, %s0 +; CHECK-NEXT: or %s6, 0, %s0 +; CHECK-NEXT: or %s7, 0, %s0 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0) + ret float %r +} + diff --git a/llvm/test/CodeGen/VE/callee.ll b/llvm/test/CodeGen/VE/callee.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/callee.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i32 @stack_stack_arg_i32_r9(i1 %0, i8 %1, i16 %2, i32 %3, i64 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { +; CHECK-LABEL: stack_stack_arg_i32_r9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldl.sx %s0, 424(,%s11) +; CHECK-NEXT: or %s11, 0, %s9 + ret i32 %9 +} + +define i64 @stack_stack_arg_i64_r9(i1 %0, i8 %1, i16 %2, i32 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) { +; CHECK-LABEL: stack_stack_arg_i64_r9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld %s0, 424(,%s11) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 %9 +} + +define float @stack_stack_arg_f32_r9(float %p0, float %p1, float %p2, float %p3, float %p4, float %p5, float %p6, float %p7, float %s0, float %s1) { +; CHECK-LABEL: stack_stack_arg_f32_r9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldu %s0, 428(,%s11) +; CHECK-NEXT: or %s11, 0, %s9 + ret float %s1 +} + +define i32 @stack_stack_arg_i32f32_r8(i32 %p0, float %p1, i32 %p2, float %p3, i32 %p4, float %p5, i32 %p6, float %p7, i32 %s0, float %s1) { +; CHECK-LABEL: stack_stack_arg_i32f32_r8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldl.sx %s0, 416(,%s11) +; CHECK-NEXT: or %s11, 0, %s9 + ret i32 %s0 +} + +define float @stack_stack_arg_i32f32_r9(i32 %p0, float %p1, i32 %p2, float %p3, i32 %p4, float %p5, i32 %p6, float %p7, i32 %s0, float %s1) { +; CHECK-LABEL: stack_stack_arg_i32f32_r9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldu %s0, 428(,%s11) +; CHECK-NEXT: or %s11, 0, %s9 + ret float %s1 +} diff --git a/llvm/test/CodeGen/VE/callstruct.ll b/llvm/test/CodeGen/VE/callstruct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/callstruct.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +%struct.a = type { i32, i32 } + +@A = common global %struct.a zeroinitializer, align 4 + +; Function Attrs: norecurse nounwind +define void @fun(%struct.a* noalias nocapture sret %a, i32 %p1, i32 %p2) { +; CHECK-LABEL: fun: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: stl %s1, (,%s0) +; CHECK-NEXT: stl %s2, 4(,%s0) +; CHECK-NEXT: or %s11, 0, %s9 + %a.zero = getelementptr inbounds %struct.a, %struct.a* %a, i64 0, i32 0 + store i32 %p1, i32* %a.zero, align 4 + %a.one = getelementptr inbounds %struct.a, %struct.a* %a, i64 0, i32 1 + store i32 %p2, i32* %a.one, align 4 + ret void +} + +; Function Attrs: nounwind +define void @caller() { +; CHECK-LABEL: caller: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, callee@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, callee@hi(%s0) +; CHECK-NEXT: lea %s0,-8(,%s9) +; CHECK-NEXT: or %s1, 3, (0)1 +; CHECK-NEXT: or %s2, 4, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: ld %s0, -8(,%s9) +; CHECK-NEXT: lea %s1, A@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, A@hi(%s1) +; CHECK-NEXT: st %s0, (,%s1) +; CHECK-NEXT: or %s11, 0, %s9 + %a = alloca i64, align 8 + %a.bc = bitcast i64* %a to %struct.a* + call void @callee(%struct.a* nonnull sret %a.bc, i32 3, i32 4) + %a.val = load i64, i64* %a, align 8 + store i64 %a.val, i64* bitcast (%struct.a* @A to i64*), align 4 + ret void +} + +declare void @callee(%struct.a* sret, i32, i32)