diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -49,6 +49,19 @@ CCDelegateTo ]>; +// All arguments get passed in stack for varargs function or non-prototyped +// function. +def CC_VE2 : CallingConv<[ + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + CCIfType<[f32], CCCustom<"allocateFloat">>, + + CCAssignToStack<0, 8> +]>; + def RetCC_VE : CallingConv<[ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -71,6 +71,8 @@ /// Custom Lower { SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -13,6 +13,7 @@ #include "VEISelLowering.h" #include "MCTargetDesc/VEMCExpr.h" +#include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" #include "VETargetMachine.h" #include "llvm/ADT/StringSwitch.h" @@ -203,7 +204,20 @@ MachinePointerInfo::getFixedStack(MF, FI))); } - assert(!IsVarArg && "TODO implement var args"); + if (!IsVarArg) + return Chain; + + // This function takes variable arguments, some of which may have been passed + // in registers %s0-%s8. + // + // The va_start intrinsic needs to know the offset to the first variable + // argument. + // TODO: need to calculate offset correctly once we support f128. + unsigned ArgOffset = ArgLocs.size() * 8; + VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + // Skip the 176 bytes of register save area. + FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); + return Chain; } @@ -258,7 +272,16 @@ // by CC_VE would be correct now. CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); - assert(!CLI.IsVarArg); + // VE requires to use both register and stack for varargs or no-prototyped + // functions. + bool UseBoth = CLI.IsVarArg; + + // Analyze operands again if it is required to store BOTH. + SmallVector ArgLocs2; + CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + ArgLocs2, *DAG.getContext()); + if (UseBoth) + CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); @@ -326,7 +349,9 @@ if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - continue; + if (!UseBoth) + continue; + VA = ArgLocs2[i]; } assert(VA.isMemLoc()); @@ -521,6 +546,15 @@ setOperationAction(ISD::BlockAddress, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + /// VAARG handling { + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // VAARG needs to be lowered to access with 8 bytes alignment. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + /// } VAARG handling + // VE has no REM or DIVREM operations. for (MVT IntVT : MVT::integer_valuetypes()) { setOperationAction(ISD::UREM, IntVT, Expand); @@ -623,6 +657,66 @@ return makeAddress(Op, DAG); } +SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // Need frame address to find the address of VarArgsFrameIndex. + MF.getFrameInfo().setFrameAddressIsTaken(true); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDLoc DL(Op); + SDValue Offset = + DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + SDValue VAList = + DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); + SDValue Chain = VAList.getValue(1); + SDValue NextPtr; + + if (VT == MVT::f32) { + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + // Then, adjust VAList. + unsigned InternalOffset = 4; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(InternalOffset, DL, PtrVT)); + } else { + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + } + + // Store the incremented VAList to the legalized pointer. + InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); + + // Load the actual argument out of the pointer VAList. + // We can't count on greater alignment than the word size. + return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), + std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); +} + SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: @@ -631,6 +725,10 @@ return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); } } /// } Custom Lower diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -60,6 +60,23 @@ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; + /// Stack Spill & Reload { + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + /// } Stack Spill & Reload + // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -298,6 +298,118 @@ } } +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (MI.getOpcode() == VE::LDSri || // I64 + MI.getOpcode() == VE::LDLri || // I32 + MI.getOpcode() == VE::LDUri // F32 + ) { + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (MI.getOpcode() == VE::STSri || // I64 + MI.getOpcode() == VE::STLri || // I32 + MI.getOpcode() == VE::STUri // F32 + ) { + if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return MI.getOperand(2).getReg(); + } + } + return 0; +} + +void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + // On the order of operands here: think "[FrameIdx + 0] = SrcReg". + if (RC == &VE::I64RegClass) { + BuildMI(MBB, I, DL, get(VE::STSri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + } else if (RC == &VE::I32RegClass) { + BuildMI(MBB, I, DL, get(VE::STLri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + } else if (RC == &VE::F32RegClass) { + BuildMI(MBB, I, DL, get(VE::STUri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + } else + report_fatal_error("Can't store this register to stack slot"); +} + +void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + if (RC == &VE::I64RegClass) { + BuildMI(MBB, I, DL, get(VE::LDSri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + } else if (RC == &VE::I32RegClass) { + BuildMI(MBB, I, DL, get(VE::LDLri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + } else if (RC == &VE::F32RegClass) { + BuildMI(MBB, I, DL, get(VE::LDUri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + } else + report_fatal_error("Can't load this register from stack slot"); +} + bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { case VE::EXTEND_STACK: { diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h --- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -20,12 +20,19 @@ virtual void anchor(); private: + /// VarArgsFrameOffset - Frame offset to start of varargs area. + int VarArgsFrameOffset; + /// IsLeafProc - True if the function is a leaf procedure. bool IsLeafProc; public: - VEMachineFunctionInfo() : IsLeafProc(false) {} - explicit VEMachineFunctionInfo(MachineFunction &MF) : IsLeafProc(false) {} + VEMachineFunctionInfo() : VarArgsFrameOffset(0), IsLeafProc(false) {} + explicit VEMachineFunctionInfo(MachineFunction &MF) + : VarArgsFrameOffset(0), IsLeafProc(false) {} + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } void setLeafProc(bool rhs) { IsLeafProc = rhs; } bool isLeafProc() const { return IsLeafProc; } diff --git a/llvm/test/CodeGen/VE/va_arg.ll b/llvm/test/CodeGen/VE/va_arg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/va_arg.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +@.str = private unnamed_addr constant [6 x i8] c"a=%d\0A\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"b=%d\0A\00", align 1 +@.str.2 = private unnamed_addr constant [6 x i8] c"c=%d\0A\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"d=%u\0A\00", align 1 +@.str.4 = private unnamed_addr constant [6 x i8] c"e=%u\0A\00", align 1 +@.str.5 = private unnamed_addr constant [6 x i8] c"f=%u\0A\00", align 1 +@.str.6 = private unnamed_addr constant [6 x i8] c"g=%f\0A\00", align 1 +@.str.7 = private unnamed_addr constant [6 x i8] c"h=%p\0A\00", align 1 +@.str.8 = private unnamed_addr constant [7 x i8] c"i=%ld\0A\00", align 1 +@.str.9 = private unnamed_addr constant [7 x i8] c"j=%lf\0A\00", align 1 + +define i32 @func_vainout(i32, ...) { +; CHECK-LABEL: func_vainout: +; CHECK: ldl.sx %s1, 184(,%s9) +; CHECK: ld2b.sx %s18, 192(,%s9) +; CHECK: ld1b.sx %s19, 200(,%s9) +; CHECK: ldl.sx %s20, 208(,%s9) +; CHECK: ld2b.zx %s21, 216(,%s9) +; CHECK: ld1b.zx %s22, 224(,%s9) +; CHECK: ldu %s26, 236(,%s9) +; CHECK: ld %s23, 240(,%s9) +; CHECK: ld %s24, 248(,%s9) +; CHECK: ld %s25, 256(,%s9) + + %a = alloca i8*, align 8 + %a8 = bitcast i8** %a to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %a8) + call void @llvm.va_start(i8* nonnull %a8) + %p0 = va_arg i8** %a, i32 + %p1 = va_arg i8** %a, i16 + %p2 = va_arg i8** %a, i8 + %p3 = va_arg i8** %a, i32 + %p4 = va_arg i8** %a, i16 + %p5 = va_arg i8** %a, i8 + %p6 = va_arg i8** %a, float + %p7 = va_arg i8** %a, i8* + %p8 = va_arg i8** %a, i64 + %p9 = va_arg i8** %a, double + call void @llvm.va_end(i8* nonnull %a8) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %a8) + %pf0 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %p0) + %p1.s32 = sext i16 %p1 to i32 + %pf1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32 %p1.s32) + %p2.s32 = sext i8 %p2 to i32 + %pf2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.2, i64 0, i64 0), i32 %p2.s32) + %pf3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.3, i64 0, i64 0), i32 %p3) + %p4.z32 = zext i16 %p4 to i32 + %pf4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.4, i64 0, i64 0), i32 %p4.z32) + %p5.z32 = zext i8 %p5 to i32 + %pf5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.5, i64 0, i64 0), i32 %p5.z32) + %p6.d = fpext float %p6 to double + %pf6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.6, i64 0, i64 0), double %p6.d) + %pf7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.7, i64 0, i64 0), i8* %p7) + %pf8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.8, i64 0, i64 0), i64 %p8) + %pf9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.9, i64 0, i64 0), double %p9) + ret i32 0 +} +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare i32 @printf(i8* nocapture readonly, ...) diff --git a/llvm/test/CodeGen/VE/va_callee.ll b/llvm/test/CodeGen/VE/va_callee.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/va_callee.ll @@ -0,0 +1,152 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i32 @va_func(i32, ...) { +; CHECK-LABEL: va_func: +; CHECK: ldl.sx %s0, 184(,%s9) +; CHECK: ld2b.sx %s18, 192(,%s9) +; CHECK: ld1b.sx %s19, 200(,%s9) +; CHECK: ldl.sx %s20, 208(,%s9) +; CHECK: ld2b.zx %s21, 216(,%s9) +; CHECK: ld1b.zx %s22, 224(,%s9) +; CHECK: ldu %s23, 236(,%s9) +; CHECK: ld %s24, 240(,%s9) +; CHECK: ld %s25, 248(,%s9) + + %va = alloca i8*, align 8 + %va.i8 = bitcast i8** %va to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8) + call void @llvm.va_start(i8* nonnull %va.i8) + %p1 = va_arg i8** %va, i32 + %p2 = va_arg i8** %va, i16 + %p3 = va_arg i8** %va, i8 + %p4 = va_arg i8** %va, i32 + %p5 = va_arg i8** %va, i16 + %p6 = va_arg i8** %va, i8 + %p7 = va_arg i8** %va, float + %p8 = va_arg i8** %va, i8* + %p9 = va_arg i8** %va, i64 + %p10 = va_arg i8** %va, double + call void @llvm.va_end(i8* nonnull %va.i8) + call void @use_i32(i32 %p1) + call void @use_s16(i16 %p2) + call void @use_s8(i8 %p3) + call void @use_i32(i32 %p4) + call void @use_u16(i16 %p5) + call void @use_u8(i8 %p6) + call void @use_float(float %p7) + call void @use_i8p(i8* %p8) + call void @use_i64(i64 %p9) + call void @use_double(double %p10) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %va.i8) + ret i32 0 +} + +define i32 @va_copy0(i32, ...) { +; CHECK-LABEL: va_copy0: +; CHECK: ldl.sx %s0, +; CHECK: ld2b.sx %s18, +; CHECK: ld1b.sx %s19, +; CHECK: ldl.sx %s20, +; CHECK: ld2b.zx %s21, +; CHECK: ld1b.zx %s22, +; CHECK: ldu %s23, +; CHECK: ld %s24, +; CHECK: ld %s25, + + %va = alloca i8*, align 8 + %va.i8 = bitcast i8** %va to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8) + call void @llvm.va_start(i8* nonnull %va.i8) + %vb = alloca i8*, align 8 + %vb.i8 = bitcast i8** %vb to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8) + call void @llvm.va_copy(i8* nonnull %vb.i8, i8* nonnull %va.i8) + call void @llvm.va_end(i8* nonnull %va.i8) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %va.i8) + %p1 = va_arg i8** %vb, i32 + %p2 = va_arg i8** %vb, i16 + %p3 = va_arg i8** %vb, i8 + %p4 = va_arg i8** %vb, i32 + %p5 = va_arg i8** %vb, i16 + %p6 = va_arg i8** %vb, i8 + %p7 = va_arg i8** %vb, float + %p8 = va_arg i8** %vb, i8* + %p9 = va_arg i8** %vb, i64 + %p10 = va_arg i8** %vb, double + call void @llvm.va_end(i8* nonnull %vb.i8) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %vb.i8) + call void @use_i32(i32 %p1) + call void @use_s16(i16 %p2) + call void @use_s8(i8 %p3) + call void @use_i32(i32 %p4) + call void @use_u16(i16 %p5) + call void @use_u8(i8 %p6) + call void @use_float(float %p7) + call void @use_i8p(i8* %p8) + call void @use_i64(i64 %p9) + call void @use_double(double %p10) + ret i32 0 +} + +define i32 @va_copy8(i32, ...) { +; CHECK-LABEL: va_copy8: +; CHECK: ldl.sx %s0, +; CHECK: ld2b.sx %s18, +; CHECK: ld1b.sx %s19, +; CHECK: ldl.sx %s20, +; CHECK: ld2b.zx %s21, +; CHECK: ld1b.zx %s22, +; CHECK: ldu %s23, +; CHECK: ld %s24, +; CHECK: ld %s25, + + %va = alloca i8*, align 8 + %va.i8 = bitcast i8** %va to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8) + call void @llvm.va_start(i8* nonnull %va.i8) + %p1 = va_arg i8** %va, i32 + %p2 = va_arg i8** %va, i16 + %p3 = va_arg i8** %va, i8 + %p4 = va_arg i8** %va, i32 + %p5 = va_arg i8** %va, i16 + %p6 = va_arg i8** %va, i8 + %p7 = va_arg i8** %va, float + + %vc = alloca i8*, align 8 + %vc.i8 = bitcast i8** %vc to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8) + call void @llvm.va_copy(i8* nonnull %vc.i8, i8* nonnull %va.i8) + call void @llvm.va_end(i8* nonnull %va.i8) + %p8 = va_arg i8** %vc, i8* + %p9 = va_arg i8** %vc, i64 + %p10 = va_arg i8** %vc, double + call void @llvm.va_end(i8* nonnull %vc.i8) + call void @use_i32(i32 %p1) + call void @use_s16(i16 %p2) + call void @use_s8(i8 %p3) + call void @use_i32(i32 %p4) + call void @use_u16(i16 %p5) + call void @use_u8(i8 %p6) + call void @use_float(float %p7) + call void @use_i8p(i8* %p8) + call void @use_i64(i64 %p9) + call void @use_double(double %p10) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %va.i8) + ret i32 0 +} + +declare void @use_i64(i64) +declare void @use_i32(i32) +declare void @use_u16(i16 zeroext) +declare void @use_u8(i8 zeroext) +declare void @use_s16(i16 signext) +declare void @use_s8(i8 signext) +declare void @use_i8p(i8*) +declare void @use_float(float) +declare void @use_double(double) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) diff --git a/llvm/test/CodeGen/VE/va_caller.ll b/llvm/test/CodeGen/VE/va_caller.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/va_caller.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +declare i32 @func(i32, ...) + +define i32 @caller() { +; CHECK-LABEL: caller: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s7, 0, (0)1 +; CHECK-NEXT: st %s7, 280(,%s11) +; CHECK-NEXT: or %s0, 11, (0)1 +; CHECK-NEXT: st %s0, 272(,%s11) +; CHECK-NEXT: st %s7, 264(,%s11) +; CHECK-NEXT: or %s0, 10, (0)1 +; CHECK-NEXT: st %s0, 256(,%s11) +; CHECK-NEXT: lea.sl %s0, 1075970048 +; CHECK-NEXT: st %s0, 248(,%s11) +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: st %s0, 240(,%s11) +; CHECK-NEXT: st %s7, 232(,%s11) +; CHECK-NEXT: lea %s0, 1086324736 +; CHECK-NEXT: stl %s0, 228(,%s11) +; CHECK-NEXT: or %s5, 5, (0)1 +; CHECK-NEXT: stl %s5, 216(,%s11) +; CHECK-NEXT: or %s4, 4, (0)1 +; CHECK-NEXT: stl %s4, 208(,%s11) +; CHECK-NEXT: or %s3, 3, (0)1 +; CHECK-NEXT: stl %s3, 200(,%s11) +; CHECK-NEXT: or %s2, 2, (0)1 +; CHECK-NEXT: stl %s2, 192(,%s11) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: stl %s1, 184(,%s11) +; CHECK-NEXT: or %s18, 0, (0)1 +; CHECK-NEXT: lea %s0, func@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, func@hi(%s0) +; CHECK-NEXT: lea.sl %s0, 1086324736 +; CHECK-NEXT: stl %s18, 176(,%s11) +; CHECK-NEXT: or %s6, 0, %s0 +; CHECK-NEXT: or %s0, 0, %s18 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s0, 0, %s18 +; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 + call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, i8* null, i64 8, double 9.0, i128 10, i128 11) + ret i32 0 +}