diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -34,11 +34,10 @@ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const override { - return MBB.erase(MI); - } + MachineBasicBlock::iterator MI) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -87,6 +87,7 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *LoongArchFI = MF.getInfo(); const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); const LoongArchInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -138,11 +139,14 @@ // Generate new FP. if (hasFP(MF)) { - adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); - - // Emit ".cfi_def_cfa $fp, 0" - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, RI->getDwarfRegNum(FPReg, true), 0)); + adjustReg(MBB, MBBI, DL, FPReg, SPReg, + StackSize - LoongArchFI->getVarArgsSaveSize(), + MachineInstr::FrameSetup); + + // Emit ".cfi_def_cfa $fp, LoongArchFI->getVarArgsSaveSize()" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, RI->getDwarfRegNum(FPReg, true), + LoongArchFI->getVarArgsSaveSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); @@ -153,6 +157,7 @@ MachineBasicBlock &MBB) const { const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *LoongArchFI = MF.getInfo(); Register SPReg = LoongArch::R3; MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -170,7 +175,8 @@ // Restore the stack pointer. if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { assert(hasFP(MF) && "frame pointer should not have been eliminated"); - adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, + adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, + -StackSize + LoongArchFI->getVarArgsSaveSize(), MachineInstr::FrameDestroy); } @@ -193,10 +199,49 @@ SavedRegs.set(LoongArchABI::getBPReg()); } +// Do not preserve stack space within prologue for outgoing variables if the +// function contains variable size objects. +// Let eliminateCallFramePseudoInstr preserve stack space for it. +bool LoongArchFrameLowering::hasReservedCallFrame( + const MachineFunction &MF) const { + return !MF.getFrameInfo().hasVarSizedObjects(); +} + +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions. +MachineBasicBlock::iterator +LoongArchFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + Register SPReg = LoongArch::R3; + DebugLoc DL = MI->getDebugLoc(); + + if (!hasReservedCallFrame(MF)) { + // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and + // ADJCALLSTACKUP must be converted to instructions manipulating the stack + // pointer. This is necessary when there is a variable length stack + // allocation (e.g. alloca), which means it's not possible to allocate + // space for outgoing arguments from within the function prologue. + int64_t Amount = MI->getOperand(0).getImm(); + + if (Amount != 0) { + // Ensure the stack remains aligned after adjustment. + Amount = alignSPAdjust(Amount); + + if (MI->getOpcode() == LoongArch::ADJCALLSTACKDOWN) + Amount = -Amount; + + adjustReg(MBB, MI, DL, SPReg, SPReg, Amount, MachineInstr::NoFlags); + } + } + + return MBB.erase(MI); +} + StackOffset LoongArchFrameLowering::getFrameIndexReference( const MachineFunction &MF, int FI, Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + auto *LoongArchFI = MF.getInfo(); // Callee-saved registers should be referenced relative to the stack // pointer (positive offset), otherwise use the frame pointer (negative @@ -213,10 +258,12 @@ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - FrameReg = RI->getFrameRegister(MF); if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { FrameReg = LoongArch::R3; Offset += StackOffset::getFixed(MFI.getStackSize()); + } else { + FrameReg = RI->getFrameRegister(MF); + Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); } return Offset; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -110,6 +110,7 @@ SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -55,6 +55,10 @@ setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); + if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); @@ -137,9 +141,27 @@ return SDValue(); case ISD::UINT_TO_FP: return lowerUINT_TO_FP(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); } } +SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + auto *FuncInfo = MF.getInfo(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); +} + SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { @@ -1174,6 +1196,10 @@ } EVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT GRLenVT = Subtarget.getGRLenVT(); + unsigned GRLenInBytes = Subtarget.getGRLen() / 8; + // Used with varargs to acumulate store chains. + std::vector OutChains; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -1211,8 +1237,66 @@ } if (IsVarArg) { - // TODO: Support vararg. - report_fatal_error("Not support vararg"); + ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + const TargetRegisterClass *RC = &LoongArch::GPRRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + auto *LoongArchFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegs.size() == Idx) { + VaArgOffset = CCInfo.getNextStackOffset(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); + LoongArchFI->setVarArgsFrameIndex(FI); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*GRLen-aligned. + if (Idx % 2) { + MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, + true); + VarArgsSaveSize += GRLenInBytes; + } + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += GRLenInBytes) { + const Register Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(ArgRegs[I], Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); + FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + cast(Store.getNode()) + ->getMemOperand() + ->setValue((Value *)nullptr); + OutChains.push_back(Store); + } + LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); + } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens for vararg functions. + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } return Chain; diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/vararg.ll @@ -0,0 +1,356 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ +; RUN: | FileCheck --check-prefix=LA64-FPELIM %s +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ +; RUN: --frame-pointer=all < %s \ +; RUN: | FileCheck --check-prefix=LA64-WITHFP %s + +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +declare void @notdead(ptr) + +define i64 @va1(ptr %fmt, ...) { +; LA64-FPELIM-LABEL: va1: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -80 +; LA64-FPELIM-NEXT: .cfi_def_cfa_offset 80 +; LA64-FPELIM-NEXT: move $a0, $a1 +; LA64-FPELIM-NEXT: st.d $a7, $sp, 72 +; LA64-FPELIM-NEXT: st.d $a6, $sp, 64 +; LA64-FPELIM-NEXT: st.d $a5, $sp, 56 +; LA64-FPELIM-NEXT: st.d $a4, $sp, 48 +; LA64-FPELIM-NEXT: st.d $a3, $sp, 40 +; LA64-FPELIM-NEXT: st.d $a2, $sp, 32 +; LA64-FPELIM-NEXT: addi.d $a1, $sp, 32 +; LA64-FPELIM-NEXT: st.d $a1, $sp, 8 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 24 +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 80 +; LA64-FPELIM-NEXT: jirl $zero, $ra, 0 +; +; LA64-WITHFP-LABEL: va1: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96 +; LA64-WITHFP-NEXT: .cfi_def_cfa_offset 96 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: .cfi_offset 1, -72 +; LA64-WITHFP-NEXT: .cfi_offset 22, -80 +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32 +; LA64-WITHFP-NEXT: .cfi_def_cfa 22, 64 +; LA64-WITHFP-NEXT: move $a0, $a1 +; LA64-WITHFP-NEXT: st.d $a7, $fp, 56 +; LA64-WITHFP-NEXT: st.d $a6, $fp, 48 +; LA64-WITHFP-NEXT: st.d $a5, $fp, 40 +; LA64-WITHFP-NEXT: st.d $a4, $fp, 32 +; LA64-WITHFP-NEXT: st.d $a3, $fp, 24 +; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 +; LA64-WITHFP-NEXT: addi.d $a1, $fp, 16 +; LA64-WITHFP-NEXT: st.d $a1, $fp, -24 +; LA64-WITHFP-NEXT: st.d $a0, $fp, 8 +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96 +; LA64-WITHFP-NEXT: jirl $zero, $ra, 0 + %va = alloca ptr, align 8 + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 8 + %argp.next = getelementptr inbounds i64, ptr %argp.cur, i32 1 + store ptr %argp.next, ptr %va, align 8 + %1 = load i64, ptr %argp.cur, align 8 + call void @llvm.va_end(ptr %va) + ret i64 %1 +} + +define i64 @va1_va_arg(ptr %fmt, ...) nounwind { +; LA64-FPELIM-LABEL: va1_va_arg: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -80 +; LA64-FPELIM-NEXT: move $a0, $a1 +; LA64-FPELIM-NEXT: st.d $a7, $sp, 72 +; LA64-FPELIM-NEXT: st.d $a6, $sp, 64 +; LA64-FPELIM-NEXT: st.d $a5, $sp, 56 +; LA64-FPELIM-NEXT: st.d $a4, $sp, 48 +; LA64-FPELIM-NEXT: st.d $a3, $sp, 40 +; LA64-FPELIM-NEXT: st.d $a2, $sp, 32 +; LA64-FPELIM-NEXT: addi.d $a1, $sp, 32 +; LA64-FPELIM-NEXT: st.d $a1, $sp, 8 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 24 +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 80 +; LA64-FPELIM-NEXT: jirl $zero, $ra, 0 +; +; LA64-WITHFP-LABEL: va1_va_arg: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32 +; LA64-WITHFP-NEXT: move $a0, $a1 +; LA64-WITHFP-NEXT: st.d $a7, $fp, 56 +; LA64-WITHFP-NEXT: st.d $a6, $fp, 48 +; LA64-WITHFP-NEXT: st.d $a5, $fp, 40 +; LA64-WITHFP-NEXT: st.d $a4, $fp, 32 +; LA64-WITHFP-NEXT: st.d $a3, $fp, 24 +; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 +; LA64-WITHFP-NEXT: addi.d $a1, $fp, 16 +; LA64-WITHFP-NEXT: st.d $a1, $fp, -24 +; LA64-WITHFP-NEXT: st.d $a0, $fp, 8 +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96 +; LA64-WITHFP-NEXT: jirl $zero, $ra, 0 + %va = alloca ptr, align 8 + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i64 + call void @llvm.va_end(ptr %va) + ret i64 %1 +} + +;; Ensure the adjustment when restoring the stack pointer using the frame +;; pointer is correct + +define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { +; LA64-FPELIM-LABEL: va1_va_arg_alloca: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -96 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: addi.d $fp, $sp, 32 +; LA64-FPELIM-NEXT: move $s0, $a1 +; LA64-FPELIM-NEXT: st.d $a7, $fp, 56 +; LA64-FPELIM-NEXT: st.d $a6, $fp, 48 +; LA64-FPELIM-NEXT: st.d $a5, $fp, 40 +; LA64-FPELIM-NEXT: st.d $a4, $fp, 32 +; LA64-FPELIM-NEXT: st.d $a3, $fp, 24 +; LA64-FPELIM-NEXT: st.d $a2, $fp, 16 +; LA64-FPELIM-NEXT: addi.d $a0, $fp, 16 +; LA64-FPELIM-NEXT: st.d $a0, $fp, -32 +; LA64-FPELIM-NEXT: addi.d $a0, $a1, 15 +; LA64-FPELIM-NEXT: addi.w $a1, $zero, -16 +; LA64-FPELIM-NEXT: and $a0, $a0, $a1 +; LA64-FPELIM-NEXT: st.d $s0, $fp, 8 +; LA64-FPELIM-NEXT: sub.d $a0, $sp, $a0 +; LA64-FPELIM-NEXT: move $sp, $a0 +; LA64-FPELIM-NEXT: bl notdead +; LA64-FPELIM-NEXT: move $a0, $s0 +; LA64-FPELIM-NEXT: addi.d $sp, $fp, -32 +; LA64-FPELIM-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 96 +; LA64-FPELIM-NEXT: jirl $zero, $ra, 0 +; +; LA64-WITHFP-LABEL: va1_va_arg_alloca: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32 +; LA64-WITHFP-NEXT: move $s0, $a1 +; LA64-WITHFP-NEXT: st.d $a7, $fp, 56 +; LA64-WITHFP-NEXT: st.d $a6, $fp, 48 +; LA64-WITHFP-NEXT: st.d $a5, $fp, 40 +; LA64-WITHFP-NEXT: st.d $a4, $fp, 32 +; LA64-WITHFP-NEXT: st.d $a3, $fp, 24 +; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 +; LA64-WITHFP-NEXT: addi.d $a0, $fp, 16 +; LA64-WITHFP-NEXT: st.d $a0, $fp, -32 +; LA64-WITHFP-NEXT: addi.d $a0, $a1, 15 +; LA64-WITHFP-NEXT: addi.w $a1, $zero, -16 +; LA64-WITHFP-NEXT: and $a0, $a0, $a1 +; LA64-WITHFP-NEXT: st.d $s0, $fp, 8 +; LA64-WITHFP-NEXT: sub.d $a0, $sp, $a0 +; LA64-WITHFP-NEXT: move $sp, $a0 +; LA64-WITHFP-NEXT: bl notdead +; LA64-WITHFP-NEXT: move $a0, $s0 +; LA64-WITHFP-NEXT: addi.d $sp, $fp, -32 +; LA64-WITHFP-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96 +; LA64-WITHFP-NEXT: jirl $zero, $ra, 0 + %va = alloca ptr, align 8 + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i64 + %2 = alloca i8, i64 %1 + call void @notdead(ptr %2) + call void @llvm.va_end(ptr %va) + ret i64 %1 +} + +define void @va1_caller() nounwind { +; LA64-FPELIM-LABEL: va1_caller: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -16 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-FPELIM-NEXT: ori $a2, $zero, 2 +; LA64-FPELIM-NEXT: bl va1 +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 16 +; LA64-FPELIM-NEXT: jirl $zero, $ra, 0 +; +; LA64-WITHFP-LABEL: va1_caller: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -16 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 16 +; LA64-WITHFP-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-WITHFP-NEXT: ori $a2, $zero, 2 +; LA64-WITHFP-NEXT: bl va1 +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 16 +; LA64-WITHFP-NEXT: jirl $zero, $ra, 0 + %1 = call i64 (ptr, ...) @va1(ptr undef, double 1.0, i64 2) + ret void +} + +;; Ensure a named 2*GRLen argument is passed in a1 and a2, while the +;; vararg long double is passed in a4 and a5 (rather than a3 and a4) + +declare i64 @va_aligned_register(i64 %a, i128 %b, ...) + +define void @va_aligned_register_caller() nounwind { +; LA64-FPELIM-LABEL: va_aligned_register_caller: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -16 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: lu12i.w $a0, 335544 +; LA64-FPELIM-NEXT: ori $a0, $a0, 1311 +; LA64-FPELIM-NEXT: lu32i.d $a0, 335544 +; LA64-FPELIM-NEXT: lu52i.d $a4, $a0, -328 +; LA64-FPELIM-NEXT: lu12i.w $a0, -503317 +; LA64-FPELIM-NEXT: ori $a0, $a0, 2129 +; LA64-FPELIM-NEXT: lu32i.d $a0, 37355 +; LA64-FPELIM-NEXT: lu52i.d $a5, $a0, 1024 +; LA64-FPELIM-NEXT: ori $a0, $zero, 2 +; LA64-FPELIM-NEXT: ori $a1, $zero, 1111 +; LA64-FPELIM-NEXT: move $a2, $zero +; LA64-FPELIM-NEXT: bl va_aligned_register +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 16 +; LA64-FPELIM-NEXT: jirl $zero, $ra, 0 +; +; LA64-WITHFP-LABEL: va_aligned_register_caller: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -16 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 16 +; LA64-WITHFP-NEXT: lu12i.w $a0, 335544 +; LA64-WITHFP-NEXT: ori $a0, $a0, 1311 +; LA64-WITHFP-NEXT: lu32i.d $a0, 335544 +; LA64-WITHFP-NEXT: lu52i.d $a4, $a0, -328 +; LA64-WITHFP-NEXT: lu12i.w $a0, -503317 +; LA64-WITHFP-NEXT: ori $a0, $a0, 2129 +; LA64-WITHFP-NEXT: lu32i.d $a0, 37355 +; LA64-WITHFP-NEXT: lu52i.d $a5, $a0, 1024 +; LA64-WITHFP-NEXT: ori $a0, $zero, 2 +; LA64-WITHFP-NEXT: ori $a1, $zero, 1111 +; LA64-WITHFP-NEXT: move $a2, $zero +; LA64-WITHFP-NEXT: bl va_aligned_register +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 16 +; LA64-WITHFP-NEXT: jirl $zero, $ra, 0 + %1 = call i64 (i64, i128, ...) @va_aligned_register(i64 2, i128 1111, + fp128 0xLEB851EB851EB851F400091EB851EB851) + ret void +} + +;; Check 2*GRLen values are aligned appropriately when passed on the stack +;; in a vararg call + +declare i32 @va_aligned_stack_callee(i32, ...) + +define void @va_aligned_stack_caller() nounwind { +; LA64-FPELIM-LABEL: va_aligned_stack_caller: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -112 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: ori $a0, $zero, 17 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 48 +; LA64-FPELIM-NEXT: ori $a0, $zero, 16 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 40 +; LA64-FPELIM-NEXT: ori $a0, $zero, 15 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 32 +; LA64-FPELIM-NEXT: ori $a0, $zero, 14 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 0 +; LA64-FPELIM-NEXT: lu12i.w $a0, -503317 +; LA64-FPELIM-NEXT: ori $a0, $a0, 2129 +; LA64-FPELIM-NEXT: lu32i.d $a0, 37355 +; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 24 +; LA64-FPELIM-NEXT: lu12i.w $a0, 335544 +; LA64-FPELIM-NEXT: ori $a0, $a0, 1311 +; LA64-FPELIM-NEXT: lu32i.d $a0, 335544 +; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, -328 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 16 +; LA64-FPELIM-NEXT: ori $a0, $zero, 1000 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 64 +; LA64-FPELIM-NEXT: st.d $zero, $sp, 88 +; LA64-FPELIM-NEXT: st.d $zero, $sp, 80 +; LA64-FPELIM-NEXT: st.d $zero, $sp, 72 +; LA64-FPELIM-NEXT: ori $a1, $zero, 11 +; LA64-FPELIM-NEXT: addi.d $a2, $sp, 64 +; LA64-FPELIM-NEXT: ori $a3, $zero, 12 +; LA64-FPELIM-NEXT: ori $a4, $zero, 13 +; LA64-FPELIM-NEXT: ori $a0, $zero, 1 +; LA64-FPELIM-NEXT: move $a6, $zero +; LA64-FPELIM-NEXT: move $a7, $a0 +; LA64-FPELIM-NEXT: bl va_aligned_stack_callee +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 112 +; LA64-FPELIM-NEXT: jirl $zero, $ra, 0 +; +; LA64-WITHFP-LABEL: va_aligned_stack_caller: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -112 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 112 +; LA64-WITHFP-NEXT: ori $a0, $zero, 17 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 48 +; LA64-WITHFP-NEXT: ori $a0, $zero, 16 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 40 +; LA64-WITHFP-NEXT: ori $a0, $zero, 15 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 32 +; LA64-WITHFP-NEXT: ori $a0, $zero, 14 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 0 +; LA64-WITHFP-NEXT: lu12i.w $a0, -503317 +; LA64-WITHFP-NEXT: ori $a0, $a0, 2129 +; LA64-WITHFP-NEXT: lu32i.d $a0, 37355 +; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 24 +; LA64-WITHFP-NEXT: lu12i.w $a0, 335544 +; LA64-WITHFP-NEXT: ori $a0, $a0, 1311 +; LA64-WITHFP-NEXT: lu32i.d $a0, 335544 +; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, -328 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 16 +; LA64-WITHFP-NEXT: ori $a0, $zero, 1000 +; LA64-WITHFP-NEXT: st.d $a0, $fp, -48 +; LA64-WITHFP-NEXT: st.d $zero, $fp, -24 +; LA64-WITHFP-NEXT: st.d $zero, $fp, -32 +; LA64-WITHFP-NEXT: st.d $zero, $fp, -40 +; LA64-WITHFP-NEXT: ori $a1, $zero, 11 +; LA64-WITHFP-NEXT: addi.d $a2, $fp, -48 +; LA64-WITHFP-NEXT: ori $a3, $zero, 12 +; LA64-WITHFP-NEXT: ori $a4, $zero, 13 +; LA64-WITHFP-NEXT: ori $a0, $zero, 1 +; LA64-WITHFP-NEXT: move $a6, $zero +; LA64-WITHFP-NEXT: move $a7, $a0 +; LA64-WITHFP-NEXT: bl va_aligned_stack_callee +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 112 +; LA64-WITHFP-NEXT: jirl $zero, $ra, 0 + %1 = call i32 (i32, ...) @va_aligned_stack_callee(i32 1, i32 11, + i256 1000, i32 12, i32 13, i128 18446744073709551616, i32 14, + fp128 0xLEB851EB851EB851F400091EB851EB851, i64 15, + [2 x i64] [i64 16, i64 17]) + ret void +}