Index: lib/Target/RISCV/RISCVFrameLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.cpp +++ lib/Target/RISCV/RISCVFrameLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "RISCVFrameLowering.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -91,6 +92,7 @@ } MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); unsigned FPReg = getFPReg(STI); @@ -124,7 +126,8 @@ std::advance(MBBI, CSI.size()); // Generate new FP. - adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); + adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize - RVFI->getVarArgsSaveSize(), + MachineInstr::FrameSetup); } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -137,6 +140,7 @@ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const RISCVRegisterInfo *RI = STI.getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); DebugLoc DL = MBBI->getDebugLoc(); unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); @@ -153,7 +157,8 @@ // necessary if the stack pointer was modified, meaning the stack size is // unknown. if (RI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) { - adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -StackSize, + adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, + -StackSize + RVFI->getVarArgsSaveSize(), MachineInstr::FrameDestroy); } @@ -166,6 +171,7 @@ unsigned &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + const auto *RVFI = MF.getInfo(); // Callee-saved registers should be referenced relative to the stack // pointer (positive offset), otherwise use the frame pointer (negative @@ -182,10 +188,13 @@ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - FrameReg = RI->getFrameRegister(MF); if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); + } else { + FrameReg = RI->getFrameRegister(MF); + assert(hasFP(MF) && "Offset calculation incorrect if !hasFP"); + Offset += RVFI->getVarArgsSaveSize(); } return Offset; } Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -53,7 +53,7 @@ bool IsRet) const; void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Outs, - bool IsRet) const; + bool IsRet, CallLoweringInfo *CLI) const; // Lower incoming arguments, copy physregs into vregs SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -78,6 +78,7 @@ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; }; } Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -14,6 +14,7 @@ #include "RISCVISelLowering.h" #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" @@ -63,6 +64,11 @@ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); @@ -158,6 +164,8 @@ return lowerBlockAddress(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); } } @@ -261,6 +269,21 @@ return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); } +SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -398,19 +421,33 @@ // Implements the RISC-V calling convention. Returns true upon failure. static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet) { + CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; assert(ValVT == XLenVT && "Unexpected ValVT"); assert(LocVT == XLenVT && "Unexpected LocVT"); - assert(IsFixed && "Vararg support not yet implemented"); // Any return value split in to more than two values can't be returned // directly. if (IsRet && ValNo > 1) return true; + // If this is a variadic argument, ensure it is assigned an even register + // if it has 8-byte alignment (RV32) or 16-byte alignment (RV64) + // An aligned register should be used regardless of + // whether the original argument is 'split' or not. The argument will not be + // passed by registers if the original type is larger than 2*XLEN, so don't + // bother aligning for that case. + unsigned TwoXLenInBytes = (2 * XLen) / 8; + if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && + DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { + unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); + if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) { + State.AllocateReg(ArgGPRs); + } + } + SmallVectorImpl &PendingLocs = State.getPendingLocs(); SmallVectorImpl &PendingArgFlags = State.getPendingArgFlags(); @@ -482,13 +519,20 @@ MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, bool IsRet) const { unsigned NumArgs = Ins.size(); + FunctionType *FType = MF.getFunction()->getFunctionType(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + Type *ArgTy = nullptr; + if (IsRet) + ArgTy = FType->getReturnType(); + else if (Ins[i].isOrigArg()) + ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); + if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo, /*IsRet=*/true, IsRet)) { + ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'); llvm_unreachable(nullptr); @@ -498,15 +542,17 @@ void RISCVTargetLowering::analyzeOutputArgs( MachineFunction &MF, CCState &CCInfo, - const SmallVectorImpl &Outs, bool IsRet) const { + const SmallVectorImpl &Outs, bool IsRet, + CallLoweringInfo *CLI) const { unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; i++) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo, Outs[i].IsFixed, IsRet)) { + ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"); llvm_unreachable(nullptr); @@ -582,9 +628,10 @@ MachineFunction &MF = DAG.getMachineFunction(); MVT XLenVT = Subtarget.getXLenVT(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - - if (IsVarArg) - report_fatal_error("VarArg not supported"); + unsigned XLen = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); + unsigned XLenInBytes = XLen / 8; + // Used with vargs to acumulate store chains. + std::vector OutChains; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -621,6 +668,69 @@ } InVals.push_back(ArgValue); } + + if (IsVarArg) { + ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + const TargetRegisterClass *RC = &RISCV::GPRRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + RISCVMachineFunctionInfo *RVFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegs.size() == Idx) { + VaArgOffset = CCInfo.getNextStackOffset(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + RVFI->setVarArgsFrameIndex(FI); + + // If saving an odd-number of registers, create an extra stack slot to + // ensure even-numbered registers are always 2*XLEN-aligned. + if (Idx % 2) { + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, + true); + VarArgsSaveSize += XLenInBytes; + } + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += XLenInBytes) { + const unsigned Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(ArgRegs[I], Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = + DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); + cast(Store.getNode()) + ->getMemOperand() + ->setValue((Value *)nullptr); + OutChains.push_back(Store); + } + RVFI->setVarArgsSaveSize(VarArgsSaveSize); + } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens for vararg functions. + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + return Chain; } @@ -641,16 +751,12 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); - if (IsVarArg) { - report_fatal_error("LowerCall with varargs not implemented"); - } - MachineFunction &MF = DAG.getMachineFunction(); // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false); + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); @@ -824,7 +930,7 @@ MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, - CCInfo, /*IsFixed=*/true, /*IsRet=*/true)) + CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) return false; } return true; @@ -836,10 +942,6 @@ const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - if (IsVarArg) { - report_fatal_error("VarArg not supported"); - } - // Stores the assignment of the return value to a location. SmallVector RVLocs; @@ -847,7 +949,8 @@ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true); + analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, + nullptr); SDValue Flag; SmallVector RetOps(1, Chain); Index: lib/Target/RISCV/RISCVMachineFunctionInfo.h =================================================================== --- /dev/null +++ lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -0,0 +1,44 @@ +//=- RISCVMachineFunctionInfo.h - RISCV machine function info -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares RISCV-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo +/// and contains private RISCV-specific information for each MachineFunction. +class RISCVMachineFunctionInfo : public MachineFunctionInfo { + + /// FrameIndex for start of varargs area + int VarArgsFrameIndex = 0; + /// Size of the save area used for varargs + int VarArgsSaveSize = 0; + +public: + RISCVMachineFunctionInfo() = default; + + explicit RISCVMachineFunctionInfo(MachineFunction &MF) {} + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } + void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H Index: test/CodeGen/RISCV/vararg.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/vararg.ll @@ -0,0 +1,535 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) + +declare void @notdead(i8*) + +; Although frontends are recommended to not generate va_arg due to the lack of +; support for aggregate types, we test simple cases here to ensure they are +; lowered correctly + +define i32 @va1(i8* %fmt, ...) nounwind { +; RV32I-LABEL: va1: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: addi a0, s0, 8 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lw a0, 4(s0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %argp.cur = load i8*, i8** %va, align 4 + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %va, align 4 + %2 = bitcast i8* %argp.cur to i32* + %3 = load i32, i32* %2, align 4 + call void @llvm.va_end(i8* %1) + ret i32 %3 +} + +define i32 @va1_va_arg(i8* %fmt, ...) nounwind { +; RV32I-LABEL: va1_va_arg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: addi a0, s0, 8 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lw a0, 4(s0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = va_arg i8** %va, i32 + call void @llvm.va_end(i8* %1) + ret i32 %2 +} + +; Ensure the adjustment when restoring the stack pointer using the frame +; pointer is correct +define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind { +; RV32I-LABEL: va1_va_arg_alloca: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: sw s1, 4(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: addi a0, s0, 8 +; RV32I-NEXT: sw a0, -16(s0) +; RV32I-NEXT: lw s1, 4(s0) +; RV32I-NEXT: addi a0, s1, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: lui a1, %hi(notdead) +; RV32I-NEXT: addi a1, a1, %lo(notdead) +; RV32I-NEXT: jalr a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw s1, 4(sp) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = va_arg i8** %va, i32 + %3 = alloca i8, i32 %2 + call void @notdead(i8* %3) + call void @llvm.va_end(i8* %1) + ret i32 %2 +} + +define void @va1_caller() nounwind { +; RV32I-LABEL: va1_caller: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, 261888 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: lui a0, %hi(va1) +; RV32I-NEXT: addi a0, a0, %lo(va1) +; RV32I-NEXT: addi a4, zero, 2 +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: jalr a0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; Pass a double, as a float would be promoted by a C/C++ frontend + %1 = call i32 (i8*, ...) @va1(i8* undef, double 1.0, i32 2) + ret void +} + +; Ensure that 2x xlen size+alignment varargs are accessed via an "aligned" +; register pair (where the first register is even-numbered). + +define double @va2(i8 *%fmt, ...) nounwind { +; RV32I-LABEL: va2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: addi a0, s0, 19 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: addi a0, s0, 11 +; RV32I-NEXT: andi a1, a0, -8 +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: ori a1, a1, 4 +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = bitcast i8** %va to i32* + %argp.cur = load i32, i32* %2, align 4 + %3 = add i32 %argp.cur, 7 + %4 = and i32 %3, -8 + %argp.cur.aligned = inttoptr i32 %3 to i8* + %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8 + store i8* %argp.next, i8** %va, align 4 + %5 = inttoptr i32 %4 to double* + %6 = load double, double* %5, align 8 + call void @llvm.va_end(i8* %1) + ret double %6 +} + +define double @va2_va_arg(i8 *%fmt, ...) nounwind { +; RV32I-LABEL: va2_va_arg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: addi a0, s0, 11 +; RV32I-NEXT: andi a0, a0, -8 +; RV32I-NEXT: ori a1, a0, 4 +; RV32I-NEXT: sw a1, -12(s0) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: addi a2, a1, 4 +; RV32I-NEXT: sw a2, -12(s0) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = va_arg i8** %va, double + call void @llvm.va_end(i8* %1) + ret double %2 +} + +define void @va2_caller() nounwind { +; RV32I-LABEL: va2_caller: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, 261888 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: lui a0, %hi(va2) +; RV32I-NEXT: addi a0, a0, %lo(va2) +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: jalr a0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %1 = call double (i8*, ...) @va2(i8* undef, double 1.000000e+00) + ret void +} + +; Ensure a named double argument is passed in a1 and a2, while the vararg +; double is passed in a4 and a5 (rather than a3 and a4) + +define double @va3(i32 %a, double %b, ...) nounwind { +; RV32I-LABEL: va3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 20(sp) +; RV32I-NEXT: sw s0, 16(sp) +; RV32I-NEXT: addi s0, sp, 24 +; RV32I-NEXT: sw a7, 20(s0) +; RV32I-NEXT: sw a6, 16(s0) +; RV32I-NEXT: sw a5, 12(s0) +; RV32I-NEXT: sw a4, 8(s0) +; RV32I-NEXT: sw a3, 4(s0) +; RV32I-NEXT: addi a0, s0, 19 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lui a0, %hi(__adddf3) +; RV32I-NEXT: addi a5, a0, %lo(__adddf3) +; RV32I-NEXT: addi a0, s0, 11 +; RV32I-NEXT: andi a0, a0, -8 +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: ori a0, a0, 4 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: jalr a5 +; RV32I-NEXT: lw s0, 16(sp) +; RV32I-NEXT: lw ra, 20(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = bitcast i8** %va to i32* + %argp.cur = load i32, i32* %2, align 4 + %3 = add i32 %argp.cur, 7 + %4 = and i32 %3, -8 + %argp.cur.aligned = inttoptr i32 %3 to i8* + %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8 + store i8* %argp.next, i8** %va, align 4 + %5 = inttoptr i32 %4 to double* + %6 = load double, double* %5, align 8 + call void @llvm.va_end(i8* %1) + %7 = fadd double %b, %6 + ret double %7 +} + +define double @va3_va_arg(i32 %a, double %b, ...) nounwind { +; RV32I-LABEL: va3_va_arg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 20(sp) +; RV32I-NEXT: sw s0, 16(sp) +; RV32I-NEXT: addi s0, sp, 24 +; RV32I-NEXT: sw a7, 20(s0) +; RV32I-NEXT: sw a6, 16(s0) +; RV32I-NEXT: sw a5, 12(s0) +; RV32I-NEXT: sw a4, 8(s0) +; RV32I-NEXT: sw a3, 4(s0) +; RV32I-NEXT: addi a0, s0, 11 +; RV32I-NEXT: andi a0, a0, -8 +; RV32I-NEXT: ori a3, a0, 4 +; RV32I-NEXT: sw a3, -12(s0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: addi a0, a3, 4 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lui a0, %hi(__adddf3) +; RV32I-NEXT: addi a5, a0, %lo(__adddf3) +; RV32I-NEXT: lw a3, 0(a3) +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: jalr a5 +; RV32I-NEXT: lw s0, 16(sp) +; RV32I-NEXT: lw ra, 20(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = va_arg i8** %va, double + call void @llvm.va_end(i8* %1) + %3 = fadd double %b, %2 + ret double %3 +} + +define void @va3_caller() nounwind { +; RV32I-LABEL: va3_caller: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, 261888 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lui a0, 262144 +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lui a0, %hi(va3) +; RV32I-NEXT: addi a3, a0, %lo(va3) +; RV32I-NEXT: addi a0, zero, 2 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: mv a4, zero +; RV32I-NEXT: jalr a3 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %1 = call double (i32, double, ...) @va3(i32 2, double 1.000000e+00, double 2.000000e+00) + ret void +} + +declare void @llvm.va_copy(i8*, i8*) + +define i32 @va4_va_copy(i32 %argno, ...) nounwind { +; RV32I-LABEL: va4_va_copy: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: addi a0, s0, 8 +; RV32I-NEXT: sw a0, -16(s0) +; RV32I-NEXT: sw a0, -20(s0) +; RV32I-NEXT: lw s1, 4(s0) +; RV32I-NEXT: lui a1, %hi(notdead) +; RV32I-NEXT: addi a1, a1, %lo(notdead) +; RV32I-NEXT: jalr a1 +; RV32I-NEXT: lw a0, -16(s0) +; RV32I-NEXT: addi a0, a0, 3 +; RV32I-NEXT: andi a0, a0, -4 +; RV32I-NEXT: addi a1, a0, 4 +; RV32I-NEXT: sw a1, -16(s0) +; RV32I-NEXT: lw a1, 0(a0) +; RV32I-NEXT: addi a0, a0, 7 +; RV32I-NEXT: andi a0, a0, -4 +; RV32I-NEXT: addi a2, a0, 4 +; RV32I-NEXT: sw a2, -16(s0) +; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: addi a0, a0, 7 +; RV32I-NEXT: andi a0, a0, -4 +; RV32I-NEXT: addi a3, a0, 4 +; RV32I-NEXT: sw a3, -16(s0) +; RV32I-NEXT: add a1, a1, s1 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret + %vargs = alloca i8*, align 4 + %wargs = alloca i8*, align 4 + %1 = bitcast i8** %vargs to i8* + %2 = bitcast i8** %wargs to i8* + call void @llvm.va_start(i8* %1) + %3 = va_arg i8** %vargs, i32 + call void @llvm.va_copy(i8* %2, i8* %1) + %4 = load i8*, i8** %wargs, align 4 + call void @notdead(i8* %4) + %5 = va_arg i8** %vargs, i32 + %6 = va_arg i8** %vargs, i32 + %7 = va_arg i8** %vargs, i32 + call void @llvm.va_end(i8* %1) + call void @llvm.va_end(i8* %2) + %add1 = add i32 %5, %3 + %add2 = add i32 %add1, %6 + %add3 = add i32 %add2, %7 + ret i32 %add3 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack in a vararg call + +define i32 @va5_aligned_stack_callee(i32 %a, ...) nounwind { +; RV32I-LABEL: va5_aligned_stack_callee: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + ret i32 1 +} + +define void @va5_aligned_stack_caller() nounwind { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; RV32I-LABEL: va5_aligned_stack_caller: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) +; RV32I-NEXT: sw s0, 56(sp) +; RV32I-NEXT: addi s0, sp, 64 +; RV32I-NEXT: addi a0, zero, 17 +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: addi a0, zero, 16 +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: addi a0, zero, 15 +; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: lui a0, 262236 +; RV32I-NEXT: addi a0, a0, 655 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: lui a0, 377487 +; RV32I-NEXT: addi a0, a0, 1475 +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: addi a0, zero, 14 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: lui a0, 262153 +; RV32I-NEXT: addi a0, a0, 491 +; RV32I-NEXT: sw a0, -20(s0) +; RV32I-NEXT: lui a0, 545260 +; RV32I-NEXT: addi a0, a0, -1967 +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: lui a0, 964690 +; RV32I-NEXT: addi a0, a0, -328 +; RV32I-NEXT: sw a0, -28(s0) +; RV32I-NEXT: lui a0, 335544 +; RV32I-NEXT: addi a0, a0, 1311 +; RV32I-NEXT: sw a0, -32(s0) +; RV32I-NEXT: lui a0, 688509 +; RV32I-NEXT: addi a6, a0, -2048 +; RV32I-NEXT: lui a0, %hi(va5_aligned_stack_callee) +; RV32I-NEXT: addi a5, a0, %lo(va5_aligned_stack_callee) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 11 +; RV32I-NEXT: addi a2, s0, -32 +; RV32I-NEXT: addi a3, zero, 12 +; RV32I-NEXT: addi a4, zero, 13 +; RV32I-NEXT: addi a7, zero, 4 +; RV32I-NEXT: jalr a5 +; RV32I-NEXT: lw s0, 56(sp) +; RV32I-NEXT: lw ra, 60(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret + %1 = call i32 (i32, ...) @va5_aligned_stack_callee(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, i64 20000000000, + i32 14, double 2.720000e+00, i32 15, [2 x i32] [i32 16, i32 17]) + ret void +} + +; A function with no fixed arguments is not valid C, but can be +; specified in LLVM IR. We must ensure the vararg save area is +; still set up correctly. + +define i32 @va6_no_fixed_args(...) nounwind { +; RV32I-LABEL: va6_no_fixed_args: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a7, 28(s0) +; RV32I-NEXT: sw a6, 24(s0) +; RV32I-NEXT: sw a5, 20(s0) +; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: sw a3, 12(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: addi a0, s0, 4 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lw a0, 0(s0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %2 = va_arg i8** %va, i32 + call void @llvm.va_end(i8* %1) + ret i32 %2 +}