Index: include/llvm/CodeGen/CallingConvLower.h =================================================================== --- include/llvm/CodeGen/CallingConvLower.h +++ include/llvm/CodeGen/CallingConvLower.h @@ -201,6 +201,7 @@ unsigned MaxStackArgAlign; SmallVector UsedRegs; SmallVector PendingLocs; + SmallVector PendingArgFlags; // ByValInfo and SmallVector ByValRegs: // @@ -508,6 +509,11 @@ return PendingLocs; } + // Get a list of argflags for pending assignments. + SmallVectorImpl &getPendingArgFlags() { + return PendingArgFlags; + } + /// Compute the remaining unused register parameters that would be used for /// the given value type. This is useful when varargs are passed in the /// registers that normal prototyped parameters would be passed in, or for Index: lib/Target/RISCV/CMakeLists.txt =================================================================== --- lib/Target/RISCV/CMakeLists.txt +++ lib/Target/RISCV/CMakeLists.txt @@ -6,7 +6,6 @@ tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM RISCVGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM RISCVGenCallingConv.inc -gen-callingconv) tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel) tablegen(LLVM RISCVGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler) Index: lib/Target/RISCV/RISCVCallingConv.td =================================================================== --- lib/Target/RISCV/RISCVCallingConv.td +++ lib/Target/RISCV/RISCVCallingConv.td @@ -11,20 +11,8 @@ // //===----------------------------------------------------------------------===// -// RISCV 32-bit C return-value convention. -def RetCC_RISCV32 : CallingConv<[CCIfType<[i32], CCAssignToReg<[X10, X11]>>]>; - -// RISCV 32-bit C Calling convention. -def CC_RISCV32 : CallingConv<[ - // Promote i8/i16 args to i32 - CCIfType<[ i8, i16 ], CCPromoteToType>, - - // All arguments get passed in integer registers if there is space. - CCIfType<[i32], CCAssignToReg<[ X10, X11, X12, X13, X14, X15, X16, X17]>>, - - // Could be assigned to the stack in 8-byte aligned units, but unsupported - CCAssignToStack<8, 8> -]>; +// The RISC-V calling convention is handled with custom code in +// RISCVISelLowering.cpp (CC_RISCV). def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -48,12 +48,22 @@ MachineBasicBlock *BB) const override; private: + void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Ins, + bool IsRet) const; + void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Outs, + bool IsRet, CallLoweringInfo *CLI) const; // Lower incoming arguments, copy physregs into vregs SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -328,7 +328,243 @@ } // Calling Convention Implementation. -#include "RISCVGenCallingConv.inc" +// The expectations for frontend ABI lowering vary from target to target. +// Ideally, an LLVM frontend would be able to avoid worrying about many ABI +// details, but this is a longer term goal. For now, we simply try to keep the +// role of the frontend as simple and well-defined as possible. The rules can +// be summarised as: +// * Never split up large scalar arguments. We handle them here. +// * If a hardfloat calling convention is being used, and the struct may be +// passed in a pair of registers (fp+fp, int+fp), and both registers are +// available, then pass as two separate arguments. If either the GPRs or FPRs +// are exhausted, then pass according to the rule below. +// * If a struct could never be passed in registers or directly in a stack +// slot (as it is larger than 2x xlen and the floating point rules don't +// apply), then pass it using a pointer with the byval attribute +// * If a struct is less than 2x xlen, then coerce to either a two-element +// word-sized array or a 2x xlen scalar (depending on alignment). +// * The frontend can determine whether a struct is returned by reference or +// not based on its size and fields. If it will be returned by reference, the +// frontend must modify the prototype so a pointer with the sret annotation is +// passed as the first argument. This is not necessary for large scalar +// returns. +// * Struct return values and varargs should be coerced to structs containing +// register-size fields in the same situations they would be for fixed +// arguments. + +static const MCPhysReg ArgGPRs[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, + RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 +}; + +// Pass a 2xlen argument that has been split in to two xlen values through +// registers or the stack as necessary. +static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, + ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, + MVT ValVT2, MVT LocVT2, + ISD::ArgFlagsTy ArgFlags2) { + unsigned XLenInBytes = XLen / 8; + if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + // At least one half can be passed via register. + State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, + VA1.getLocVT(), CCValAssign::Full)); + } else { + // Both halves must be passed on the stack, with proper alignment. + unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); + State.addLoc( + CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), + State.AllocateStack(XLenInBytes, StackAlign), + VA1.getLocVT(), CCValAssign::Full)); + State.addLoc(CCValAssign::getMem( + ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, + CCValAssign::Full)); + return false; + } + + if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + // The second half can also be passed via register. + State.addLoc( + CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); + } else { + // The second half is passed via the stack, without additional alignment. + State.addLoc(CCValAssign::getMem( + ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, + CCValAssign::Full)); + } + + return false; +} + +// Implements the RISC-V calling convention. Returns true upon failure. +static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State, bool IsFixed, bool IsRet) { + unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); + assert(XLen == 32 || XLen == 64); + MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; + assert(ValVT == XLenVT && "Unexpected ValVT"); + assert(LocVT == XLenVT && "Unexpected LocVT"); + assert(IsFixed && "Vararg support not yet implemented"); + + // Any return value split in to more than two values can't be returned + // directly. + if (IsRet && ValNo > 1) + return true; + + SmallVectorImpl &PendingMembers = State.getPendingLocs(); + SmallVectorImpl &PendingArgFlags = + State.getPendingArgFlags(); + + assert(PendingMembers.size() == PendingArgFlags.size() && + "PendingMembers and PendingArgFlags out of sync"); + + // Split arguments might be passed indirectly, so keep track of the pending + // values. + if (ArgFlags.isSplit() || !PendingMembers.empty()) { + LocVT = XLenVT; + LocInfo = CCValAssign::Indirect; + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + PendingArgFlags.push_back(ArgFlags); + if (!ArgFlags.isSplitEnd()) { + return false; + } + } + + // If the split argument only had two elements, it should be passed directly + // in registers or on the stack. + if (ArgFlags.isSplitEnd() && PendingMembers.size() <= 2) { + assert(PendingMembers.size() == 2 && "Unexpected PendingMembers.size()"); + // Apply the normal calling convention rules to the first half of the + // split argument. + CCValAssign VA = PendingMembers[0]; + ISD::ArgFlagsTy AF = PendingArgFlags[0]; + PendingMembers.clear(); + PendingArgFlags.clear(); + return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, + ArgFlags); + } + + // Allocate to a register if possible, or else a stack slot. + unsigned Reg = State.AllocateReg(ArgGPRs); + unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); + + // If we reach this point and PendingMembers is non-empty, we must be at the + // end of a split argument that must be passed indirectly. + if (!PendingMembers.empty()) { + assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); + assert(PendingMembers.size() > 2 && "Unexpected PendingMembers.size()"); + + for (auto &It : PendingMembers) { + if (Reg) + It.convertToReg(Reg); + else + It.convertToMem(StackOffset); + State.addLoc(It); + } + PendingMembers.clear(); + PendingArgFlags.clear(); + return false; + } + + assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); + + if (Reg) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + } else { + State.addLoc( + CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + } + return false; +} + +void RISCVTargetLowering::analyzeInputArgs( + MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Ins, bool IsRet) const { + unsigned NumArgs = Ins.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + + if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo, true, IsRet)) { + DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'); + llvm_unreachable(nullptr); + } + } +} + +void RISCVTargetLowering::analyzeOutputArgs( + MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Outs, bool IsRet, + CallLoweringInfo *CLI) const { + unsigned NumArgs = Outs.size(); + + for (unsigned i = 0; i != NumArgs; i++) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + + if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo, Outs[i].IsFixed, IsRet)) { + DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << "\n"); + llvm_unreachable(nullptr); + } + } +} + +// The caller is responsible for loading the full value if the argument is +// passed with CCValAssign::Indirect. +static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + EVT LocVT = VA.getLocVT(); + SDValue Val; + + unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + case CCValAssign::Indirect: + return Val; + } +} + +// The caller is responsible for loading the full value if the argument is +// passed with CCValAssign::Indirect. +static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + EVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); + EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); + int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val; + + ISD::LoadExtType ExtType; + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + case CCValAssign::Indirect: + ExtType = ISD::NON_EXTLOAD; + break; + } + Val = DAG.getExtLoad( + ExtType, DL, LocVT, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); + return Val; +} // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( @@ -345,8 +581,8 @@ } MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); MVT XLenVT = Subtarget.getXLenVT(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (IsVarArg) report_fatal_error("VarArg not supported"); @@ -354,25 +590,37 @@ // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV32); - - for (auto &VA : ArgLocs) { - if (!VA.isRegLoc()) - report_fatal_error("Defined with too many args"); - - // Arguments passed in registers. - EVT RegVT = VA.getLocVT(); - if (RegVT != XLenVT) { - DEBUG(dbgs() << "LowerFormalArguments Unhandled argument type: " - << RegVT.getEVTString() << "\n"); - report_fatal_error("unhandled argument type"); + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet*/ false); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + assert(VA.getLocVT() == XLenVT && "Unhandled argument type"); + SDValue ArgValue; + if (VA.isRegLoc()) + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); + else + ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); + + if (VA.getLocInfo() == CCValAssign::Indirect) { + // If the original argument was split and passed by reference (e.g. i128 + // on RV32), we need to load all parts of it here (using the same + // address). + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, + MachinePointerInfo())); + unsigned ArgIndex = Ins[i].OrigArgIndex; + assert(Ins[i].PartOffset == 0); + while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { + CCValAssign &PartVA = ArgLocs[i + 1]; + unsigned PartOffset = Ins[i + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, + DAG.getIntPtrConstant(PartOffset, DL)); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + ++i; + } + continue; } - const unsigned VReg = - RegInfo.createVirtualRegister(&RISCV::GPRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - SDValue ArgIn = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); - - InVals.push_back(ArgIn); + InVals.push_back(ArgValue); } return Chain; } @@ -392,6 +640,7 @@ CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT XLenVT = Subtarget.getXLenVT(); if (IsVarArg) { report_fatal_error("LowerCall with varargs not implemented"); @@ -402,44 +651,105 @@ // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV32); + analyzeOutputArgs(MF, ArgCCInfo, Outs, false, &CLI); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); - for (auto &Arg : Outs) { - if (!Arg.Flags.isByVal()) + // Create local copies for byval args + SmallVector ByValArgs; + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (!Flags.isByVal()) continue; - report_fatal_error("Passing arguments byval not yet implemented"); + + SDValue Arg = OutVals[i]; + unsigned Size = Flags.getByValSize(); + unsigned Align = Flags.getByValAlign(); + + int FI = MF.getFrameInfo().CreateStackObject(Size, Align, false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); + + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, + /*IsVolatile=*/false, + /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), + MachinePointerInfo()); + ByValArgs.push_back(FIPtr); } Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. SmallVector, 8> RegsToPass; + SmallVector MemOpChains; SDValue StackPtr; - for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { - CCValAssign &VA = ArgLocs[I]; - SDValue ArgValue = OutVals[I]; + for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; // Promote the value if needed. - // For now, only handle fully promoted arguments. + // For now, only handle fully promoted and indirect arguments. switch (VA.getLocInfo()) { case CCValAssign::Full: break; + case CCValAssign::Indirect: { + // Store the argument in a stack slot and pass its address. + SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); + int FI = cast(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + // If the original argument was split (e.g. i128), we need + // to store all parts of it here (and pass just one address). + unsigned ArgIndex = Outs[i].OrigArgIndex; + assert(Outs[i].PartOffset == 0); + while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[i + 1]; + unsigned PartOffset = Outs[i + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, + DAG.getIntPtrConstant(PartOffset, DL)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, PartValue, Address, + MachinePointerInfo::getFixedStack(MF, FI))); + ++i; + } + ArgValue = SpillSlot; + break; + } default: llvm_unreachable("Unknown loc info!"); } + // Use local copy if it is a byval arg. + if (Flags.isByVal()) + ArgValue = ByValArgs[j++]; + if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { assert(VA.isMemLoc() && "Argument not register or memory"); - report_fatal_error("Passing arguments via the stack not yet implemented"); + + // Work out the address of the stack slot. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); + + // Emit the store. + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } + // Join the stores, which are independent of one another. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + SDValue Glue; // Build a sequence of copy-to-reg nodes, chained and glued together. @@ -489,7 +799,7 @@ // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - RetCCInfo.AnalyzeCallResult(Ins, RetCC_RISCV32); + analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet*/ true); // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { @@ -499,12 +809,28 @@ Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); - InVals.push_back(Chain.getValue(0)); + assert(VA.getLocInfo() == CCValAssign::Full && "Unknown loc info!"); + InVals.push_back(RetValue); } return Chain; } +bool RISCVTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, + CCInfo, /*IsFixed*/ true, /*IsRet*/ true)) + return false; + } + return true; +} + SDValue RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -522,17 +848,21 @@ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC_RISCV32); + analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet*/ true, + nullptr); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { + SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); + assert(VA.getLocInfo() == CCValAssign::Full && + "Unexpected CCValAssign::LocInfo"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); // Guarantee that all emitted copies are stuck together. Flag = Chain.getValue(1); Index: test/CodeGen/RISCV/byval.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/byval.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s + +%struct.Foo = type { i32, i32, i32, i16, i8 } +@foo = global %struct.Foo { i32 1, i32 2, i32 3, i16 4, i8 5 }, align 4 + +define i32 @callee(%struct.Foo* byval %f) nounwind { +; RV32I-LABEL: callee: +; RV32I: # BB#0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 +entry: + %0 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 0 + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + + +define void @caller() nounwind { +; RV32I-LABEL: caller: +; RV32I: # BB#0: # %entry +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: lui a0, %hi(foo+12) +; RV32I-NEXT: addi a0, a0, %lo(foo+12) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lui a0, %hi(foo+8) +; RV32I-NEXT: addi a0, a0, %lo(foo+8) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -16(s0) +; RV32I-NEXT: lui a0, %hi(foo+4) +; RV32I-NEXT: addi a0, a0, %lo(foo+4) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -20(s0) +; RV32I-NEXT: lui a0, %hi(foo) +; RV32I-NEXT: addi a0, a0, %lo(foo) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: lui a0, %hi(callee) +; RV32I-NEXT: addi a1, a0, %lo(callee) +; RV32I-NEXT: addi a0, s0, -24 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 +entry: + %call = call i32 @callee(%struct.Foo* byval @foo) + ret void +} Index: test/CodeGen/RISCV/calling-conv-sext-zext.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/calling-conv-sext-zext.ll @@ -0,0 +1,497 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s + +define zeroext i8 @uint8_arg_to_uint8_ret(i8 zeroext %a) nounwind { +; RV32I-LABEL: uint8_arg_to_uint8_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret i8 %a +} + +declare void @receive_uint8(i8 zeroext) + +define void @pass_uint8_as_uint8(i8 zeroext %a) nounwind { +; RV32I-LABEL: pass_uint8_as_uint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_uint8) +; RV32I-NEXT: addi a1, a1, %lo(receive_uint8) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + call void @receive_uint8(i8 zeroext %a) + ret void +} + +declare zeroext i8 @return_uint8() + +define zeroext i8 @ret_callresult_uint8_as_uint8() nounwind { +; RV32I-LABEL: ret_callresult_uint8_as_uint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_uint8) +; RV32I-NEXT: addi a0, a0, %lo(return_uint8) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call zeroext i8 @return_uint8() + ret i8 %1 +} + +define signext i8 @uint8_arg_to_sint8_ret(i8 zeroext %a) nounwind { +; RV32I-LABEL: uint8_arg_to_sint8_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret i8 %a +} + +declare void @receive_sint8(i8 signext) + +define void @pass_uint8_as_sint8(i8 zeroext %a) nounwind { +; RV32I-LABEL: pass_uint8_as_sint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_sint8) +; RV32I-NEXT: addi a1, a1, %lo(receive_sint8) +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + + call void @receive_sint8(i8 signext %a) + ret void +} + +define signext i8 @ret_callresult_uint8_as_sint8() nounwind { +; RV32I-LABEL: ret_callresult_uint8_as_sint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_uint8) +; RV32I-NEXT: addi a0, a0, %lo(return_uint8) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call zeroext i8 @return_uint8() + ret i8 %1 +} + +define signext i32 @uint8_arg_to_anyint32_ret(i8 zeroext %a) nounwind { +; RV32I-LABEL: uint8_arg_to_anyint32_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = zext i8 %a to i32 + ret i32 %1 +} + +declare void @receive_anyint32(i32 signext) + +define void @pass_uint8_as_anyint32(i8 zeroext %a) nounwind { +; RV32I-LABEL: pass_uint8_as_anyint32: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_anyint32) +; RV32I-NEXT: addi a1, a1, %lo(receive_anyint32) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = zext i8 %a to i32 + call void @receive_anyint32(i32 signext %1) + ret void +} + +define signext i32 @ret_callresult_uint8_as_anyint32() nounwind { +; RV32I-LABEL: ret_callresult_uint8_as_anyint32: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_uint8) +; RV32I-NEXT: addi a0, a0, %lo(return_uint8) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call zeroext i8 @return_uint8() + %2 = zext i8 %1 to i32 + ret i32 %2 +} + +define zeroext i8 @sint8_arg_to_uint8_ret(i8 signext %a) nounwind { +; RV32I-LABEL: sint8_arg_to_uint8_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret i8 %a +} + +define void @pass_sint8_as_uint8(i8 signext %a) nounwind { +; RV32I-LABEL: pass_sint8_as_uint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lui a1, %hi(receive_uint8) +; RV32I-NEXT: addi a1, a1, %lo(receive_uint8) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + call void @receive_uint8(i8 zeroext %a) + ret void +} + +declare signext i8 @return_sint8() + +define zeroext i8 @ret_callresult_sint8_as_uint8() nounwind { +; RV32I-LABEL: ret_callresult_sint8_as_uint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_sint8) +; RV32I-NEXT: addi a0, a0, %lo(return_sint8) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call signext i8 @return_sint8() + ret i8 %1 +} + +define signext i8 @sint8_arg_to_sint8_ret(i8 signext %a) nounwind { +; RV32I-LABEL: sint8_arg_to_sint8_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret i8 %a +} + +define void @pass_sint8_as_sint8(i8 signext %a) nounwind { +; RV32I-LABEL: pass_sint8_as_sint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_sint8) +; RV32I-NEXT: addi a1, a1, %lo(receive_sint8) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + call void @receive_sint8(i8 signext %a) + ret void +} + +define signext i8 @ret_callresult_sint8_as_sint8() nounwind { +; RV32I-LABEL: ret_callresult_sint8_as_sint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_sint8) +; RV32I-NEXT: addi a0, a0, %lo(return_sint8) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call signext i8 @return_sint8() + ret i8 %1 +} + +define signext i32 @sint8_arg_to_anyint32_ret(i8 signext %a) nounwind { +; RV32I-LABEL: sint8_arg_to_anyint32_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = sext i8 %a to i32 + ret i32 %1 +} + +define void @pass_sint8_as_anyint32(i8 signext %a) nounwind { +; RV32I-LABEL: pass_sint8_as_anyint32: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_anyint32) +; RV32I-NEXT: addi a1, a1, %lo(receive_anyint32) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = sext i8 %a to i32 + call void @receive_anyint32(i32 signext %1) + ret void +} + +define signext i32 @ret_callresult_sint8_as_anyint32() nounwind { +; RV32I-LABEL: ret_callresult_sint8_as_anyint32: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_sint8) +; RV32I-NEXT: addi a0, a0, %lo(return_sint8) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call signext i8 @return_sint8() + %2 = sext i8 %1 to i32 + ret i32 %2 +} + +define zeroext i8 @anyint32_arg_to_uint8_ret(i32 signext %a) nounwind { +; RV32I-LABEL: anyint32_arg_to_uint8_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = trunc i32 %a to i8 + ret i8 %1 +} + +define void @pass_anyint32_as_uint8(i32 signext %a) nounwind { +; RV32I-LABEL: pass_anyint32_as_uint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lui a1, %hi(receive_uint8) +; RV32I-NEXT: addi a1, a1, %lo(receive_uint8) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = trunc i32 %a to i8 + call void @receive_uint8(i8 zeroext %1) + ret void +} + +declare signext i32 @return_anyint32() + +define zeroext i8 @ret_callresult_anyint32_as_uint8() nounwind { +; RV32I-LABEL: ret_callresult_anyint32_as_uint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_anyint32) +; RV32I-NEXT: addi a0, a0, %lo(return_anyint32) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call signext i32 @return_anyint32() + %2 = trunc i32 %1 to i8 + ret i8 %2 +} + +define signext i8 @anyint32_arg_to_sint8_ret(i32 signext %a) nounwind { +; RV32I-LABEL: anyint32_arg_to_sint8_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = trunc i32 %a to i8 + ret i8 %1 +} + +define void @pass_anyint32_as_sint8(i32 signext %a) nounwind { +; RV32I-LABEL: pass_anyint32_as_sint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_sint8) +; RV32I-NEXT: addi a1, a1, %lo(receive_sint8) +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = trunc i32 %a to i8 + call void @receive_sint8(i8 signext %1) + ret void +} + +define signext i8 @ret_callresult_anyint32_as_sint8() nounwind { +; RV32I-LABEL: ret_callresult_anyint32_as_sint8: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_anyint32) +; RV32I-NEXT: addi a0, a0, %lo(return_anyint32) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call signext i32 @return_anyint32() + %2 = trunc i32 %1 to i8 + ret i8 %2 +} + +define signext i32 @anyint32_arg_to_anyint32_ret(i32 signext %a) nounwind { +; RV32I-LABEL: anyint32_arg_to_anyint32_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret i32 %a +} + +define void @pass_anyint32_as_anyint32(i32 signext %a) nounwind { +; RV32I-LABEL: pass_anyint32_as_anyint32: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, %hi(receive_anyint32) +; RV32I-NEXT: addi a1, a1, %lo(receive_anyint32) +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + call void @receive_anyint32(i32 signext %a) + ret void +} + +define signext i32 @ret_callresult_anyint32_as_anyint32() nounwind { +; RV32I-LABEL: ret_callresult_anyint32_as_anyint32: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(return_anyint32) +; RV32I-NEXT: addi a0, a0, %lo(return_anyint32) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call signext i32 @return_anyint32() + ret i32 %1 +} + Index: test/CodeGen/RISCV/calling-conv.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/calling-conv.ll @@ -0,0 +1,719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-4 and fp-8. + +; Check that on RV32, i64 and double are passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_scalars(i32 %a, i64 %b, i32 %c, i32 %d, double %e) nounwind { +; RV32I-LABEL: callee_scalars: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: addi s1, a4, 0 +; RV32I-NEXT: addi s2, a3, 0 +; RV32I-NEXT: addi s3, a1, 0 +; RV32I-NEXT: addi s4, a0, 0 +; RV32I-NEXT: lui a0, %hi(__fixdfsi) +; RV32I-NEXT: addi a2, a0, %lo(__fixdfsi) +; RV32I-NEXT: addi a0, a5, 0 +; RV32I-NEXT: addi a1, a6, 0 +; RV32I-NEXT: jalr ra, a2, 0 +; RV32I-NEXT: add a1, s4, s3 +; RV32I-NEXT: add a1, a1, s2 +; RV32I-NEXT: add a1, a1, s1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %b_trunc = trunc i64 %b to i32 + %e_fptosi = fptosi double %e to i32 + %1 = add i32 %a, %b_trunc + %2 = add i32 %1, %c + %3 = add i32 %2, %d + %4 = add i32 %3, %e_fptosi + ret i32 %4 +} + +define i32 @caller_scalars() nounwind { +; RV32I-LABEL: caller_scalars: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, 262464 +; RV32I-NEXT: addi a6, a0, 0 +; RV32I-NEXT: lui a0, %hi(callee_scalars) +; RV32I-NEXT: addi a7, a0, %lo(callee_scalars) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 2 +; RV32I-NEXT: addi a3, zero, 3 +; RV32I-NEXT: addi a4, zero, 4 +; RV32I-NEXT: addi a2, zero, 0 +; RV32I-NEXT: addi a5, zero, 0 +; RV32I-NEXT: jalr ra, a7, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @callee_scalars(i32 1, i64 2, i32 3, i32 4, double 5.000000e+00) + ret i32 %1 +} + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { +; RV32I-LABEL: callee_large_scalars: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: lw a3, 12(a0) +; RV32I-NEXT: xor a2, a3, a2 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a4, 4(a0) +; RV32I-NEXT: xor a3, a4, a3 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a4, 8(a0) +; RV32I-NEXT: xor a3, a4, a3 +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: xor a0, a0, zero +; RV32I-NEXT: sltiu a0, a0, 1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() nounwind { +; RV32I-LABEL: caller_large_scalars: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: addi s0, sp, 48 +; RV32I-NEXT: sw zero, -40(s0) +; RV32I-NEXT: sw zero, -44(s0) +; RV32I-NEXT: sw zero, -48(s0) +; RV32I-NEXT: sw zero, -12(s0) +; RV32I-NEXT: sw zero, -16(s0) +; RV32I-NEXT: sw zero, -20(s0) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: lui a0, 524272 +; RV32I-NEXT: addi a0, a0, 0 +; RV32I-NEXT: sw a0, -36(s0) +; RV32I-NEXT: lui a0, %hi(callee_large_scalars) +; RV32I-NEXT: addi a2, a0, %lo(callee_large_scalars) +; RV32I-NEXT: addi a0, s0, -24 +; RV32I-NEXT: addi a1, s0, -48 +; RV32I-NEXT: jalr ra, a2, 0 +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) nounwind { +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory +; RV32I-LABEL: callee_large_scalars_exhausted_regs: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a0, 4(s0) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: lw a2, 12(a7) +; RV32I-NEXT: xor a1, a2, a1 +; RV32I-NEXT: lw a2, 4(a0) +; RV32I-NEXT: lw a3, 4(a7) +; RV32I-NEXT: xor a2, a3, a2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: lw a3, 8(a7) +; RV32I-NEXT: xor a2, a3, a2 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a3, 0(a7) +; RV32I-NEXT: xor a0, a3, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: xor a0, a0, zero +; RV32I-NEXT: sltiu a0, a0, 1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() nounwind { +; RV32I-LABEL: caller_large_scalars_exhausted_regs: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) +; RV32I-NEXT: sw s0, 56(sp) +; RV32I-NEXT: addi s0, sp, 64 +; RV32I-NEXT: addi a0, s0, -48 +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: addi a0, zero, 9 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw zero, -40(s0) +; RV32I-NEXT: sw zero, -44(s0) +; RV32I-NEXT: sw zero, -48(s0) +; RV32I-NEXT: sw zero, -12(s0) +; RV32I-NEXT: sw zero, -16(s0) +; RV32I-NEXT: sw zero, -20(s0) +; RV32I-NEXT: addi a0, zero, 8 +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: lui a0, 524272 +; RV32I-NEXT: addi a0, a0, 0 +; RV32I-NEXT: sw a0, -36(s0) +; RV32I-NEXT: lui a0, %hi(callee_large_scalars_exhausted_regs) +; RV32I-NEXT: addi t0, a0, %lo(callee_large_scalars_exhausted_regs) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 2 +; RV32I-NEXT: addi a2, zero, 3 +; RV32I-NEXT: addi a3, zero, 4 +; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: addi a5, zero, 6 +; RV32I-NEXT: addi a6, zero, 7 +; RV32I-NEXT: addi a7, s0, -24 +; RV32I-NEXT: jalr ra, t0, 0 +; RV32I-NEXT: lw s0, 56(sp) +; RV32I-NEXT: lw ra, 60(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Ensure that libcalls generated in the middle-end obey the calling convention + +define i32 @caller_mixed_scalar_libcalls(i64 %a) nounwind { +; RV32I-LABEL: caller_mixed_scalar_libcalls: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: addi a2, a1, 0 +; RV32I-NEXT: addi a1, a0, 0 +; RV32I-NEXT: lui a0, %hi(__floatditf) +; RV32I-NEXT: addi a3, a0, %lo(__floatditf) +; RV32I-NEXT: addi a0, s0, -24 +; RV32I-NEXT: jalr ra, a3, 0 +; RV32I-NEXT: lw a0, -24(s0) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = sitofp i64 %a to fp128 + %2 = bitcast fp128 %1 to i128 + %3 = trunc i128 %2 to i32 + ret i32 %3 +} + +; Check that the stack is used once the GPRs are exhausted + +define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) nounwind { +; RV32I-LABEL: callee_many_scalars: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw t0, 0(s0) +; RV32I-NEXT: xor a4, a4, t0 +; RV32I-NEXT: xor a3, a3, a7 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: xor a3, a3, zero +; RV32I-NEXT: lui a4, 16 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: sltiu a1, a3, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, a0, a5 +; RV32I-NEXT: add a0, a0, a6 +; RV32I-NEXT: lw a1, 4(s0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %a_ext = zext i8 %a to i32 + %b_ext = zext i16 %b to i32 + %1 = add i32 %a_ext, %b_ext + %2 = add i32 %1, %c + %3 = icmp eq i64 %d, %g + %4 = zext i1 %3 to i32 + %5 = add i32 %4, %2 + %6 = add i32 %5, %e + %7 = add i32 %6, %f + %8 = add i32 %7, %h + ret i32 %8 +} + +define i32 @caller_many_scalars() nounwind { +; RV32I-LABEL: caller_many_scalars: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: addi a0, zero, 8 +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: lui a0, %hi(callee_many_scalars) +; RV32I-NEXT: addi t0, a0, %lo(callee_many_scalars) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 2 +; RV32I-NEXT: addi a2, zero, 3 +; RV32I-NEXT: addi a3, zero, 4 +; RV32I-NEXT: addi a5, zero, 5 +; RV32I-NEXT: addi a6, zero, 6 +; RV32I-NEXT: addi a7, zero, 7 +; RV32I-NEXT: addi a4, zero, 0 +; RV32I-NEXT: jalr ra, t0, 0 +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8) + ret i32 %1 +} + +; Check passing of coerced integer arrays + +%struct.small = type { i32, i32* } + +define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) nounwind { +; RV32I-LABEL: callee_small_coerced_struct: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sltiu a0, a0, 1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = extractvalue [2 x i32] %a.coerce, 0 + %2 = extractvalue [2 x i32] %a.coerce, 1 + %3 = icmp eq i32 %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define i32 @caller_small_coerced_struct() nounwind { +; RV32I-LABEL: caller_small_coerced_struct: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(callee_small_coerced_struct) +; RV32I-NEXT: addi a2, a0, %lo(callee_small_coerced_struct) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 2 +; RV32I-NEXT: jalr ra, a2, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2]) + ret i32 %1 +} + +; Check large struct arguments, which are passed byval + +%struct.large = type { i32, i32, i32, i32 } + +define i32 @callee_large_struct(%struct.large* byval align 4 %a) nounwind { +; RV32I-LABEL: callee_large_struct: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0 + %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3 + %3 = load i32, i32* %1 + %4 = load i32, i32* %2 + %5 = add i32 %3, %4 + ret i32 %5 +} + +define i32 @caller_large_struct() nounwind { +; RV32I-LABEL: caller_large_struct: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: addi s0, sp, 48 +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: sw a0, -40(s0) +; RV32I-NEXT: addi a0, zero, 2 +; RV32I-NEXT: sw a0, -20(s0) +; RV32I-NEXT: sw a0, -36(s0) +; RV32I-NEXT: addi a0, zero, 3 +; RV32I-NEXT: sw a0, -16(s0) +; RV32I-NEXT: sw a0, -32(s0) +; RV32I-NEXT: addi a0, zero, 4 +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: sw a0, -28(s0) +; RV32I-NEXT: lui a0, %hi(callee_large_struct) +; RV32I-NEXT: addi a1, a0, %lo(callee_large_struct) +; RV32I-NEXT: addi a0, s0, -40 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: jalr zero, ra, 0 + %ls = alloca %struct.large, align 4 + %1 = bitcast %struct.large* %ls to i8* + %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0 + store i32 1, i32* %a + %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1 + store i32 2, i32* %b + %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2 + store i32 3, i32* %c + %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3 + store i32 4, i32* %d + %2 = call i32 @callee_large_struct(%struct.large* byval align 4 %ls) + ret i32 %2 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) nounwind { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; RV32I-LABEL: callee_aligned_stack: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: add a0, a0, a7 +; RV32I-NEXT: lw a1, 0(s0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 8(s0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 16(s0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 20(s0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = bitcast fp128 %c to i128 + %2 = trunc i128 %1 to i32 + %3 = add i32 %2, %g + %4 = add i32 %3, %h + %5 = bitcast double %i to i64 + %6 = trunc i64 %5 to i32 + %7 = add i32 %4, %6 + %8 = add i32 %7, %j + %9 = extractvalue [2 x i32] %k, 0 + %10 = add i32 %8, %9 + ret i32 %10 +} + +define void @caller_aligned_stack() nounwind { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; RV32I-LABEL: caller_aligned_stack: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) +; RV32I-NEXT: sw s0, 56(sp) +; RV32I-NEXT: addi s0, sp, 64 +; RV32I-NEXT: addi a0, zero, 18 +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: addi a0, zero, 17 +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: addi a0, zero, 16 +; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: lui a0, 262236 +; RV32I-NEXT: addi a0, a0, 655 +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: lui a0, 377487 +; RV32I-NEXT: addi a0, a0, 1475 +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: addi a0, zero, 15 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: lui a0, 262153 +; RV32I-NEXT: addi a0, a0, 491 +; RV32I-NEXT: sw a0, -20(s0) +; RV32I-NEXT: lui a0, 545260 +; RV32I-NEXT: addi a0, a0, -1967 +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: lui a0, 964690 +; RV32I-NEXT: addi a0, a0, -328 +; RV32I-NEXT: sw a0, -28(s0) +; RV32I-NEXT: lui a0, 335544 +; RV32I-NEXT: addi a0, a0, 1311 +; RV32I-NEXT: sw a0, -32(s0) +; RV32I-NEXT: lui a0, 688509 +; RV32I-NEXT: addi a5, a0, -2048 +; RV32I-NEXT: lui a0, %hi(callee_aligned_stack) +; RV32I-NEXT: addi t0, a0, %lo(callee_aligned_stack) +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 11 +; RV32I-NEXT: addi a2, s0, -32 +; RV32I-NEXT: addi a3, zero, 12 +; RV32I-NEXT: addi a4, zero, 13 +; RV32I-NEXT: addi a6, zero, 4 +; RV32I-NEXT: addi a7, zero, 14 +; RV32I-NEXT: jalr ra, t0, 0 +; RV32I-NEXT: lw s0, 56(sp) +; RV32I-NEXT: lw ra, 60(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @callee_aligned_stack(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, + i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16, + [2 x i32] [i32 17, i32 18]) + ret void +} + +; Check return of 2x xlen scalars + +define i64 @callee_small_scalar_ret() nounwind { +; RV32I-LABEL: callee_small_scalar_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, 466866 +; RV32I-NEXT: addi a0, a0, 1677 +; RV32I-NEXT: addi a1, zero, 287 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret i64 1234567898765 +} + +define i32 @caller_small_scalar_ret() nounwind { +; RV32I-LABEL: caller_small_scalar_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(callee_small_scalar_ret) +; RV32I-NEXT: addi a0, a0, %lo(callee_small_scalar_ret) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: lui a2, 56 +; RV32I-NEXT: addi a2, a2, 580 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lui a2, 200614 +; RV32I-NEXT: addi a2, a2, 647 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: xor a0, a0, zero +; RV32I-NEXT: sltiu a0, a0, 1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i64 @callee_small_scalar_ret() + %2 = icmp eq i64 987654321234567, %1 + %3 = zext i1 %2 to i32 + ret i32 %3 +} + +; Check return of 2x xlen structs + +define %struct.small @callee_small_struct_ret() nounwind { +; RV32I-LABEL: callee_small_struct_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret %struct.small { i32 1, i32* null } +} + +define i32 @caller_small_struct_ret() nounwind { +; RV32I-LABEL: caller_small_struct_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a0, %hi(callee_small_struct_ret) +; RV32I-NEXT: addi a0, a0, %lo(callee_small_struct_ret) +; RV32I-NEXT: jalr ra, a0, 0 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint i32* %3 to i32 + %5 = add i32 %2, %4 + ret i32 %5 +} + +; Check return of >2x xlen scalars + +define fp128 @callee_large_scalar_ret() nounwind { +; RV32I-LABEL: callee_large_scalar_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lui a1, 524272 +; RV32I-NEXT: addi a1, a1, 0 +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw zero, 8(a0) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw zero, 0(a0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_large_scalar_ret() nounwind { +; RV32I-LABEL: caller_large_scalar_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: lui a0, %hi(callee_large_scalar_ret) +; RV32I-NEXT: addi a1, a0, %lo(callee_large_scalar_ret) +; RV32I-NEXT: addi a0, s0, -32 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call fp128 @callee_large_scalar_ret() + ret void +} + +; Check return of >2x xlen structs + +define void @callee_large_struct_ret(%struct.large* noalias sret %agg.result) nounwind { +; RV32I-LABEL: callee_large_struct_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: addi a1, zero, 2 +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: addi a1, zero, 1 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: addi a1, zero, 3 +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: addi a1, zero, 4 +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1 + store i32 2, i32* %b, align 4 + %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2 + store i32 3, i32* %c, align 4 + %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3 + store i32 4, i32* %d, align 4 + ret void +} + +define i32 @caller_large_struct_ret() nounwind { +; RV32I-LABEL: caller_large_struct_ret: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: lui a0, %hi(callee_large_struct_ret) +; RV32I-NEXT: addi a1, a0, %lo(callee_large_struct_ret) +; RV32I-NEXT: addi a0, s0, -24 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw a0, -12(s0) +; RV32I-NEXT: lw a1, -24(s0) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = alloca %struct.large + call void @callee_large_struct_ret(%struct.large* sret %1) + %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0 + %3 = load i32, i32* %2 + %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3 + %5 = load i32, i32* %4 + %6 = add i32 %3, %5 + ret i32 %6 +} Index: test/CodeGen/RISCV/calls.ll =================================================================== --- test/CodeGen/RISCV/calls.ll +++ test/CodeGen/RISCV/calls.ll @@ -52,7 +52,7 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: jalr zero, ra, 0 - %1 = call i32 @defined_function(i32 %a) nounwind + %1 = call i32 @defined_function(i32 %a) ret i32 %1 } @@ -115,3 +115,83 @@ %1 = call fastcc i32 @fastcc_function(i32 %a, i32 %b) ret i32 %a } + +declare i32 @external_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind + +define i32 @test_call_external_many_args(i32 %a) nounwind { +; RV32I-LABEL: test_call_external_many_args: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: addi s1, a0, 0 +; RV32I-NEXT: sw s1, 4(sp) +; RV32I-NEXT: sw s1, 0(sp) +; RV32I-NEXT: lui a0, %hi(external_many_args) +; RV32I-NEXT: addi t0, a0, %lo(external_many_args) +; RV32I-NEXT: addi a0, s1, 0 +; RV32I-NEXT: addi a1, s1, 0 +; RV32I-NEXT: addi a2, s1, 0 +; RV32I-NEXT: addi a3, s1, 0 +; RV32I-NEXT: addi a4, s1, 0 +; RV32I-NEXT: addi a5, s1, 0 +; RV32I-NEXT: addi a6, s1, 0 +; RV32I-NEXT: addi a7, s1, 0 +; RV32I-NEXT: jalr ra, t0, 0 +; RV32I-NEXT: addi a0, s1, 0 +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @external_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, + i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) + ret i32 %a +} + +define i32 @defined_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 %j) nounwind { +; RV32I-LABEL: defined_many_args: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a0, 4(s0) +; RV32I-NEXT: addi a0, a0, 1 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: jalr zero, ra, 0 + %added = add i32 %j, 1 + ret i32 %added +} + +define i32 @test_call_defined_many_args(i32 %a) nounwind { +; RV32I-LABEL: test_call_defined_many_args: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: addi s0, sp, 32 +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: lui a1, %hi(defined_many_args) +; RV32I-NEXT: addi t0, a1, %lo(defined_many_args) +; RV32I-NEXT: addi a1, a0, 0 +; RV32I-NEXT: addi a2, a0, 0 +; RV32I-NEXT: addi a3, a0, 0 +; RV32I-NEXT: addi a4, a0, 0 +; RV32I-NEXT: addi a5, a0, 0 +; RV32I-NEXT: addi a6, a0, 0 +; RV32I-NEXT: addi a7, a0, 0 +; RV32I-NEXT: jalr ra, t0, 0 +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = call i32 @defined_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, + i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) + ret i32 %1 +} Index: test/CodeGen/RISCV/fp128.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/fp128.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s + +@x = local_unnamed_addr global fp128 0xL00000000000000007FFF000000000000, align 16 +@y = local_unnamed_addr global fp128 0xL00000000000000007FFF000000000000, align 16 + +; Besides anything else, these tests help verify that libcall ABI lowering +; works correctly + +define i32 @test_load_and_cmp() nounwind { +; RV32I-LABEL: test_load_and_cmp: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: addi s0, sp, 48 +; RV32I-NEXT: lui a0, %hi(y+12) +; RV32I-NEXT: addi a0, a0, %lo(y+12) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -28(s0) +; RV32I-NEXT: lui a0, %hi(y+8) +; RV32I-NEXT: addi a0, a0, %lo(y+8) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -32(s0) +; RV32I-NEXT: lui a0, %hi(y+4) +; RV32I-NEXT: addi a0, a0, %lo(y+4) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -36(s0) +; RV32I-NEXT: lui a0, %hi(y) +; RV32I-NEXT: addi a0, a0, %lo(y) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -40(s0) +; RV32I-NEXT: lui a0, %hi(x+12) +; RV32I-NEXT: addi a0, a0, %lo(x+12) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -12(s0) +; RV32I-NEXT: lui a0, %hi(x+8) +; RV32I-NEXT: addi a0, a0, %lo(x+8) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -16(s0) +; RV32I-NEXT: lui a0, %hi(x+4) +; RV32I-NEXT: addi a0, a0, %lo(x+4) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -20(s0) +; RV32I-NEXT: lui a0, %hi(x) +; RV32I-NEXT: addi a0, a0, %lo(x) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -24(s0) +; RV32I-NEXT: lui a0, %hi(__netf2) +; RV32I-NEXT: addi a2, a0, %lo(__netf2) +; RV32I-NEXT: addi a0, s0, -24 +; RV32I-NEXT: addi a1, s0, -40 +; RV32I-NEXT: jalr ra, a2, 0 +; RV32I-NEXT: xor a0, a0, zero +; RV32I-NEXT: sltu a0, zero, a0 +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = load fp128, fp128* @x, align 16 + %2 = load fp128, fp128* @y, align 16 + %cmp = fcmp une fp128 %1, %2 + %3 = zext i1 %cmp to i32 + ret i32 %3 +} + +define i32 @test_add_and_fptosi() nounwind { +; RV32I-LABEL: test_add_and_fptosi: +; RV32I: # BB#0: +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) +; RV32I-NEXT: sw s0, 72(sp) +; RV32I-NEXT: addi s0, sp, 80 +; RV32I-NEXT: lui a0, %hi(y+12) +; RV32I-NEXT: addi a0, a0, %lo(y+12) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -44(s0) +; RV32I-NEXT: lui a0, %hi(y+8) +; RV32I-NEXT: addi a0, a0, %lo(y+8) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -48(s0) +; RV32I-NEXT: lui a0, %hi(y+4) +; RV32I-NEXT: addi a0, a0, %lo(y+4) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -52(s0) +; RV32I-NEXT: lui a0, %hi(y) +; RV32I-NEXT: addi a0, a0, %lo(y) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -56(s0) +; RV32I-NEXT: lui a0, %hi(x+12) +; RV32I-NEXT: addi a0, a0, %lo(x+12) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -28(s0) +; RV32I-NEXT: lui a0, %hi(x+8) +; RV32I-NEXT: addi a0, a0, %lo(x+8) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -32(s0) +; RV32I-NEXT: lui a0, %hi(x+4) +; RV32I-NEXT: addi a0, a0, %lo(x+4) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -36(s0) +; RV32I-NEXT: lui a0, %hi(x) +; RV32I-NEXT: addi a0, a0, %lo(x) +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: sw a0, -40(s0) +; RV32I-NEXT: lui a0, %hi(__addtf3) +; RV32I-NEXT: addi a3, a0, %lo(__addtf3) +; RV32I-NEXT: addi a0, s0, -24 +; RV32I-NEXT: addi a1, s0, -40 +; RV32I-NEXT: addi a2, s0, -56 +; RV32I-NEXT: jalr ra, a3, 0 +; RV32I-NEXT: lw a0, -12(s0) +; RV32I-NEXT: sw a0, -60(s0) +; RV32I-NEXT: lw a0, -16(s0) +; RV32I-NEXT: sw a0, -64(s0) +; RV32I-NEXT: lw a0, -20(s0) +; RV32I-NEXT: sw a0, -68(s0) +; RV32I-NEXT: lw a0, -24(s0) +; RV32I-NEXT: sw a0, -72(s0) +; RV32I-NEXT: lui a0, %hi(__fixtfsi) +; RV32I-NEXT: addi a1, a0, %lo(__fixtfsi) +; RV32I-NEXT: addi a0, s0, -72 +; RV32I-NEXT: jalr ra, a1, 0 +; RV32I-NEXT: lw s0, 72(sp) +; RV32I-NEXT: lw ra, 76(sp) +; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: jalr zero, ra, 0 + %1 = load fp128, fp128* @x, align 16 + %2 = load fp128, fp128* @y, align 16 + %3 = fadd fp128 %1, %2 + %4 = fptosi fp128 %3 to i32 + ret i32 %4 +}