diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -87,7 +87,8 @@ } bool setABI(const std::string &Name) override { - if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") { + if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d" || + Name == "ilp32e") { ABI = Name; return true; } diff --git a/clang/test/Driver/riscv-abi.c b/clang/test/Driver/riscv-abi.c --- a/clang/test/Driver/riscv-abi.c +++ b/clang/test/Driver/riscv-abi.c @@ -23,6 +23,13 @@ // CHECK-ILP32D: "-target-abi" "ilp32d" +// RUN: %clang -target riscv32-unknown-elf %s -### -o %t.o -mabi=ilp32e 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ILP32E %s +// RUN: %clang -target riscv32-unknown-elf -x assembler %s -### -o %t.o \ +// RUN: -mabi=ilp32e 2>&1 | FileCheck -check-prefix=CHECK-ILP32E %s + +// CHECK-ILP32E: "-target-abi" "ilp32e" + // RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=lp64 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-RV32-LP64 %s diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -13,8 +13,10 @@ // The RISC-V calling convention is handled with custom code in // RISCVISelLowering.cpp (CC_RISCV). +def CSR_ILP32E : CalleeSavedRegs<(add X1, X3, X4, X8, X9)>; + def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add CSR_ILP32E, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -27,8 +29,13 @@ // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; -// Interrupt handler needs to save/restore all registers that are used, +// Interrupt handler needs to save/restore all physical registers that are used, // both Caller and Callee saved registers. +// +// The only physical register that isn't saved is x2 (SP), which is used by the +// processor when the interrupt happens. + +// All 32-bit GP registers, excluding x0 (zero) and x2 (sp). def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 9), (sequence "X%u", 10, 11), diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -20,11 +20,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: - explicit RISCVFrameLowering(const RISCVSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/Align(16), - /*LocalAreaOffset=*/0), - STI(STI) {} + explicit RISCVFrameLowering(const RISCVSubtarget &STI); void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -23,13 +23,49 @@ using namespace llvm; +RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + /*StackAlignment=*/STI.getTargetABI() == + RISCVABI::ABI_ILP32E + ? Align(4) + : Align(16), + /*LocalAreaOffset=*/0), + STI(STI) {} + bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MF.getTarget().Options.DisableFramePointerElim(MF) || - RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || - MFI.isFrameAddressTaken(); + + // If eliminating the frame pointer is disabled, then we have a frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) + return true; + + // If we take the address of the frame, then we need to store that address + // somewhere. + if (MFI.isFrameAddressTaken()) + return true; + + // We need to save the frame pointer to realign the stack. + if (RegInfo->needsStackRealignment(MF)) + return true; + + // We need to save the frame pointer to allocate a variable-sized object. + if (MFI.hasVarSizedObjects()) + return true; + + // The ILP32E calling convention aligns the stack at 4-byte boundaries, rather + // than the default of 16-byte boundaries. This can cause issues if we need to + // save or spill registers requiring 8-byte alignment (double-precision FP + // registers), as we need to realign the stack to save and restore them. We + // only find out about spills after register allocation, at which point FP may + // have been allocated. Thus, if we are on ILP32E and we have any physical + // registers that require more-than-4-byte alignment (only the D extension, at + // the moment), we need to preemptively ensure we have the frame pointer. + if (STI.getTargetABI() == RISCVABI::ABI_ILP32E && STI.hasStdExtD()) + return true; + + return false; } bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -54,6 +54,7 @@ default: report_fatal_error("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64: @@ -1389,19 +1390,35 @@ // register-size fields in the same situations they would be for fixed // arguments. -static const MCPhysReg ArgGPRs[] = { +// The GPRs used for passing arguments in the ILP32* and LP64 ABIs, except +// the ILP32E ABI. +static const MCPhysReg ArgGPRs_NonE[] = { RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; +// The GPRs used for passing arguments in the ILP32E ABI. +static const MCPhysReg ArgGPRs_ILP32E[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, + RISCV::X14, RISCV::X15 +}; +// The FPRs used for passing arguments in the ILP32F and LP64F ABIs. static const MCPhysReg ArgFPR32s[] = { RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F }; +// The FPRs used for passing arguments in the ILP32D and LP64D ABIs. static const MCPhysReg ArgFPR64s[] = { RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D }; +static ArrayRef getCallingConvArgGPRs(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return makeArrayRef(ArgGPRs_ILP32E); + else + return makeArrayRef(ArgGPRs_NonE); +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -1409,6 +1426,9 @@ MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; + const RISCVSubtarget &STI = State.getMachineFunction().getSubtarget(); + ArrayRef ArgGPRs = getCallingConvArgGPRs(STI.getTargetABI()); + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, @@ -1465,6 +1485,7 @@ default: llvm_unreachable("Unexpected ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64: break; case RISCVABI::ABI_ILP32F: @@ -1494,6 +1515,8 @@ LocInfo = CCValAssign::BCvt; } + ArrayRef ArgGPRs = getCallingConvArgGPRs(ABI); + // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should @@ -1504,10 +1527,10 @@ unsigned TwoXLenInBytes = (2 * XLen) / 8; if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { - unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); + unsigned RegIdx = State.getFirstUnallocated( ArgGPRs); // Skip 'odd' register if necessary. - if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) - State.AllocateReg(ArgGPRs); + if (RegIdx != ArgGPRs.size() && RegIdx % 2 == 1) + State.AllocateReg( ArgGPRs); } SmallVectorImpl &PendingLocs = State.getPendingLocs(); @@ -1526,7 +1549,7 @@ // GPRs, split between a GPR and the stack, or passed completely on the // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. - Register Reg = State.AllocateReg(ArgGPRs); + Register Reg = State.AllocateReg( ArgGPRs); LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, 8); @@ -1534,7 +1557,7 @@ CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); return false; } - if (!State.AllocateReg(ArgGPRs)) + if (!State.AllocateReg( ArgGPRs)) State.AllocateStack(4, 4); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -1574,7 +1597,7 @@ else if (ValVT == MVT::f64 && !UseGPRForF64) Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s); else - Reg = State.AllocateReg(ArgGPRs); + Reg = State.AllocateReg( ArgGPRs); unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); // If we reach this point and PendingLocs is non-empty, we must be at the @@ -1891,6 +1914,7 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); unsigned XLenInBytes = Subtarget.getXLen() / 8; + RISCVABI::ABI ABI = Subtarget.getTargetABI(); // Used with vargs to acumulate store chains. std::vector OutChains; @@ -1938,7 +1962,7 @@ } if (IsVarArg) { - ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + ArrayRef ArgRegs = getCallingConvArgGPRs(ABI); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -2338,9 +2362,9 @@ Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { - assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); + assert(VA.getLocReg() == RISCV::X10 && "Unexpected reg assignment"); SDValue RetValue2 = - DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + DAG.getCopyFromReg(Chain, DL, RISCV::X11, MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -53,6 +53,8 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + return CSR_ILP32E_SaveList; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_SaveList; @@ -167,6 +169,8 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + return CSR_ILP32E_RegMask; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_RegMask; diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -1,7 +1,9 @@ ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=ILP32-LP64 +; RUN: | FileCheck %s -check-prefix=ILP32-ILP32E-LP64 +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32-ILP32E-LP64 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=ILP32-LP64 +; RUN: | FileCheck %s -check-prefix=ILP32-ILP32E-LP64 ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32F-LP64F ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ @@ -13,82 +15,82 @@ @var = global [32 x float] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns ; something appropriate. define void @callee() nounwind { -; ILP32-LP64-LABEL: callee: -; ILP32-LP64: # %bb.0: -; ILP32-LP64-NEXT: lui a0, %hi(var) -; ILP32-LP64-NEXT: flw ft0, %lo(var)(a0) -; ILP32-LP64-NEXT: addi a1, a0, %lo(var) -; ILP32-LP64-NEXT: flw ft1, 4(a1) -; ILP32-LP64-NEXT: flw ft2, 8(a1) -; ILP32-LP64-NEXT: flw ft3, 12(a1) -; ILP32-LP64-NEXT: flw ft4, 16(a1) -; ILP32-LP64-NEXT: flw ft5, 20(a1) -; ILP32-LP64-NEXT: flw ft6, 24(a1) -; ILP32-LP64-NEXT: flw ft7, 28(a1) -; ILP32-LP64-NEXT: flw fa0, 32(a1) -; ILP32-LP64-NEXT: flw fa1, 36(a1) -; ILP32-LP64-NEXT: flw fa2, 40(a1) -; ILP32-LP64-NEXT: flw fa3, 44(a1) -; ILP32-LP64-NEXT: flw fa4, 48(a1) -; ILP32-LP64-NEXT: flw fa5, 52(a1) -; ILP32-LP64-NEXT: flw fa6, 56(a1) -; ILP32-LP64-NEXT: flw fa7, 60(a1) -; ILP32-LP64-NEXT: flw ft8, 64(a1) -; ILP32-LP64-NEXT: flw ft9, 68(a1) -; ILP32-LP64-NEXT: flw ft10, 72(a1) -; ILP32-LP64-NEXT: flw ft11, 76(a1) -; ILP32-LP64-NEXT: flw fs0, 80(a1) -; ILP32-LP64-NEXT: flw fs1, 84(a1) -; ILP32-LP64-NEXT: flw fs2, 88(a1) -; ILP32-LP64-NEXT: flw fs3, 92(a1) -; ILP32-LP64-NEXT: flw fs4, 96(a1) -; ILP32-LP64-NEXT: flw fs5, 100(a1) -; ILP32-LP64-NEXT: flw fs6, 104(a1) -; ILP32-LP64-NEXT: flw fs7, 108(a1) -; ILP32-LP64-NEXT: flw fs8, 124(a1) -; ILP32-LP64-NEXT: flw fs9, 120(a1) -; ILP32-LP64-NEXT: flw fs10, 116(a1) -; ILP32-LP64-NEXT: flw fs11, 112(a1) -; ILP32-LP64-NEXT: fsw fs8, 124(a1) -; ILP32-LP64-NEXT: fsw fs9, 120(a1) -; ILP32-LP64-NEXT: fsw fs10, 116(a1) -; ILP32-LP64-NEXT: fsw fs11, 112(a1) -; ILP32-LP64-NEXT: fsw fs7, 108(a1) -; ILP32-LP64-NEXT: fsw fs6, 104(a1) -; ILP32-LP64-NEXT: fsw fs5, 100(a1) -; ILP32-LP64-NEXT: fsw fs4, 96(a1) -; ILP32-LP64-NEXT: fsw fs3, 92(a1) -; ILP32-LP64-NEXT: fsw fs2, 88(a1) -; ILP32-LP64-NEXT: fsw fs1, 84(a1) -; ILP32-LP64-NEXT: fsw fs0, 80(a1) -; ILP32-LP64-NEXT: fsw ft11, 76(a1) -; ILP32-LP64-NEXT: fsw ft10, 72(a1) -; ILP32-LP64-NEXT: fsw ft9, 68(a1) -; ILP32-LP64-NEXT: fsw ft8, 64(a1) -; ILP32-LP64-NEXT: fsw fa7, 60(a1) -; ILP32-LP64-NEXT: fsw fa6, 56(a1) -; ILP32-LP64-NEXT: fsw fa5, 52(a1) -; ILP32-LP64-NEXT: fsw fa4, 48(a1) -; ILP32-LP64-NEXT: fsw fa3, 44(a1) -; ILP32-LP64-NEXT: fsw fa2, 40(a1) -; ILP32-LP64-NEXT: fsw fa1, 36(a1) -; ILP32-LP64-NEXT: fsw fa0, 32(a1) -; ILP32-LP64-NEXT: fsw ft7, 28(a1) -; ILP32-LP64-NEXT: fsw ft6, 24(a1) -; ILP32-LP64-NEXT: fsw ft5, 20(a1) -; ILP32-LP64-NEXT: fsw ft4, 16(a1) -; ILP32-LP64-NEXT: fsw ft3, 12(a1) -; ILP32-LP64-NEXT: fsw ft2, 8(a1) -; ILP32-LP64-NEXT: fsw ft1, 4(a1) -; ILP32-LP64-NEXT: fsw ft0, %lo(var)(a0) -; ILP32-LP64-NEXT: ret +; ILP32-ILP32E-LP64-LABEL: callee: +; ILP32-ILP32E-LP64: # %bb.0: +; ILP32-ILP32E-LP64-NEXT: lui a0, %hi(var) +; ILP32-ILP32E-LP64-NEXT: flw ft0, %lo(var)(a0) +; ILP32-ILP32E-LP64-NEXT: addi a1, a0, %lo(var) +; ILP32-ILP32E-LP64-NEXT: flw ft1, 4(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft2, 8(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft3, 12(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft4, 16(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft5, 20(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft6, 24(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft7, 28(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa0, 32(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa1, 36(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa2, 40(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa3, 44(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa4, 48(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa5, 52(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa6, 56(a1) +; ILP32-ILP32E-LP64-NEXT: flw fa7, 60(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft8, 64(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft9, 68(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft10, 72(a1) +; ILP32-ILP32E-LP64-NEXT: flw ft11, 76(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs0, 80(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs1, 84(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs2, 88(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs3, 92(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs4, 96(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs5, 100(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs6, 104(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs7, 108(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs8, 124(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs9, 120(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs10, 116(a1) +; ILP32-ILP32E-LP64-NEXT: flw fs11, 112(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs8, 124(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs9, 120(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs10, 116(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs11, 112(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs7, 108(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs6, 104(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs5, 100(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs4, 96(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs3, 92(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs2, 88(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs1, 84(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fs0, 80(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft11, 76(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft10, 72(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft9, 68(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft8, 64(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa7, 60(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa6, 56(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa5, 52(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa4, 48(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa3, 44(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa2, 40(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa1, 36(a1) +; ILP32-ILP32E-LP64-NEXT: fsw fa0, 32(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft7, 28(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft6, 24(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft5, 20(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft4, 16(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft3, 12(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft2, 8(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft1, 4(a1) +; ILP32-ILP32E-LP64-NEXT: fsw ft0, %lo(var)(a0) +; ILP32-ILP32E-LP64-NEXT: ret ; ; ILP32F-LP64F-LABEL: callee: ; ILP32F-LP64F: # %bb.0: @@ -140,15 +142,15 @@ ; fs0-fs11 are preserved across calls. define void @caller() nounwind { -; ILP32-LP64-LABEL: caller: -; ILP32-LP64-NOT: ft{{[1-9][0-9]*}} -; ILP32-LP64-NOT: fs{{[0-9]+}} -; ILP32-LP64-NOT: fa{{[0-9]+}} -; ILP32-LP64: call callee -; ILP32-LP64-NOT: ft{{[1-9][0-9]*}} -; ILP32-LP64-NOT: fs{{[0-9]+}} -; ILP32-LP64-NOT: fa{{[0-9]+}} -; ILP32-LP64: ret +; ILP32-ILP32E-LP64-LABEL: caller: +; ILP32-ILP32E-LP64-NOT: ft{{[1-9][0-9]*}} +; ILP32-ILP32E-LP64-NOT: fs{{[0-9]+}} +; ILP32-ILP32E-LP64-NOT: fa{{[0-9]+}} +; ILP32-ILP32E-LP64: call callee +; ILP32-ILP32E-LP64-NOT: ft{{[1-9][0-9]*}} +; ILP32-ILP32E-LP64-NOT: fs{{[0-9]+}} +; ILP32-ILP32E-LP64-NOT: fa{{[0-9]+}} +; ILP32-ILP32E-LP64: ret ; ; ILP32F-LP64F-LABEL: caller: ; ILP32F-LP64F: flw fs8, 80(s1) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -1,90 +1,28 @@ -; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=ILP32-LP64 -; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=ILP32-LP64 +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32-LP64-NO-D +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32-LP64-NO-D ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32D-LP64D ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32D-LP64D +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32E + @var = global [32 x double] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns -; something appropriate. +; something appropriate. The checks ensure the function is saving the right +; registers to the stack before loading from @var. define void @callee() nounwind { -; ILP32-LP64-LABEL: callee: -; ILP32-LP64: # %bb.0: -; ILP32-LP64-NEXT: lui a0, %hi(var) -; ILP32-LP64-NEXT: fld ft0, %lo(var)(a0) -; ILP32-LP64-NEXT: addi a1, a0, %lo(var) -; ILP32-LP64-NEXT: fld ft1, 8(a1) -; ILP32-LP64-NEXT: fld ft2, 16(a1) -; ILP32-LP64-NEXT: fld ft3, 24(a1) -; ILP32-LP64-NEXT: fld ft4, 32(a1) -; ILP32-LP64-NEXT: fld ft5, 40(a1) -; ILP32-LP64-NEXT: fld ft6, 48(a1) -; ILP32-LP64-NEXT: fld ft7, 56(a1) -; ILP32-LP64-NEXT: fld fa0, 64(a1) -; ILP32-LP64-NEXT: fld fa1, 72(a1) -; ILP32-LP64-NEXT: fld fa2, 80(a1) -; ILP32-LP64-NEXT: fld fa3, 88(a1) -; ILP32-LP64-NEXT: fld fa4, 96(a1) -; ILP32-LP64-NEXT: fld fa5, 104(a1) -; ILP32-LP64-NEXT: fld fa6, 112(a1) -; ILP32-LP64-NEXT: fld fa7, 120(a1) -; ILP32-LP64-NEXT: fld ft8, 128(a1) -; ILP32-LP64-NEXT: fld ft9, 136(a1) -; ILP32-LP64-NEXT: fld ft10, 144(a1) -; ILP32-LP64-NEXT: fld ft11, 152(a1) -; ILP32-LP64-NEXT: fld fs0, 160(a1) -; ILP32-LP64-NEXT: fld fs1, 168(a1) -; ILP32-LP64-NEXT: fld fs2, 176(a1) -; ILP32-LP64-NEXT: fld fs3, 184(a1) -; ILP32-LP64-NEXT: fld fs4, 192(a1) -; ILP32-LP64-NEXT: fld fs5, 200(a1) -; ILP32-LP64-NEXT: fld fs6, 208(a1) -; ILP32-LP64-NEXT: fld fs7, 216(a1) -; ILP32-LP64-NEXT: fld fs8, 248(a1) -; ILP32-LP64-NEXT: fld fs9, 240(a1) -; ILP32-LP64-NEXT: fld fs10, 232(a1) -; ILP32-LP64-NEXT: fld fs11, 224(a1) -; ILP32-LP64-NEXT: fsd fs8, 248(a1) -; ILP32-LP64-NEXT: fsd fs9, 240(a1) -; ILP32-LP64-NEXT: fsd fs10, 232(a1) -; ILP32-LP64-NEXT: fsd fs11, 224(a1) -; ILP32-LP64-NEXT: fsd fs7, 216(a1) -; ILP32-LP64-NEXT: fsd fs6, 208(a1) -; ILP32-LP64-NEXT: fsd fs5, 200(a1) -; ILP32-LP64-NEXT: fsd fs4, 192(a1) -; ILP32-LP64-NEXT: fsd fs3, 184(a1) -; ILP32-LP64-NEXT: fsd fs2, 176(a1) -; ILP32-LP64-NEXT: fsd fs1, 168(a1) -; ILP32-LP64-NEXT: fsd fs0, 160(a1) -; ILP32-LP64-NEXT: fsd ft11, 152(a1) -; ILP32-LP64-NEXT: fsd ft10, 144(a1) -; ILP32-LP64-NEXT: fsd ft9, 136(a1) -; ILP32-LP64-NEXT: fsd ft8, 128(a1) -; ILP32-LP64-NEXT: fsd fa7, 120(a1) -; ILP32-LP64-NEXT: fsd fa6, 112(a1) -; ILP32-LP64-NEXT: fsd fa5, 104(a1) -; ILP32-LP64-NEXT: fsd fa4, 96(a1) -; ILP32-LP64-NEXT: fsd fa3, 88(a1) -; ILP32-LP64-NEXT: fsd fa2, 80(a1) -; ILP32-LP64-NEXT: fsd fa1, 72(a1) -; ILP32-LP64-NEXT: fsd fa0, 64(a1) -; ILP32-LP64-NEXT: fsd ft7, 56(a1) -; ILP32-LP64-NEXT: fsd ft6, 48(a1) -; ILP32-LP64-NEXT: fsd ft5, 40(a1) -; ILP32-LP64-NEXT: fsd ft4, 32(a1) -; ILP32-LP64-NEXT: fsd ft3, 24(a1) -; ILP32-LP64-NEXT: fsd ft2, 16(a1) -; ILP32-LP64-NEXT: fsd ft1, 8(a1) -; ILP32-LP64-NEXT: fsd ft0, %lo(var)(a0) -; ILP32-LP64-NEXT: ret +; ILP32-LP64-NO-D-LABEL: callee: +; ILP32-LP64-NO-D: # %bb.0: +; ILP32-LP64-NO-D-NEXT: lui a0, %hi(var) ; ; ILP32D-LP64D-LABEL: callee: ; ILP32D-LP64D: # %bb.0: @@ -102,8 +40,14 @@ ; ILP32D-LP64D-NEXT: fsd fs10, 8(sp) ; ILP32D-LP64D-NEXT: fsd fs11, 0(sp) ; ILP32D-LP64D-NEXT: lui a0, %hi(var) -; ILP32D-LP64D-NEXT: fld ft0, %lo(var)(a0) -; ILP32D-LP64D-NEXT: addi a1, a0, %lo(var) +; +; ILP32E-LABEL: callee: +; ILP32E: # %bb.0 +; ILP32E-NEXT: addi sp, sp, -8 +; ILP32E-NEXT: sw ra, 4(sp) +; ILP32E-NEXT: sw s0, 0(sp) +; ILP32E-NEXT: addi s0, sp, 8 +; ILP32E-NEXT: lui a0, %hi(var) %val = load [32 x double], [32 x double]* @var store volatile [32 x double] %val, [32 x double]* @var ret void @@ -117,43 +61,62 @@ ; fs0-fs11 are preserved across calls. define void @caller() nounwind { -; ILP32-LP64-LABEL: caller: -; ILP32-LP64-NOT: ft{{[1-9][0-9]*}} -; ILP32-LP64-NOT: fs{{[0-9]+}} -; ILP32-LP64-NOT: fa{{[0-9]+}} -; ILP32-LP64: call callee -; ILP32-LP64-NOT: ft{{[1-9][0-9]*}} -; ILP32-LP64-NOT: fs{{[0-9]+}} -; ILP32-LP64-NOT: fa{{[0-9]+}} -; ILP32-LP64: ret +; In this case, only `ft0` is used to save all of %val to the stack. +; +; ILP32-LP64-NO-D-LABEL: caller: +; ILP32-LP64-NO-D: # %bb.0: +; ILP32-LP64-NO-D-NOT: ft{{[1-9][0-9]*}} +; ILP32-LP64-NO-D-NOT: fs{{[0-9]+}} +; ILP32-LP64-NO-D-NOT: fa{{[0-9]+}} +; ILP32-LP64-NO-D: call callee +; ILP32-LP64-NO-D-NOT: ft{{[1-9][0-9]*}} +; ILP32-LP64-NO-D-NOT: fs{{[0-9]+}} +; ILP32-LP64-NO-D-NOT: fa{{[0-9]+}} +; ILP32-LP64-NO-D: ret +; +; In this case, the floating-point saved registers are keeping parts of %val +; across the call so they don't have to be copied to the stack, so it looks odd +; but is actually reasonable. ; ; ILP32F-LP64D-LABEL: caller: -; ILP32D-LP64D: fld fs8, 160(s1) -; ILP32D-LP64D-NEXT: fld fs9, 168(s1) -; ILP32D-LP64D-NEXT: fld fs10, 176(s1) -; ILP32D-LP64D-NEXT: fld fs11, 184(s1) -; ILP32D-LP64D-NEXT: fld fs0, 192(s1) -; ILP32D-LP64D-NEXT: fld fs1, 200(s1) -; ILP32D-LP64D-NEXT: fld fs2, 208(s1) -; ILP32D-LP64D-NEXT: fld fs3, 216(s1) -; ILP32D-LP64D-NEXT: fld fs4, 224(s1) -; ILP32D-LP64D-NEXT: fld fs5, 232(s1) -; ILP32D-LP64D-NEXT: fld fs6, 240(s1) -; ILP32D-LP64D-NEXT: fld fs7, 248(s1) -; ILP32D-LP64D-NEXT: call callee -; ILP32D-LP64D-NEXT: fsd fs7, 248(s1) -; ILP32D-LP64D-NEXT: fsd fs6, 240(s1) -; ILP32D-LP64D-NEXT: fsd fs5, 232(s1) -; ILP32D-LP64D-NEXT: fsd fs4, 224(s1) -; ILP32D-LP64D-NEXT: fsd fs3, 216(s1) -; ILP32D-LP64D-NEXT: fsd fs2, 208(s1) -; ILP32D-LP64D-NEXT: fsd fs1, 200(s1) -; ILP32D-LP64D-NEXT: fsd fs0, 192(s1) -; ILP32D-LP64D-NEXT: fsd fs11, 184(s1) -; ILP32D-LP64D-NEXT: fsd fs10, 176(s1) -; ILP32D-LP64D-NEXT: fsd fs9, 168(s1) -; ILP32D-LP64D-NEXT: fsd fs8, 160(s1) -; ILP32D-LP64D-NEXT: fld ft0, {{[0-9]+}}(sp) +; ILP32D-LP64D: fld fs8, 160(s1) +; ILP32D-LP64D-NEXT: fld fs9, 168(s1) +; ILP32D-LP64D-NEXT: fld fs10, 176(s1) +; ILP32D-LP64D-NEXT: fld fs11, 184(s1) +; ILP32D-LP64D-NEXT: fld fs0, 192(s1) +; ILP32D-LP64D-NEXT: fld fs1, 200(s1) +; ILP32D-LP64D-NEXT: fld fs2, 208(s1) +; ILP32D-LP64D-NEXT: fld fs3, 216(s1) +; ILP32D-LP64D-NEXT: fld fs4, 224(s1) +; ILP32D-LP64D-NEXT: fld fs5, 232(s1) +; ILP32D-LP64D-NEXT: fld fs6, 240(s1) +; ILP32D-LP64D-NEXT: fld fs7, 248(s1) +; ILP32D-LP64D-NEXT: call callee +; ILP32D-LP64D-NEXT: fsd fs7, 248(s1) +; ILP32D-LP64D-NEXT: fsd fs6, 240(s1) +; ILP32D-LP64D-NEXT: fsd fs5, 232(s1) +; ILP32D-LP64D-NEXT: fsd fs4, 224(s1) +; ILP32D-LP64D-NEXT: fsd fs3, 216(s1) +; ILP32D-LP64D-NEXT: fsd fs2, 208(s1) +; ILP32D-LP64D-NEXT: fsd fs1, 200(s1) +; ILP32D-LP64D-NEXT: fsd fs0, 192(s1) +; ILP32D-LP64D-NEXT: fsd fs11, 184(s1) +; ILP32D-LP64D-NEXT: fsd fs10, 176(s1) +; ILP32D-LP64D-NEXT: fsd fs9, 168(s1) +; ILP32D-LP64D-NEXT: fsd fs8, 160(s1) +; +; In this case, only `ft0` is used to save all of %val to the stack. +; +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: +; ILP32E-NOT: ft{{[1-9][0-9]*}} +; ILP32E-NOT: fs{{[0-9]+}} +; ILP32E-NOT: fa{{[0-9]+}} +; ILP32E: call callee +; ILP32E-NOT: ft{{[1-9][0-9]*}} +; ILP32E-NOT: fs{{[0-9]+}} +; ILP32E-NOT: fa{{[0-9]+}} +; ILP32E: ret %val = load [32 x double], [32 x double]* @var call void @callee() store volatile [32 x double] %val, [32 x double]* @var diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -1,5 +1,7 @@ ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32f -verify-machineinstrs < %s \ @@ -46,6 +48,17 @@ ; RV32I-NEXT: sw a1, 24(sp) ; RV32I-NEXT: addi a2, a0, %lo(var) ; +; RV32I-ILP32E-LABEL: callee: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -32 +; RV32I-ILP32E-NEXT: sw ra, 28(sp) +; RV32I-ILP32E-NEXT: sw s0, 24(sp) +; RV32I-ILP32E-NEXT: sw s1, 20(sp) +; RV32I-ILP32E-NEXT: lui a0, %hi(var) +; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0) +; RV32I-ILP32E-NEXT: sw a1, 16(sp) +; RV32I-ILP32E-NEXT: addi a2, a0, %lo(var) +; ; RV32I-WITH-FP-LABEL: callee: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -80 @@ -151,6 +164,17 @@ ; RV32I-NEXT: sw s2, 84(s0) ; RV32I-NEXT: lw a0, 8(sp) ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: lui a0, %hi(var) +; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0) +; RV32I-ILP32E-NEXT: sw a1, 120(sp) +; RV32I-ILP32E-NEXT: addi s0, a0, %lo(var) +; RV32I-ILP32E: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lw s1, 124(s0) +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: sw s1, 124(s0) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) +; ; RV32I-WITH-FP-LABEL: caller: ; RV32I-WITH-FP: addi s0, sp, 144 ; RV32I-WITH-FP-NEXT: lui a0, %hi(var) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e-double-bug.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e-double-bug.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e-double-bug.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+f -verify-machineinstrs < %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+f,+d -verify-machineinstrs < %s + +declare void @callee() + +@global_double = external global double, align 8 + +define void @test_double() nounwind { +; ILP32E-F-LABEL: test_double: +; ILP32E-F: # %bb.0: +; ILP32E-F-NEXT: addi sp, sp, -32 +; ILP32E-F-NEXT: sw ra, 28(sp) +; ILP32E-F-NEXT: sw s0, 24(sp) +; ILP32E-F-NEXT: sw s1, 20(sp) +; ILP32E-F-NEXT: addi s0, sp, 32 +; ILP32E-F-NEXT: andi sp, sp, -8 +; ILP32E-F-NEXT: lui a0, %hi(global_double) +; ILP32E-F-NEXT: addi s1, a0, %lo(global_double) +; ILP32E-F-NEXT: lw a0, %lo(global_double)(a0) +; ILP32E-F-NEXT: lw a1, 4(s1) +; ILP32E-F-NEXT: sw a0, 8(sp) +; ILP32E-F-NEXT: sw a1, 12(sp) +; ILP32E-F-NEXT: call callee +; ILP32E-F-NEXT: lw a0, 12(sp) +; ILP32E-F-NEXT: lw a1, 8(sp) +; ILP32E-F-NEXT: sw a0, 4(s1) +; ILP32E-F-NEXT: lui a0, %hi(global_double) +; ILP32E-F-NEXT: sw a1, %lo(global_double)(a0) +; ILP32E-F-NEXT: addi sp, s0, -32 +; ILP32E-F-NEXT: lw s1, 20(sp) +; ILP32E-F-NEXT: lw s0, 24(sp) +; ILP32E-F-NEXT: lw ra, 28(sp) +; ILP32E-F-NEXT: addi sp, sp, 32 +; ILP32E-F-NEXT: ret +; +; ILP32E-FD-LABEL: test_double: +; ILP32E-FD: # %bb.0: +; ILP32E-FD-NEXT: addi sp, sp, -32 +; ILP32E-FD-NEXT: sw ra, 28(sp) +; ILP32E-FD-NEXT: sw s0, 24(sp) +; ILP32E-FD-NEXT: sw s1, 20(sp) +; ILP32E-FD-NEXT: addi s0, sp, 32 +; ILP32E-FD-NEXT: andi sp, sp, -8 +; ILP32E-FD-NEXT: lui s1, %hi(global_double) +; ILP32E-FD-NEXT: fld ft0, %lo(global_double)(s1) +; ILP32E-FD-NEXT: fsd ft0, 8(sp) +; ILP32E-FD-NEXT: call callee +; ILP32E-FD-NEXT: fld ft0, 8(sp) +; ILP32E-FD-NEXT: fsd ft0, %lo(global_double)(s1) +; ILP32E-FD-NEXT: addi sp, s0, -32 +; ILP32E-FD-NEXT: lw s1, 20(sp) +; ILP32E-FD-NEXT: lw s0, 24(sp) +; ILP32E-FD-NEXT: lw ra, 28(sp) +; ILP32E-FD-NEXT: addi sp, sp, 32 +; ILP32E-FD-NEXT: ret + %local_double = alloca double, align 8 + %1 = load double, double* @global_double, align 8 + store double %1, double* %local_double, align 8 + call void @callee() + %2 = load double, double* %local_double, align 8 + store double %2, double* @global_double, align 8 + ret void +} + +@global_float = external global float, align 4 + +define void @test_float() nounwind { +; ILP32E-F-LABEL: test_float: +; ILP32E-F: # %bb.0: +; ILP32E-F-NEXT: addi sp, sp, -24 +; ILP32E-F-NEXT: sw ra, 20(sp) +; ILP32E-F-NEXT: sw s0, 16(sp) +; ILP32E-F-NEXT: sw s1, 12(sp) +; ILP32E-F-NEXT: addi s0, sp, 24 +; ILP32E-F-NEXT: andi sp, sp, -8 +; ILP32E-F-NEXT: lui s1, %hi(global_float) +; ILP32E-F-NEXT: flw ft0, %lo(global_float)(s1) +; ILP32E-F-NEXT: fsw ft0, 8(sp) +; ILP32E-F-NEXT: call callee +; ILP32E-F-NEXT: flw ft0, 8(sp) +; ILP32E-F-NEXT: fsw ft0, %lo(global_float)(s1) +; ILP32E-F-NEXT: addi sp, s0, -24 +; ILP32E-F-NEXT: lw s1, 12(sp) +; ILP32E-F-NEXT: lw s0, 16(sp) +; ILP32E-F-NEXT: lw ra, 20(sp) +; ILP32E-F-NEXT: addi sp, sp, 24 +; ILP32E-F-NEXT: ret +; +; ILP32E-FD-LABEL: test_float: +; ILP32E-FD: # %bb.0: +; ILP32E-FD-NEXT: addi sp, sp, -24 +; ILP32E-FD-NEXT: sw ra, 20(sp) +; ILP32E-FD-NEXT: sw s0, 16(sp) +; ILP32E-FD-NEXT: sw s1, 12(sp) +; ILP32E-FD-NEXT: addi s0, sp, 24 +; ILP32E-FD-NEXT: andi sp, sp, -8 +; ILP32E-FD-NEXT: lui s1, %hi(global_float) +; ILP32E-FD-NEXT: flw ft0, %lo(global_float)(s1) +; ILP32E-FD-NEXT: fsw ft0, 8(sp) +; ILP32E-FD-NEXT: call callee +; ILP32E-FD-NEXT: flw ft0, 8(sp) +; ILP32E-FD-NEXT: fsw ft0, %lo(global_float)(s1) +; ILP32E-FD-NEXT: addi sp, s0, -24 +; ILP32E-FD-NEXT: lw s1, 12(sp) +; ILP32E-FD-NEXT: lw s0, 16(sp) +; ILP32E-FD-NEXT: lw ra, 20(sp) +; ILP32E-FD-NEXT: addi sp, sp, 24 +; ILP32E-FD-NEXT: ret + %local_float = alloca float, align 8 + %1 = load float, float* @global_float, align 8 + store float %1, float* %local_float, align 8 + call void @callee() + %2 = load float, float* %local_float, align 8 + store float %2, float* @global_float, align 8 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -0,0 +1,1995 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+d \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32D-ILP32E %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-4 and fp-8. + +; Any tests that would have identical output for some combination of the ilp32* +; ABIs belong in calling-conv-*-common.ll. This file contains tests that will +; have different output across those ABIs. i.e. where some arguments would be +; passed according to the floating point ABI, or where the stack is aligned to +; a different boundary. + +define i32 @callee_float_in_regs(i32 %a, float %b) nounwind { +; ILP32E-FPELIM-LABEL: callee_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: call __fixsfsi +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: call __fixsfsi +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_float_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: fmv.w.x ft0, a1 +; RV32D-ILP32E-NEXT: fcvt.w.s a1, ft0, rtz +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %b_fptosi = fptosi float %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_float_in_regs() nounwind { +; ILP32E-FPELIM-LABEL: caller_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: lui a1, 262144 +; ILP32E-FPELIM-NEXT: call callee_float_in_regs +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: lui a1, 262144 +; ILP32E-WITHFP-NEXT: call callee_float_in_regs +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_float_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: lui a1, 262144 +; RV32D-ILP32E-NEXT: call callee_float_in_regs +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_float_in_regs(i32 1, float 2.0) + ret i32 %1 +} + +define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) nounwind { +; ILP32E-FPELIM-LABEL: callee_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 16 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a0, 8(s0) +; ILP32E-FPELIM-NEXT: lw a1, 0(s0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: addi sp, s0, -16 +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_float_on_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -16 +; RV32D-ILP32E-NEXT: sw ra, 12(sp) +; RV32D-ILP32E-NEXT: sw s0, 8(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 16 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lw a0, 0(s0) +; RV32D-ILP32E-NEXT: lw a1, 8(s0) +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -16 +; RV32D-ILP32E-NEXT: lw s0, 8(sp) +; RV32D-ILP32E-NEXT: lw ra, 12(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 16 +; RV32D-ILP32E-NEXT: ret + %1 = trunc i64 %d to i32 + %2 = bitcast float %e to i32 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @caller_float_on_stack() nounwind { +; ILP32E-FPELIM-LABEL: caller_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lui a0, 264704 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: addi a1, zero, 4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a2, zero, 2 +; ILP32E-FPELIM-NEXT: addi a4, zero, 3 +; ILP32E-FPELIM-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-NEXT: mv a1, zero +; ILP32E-FPELIM-NEXT: mv a3, zero +; ILP32E-FPELIM-NEXT: mv a5, zero +; ILP32E-FPELIM-NEXT: call callee_float_on_stack +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lui a0, 264704 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: addi a1, zero, 4 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a2, zero, 2 +; ILP32E-WITHFP-NEXT: addi a4, zero, 3 +; ILP32E-WITHFP-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-NEXT: mv a1, zero +; ILP32E-WITHFP-NEXT: mv a3, zero +; ILP32E-WITHFP-NEXT: mv a5, zero +; ILP32E-WITHFP-NEXT: call callee_float_on_stack +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_float_on_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -32 +; RV32D-ILP32E-NEXT: sw ra, 28(sp) +; RV32D-ILP32E-NEXT: sw s0, 24(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 32 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lui a0, 264704 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: sw zero, 4(sp) +; RV32D-ILP32E-NEXT: addi a1, zero, 4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a2, zero, 2 +; RV32D-ILP32E-NEXT: addi a4, zero, 3 +; RV32D-ILP32E-NEXT: sw a1, 0(sp) +; RV32D-ILP32E-NEXT: mv a1, zero +; RV32D-ILP32E-NEXT: mv a3, zero +; RV32D-ILP32E-NEXT: mv a5, zero +; RV32D-ILP32E-NEXT: call callee_float_on_stack +; RV32D-ILP32E-NEXT: addi sp, s0, -32 +; RV32D-ILP32E-NEXT: lw s0, 24(sp) +; RV32D-ILP32E-NEXT: lw ra, 28(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 32 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) + ret i32 %1 +} + +define float @callee_tiny_scalar_ret() nounwind { +; ILP32E-FPELIM-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 260096 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a0, 260096 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_tiny_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: lui a0, 260096 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + ret float 1.0 +} + +define i32 @caller_tiny_scalar_ret() nounwind { +; ILP32E-FPELIM-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: call callee_tiny_scalar_ret +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: call callee_tiny_scalar_ret +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_tiny_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: call callee_tiny_scalar_ret +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + ret i32 %2 +} + +; Check that on RV32 ilp32e, double is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_double_in_regs(i32 %a, double %b) nounwind { +; ILP32E-FPELIM-LABEL: callee_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: mv a1, a2 +; ILP32E-FPELIM-NEXT: call __fixdfsi +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: mv a1, a2 +; ILP32E-WITHFP-NEXT: call __fixdfsi +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_double_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -24 +; RV32D-ILP32E-NEXT: sw ra, 20(sp) +; RV32D-ILP32E-NEXT: sw s0, 16(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 24 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw a1, 8(sp) +; RV32D-ILP32E-NEXT: sw a2, 12(sp) +; RV32D-ILP32E-NEXT: fld ft0, 8(sp) +; RV32D-ILP32E-NEXT: fcvt.w.d a1, ft0, rtz +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -24 +; RV32D-ILP32E-NEXT: lw s0, 16(sp) +; RV32D-ILP32E-NEXT: lw ra, 20(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 24 +; RV32D-ILP32E-NEXT: ret + %b_fptosi = fptosi double %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_double_in_regs() nounwind { +; ILP32E-FPELIM-LABEL: caller_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: lui a2, 262144 +; ILP32E-FPELIM-NEXT: mv a1, zero +; ILP32E-FPELIM-NEXT: call callee_double_in_regs +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: mv a1, zero +; ILP32E-WITHFP-NEXT: call callee_double_in_regs +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_double_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: lui a2, 262144 +; RV32D-ILP32E-NEXT: mv a1, zero +; RV32D-ILP32E-NEXT: call callee_double_in_regs +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_double_in_regs(i32 1, double 2.0) + ret i32 %1 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) nounwind { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: callee_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 16 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-NEXT: lw a1, 28(s0) +; ILP32E-FPELIM-NEXT: lw a2, 4(s0) +; ILP32E-FPELIM-NEXT: lw a3, 8(s0) +; ILP32E-FPELIM-NEXT: lw a4, 16(s0) +; ILP32E-FPELIM-NEXT: lw a5, 24(s0) +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a0, a0, a3 +; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -16 +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-NEXT: lw a1, 28(s0) +; ILP32E-WITHFP-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-NEXT: lw a4, 16(s0) +; ILP32E-WITHFP-NEXT: lw a5, 24(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a0, a0, a3 +; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_aligned_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -16 +; RV32D-ILP32E-NEXT: sw ra, 12(sp) +; RV32D-ILP32E-NEXT: sw s0, 8(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 16 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lw a0, 0(a2) +; RV32D-ILP32E-NEXT: lw a1, 28(s0) +; RV32D-ILP32E-NEXT: lw a2, 4(s0) +; RV32D-ILP32E-NEXT: lw a3, 8(s0) +; RV32D-ILP32E-NEXT: lw a4, 16(s0) +; RV32D-ILP32E-NEXT: lw a5, 24(s0) +; RV32D-ILP32E-NEXT: add a0, a0, a2 +; RV32D-ILP32E-NEXT: add a0, a0, a3 +; RV32D-ILP32E-NEXT: add a0, a0, a4 +; RV32D-ILP32E-NEXT: add a0, a0, a5 +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -16 +; RV32D-ILP32E-NEXT: lw s0, 8(sp) +; RV32D-ILP32E-NEXT: lw ra, 12(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 16 +; RV32D-ILP32E-NEXT: ret + %1 = bitcast fp128 %c to i128 + %2 = trunc i128 %1 to i32 + %3 = add i32 %2, %g + %4 = add i32 %3, %h + %5 = bitcast double %i to i64 + %6 = trunc i64 %5 to i32 + %7 = add i32 %4, %6 + %8 = add i32 %7, %j + %9 = extractvalue [2 x i32] %k, 0 + %10 = add i32 %8, %9 + ret i32 %10 +} + +define void @caller_aligned_stack() nounwind { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: caller_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -96 +; ILP32E-FPELIM-NEXT: sw ra, 92(sp) +; ILP32E-FPELIM-NEXT: sw s0, 88(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 96 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, zero, 18 +; ILP32E-FPELIM-NEXT: sw a0, 32(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 17 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 16 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 15 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 76(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 72(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 68(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: addi a2, sp, 64 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 11 +; ILP32E-FPELIM-NEXT: addi a3, zero, 12 +; ILP32E-FPELIM-NEXT: addi a4, zero, 13 +; ILP32E-FPELIM-NEXT: sw a6, 64(sp) +; ILP32E-FPELIM-NEXT: call callee_aligned_stack +; ILP32E-FPELIM-NEXT: addi sp, s0, -96 +; ILP32E-FPELIM-NEXT: lw s0, 88(sp) +; ILP32E-FPELIM-NEXT: lw ra, 92(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 96 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -96 +; ILP32E-WITHFP-NEXT: sw ra, 92(sp) +; ILP32E-WITHFP-NEXT: sw s0, 88(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 96 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, zero, 18 +; ILP32E-WITHFP-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 17 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 16 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 15 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 76(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 72(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 68(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: addi a2, sp, 64 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 11 +; ILP32E-WITHFP-NEXT: addi a3, zero, 12 +; ILP32E-WITHFP-NEXT: addi a4, zero, 13 +; ILP32E-WITHFP-NEXT: sw a6, 64(sp) +; ILP32E-WITHFP-NEXT: call callee_aligned_stack +; ILP32E-WITHFP-NEXT: addi sp, s0, -96 +; ILP32E-WITHFP-NEXT: lw s0, 88(sp) +; ILP32E-WITHFP-NEXT: lw ra, 92(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 96 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_aligned_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -96 +; RV32D-ILP32E-NEXT: sw ra, 92(sp) +; RV32D-ILP32E-NEXT: sw s0, 88(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 96 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: lui a0, 262236 +; RV32D-ILP32E-NEXT: addi a0, a0, 655 +; RV32D-ILP32E-NEXT: sw a0, 20(sp) +; RV32D-ILP32E-NEXT: lui a0, 377487 +; RV32D-ILP32E-NEXT: addi a0, a0, 1475 +; RV32D-ILP32E-NEXT: sw a0, 16(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 18 +; RV32D-ILP32E-NEXT: sw a0, 32(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 17 +; RV32D-ILP32E-NEXT: sw a0, 28(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 16 +; RV32D-ILP32E-NEXT: sw a0, 24(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 15 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 14 +; RV32D-ILP32E-NEXT: sw a0, 4(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 4 +; RV32D-ILP32E-NEXT: sw a0, 0(sp) +; RV32D-ILP32E-NEXT: lui a0, 262153 +; RV32D-ILP32E-NEXT: addi a0, a0, 491 +; RV32D-ILP32E-NEXT: sw a0, 76(sp) +; RV32D-ILP32E-NEXT: lui a0, 545260 +; RV32D-ILP32E-NEXT: addi a0, a0, -1967 +; RV32D-ILP32E-NEXT: sw a0, 72(sp) +; RV32D-ILP32E-NEXT: lui a0, 964690 +; RV32D-ILP32E-NEXT: addi a0, a0, -328 +; RV32D-ILP32E-NEXT: sw a0, 68(sp) +; RV32D-ILP32E-NEXT: lui a0, 335544 +; RV32D-ILP32E-NEXT: addi a6, a0, 1311 +; RV32D-ILP32E-NEXT: lui a0, 688509 +; RV32D-ILP32E-NEXT: addi a5, a0, -2048 +; RV32D-ILP32E-NEXT: addi a2, sp, 64 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 11 +; RV32D-ILP32E-NEXT: addi a3, zero, 12 +; RV32D-ILP32E-NEXT: addi a4, zero, 13 +; RV32D-ILP32E-NEXT: sw a6, 64(sp) +; RV32D-ILP32E-NEXT: call callee_aligned_stack +; RV32D-ILP32E-NEXT: addi sp, s0, -96 +; RV32D-ILP32E-NEXT: lw s0, 88(sp) +; RV32D-ILP32E-NEXT: lw ra, 92(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 96 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_aligned_stack(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, + i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16, + [2 x i32] [i32 17, i32 18]) + ret void +} + +define double @callee_small_scalar_ret() nounwind { +; ILP32E-FPELIM-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 261888 +; ILP32E-FPELIM-NEXT: mv a0, zero +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a1, 261888 +; ILP32E-WITHFP-NEXT: mv a0, zero +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_small_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: lui a1, 261888 +; RV32D-ILP32E-NEXT: mv a0, zero +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + ret double 1.0 +} + +define i64 @caller_small_scalar_ret() nounwind { +; ILP32E-FPELIM-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: call callee_small_scalar_ret +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: call callee_small_scalar_ret +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_small_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -32 +; RV32D-ILP32E-NEXT: sw ra, 28(sp) +; RV32D-ILP32E-NEXT: sw s0, 24(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 32 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: call callee_small_scalar_ret +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: sw a1, 12(sp) +; RV32D-ILP32E-NEXT: fld ft0, 8(sp) +; RV32D-ILP32E-NEXT: fsd ft0, 16(sp) +; RV32D-ILP32E-NEXT: lw a0, 16(sp) +; RV32D-ILP32E-NEXT: lw a1, 20(sp) +; RV32D-ILP32E-NEXT: addi sp, s0, -32 +; RV32D-ILP32E-NEXT: lw s0, 24(sp) +; RV32D-ILP32E-NEXT: lw ra, 28(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 32 +; RV32D-ILP32E-NEXT: ret + %1 = call double @callee_small_scalar_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} + +; Check that on RV32, i64 is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_i64_in_regs(i32 %a, i64 %b) nounwind { +; ILP32E-FPELIM-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_i64_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %b_trunc = trunc i64 %b to i32 + %1 = add i32 %a, %b_trunc + ret i32 %1 +} + +define i32 @caller_i64_in_regs() nounwind { +; ILP32E-FPELIM-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: mv a2, zero +; ILP32E-FPELIM-NEXT: call callee_i64_in_regs +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call callee_i64_in_regs +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_i64_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: mv a2, zero +; RV32D-ILP32E-NEXT: call callee_i64_in_regs +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_i64_in_regs(i32 1, i64 2) + ret i32 %1 +} + +; Check that the stack is used once the GPRs are exhausted + +define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) nounwind { +; ILP32E-FPELIM-LABEL: callee_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 16 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a6, 16(s0) +; ILP32E-FPELIM-NEXT: lw a7, 0(s0) +; ILP32E-FPELIM-NEXT: lw t0, 8(s0) +; ILP32E-FPELIM-NEXT: lw t1, 12(s0) +; ILP32E-FPELIM-NEXT: andi t2, a0, 255 +; ILP32E-FPELIM-NEXT: lui a0, 16 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1 +; ILP32E-FPELIM-NEXT: and a0, a1, a0 +; ILP32E-FPELIM-NEXT: add a0, t2, a0 +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-NEXT: seqz a1, a1 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-NEXT: addi sp, s0, -16 +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a6, 16(s0) +; ILP32E-WITHFP-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-NEXT: lw t0, 8(s0) +; ILP32E-WITHFP-NEXT: lw t1, 12(s0) +; ILP32E-WITHFP-NEXT: andi t2, a0, 255 +; ILP32E-WITHFP-NEXT: lui a0, 16 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1 +; ILP32E-WITHFP-NEXT: and a0, a1, a0 +; ILP32E-WITHFP-NEXT: add a0, t2, a0 +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-NEXT: seqz a1, a1 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_many_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -16 +; RV32D-ILP32E-NEXT: sw ra, 12(sp) +; RV32D-ILP32E-NEXT: sw s0, 8(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 16 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lw a6, 16(s0) +; RV32D-ILP32E-NEXT: lw a7, 0(s0) +; RV32D-ILP32E-NEXT: lw t0, 8(s0) +; RV32D-ILP32E-NEXT: lw t1, 12(s0) +; RV32D-ILP32E-NEXT: andi t2, a0, 255 +; RV32D-ILP32E-NEXT: lui a0, 16 +; RV32D-ILP32E-NEXT: addi a0, a0, -1 +; RV32D-ILP32E-NEXT: and a0, a1, a0 +; RV32D-ILP32E-NEXT: add a0, t2, a0 +; RV32D-ILP32E-NEXT: add a0, a0, a2 +; RV32D-ILP32E-NEXT: xor a1, a4, t1 +; RV32D-ILP32E-NEXT: xor a2, a3, t0 +; RV32D-ILP32E-NEXT: or a1, a2, a1 +; RV32D-ILP32E-NEXT: seqz a1, a1 +; RV32D-ILP32E-NEXT: add a0, a1, a0 +; RV32D-ILP32E-NEXT: add a0, a0, a5 +; RV32D-ILP32E-NEXT: add a0, a0, a7 +; RV32D-ILP32E-NEXT: add a0, a0, a6 +; RV32D-ILP32E-NEXT: addi sp, s0, -16 +; RV32D-ILP32E-NEXT: lw s0, 8(sp) +; RV32D-ILP32E-NEXT: lw ra, 12(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 16 +; RV32D-ILP32E-NEXT: ret + %a_ext = zext i8 %a to i32 + %b_ext = zext i16 %b to i32 + %1 = add i32 %a_ext, %b_ext + %2 = add i32 %1, %c + %3 = icmp eq i64 %d, %g + %4 = zext i1 %3 to i32 + %5 = add i32 %4, %2 + %6 = add i32 %5, %e + %7 = add i32 %6, %f + %8 = add i32 %7, %h + ret i32 %8 +} + +define i32 @caller_many_scalars() nounwind { +; ILP32E-FPELIM-LABEL: caller_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -40 +; ILP32E-FPELIM-NEXT: sw ra, 36(sp) +; ILP32E-FPELIM-NEXT: sw s0, 32(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 40 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: addi a0, zero, 8 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 7 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a4, zero, 6 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: addi a2, zero, 3 +; ILP32E-FPELIM-NEXT: addi a3, zero, 4 +; ILP32E-FPELIM-NEXT: addi a5, zero, 5 +; ILP32E-FPELIM-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-NEXT: mv a4, zero +; ILP32E-FPELIM-NEXT: call callee_many_scalars +; ILP32E-FPELIM-NEXT: addi sp, s0, -40 +; ILP32E-FPELIM-NEXT: lw s0, 32(sp) +; ILP32E-FPELIM-NEXT: lw ra, 36(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 40 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: sw ra, 36(sp) +; ILP32E-WITHFP-NEXT: sw s0, 32(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 8 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 12(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 7 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a4, zero, 6 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: addi a2, zero, 3 +; ILP32E-WITHFP-NEXT: addi a3, zero, 4 +; ILP32E-WITHFP-NEXT: addi a5, zero, 5 +; ILP32E-WITHFP-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-NEXT: mv a4, zero +; ILP32E-WITHFP-NEXT: call callee_many_scalars +; ILP32E-WITHFP-NEXT: addi sp, s0, -40 +; ILP32E-WITHFP-NEXT: lw s0, 32(sp) +; ILP32E-WITHFP-NEXT: lw ra, 36(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_many_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -40 +; RV32D-ILP32E-NEXT: sw ra, 36(sp) +; RV32D-ILP32E-NEXT: sw s0, 32(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 40 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: addi a0, zero, 8 +; RV32D-ILP32E-NEXT: sw a0, 16(sp) +; RV32D-ILP32E-NEXT: sw zero, 12(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 7 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi a4, zero, 6 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: addi a2, zero, 3 +; RV32D-ILP32E-NEXT: addi a3, zero, 4 +; RV32D-ILP32E-NEXT: addi a5, zero, 5 +; RV32D-ILP32E-NEXT: sw a4, 0(sp) +; RV32D-ILP32E-NEXT: mv a4, zero +; RV32D-ILP32E-NEXT: call callee_many_scalars +; RV32D-ILP32E-NEXT: addi sp, s0, -40 +; RV32D-ILP32E-NEXT: lw s0, 32(sp) +; RV32D-ILP32E-NEXT: lw ra, 36(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 40 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8) + ret i32 %1 +} + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { +; ILP32E-FPELIM-LABEL: callee_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a6, 0(a1) +; ILP32E-FPELIM-NEXT: lw a7, 0(a0) +; ILP32E-FPELIM-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-NEXT: lw a2, 12(a0) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a2, a2, a5 +; ILP32E-FPELIM-NEXT: xor a3, a3, a4 +; ILP32E-FPELIM-NEXT: or a2, a3, a2 +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a7, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a2 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lw a6, 0(a1) +; ILP32E-WITHFP-NEXT: lw a7, 0(a0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-NEXT: lw a2, 12(a0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a2, a2, a5 +; ILP32E-WITHFP-NEXT: xor a3, a3, a4 +; ILP32E-WITHFP-NEXT: or a2, a3, a2 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a7, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a2 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: lw a6, 0(a1) +; RV32D-ILP32E-NEXT: lw a7, 0(a0) +; RV32D-ILP32E-NEXT: lw a4, 4(a1) +; RV32D-ILP32E-NEXT: lw a5, 12(a1) +; RV32D-ILP32E-NEXT: lw a2, 12(a0) +; RV32D-ILP32E-NEXT: lw a3, 4(a0) +; RV32D-ILP32E-NEXT: lw a1, 8(a1) +; RV32D-ILP32E-NEXT: lw a0, 8(a0) +; RV32D-ILP32E-NEXT: xor a2, a2, a5 +; RV32D-ILP32E-NEXT: xor a3, a3, a4 +; RV32D-ILP32E-NEXT: or a2, a3, a2 +; RV32D-ILP32E-NEXT: xor a0, a0, a1 +; RV32D-ILP32E-NEXT: xor a1, a7, a6 +; RV32D-ILP32E-NEXT: or a0, a1, a0 +; RV32D-ILP32E-NEXT: or a0, a0, a2 +; RV32D-ILP32E-NEXT: seqz a0, a0 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() nounwind { +; ILP32E-FPELIM-LABEL: caller_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-NEXT: addi a2, zero, 1 +; ILP32E-FPELIM-NEXT: addi a0, sp, 40 +; ILP32E-FPELIM-NEXT: addi a1, sp, 16 +; ILP32E-FPELIM-NEXT: sw a2, 40(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-NEXT: addi a2, zero, 1 +; ILP32E-WITHFP-NEXT: addi a0, sp, 40 +; ILP32E-WITHFP-NEXT: addi a1, sp, 16 +; ILP32E-WITHFP-NEXT: sw a2, 40(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -64 +; RV32D-ILP32E-NEXT: sw ra, 60(sp) +; RV32D-ILP32E-NEXT: sw s0, 56(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 64 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: lui a0, 524272 +; RV32D-ILP32E-NEXT: sw a0, 28(sp) +; RV32D-ILP32E-NEXT: sw zero, 24(sp) +; RV32D-ILP32E-NEXT: sw zero, 20(sp) +; RV32D-ILP32E-NEXT: sw zero, 16(sp) +; RV32D-ILP32E-NEXT: sw zero, 52(sp) +; RV32D-ILP32E-NEXT: sw zero, 48(sp) +; RV32D-ILP32E-NEXT: sw zero, 44(sp) +; RV32D-ILP32E-NEXT: addi a2, zero, 1 +; RV32D-ILP32E-NEXT: addi a0, sp, 40 +; RV32D-ILP32E-NEXT: addi a1, sp, 16 +; RV32D-ILP32E-NEXT: sw a2, 40(sp) +; RV32D-ILP32E-NEXT: call callee_large_scalars +; RV32D-ILP32E-NEXT: addi sp, s0, -64 +; RV32D-ILP32E-NEXT: lw s0, 56(sp) +; RV32D-ILP32E-NEXT: lw ra, 60(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 64 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) nounwind { +; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a6, 0(a0) +; ILP32E-FPELIM-NEXT: lw a7, 0(a1) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a2, 12(a1) +; ILP32E-FPELIM-NEXT: lw a3, 4(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: xor a2, a2, a5 +; ILP32E-FPELIM-NEXT: xor a3, a3, a4 +; ILP32E-FPELIM-NEXT: or a2, a3, a2 +; ILP32E-FPELIM-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-NEXT: xor a1, a7, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a2 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-NEXT: lw a6, 0(a0) +; ILP32E-WITHFP-NEXT: lw a7, 0(a1) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a2, 12(a1) +; ILP32E-WITHFP-NEXT: lw a3, 4(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: xor a2, a2, a5 +; ILP32E-WITHFP-NEXT: xor a3, a3, a4 +; ILP32E-WITHFP-NEXT: or a2, a3, a2 +; ILP32E-WITHFP-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-NEXT: xor a1, a7, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a2 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_scalars_exhausted_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: lw a0, 12(s0) +; RV32D-ILP32E-NEXT: lw a1, 4(s0) +; RV32D-ILP32E-NEXT: lw a6, 0(a0) +; RV32D-ILP32E-NEXT: lw a7, 0(a1) +; RV32D-ILP32E-NEXT: lw a4, 4(a0) +; RV32D-ILP32E-NEXT: lw a5, 12(a0) +; RV32D-ILP32E-NEXT: lw a2, 12(a1) +; RV32D-ILP32E-NEXT: lw a3, 4(a1) +; RV32D-ILP32E-NEXT: lw a0, 8(a0) +; RV32D-ILP32E-NEXT: lw a1, 8(a1) +; RV32D-ILP32E-NEXT: xor a2, a2, a5 +; RV32D-ILP32E-NEXT: xor a3, a3, a4 +; RV32D-ILP32E-NEXT: or a2, a3, a2 +; RV32D-ILP32E-NEXT: xor a0, a1, a0 +; RV32D-ILP32E-NEXT: xor a1, a7, a6 +; RV32D-ILP32E-NEXT: or a0, a1, a0 +; RV32D-ILP32E-NEXT: or a0, a0, a2 +; RV32D-ILP32E-NEXT: seqz a0, a0 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() nounwind { +; ILP32E-FPELIM-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -80 +; ILP32E-FPELIM-NEXT: sw ra, 76(sp) +; ILP32E-FPELIM-NEXT: sw s0, 72(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 80 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, sp, 32 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 9 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 56 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 7 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: sw zero, 40(sp) +; ILP32E-FPELIM-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-NEXT: sw zero, 68(sp) +; ILP32E-FPELIM-NEXT: sw zero, 64(sp) +; ILP32E-FPELIM-NEXT: sw zero, 60(sp) +; ILP32E-FPELIM-NEXT: addi a6, zero, 8 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: addi a2, zero, 3 +; ILP32E-FPELIM-NEXT: addi a3, zero, 4 +; ILP32E-FPELIM-NEXT: addi a4, zero, 5 +; ILP32E-FPELIM-NEXT: addi a5, zero, 6 +; ILP32E-FPELIM-NEXT: sw a6, 56(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs +; ILP32E-FPELIM-NEXT: addi sp, s0, -80 +; ILP32E-FPELIM-NEXT: lw s0, 72(sp) +; ILP32E-FPELIM-NEXT: lw ra, 76(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 80 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -80 +; ILP32E-WITHFP-NEXT: sw ra, 76(sp) +; ILP32E-WITHFP-NEXT: sw s0, 72(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 80 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, sp, 32 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 9 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 56 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 7 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: sw zero, 40(sp) +; ILP32E-WITHFP-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-NEXT: sw zero, 68(sp) +; ILP32E-WITHFP-NEXT: sw zero, 64(sp) +; ILP32E-WITHFP-NEXT: sw zero, 60(sp) +; ILP32E-WITHFP-NEXT: addi a6, zero, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: addi a2, zero, 3 +; ILP32E-WITHFP-NEXT: addi a3, zero, 4 +; ILP32E-WITHFP-NEXT: addi a4, zero, 5 +; ILP32E-WITHFP-NEXT: addi a5, zero, 6 +; ILP32E-WITHFP-NEXT: sw a6, 56(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs +; ILP32E-WITHFP-NEXT: addi sp, s0, -80 +; ILP32E-WITHFP-NEXT: lw s0, 72(sp) +; ILP32E-WITHFP-NEXT: lw ra, 76(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 80 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_scalars_exhausted_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -80 +; RV32D-ILP32E-NEXT: sw ra, 76(sp) +; RV32D-ILP32E-NEXT: sw s0, 72(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 80 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: addi a0, sp, 32 +; RV32D-ILP32E-NEXT: sw a0, 12(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 9 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi a0, sp, 56 +; RV32D-ILP32E-NEXT: sw a0, 4(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 7 +; RV32D-ILP32E-NEXT: sw a0, 0(sp) +; RV32D-ILP32E-NEXT: lui a0, 524272 +; RV32D-ILP32E-NEXT: sw a0, 44(sp) +; RV32D-ILP32E-NEXT: sw zero, 40(sp) +; RV32D-ILP32E-NEXT: sw zero, 36(sp) +; RV32D-ILP32E-NEXT: sw zero, 32(sp) +; RV32D-ILP32E-NEXT: sw zero, 68(sp) +; RV32D-ILP32E-NEXT: sw zero, 64(sp) +; RV32D-ILP32E-NEXT: sw zero, 60(sp) +; RV32D-ILP32E-NEXT: addi a6, zero, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: addi a2, zero, 3 +; RV32D-ILP32E-NEXT: addi a3, zero, 4 +; RV32D-ILP32E-NEXT: addi a4, zero, 5 +; RV32D-ILP32E-NEXT: addi a5, zero, 6 +; RV32D-ILP32E-NEXT: sw a6, 56(sp) +; RV32D-ILP32E-NEXT: call callee_large_scalars_exhausted_regs +; RV32D-ILP32E-NEXT: addi sp, s0, -80 +; RV32D-ILP32E-NEXT: lw s0, 72(sp) +; RV32D-ILP32E-NEXT: lw ra, 76(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 80 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Ensure that libcalls generated in the middle-end obey the calling convention + +define i32 @caller_mixed_scalar_libcalls(i64 %a) nounwind { +; ILP32E-FPELIM-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a2, a1 +; ILP32E-FPELIM-NEXT: mv a1, a0 +; ILP32E-FPELIM-NEXT: addi a0, sp, 8 +; ILP32E-FPELIM-NEXT: call __floatditf +; ILP32E-FPELIM-NEXT: lw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a2, a1 +; ILP32E-WITHFP-NEXT: mv a1, a0 +; ILP32E-WITHFP-NEXT: addi a0, sp, 8 +; ILP32E-WITHFP-NEXT: call __floatditf +; ILP32E-WITHFP-NEXT: lw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_mixed_scalar_libcalls: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -32 +; RV32D-ILP32E-NEXT: sw ra, 28(sp) +; RV32D-ILP32E-NEXT: sw s0, 24(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 32 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: mv a2, a1 +; RV32D-ILP32E-NEXT: mv a1, a0 +; RV32D-ILP32E-NEXT: addi a0, sp, 8 +; RV32D-ILP32E-NEXT: call __floatditf +; RV32D-ILP32E-NEXT: lw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi sp, s0, -32 +; RV32D-ILP32E-NEXT: lw s0, 24(sp) +; RV32D-ILP32E-NEXT: lw ra, 28(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 32 +; RV32D-ILP32E-NEXT: ret + %1 = sitofp i64 %a to fp128 + %2 = bitcast fp128 %1 to i128 + %3 = trunc i128 %2 to i32 + ret i32 %3 +} + + +; Check passing of coerced integer arrays + +%struct.small = type { i32, i32* } + +define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) nounwind { +; ILP32E-FPELIM-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_small_coerced_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: xor a0, a0, a1 +; RV32D-ILP32E-NEXT: seqz a0, a0 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = extractvalue [2 x i32] %a.coerce, 0 + %2 = extractvalue [2 x i32] %a.coerce, 1 + %3 = icmp eq i32 %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define i32 @caller_small_coerced_struct() nounwind { +; ILP32E-FPELIM-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: call callee_small_coerced_struct +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: call callee_small_coerced_struct +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_small_coerced_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: call callee_small_coerced_struct +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2]) + ret i32 %1 +} + +; Check large struct arguments, which are passed byval + +%struct.large = type { i32, i32, i32, i32 } + +define i32 @callee_large_struct(%struct.large* byval align 4 %a) nounwind { +; ILP32E-FPELIM-LABEL: callee_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: lw a1, 0(a0) +; RV32D-ILP32E-NEXT: lw a0, 12(a0) +; RV32D-ILP32E-NEXT: add a0, a1, a0 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0 + %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3 + %3 = load i32, i32* %1 + %4 = load i32, i32* %2 + %5 = add i32 %3, %4 + ret i32 %5 +} + +define i32 @caller_large_struct() nounwind { +; ILP32E-FPELIM-LABEL: caller_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-NEXT: sw ra, 44(sp) +; ILP32E-FPELIM-NEXT: sw s0, 40(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 48 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: sw a1, 28(sp) +; ILP32E-FPELIM-NEXT: addi a2, zero, 3 +; ILP32E-FPELIM-NEXT: sw a2, 32(sp) +; ILP32E-FPELIM-NEXT: addi a3, zero, 4 +; ILP32E-FPELIM-NEXT: sw a3, 36(sp) +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 8 +; ILP32E-FPELIM-NEXT: call callee_large_struct +; ILP32E-FPELIM-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-NEXT: lw s0, 40(sp) +; ILP32E-FPELIM-NEXT: lw ra, 44(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 44(sp) +; ILP32E-WITHFP-NEXT: sw s0, 40(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 48 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: sw a1, 28(sp) +; ILP32E-WITHFP-NEXT: addi a2, zero, 3 +; ILP32E-WITHFP-NEXT: sw a2, 32(sp) +; ILP32E-WITHFP-NEXT: addi a3, zero, 4 +; ILP32E-WITHFP-NEXT: sw a3, 36(sp) +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: sw a1, 12(sp) +; ILP32E-WITHFP-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-NEXT: sw a3, 20(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 8 +; ILP32E-WITHFP-NEXT: call callee_large_struct +; ILP32E-WITHFP-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-NEXT: lw s0, 40(sp) +; ILP32E-WITHFP-NEXT: lw ra, 44(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -48 +; RV32D-ILP32E-NEXT: sw ra, 44(sp) +; RV32D-ILP32E-NEXT: sw s0, 40(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 48 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: sw a0, 24(sp) +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: sw a1, 28(sp) +; RV32D-ILP32E-NEXT: addi a2, zero, 3 +; RV32D-ILP32E-NEXT: sw a2, 32(sp) +; RV32D-ILP32E-NEXT: addi a3, zero, 4 +; RV32D-ILP32E-NEXT: sw a3, 36(sp) +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: sw a1, 12(sp) +; RV32D-ILP32E-NEXT: sw a2, 16(sp) +; RV32D-ILP32E-NEXT: sw a3, 20(sp) +; RV32D-ILP32E-NEXT: addi a0, sp, 8 +; RV32D-ILP32E-NEXT: call callee_large_struct +; RV32D-ILP32E-NEXT: addi sp, s0, -48 +; RV32D-ILP32E-NEXT: lw s0, 40(sp) +; RV32D-ILP32E-NEXT: lw ra, 44(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 48 +; RV32D-ILP32E-NEXT: ret + %ls = alloca %struct.large, align 4 + %1 = bitcast %struct.large* %ls to i8* + %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0 + store i32 1, i32* %a + %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1 + store i32 2, i32* %b + %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2 + store i32 3, i32* %c + %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3 + store i32 4, i32* %d + %2 = call i32 @callee_large_struct(%struct.large* byval align 4 %ls) + ret i32 %2 +} + +; Check return of 2x xlen structs + +define %struct.small @callee_small_struct_ret() nounwind { +; ILP32E-FPELIM-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: mv a1, zero +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: mv a1, zero +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_small_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: mv a1, zero +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + ret %struct.small { i32 1, i32* null } +} + +define i32 @caller_small_struct_ret() nounwind { +; ILP32E-FPELIM-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) +; ILP32E-FPELIM-NEXT: call callee_small_struct_ret +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: call callee_small_struct_ret +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_small_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: call callee_small_struct_ret +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint i32* %3 to i32 + %5 = add i32 %2, %4 + ret i32 %5 +} + +; Check return of >2x xlen scalars + +define fp128 @callee_large_scalar_ret() nounwind { +; ILP32E-FPELIM-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 524272 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a1, 524272 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: lui a1, 524272 +; RV32D-ILP32E-NEXT: sw a1, 12(a0) +; RV32D-ILP32E-NEXT: sw zero, 8(a0) +; RV32D-ILP32E-NEXT: sw zero, 4(a0) +; RV32D-ILP32E-NEXT: sw zero, 0(a0) +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_large_scalar_ret() nounwind { +; ILP32E-FPELIM-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-NEXT: sw ra, 44(sp) +; ILP32E-FPELIM-NEXT: sw s0, 40(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 48 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: call callee_large_scalar_ret +; ILP32E-FPELIM-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-NEXT: lw s0, 40(sp) +; ILP32E-FPELIM-NEXT: lw ra, 44(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 44(sp) +; ILP32E-WITHFP-NEXT: sw s0, 40(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 48 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: call callee_large_scalar_ret +; ILP32E-WITHFP-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-NEXT: lw s0, 40(sp) +; ILP32E-WITHFP-NEXT: lw ra, 44(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -48 +; RV32D-ILP32E-NEXT: sw ra, 44(sp) +; RV32D-ILP32E-NEXT: sw s0, 40(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 48 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: addi a0, sp, 16 +; RV32D-ILP32E-NEXT: call callee_large_scalar_ret +; RV32D-ILP32E-NEXT: addi sp, s0, -48 +; RV32D-ILP32E-NEXT: lw s0, 40(sp) +; RV32D-ILP32E-NEXT: lw ra, 44(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 48 +; RV32D-ILP32E-NEXT: ret + %1 = call fp128 @callee_large_scalar_ret() + ret void +} + +; Check return of >2x xlen structs + +define void @callee_large_struct_ret(%struct.large* noalias sret %agg.result) nounwind { +; ILP32E-FPELIM-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi a1, zero, 1 +; ILP32E-FPELIM-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-NEXT: addi a1, zero, 3 +; ILP32E-FPELIM-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-NEXT: addi a1, zero, 4 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a1, zero, 1 +; ILP32E-WITHFP-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 3 +; ILP32E-WITHFP-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 4 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) +; RV32D-ILP32E-NEXT: sw s0, 0(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: addi a1, zero, 1 +; RV32D-ILP32E-NEXT: sw a1, 0(a0) +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: sw a1, 4(a0) +; RV32D-ILP32E-NEXT: addi a1, zero, 3 +; RV32D-ILP32E-NEXT: sw a1, 8(a0) +; RV32D-ILP32E-NEXT: addi a1, zero, 4 +; RV32D-ILP32E-NEXT: sw a1, 12(a0) +; RV32D-ILP32E-NEXT: lw s0, 0(sp) +; RV32D-ILP32E-NEXT: lw ra, 4(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1 + store i32 2, i32* %b, align 4 + %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2 + store i32 3, i32* %c, align 4 + %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3 + store i32 4, i32* %d, align 4 + ret void +} + +define i32 @caller_large_struct_ret() nounwind { +; ILP32E-FPELIM-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: addi a0, sp, 8 +; ILP32E-FPELIM-NEXT: call callee_large_struct_ret +; ILP32E-FPELIM-NEXT: lw a0, 8(sp) +; ILP32E-FPELIM-NEXT: lw a1, 20(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: addi a0, sp, 8 +; ILP32E-WITHFP-NEXT: call callee_large_struct_ret +; ILP32E-WITHFP-NEXT: lw a0, 8(sp) +; ILP32E-WITHFP-NEXT: lw a1, 20(sp) +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -32 +; RV32D-ILP32E-NEXT: sw ra, 28(sp) +; RV32D-ILP32E-NEXT: sw s0, 24(sp) +; RV32D-ILP32E-NEXT: addi s0, sp, 32 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: addi a0, sp, 8 +; RV32D-ILP32E-NEXT: call callee_large_struct_ret +; RV32D-ILP32E-NEXT: lw a0, 8(sp) +; RV32D-ILP32E-NEXT: lw a1, 20(sp) +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -32 +; RV32D-ILP32E-NEXT: lw s0, 24(sp) +; RV32D-ILP32E-NEXT: lw ra, 28(sp) +; RV32D-ILP32E-NEXT: addi sp, sp, 32 +; RV32D-ILP32E-NEXT: ret + %1 = alloca %struct.large + call void @callee_large_struct_ret(%struct.large* sret %1) + %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0 + %3 = load i32, i32* %2 + %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3 + %5 = load i32, i32* %4 + %6 = add i32 %3, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32IF +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IF-ILP32E ; Exercises the ILP32 calling convention code in the case that f32 is a legal ; type. As well as testing that lowering is correct, these tests also aim to @@ -12,6 +14,20 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: lw a0, 4(sp) ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -16 +; RV32IF-ILP32E-NEXT: sw ra, 12(sp) +; RV32IF-ILP32E-NEXT: sw s0, 8(sp) +; RV32IF-ILP32E-NEXT: addi s0, sp, 16 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: lw a0, 12(s0) +; RV32IF-ILP32E-NEXT: addi sp, s0, -16 +; RV32IF-ILP32E-NEXT: lw s0, 8(sp) +; RV32IF-ILP32E-NEXT: lw ra, 12(sp) +; RV32IF-ILP32E-NEXT: addi sp, sp, 16 +; RV32IF-ILP32E-NEXT: ret ret float %f } @@ -23,6 +39,23 @@ ; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -16 +; RV32IF-ILP32E-NEXT: sw ra, 12(sp) +; RV32IF-ILP32E-NEXT: sw s0, 8(sp) +; RV32IF-ILP32E-NEXT: addi s0, sp, 16 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: flw ft0, 12(s0) +; RV32IF-ILP32E-NEXT: flw ft1, 8(s0) +; RV32IF-ILP32E-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-ILP32E-NEXT: fmv.x.w a0, ft0 +; RV32IF-ILP32E-NEXT: addi sp, s0, -16 +; RV32IF-ILP32E-NEXT: lw s0, 8(sp) +; RV32IF-ILP32E-NEXT: lw ra, 12(sp) +; RV32IF-ILP32E-NEXT: addi sp, sp, 16 +; RV32IF-ILP32E-NEXT: ret %1 = fadd float %e, %f ret float %1 } @@ -47,6 +80,32 @@ ; RV32IF-NEXT: lw ra, 12(sp) ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: caller_onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -32 +; RV32IF-ILP32E-NEXT: sw ra, 28(sp) +; RV32IF-ILP32E-NEXT: sw s0, 24(sp) +; RV32IF-ILP32E-NEXT: addi s0, sp, 32 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw a0, 12(sp) +; RV32IF-ILP32E-NEXT: lui a0, 264704 +; RV32IF-ILP32E-NEXT: sw a0, 8(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: addi a1, zero, 4 +; RV32IF-ILP32E-NEXT: addi a0, zero, 1 +; RV32IF-ILP32E-NEXT: addi a2, zero, 2 +; RV32IF-ILP32E-NEXT: addi a4, zero, 3 +; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: mv a1, zero +; RV32IF-ILP32E-NEXT: mv a3, zero +; RV32IF-ILP32E-NEXT: mv a5, zero +; RV32IF-ILP32E-NEXT: call onstack_f32_noop +; RV32IF-ILP32E-NEXT: addi sp, s0, -32 +; RV32IF-ILP32E-NEXT: lw s0, 24(sp) +; RV32IF-ILP32E-NEXT: lw ra, 28(sp) +; RV32IF-ILP32E-NEXT: addi sp, sp, 32 +; RV32IF-ILP32E-NEXT: ret %1 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float 5.0, float %a) ret float %1 } @@ -74,6 +133,35 @@ ; RV32IF-NEXT: lw ra, 12(sp) ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: caller_onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -32 +; RV32IF-ILP32E-NEXT: sw ra, 28(sp) +; RV32IF-ILP32E-NEXT: sw s0, 24(sp) +; RV32IF-ILP32E-NEXT: addi s0, sp, 32 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: fmv.w.x ft0, a1 +; RV32IF-ILP32E-NEXT: fmv.w.x ft1, a0 +; RV32IF-ILP32E-NEXT: fadd.s ft2, ft1, ft0 +; RV32IF-ILP32E-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: addi a0, zero, 4 +; RV32IF-ILP32E-NEXT: sw a0, 0(sp) +; RV32IF-ILP32E-NEXT: fsw ft0, 12(sp) +; RV32IF-ILP32E-NEXT: addi a0, zero, 1 +; RV32IF-ILP32E-NEXT: addi a2, zero, 2 +; RV32IF-ILP32E-NEXT: addi a4, zero, 3 +; RV32IF-ILP32E-NEXT: fsw ft2, 8(sp) +; RV32IF-ILP32E-NEXT: mv a1, zero +; RV32IF-ILP32E-NEXT: mv a3, zero +; RV32IF-ILP32E-NEXT: mv a5, zero +; RV32IF-ILP32E-NEXT: call onstack_f32_noop +; RV32IF-ILP32E-NEXT: addi sp, s0, -32 +; RV32IF-ILP32E-NEXT: lw s0, 24(sp) +; RV32IF-ILP32E-NEXT: lw ra, 28(sp) +; RV32IF-ILP32E-NEXT: addi sp, sp, 32 +; RV32IF-ILP32E-NEXT: ret %1 = fadd float %a, %b %2 = fsub float %b, %a %3 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float %1, float %2) diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I @@ -34,6 +36,33 @@ ; RV32I-NEXT: addi sp, sp, 128 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 128 +; RV32I-ILP32E-NEXT: sw ra, 124(sp) +; RV32I-ILP32E-NEXT: sw s0, 120(sp) +; RV32I-ILP32E-NEXT: sw s1, 116(sp) +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: .cfi_offset s1, -12 +; RV32I-ILP32E-NEXT: addi s0, sp, 128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv s1, sp +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: addi a1, s1, 64 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -128 +; RV32I-ILP32E-NEXT: lw s1, 116(sp) +; RV32I-ILP32E-NEXT: lw s0, 120(sp) +; RV32I-ILP32E-NEXT: lw ra, 124(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 128 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -128 diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I @@ -22,6 +24,21 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) +; RV32I-ILP32E-NEXT: sw s0, 56(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: andi sp, sp, -32 +; RV32I-ILP32E-NEXT: addi a0, sp, 32 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw s0, 56(sp) +; RV32I-ILP32E-NEXT: lw ra, 60(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -52,6 +69,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -82,6 +109,21 @@ ; RV32I-NEXT: addi sp, sp, 128 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -128 +; RV32I-ILP32E-NEXT: sw ra, 124(sp) +; RV32I-ILP32E-NEXT: sw s0, 120(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 128 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: addi a0, sp, 64 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -128 +; RV32I-ILP32E-NEXT: lw s0, 120(sp) +; RV32I-ILP32E-NEXT: lw ra, 124(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 128 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -128 @@ -112,6 +154,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -142,6 +194,21 @@ ; RV32I-NEXT: addi sp, sp, 256 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -256 +; RV32I-ILP32E-NEXT: sw ra, 252(sp) +; RV32I-ILP32E-NEXT: sw s0, 248(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 256 +; RV32I-ILP32E-NEXT: andi sp, sp, -128 +; RV32I-ILP32E-NEXT: addi a0, sp, 128 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -256 +; RV32I-ILP32E-NEXT: lw s0, 248(sp) +; RV32I-ILP32E-NEXT: lw ra, 252(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 256 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -256 @@ -172,6 +239,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -202,6 +279,21 @@ ; RV32I-NEXT: addi sp, sp, 512 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -512 +; RV32I-ILP32E-NEXT: sw ra, 508(sp) +; RV32I-ILP32E-NEXT: sw s0, 504(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 512 +; RV32I-ILP32E-NEXT: andi sp, sp, -256 +; RV32I-ILP32E-NEXT: addi a0, sp, 256 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -512 +; RV32I-ILP32E-NEXT: lw s0, 504(sp) +; RV32I-ILP32E-NEXT: lw ra, 508(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 512 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -512 @@ -232,6 +324,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -262,6 +364,21 @@ ; RV32I-NEXT: addi sp, sp, 1536 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -1536 +; RV32I-ILP32E-NEXT: sw ra, 1532(sp) +; RV32I-ILP32E-NEXT: sw s0, 1528(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 1536 +; RV32I-ILP32E-NEXT: andi sp, sp, -512 +; RV32I-ILP32E-NEXT: addi a0, sp, 1024 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -1536 +; RV32I-ILP32E-NEXT: lw s0, 1528(sp) +; RV32I-ILP32E-NEXT: lw ra, 1532(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 1536 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -1536 @@ -292,6 +409,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -329,6 +456,28 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: addi sp, sp, -1028 +; RV32I-ILP32E-NEXT: andi sp, sp, -1024 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: mv a0, a0 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -1024 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: addi sp, sp, 1028 +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -366,6 +515,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -406,6 +565,31 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, 4 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: andi sp, sp, -2048 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: mv a0, a0 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, 4 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -446,6 +630,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -486,6 +680,31 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: lui a0, 3 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: srli a0, sp, 12 +; RV32I-ILP32E-NEXT: slli sp, a0, 12 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: mv a0, a0 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lui a0, 3 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: lui a0, 3 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -526,6 +745,16 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 diff --git a/llvm/test/CodeGen/RISCV/target-abi-valid.ll b/llvm/test/CodeGen/RISCV/target-abi-valid.ll --- a/llvm/test/CodeGen/RISCV/target-abi-valid.ll +++ b/llvm/test/CodeGen/RISCV/target-abi-valid.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 < %s \ @@ -33,8 +35,3 @@ ; CHECK-IMP-NEXT: ret ret void } - -; RUN: not llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=CHECK-UNIMP %s - -; CHECK-UNIMP: LLVM ERROR: Don't know how to lower this ABI diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -11,6 +11,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32D-ILP32-ILP32F-ILP32D-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \ @@ -97,6 +101,44 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 12 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -206,6 +248,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 12 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -345,6 +420,62 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_va_arg_alloca: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -40 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) +; RV32I-ILP32E-NEXT: sw s0, 8(sp) +; RV32I-ILP32E-NEXT: sw s1, 4(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: mv s1, a1 +; RV32I-ILP32E-NEXT: sw a5, 20(s0) +; RV32I-ILP32E-NEXT: sw a4, 16(s0) +; RV32I-ILP32E-NEXT: sw a3, 12(s0) +; RV32I-ILP32E-NEXT: sw a2, 8(s0) +; RV32I-ILP32E-NEXT: sw a1, 4(s0) +; RV32I-ILP32E-NEXT: addi a0, s0, 8 +; RV32I-ILP32E-NEXT: sw a0, -16(s0) +; RV32I-ILP32E-NEXT: addi a0, a1, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: call notdead +; RV32I-ILP32E-NEXT: mv a0, s1 +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw s1, 4(sp) +; RV32I-ILP32E-NEXT: lw s0, 8(sp) +; RV32I-ILP32E-NEXT: lw ra, 12(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 40 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg_alloca: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) +; ILP32E-WITHFP-NEXT: sw s1, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a1, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: sub a0, sp, a0 +; ILP32E-WITHFP-NEXT: mv sp, a0 +; ILP32E-WITHFP-NEXT: call notdead +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw s1, 4(sp) +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -467,6 +598,33 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) +; RV32I-ILP32E-NEXT: lui a3, 261888 +; RV32I-ILP32E-NEXT: addi a4, zero, 2 +; RV32I-ILP32E-NEXT: mv a2, zero +; RV32I-ILP32E-NEXT: call va1 +; RV32I-ILP32E-NEXT: lw ra, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a3, 261888 +; ILP32E-WITHFP-NEXT: addi a4, zero, 2 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call va1 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -566,6 +724,47 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a1, a0, -8 +; RV32I-ILP32E-NEXT: addi a0, sp, 23 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a1) +; RV32I-ILP32E-NEXT: ori a1, a1, 4 +; RV32I-ILP32E-NEXT: lw a1, 0(a1) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, s0, 19 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: ori a1, a1, 4 +; ILP32E-WITHFP-NEXT: lw a1, 0(a1) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -708,6 +907,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: ori a1, a0, 4 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a0) +; RV32I-ILP32E-NEXT: addi a2, a1, 4 +; RV32I-ILP32E-NEXT: sw a2, 0(sp) +; RV32I-ILP32E-NEXT: lw a1, 0(a1) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: ori a1, a0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: addi a2, a1, 4 +; ILP32E-WITHFP-NEXT: sw a2, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a1) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -792,6 +1034,31 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) +; RV32I-ILP32E-NEXT: lui a3, 261888 +; RV32I-ILP32E-NEXT: mv a2, zero +; RV32I-ILP32E-NEXT: call va2 +; RV32I-ILP32E-NEXT: lw ra, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a3, 261888 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call va2 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -895,6 +1162,51 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -20 +; RV32I-ILP32E-NEXT: sw a5, 16(sp) +; RV32I-ILP32E-NEXT: sw a4, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: addi a3, sp, 23 +; RV32I-ILP32E-NEXT: sw a3, 0(sp) +; RV32I-ILP32E-NEXT: lw a3, 0(a0) +; RV32I-ILP32E-NEXT: ori a0, a0, 4 +; RV32I-ILP32E-NEXT: lw a4, 0(a0) +; RV32I-ILP32E-NEXT: add a0, a1, a3 +; RV32I-ILP32E-NEXT: sltu a1, a0, a1 +; RV32I-ILP32E-NEXT: add a2, a2, a4 +; RV32I-ILP32E-NEXT: add a1, a2, a1 +; RV32I-ILP32E-NEXT: addi sp, sp, 20 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -28 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, s0, 19 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: ori a0, a0, 4 +; ILP32E-WITHFP-NEXT: lw a4, 0(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a3 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a4 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 28 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1044,6 +1356,53 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -20 +; RV32I-ILP32E-NEXT: sw a5, 16(sp) +; RV32I-ILP32E-NEXT: sw a4, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: ori a3, a0, 4 +; RV32I-ILP32E-NEXT: sw a3, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a0) +; RV32I-ILP32E-NEXT: addi a4, a3, 4 +; RV32I-ILP32E-NEXT: sw a4, 0(sp) +; RV32I-ILP32E-NEXT: lw a3, 0(a3) +; RV32I-ILP32E-NEXT: add a0, a1, a0 +; RV32I-ILP32E-NEXT: sltu a1, a0, a1 +; RV32I-ILP32E-NEXT: add a2, a2, a3 +; RV32I-ILP32E-NEXT: add a1, a2, a1 +; RV32I-ILP32E-NEXT: addi sp, sp, 20 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -28 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: ori a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: addi a4, a3, 4 +; ILP32E-WITHFP-NEXT: sw a4, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a3) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a3 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 28 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1136,6 +1495,37 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 2 +; RV32I-ILP32E-NEXT: addi a1, zero, 1111 +; RV32I-ILP32E-NEXT: lui a5, 262144 +; RV32I-ILP32E-NEXT: mv a2, zero +; RV32I-ILP32E-NEXT: mv a4, zero +; RV32I-ILP32E-NEXT: call va3 +; RV32I-ILP32E-NEXT: lw ra, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 2 +; ILP32E-WITHFP-NEXT: addi a1, zero, 1111 +; ILP32E-WITHFP-NEXT: lui a5, 262144 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: mv a4, zero +; ILP32E-WITHFP-NEXT: call va3 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -1297,6 +1687,87 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va4_va_copy: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -40 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) +; RV32I-ILP32E-NEXT: sw s0, 8(sp) +; RV32I-ILP32E-NEXT: mv s0, a1 +; RV32I-ILP32E-NEXT: sw a5, 36(sp) +; RV32I-ILP32E-NEXT: sw a4, 32(sp) +; RV32I-ILP32E-NEXT: sw a3, 28(sp) +; RV32I-ILP32E-NEXT: sw a2, 24(sp) +; RV32I-ILP32E-NEXT: sw a1, 20(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 24 +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: call notdead +; RV32I-ILP32E-NEXT: lw a0, 4(sp) +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a1, a0, 4 +; RV32I-ILP32E-NEXT: sw a1, 4(sp) +; RV32I-ILP32E-NEXT: lw a1, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 7 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a2, a0, 4 +; RV32I-ILP32E-NEXT: sw a2, 4(sp) +; RV32I-ILP32E-NEXT: lw a2, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 7 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a3, a0, 4 +; RV32I-ILP32E-NEXT: sw a3, 4(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a0) +; RV32I-ILP32E-NEXT: add a1, a1, s0 +; RV32I-ILP32E-NEXT: add a1, a1, a2 +; RV32I-ILP32E-NEXT: add a0, a1, a0 +; RV32I-ILP32E-NEXT: lw s0, 8(sp) +; RV32I-ILP32E-NEXT: lw ra, 12(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 40 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va4_va_copy: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -44 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) +; ILP32E-WITHFP-NEXT: sw s1, 8(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: sw a0, -20(s0) +; ILP32E-WITHFP-NEXT: call notdead +; ILP32E-WITHFP-NEXT: lw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a1, a0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -16(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a2, a0, 4 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -16(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a1, a1, s1 +; ILP32E-WITHFP-NEXT: add a1, a1, a2 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw s1, 8(sp) +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 44 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -1542,6 +2013,106 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va5_aligned_stack_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -96 +; RV32I-ILP32E-NEXT: sw ra, 92(sp) +; RV32I-ILP32E-NEXT: sw s0, 88(sp) +; RV32I-ILP32E-NEXT: addi s0, sp, 96 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: addi a0, zero, 17 +; RV32I-ILP32E-NEXT: sw a0, 32(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 16 +; RV32I-ILP32E-NEXT: sw a0, 28(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 15 +; RV32I-ILP32E-NEXT: sw a0, 24(sp) +; RV32I-ILP32E-NEXT: lui a0, 262236 +; RV32I-ILP32E-NEXT: addi a0, a0, 655 +; RV32I-ILP32E-NEXT: sw a0, 20(sp) +; RV32I-ILP32E-NEXT: lui a0, 377487 +; RV32I-ILP32E-NEXT: addi a0, a0, 1475 +; RV32I-ILP32E-NEXT: sw a0, 16(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 14 +; RV32I-ILP32E-NEXT: sw a0, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 4 +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: lui a0, 688509 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lui a0, 262153 +; RV32I-ILP32E-NEXT: addi a0, a0, 491 +; RV32I-ILP32E-NEXT: sw a0, 76(sp) +; RV32I-ILP32E-NEXT: lui a0, 545260 +; RV32I-ILP32E-NEXT: addi a0, a0, -1967 +; RV32I-ILP32E-NEXT: sw a0, 72(sp) +; RV32I-ILP32E-NEXT: lui a0, 964690 +; RV32I-ILP32E-NEXT: addi a0, a0, -328 +; RV32I-ILP32E-NEXT: sw a0, 68(sp) +; RV32I-ILP32E-NEXT: lui a0, 335544 +; RV32I-ILP32E-NEXT: addi a5, a0, 1311 +; RV32I-ILP32E-NEXT: addi a2, sp, 64 +; RV32I-ILP32E-NEXT: addi a0, zero, 1 +; RV32I-ILP32E-NEXT: addi a1, zero, 11 +; RV32I-ILP32E-NEXT: addi a3, zero, 12 +; RV32I-ILP32E-NEXT: addi a4, zero, 13 +; RV32I-ILP32E-NEXT: sw a5, 64(sp) +; RV32I-ILP32E-NEXT: call va5_aligned_stack_callee +; RV32I-ILP32E-NEXT: addi sp, s0, -96 +; RV32I-ILP32E-NEXT: lw s0, 88(sp) +; RV32I-ILP32E-NEXT: lw ra, 92(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 96 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va5_aligned_stack_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -96 +; ILP32E-WITHFP-NEXT: sw ra, 92(sp) +; ILP32E-WITHFP-NEXT: sw s0, 88(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 96 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, zero, 17 +; ILP32E-WITHFP-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 16 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 15 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 14 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 4 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a0, a0, -2048 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 76(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 72(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 68(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a5, a0, 1311 +; ILP32E-WITHFP-NEXT: addi a2, sp, 64 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 11 +; ILP32E-WITHFP-NEXT: addi a3, zero, 12 +; ILP32E-WITHFP-NEXT: addi a4, zero, 13 +; ILP32E-WITHFP-NEXT: sw a5, 64(sp) +; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee +; ILP32E-WITHFP-NEXT: addi sp, s0, -96 +; ILP32E-WITHFP-NEXT: lw s0, 88(sp) +; ILP32E-WITHFP-NEXT: lw ra, 92(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 96 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 @@ -1706,6 +2277,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va6_no_fixed_args: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 8 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va6_no_fixed_args: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a0, 0(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -1879,6 +2483,75 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add sp, sp, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va_large_stack: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: lui a0, 24414 +; RV32I-ILP32E-NEXT: addi a0, a0, 288 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 100000032 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: lui a6, 24414 +; RV32I-ILP32E-NEXT: addi a6, a6, 284 +; RV32I-ILP32E-NEXT: add a6, sp, a6 +; RV32I-ILP32E-NEXT: sw a5, 0(a6) +; RV32I-ILP32E-NEXT: lui a5, 24414 +; RV32I-ILP32E-NEXT: addi a5, a5, 280 +; RV32I-ILP32E-NEXT: add a5, sp, a5 +; RV32I-ILP32E-NEXT: sw a4, 0(a5) +; RV32I-ILP32E-NEXT: lui a4, 24414 +; RV32I-ILP32E-NEXT: addi a4, a4, 276 +; RV32I-ILP32E-NEXT: add a4, sp, a4 +; RV32I-ILP32E-NEXT: sw a3, 0(a4) +; RV32I-ILP32E-NEXT: lui a3, 24414 +; RV32I-ILP32E-NEXT: addi a3, a3, 272 +; RV32I-ILP32E-NEXT: add a3, sp, a3 +; RV32I-ILP32E-NEXT: sw a2, 0(a3) +; RV32I-ILP32E-NEXT: lui a2, 24414 +; RV32I-ILP32E-NEXT: addi a2, a2, 268 +; RV32I-ILP32E-NEXT: add a2, sp, a2 +; RV32I-ILP32E-NEXT: sw a1, 0(a2) +; RV32I-ILP32E-NEXT: lui a1, 24414 +; RV32I-ILP32E-NEXT: addi a1, a1, 272 +; RV32I-ILP32E-NEXT: add a1, sp, a1 +; RV32I-ILP32E-NEXT: mv a1, a1 +; RV32I-ILP32E-NEXT: sw a1, 4(sp) +; RV32I-ILP32E-NEXT: lui a1, 24414 +; RV32I-ILP32E-NEXT: addi a1, a1, 288 +; RV32I-ILP32E-NEXT: add sp, sp, a1 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va_large_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -2044 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 2044 +; ILP32E-WITHFP-NEXT: sw ra, 2016(sp) +; ILP32E-WITHFP-NEXT: sw s0, 2012(sp) +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 2020 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 24414 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1748 +; ILP32E-WITHFP-NEXT: sub sp, sp, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: lui a2, 1024162 +; ILP32E-WITHFP-NEXT: addi a2, a2, -272 +; ILP32E-WITHFP-NEXT: add a2, s0, a2 +; ILP32E-WITHFP-NEXT: sw a1, 0(a2) +; ILP32E-WITHFP-NEXT: lui a1, 24414 +; ILP32E-WITHFP-NEXT: addi a1, a1, -1748 +; ILP32E-WITHFP-NEXT: add sp, sp, a1 +; ILP32E-WITHFP-NEXT: lw s0, 2012(sp) +; ILP32E-WITHFP-NEXT: lw ra, 2016(sp) +; ILP32E-WITHFP-NEXT: addi sp, sp, 2044 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414