Index: lib/Target/RISCV/RISCVCallingConv.td =================================================================== --- lib/Target/RISCV/RISCVCallingConv.td +++ lib/Target/RISCV/RISCVCallingConv.td @@ -15,6 +15,8 @@ def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; +def CSR_E : CalleeSavedRegs<(add X1, X3, X4, X8, X9)>; + // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; @@ -27,6 +29,11 @@ (sequence "X%u", 18, 27), (sequence "X%u", 28, 31))>; +def CSR_E_Interrupt : CalleeSavedRegs<(add X1, + (sequence "X%u", 3, 9), + (sequence "X%u", 10, 11), + (sequence "X%u", 12, 15))>; + // Same as CSR_Interrupt, but including all 32-bit FP registers. def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add X1, (sequence "X%u", 3, 9), Index: lib/Target/RISCV/RISCVFrameLowering.h =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.h +++ lib/Target/RISCV/RISCVFrameLowering.h @@ -20,12 +20,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: - explicit RISCVFrameLowering(const RISCVSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/16, - /*LocalAreaOffset=*/0), - STI(STI) {} - + explicit RISCVFrameLowering(const RISCVSubtarget &STI); void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; Index: lib/Target/RISCV/RISCVFrameLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.cpp +++ lib/Target/RISCV/RISCVFrameLowering.cpp @@ -21,6 +21,15 @@ using namespace llvm; +RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering( + StackGrowsDown, + /*StackAlignment=*/(STI.getTargetABI() == RISCVABI::ABI::ABI_ILP32E) + ? 4 + : 16, + /*LocalAreaOffset=*/0), + STI(STI) {} + bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -45,7 +45,8 @@ RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); - if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64) + if (ABI != RISCVABI::ABI_ILP32 && ABI != RISCVABI::ABI_LP64 && + ABI != RISCVABI::ABI_ILP32E) report_fatal_error("Don't know how to lower this ABI"); MVT XLenVT = Subtarget.getXLenVT(); @@ -894,6 +895,10 @@ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; +static const MCPhysReg ArgEGPRs[] = { + RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15 +}; // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. @@ -901,8 +906,16 @@ ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { + bool IsILP32E = State.getMachineFunction() + .getSubtarget() + .getTargetABI() == RISCVABI::ABI_ILP32E; unsigned XLenInBytes = XLen / 8; - if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + unsigned Reg; + if (!IsILP32E) + Reg = State.AllocateReg(ArgGPRs); + else + Reg = State.AllocateReg(ArgEGPRs); + if (Reg) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); @@ -919,7 +932,12 @@ return false; } - if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + if (!IsILP32E) + Reg = State.AllocateReg(ArgGPRs); + else + Reg = State.AllocateReg(ArgEGPRs); + + if (Reg) { // The second half can also be passed via register. State.addLoc( CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); @@ -937,6 +955,9 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { + bool IsILP32E = State.getMachineFunction() + .getSubtarget() + .getTargetABI() == RISCVABI::ABI_ILP32E; unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; @@ -964,10 +985,17 @@ unsigned TwoXLenInBytes = (2 * XLen) / 8; if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { - unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); - // Skip 'odd' register if necessary. - if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) - State.AllocateReg(ArgGPRs); + if (!IsILP32E) { + unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); + // Skip 'odd' register if necessary. + if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) + State.AllocateReg(ArgGPRs); + } else { + unsigned RegIdx = State.getFirstUnallocated(ArgEGPRs); + // Skip 'odd' register if necessary. + if (RegIdx != array_lengthof(ArgEGPRs) && RegIdx % 2 == 1) + State.AllocateReg(ArgEGPRs); + } } SmallVectorImpl &PendingLocs = State.getPendingLocs(); @@ -985,7 +1013,11 @@ // GPRs, split between a GPR and the stack, or passed completely on the // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. - unsigned Reg = State.AllocateReg(ArgGPRs); + unsigned Reg; + if (!IsILP32E) + Reg = State.AllocateReg(ArgGPRs); + else + Reg = State.AllocateReg(ArgEGPRs); LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, 8); @@ -993,8 +1025,14 @@ CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); return false; } - if (!State.AllocateReg(ArgGPRs)) - State.AllocateStack(4, 4); + if (!IsILP32E) { + if (!State.AllocateReg(ArgGPRs)) + State.AllocateStack(4, 4); + } else { + if (!State.AllocateReg(ArgEGPRs)) + State.AllocateStack(4, 4); + } + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -1027,7 +1065,12 @@ } // Allocate to a register if possible, or else a stack slot. - unsigned Reg = State.AllocateReg(ArgGPRs); + // ilp32e use less ArgGPRS + unsigned Reg; + if (!IsILP32E) + Reg = State.AllocateReg(ArgGPRs); + else + Reg = State.AllocateReg(ArgEGPRs); unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); // If we reach this point and PendingLocs is non-empty, we must be at the Index: lib/Target/RISCV/RISCVRegisterInfo.cpp =================================================================== --- lib/Target/RISCV/RISCVRegisterInfo.cpp +++ lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -33,12 +33,18 @@ const MCPhysReg * RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (MF->getFunction().hasFnAttribute("interrupt")) { + // For rv32i and ilp32e + if (MF->getSubtarget().getTargetABI() == RISCVABI::ABI_ILP32E) + return CSR_E_Interrupt_SaveList; if (MF->getSubtarget().hasStdExtD()) return CSR_XLEN_F64_Interrupt_SaveList; if (MF->getSubtarget().hasStdExtF()) return CSR_XLEN_F32_Interrupt_SaveList; return CSR_Interrupt_SaveList; } + if (MF->getSubtarget().getTargetABI() == RISCVABI::ABI_ILP32E) + return CSR_E_SaveList; + return CSR_SaveList; } @@ -119,11 +125,15 @@ RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF, CallingConv::ID /*CC*/) const { if (MF.getFunction().hasFnAttribute("interrupt")) { + if (MF.getSubtarget().getTargetABI() == RISCVABI::ABI_ILP32E) + return CSR_E_Interrupt_RegMask; if (MF.getSubtarget().hasStdExtD()) return CSR_XLEN_F64_Interrupt_RegMask; if (MF.getSubtarget().hasStdExtF()) return CSR_XLEN_F32_Interrupt_RegMask; return CSR_Interrupt_RegMask; } + if (MF.getSubtarget().getTargetABI() == RISCVABI::ABI_ILP32E) + return CSR_E_RegMask; return CSR_RegMask; } Index: test/CodeGen/RISCV/callee-saved-gprs-ilp32e.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/callee-saved-gprs-ilp32e.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-WITH-FP + +; RV64I-ILP32E: 32-bit ABIs are not supported for 64-bit targets (ignoring target-abi) +; this is verified by target-abi-invalid.ll + +@var = global [32 x i32] zeroinitializer + +define void @foo() { +; RV32I-LABEL: foo: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw s0, 28(sp) +; RV32I-NEXT: sw s1, 24(sp) +; RV32I-NEXT: lui a0, %hi(var) +; RV32I-NEXT: addi a1, a0, %lo(var) +; +; RV32I-WITH-FP-LABEL: foo: +; RV32I-WITH-FP: # %bb.0: +; RV32I-WITH-FP-NEXT: addi sp, sp, -40 +; RV32I-WITH-FP-NEXT: sw ra, 36(sp) +; RV32I-WITH-FP-NEXT: sw s0, 32(sp) +; RV32I-WITH-FP-NEXT: sw s1, 28(sp) +; RV32I-WITH-FP-NEXT: addi s0, sp, 40 +; RV32I-WITH-FP-NEXT: lui a0, %hi(var) +; RV32I-WITH-FP-NEXT: addi a1, a0, %lo(var) + %val = load [32 x i32], [32 x i32]* @var + store volatile [32 x i32] %val, [32 x i32]* @var + ret void +} Index: test/CodeGen/RISCV/calling-conv-ilp32e.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=RV32I-WITHFP %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-4 and fp-8. + +; Any tests that would have identical output for some combination of the ilp32* +; ABIs belong in calling-conv-*-common.ll. This file contains tests that will +; have different output across those ABIs. i.e. where some arguments would be +; passed according to the floating point ABI. + +define i32 @callee_float_in_regs(i32 %a, float %b) nounwind { +; RV32I-FPELIM-LABEL: callee_float_in_regs: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -8 +; RV32I-FPELIM-NEXT: sw ra, 4(sp) +; RV32I-FPELIM-NEXT: sw s0, 0(sp) +; RV32I-FPELIM-NEXT: mv s0, a0 +; RV32I-FPELIM-NEXT: mv a0, a1 +; RV32I-FPELIM-NEXT: call __fixsfsi +; RV32I-FPELIM-NEXT: add a0, s0, a0 +; RV32I-FPELIM-NEXT: lw s0, 0(sp) +; RV32I-FPELIM-NEXT: lw ra, 4(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 8 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: callee_float_in_regs: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -12 +; RV32I-WITHFP-NEXT: sw ra, 8(sp) +; RV32I-WITHFP-NEXT: sw s0, 4(sp) +; RV32I-WITHFP-NEXT: sw s1, 0(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 12 +; RV32I-WITHFP-NEXT: mv s1, a0 +; RV32I-WITHFP-NEXT: mv a0, a1 +; RV32I-WITHFP-NEXT: call __fixsfsi +; RV32I-WITHFP-NEXT: add a0, s1, a0 +; RV32I-WITHFP-NEXT: lw s1, 0(sp) +; RV32I-WITHFP-NEXT: lw s0, 4(sp) +; RV32I-WITHFP-NEXT: lw ra, 8(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 12 +; RV32I-WITHFP-NEXT: ret + %b_fptosi = fptosi float %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_float_in_regs() nounwind { +; RV32I-FPELIM-LABEL: caller_float_in_regs: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -4 +; RV32I-FPELIM-NEXT: sw ra, 0(sp) +; RV32I-FPELIM-NEXT: addi a0, zero, 1 +; RV32I-FPELIM-NEXT: lui a1, 262144 +; RV32I-FPELIM-NEXT: call callee_float_in_regs +; RV32I-FPELIM-NEXT: lw ra, 0(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 4 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: caller_float_in_regs: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -8 +; RV32I-WITHFP-NEXT: sw ra, 4(sp) +; RV32I-WITHFP-NEXT: sw s0, 0(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 8 +; RV32I-WITHFP-NEXT: addi a0, zero, 1 +; RV32I-WITHFP-NEXT: lui a1, 262144 +; RV32I-WITHFP-NEXT: call callee_float_in_regs +; RV32I-WITHFP-NEXT: lw s0, 0(sp) +; RV32I-WITHFP-NEXT: lw ra, 4(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 8 +; RV32I-WITHFP-NEXT: ret + %1 = call i32 @callee_float_in_regs(i32 1, float 2.0) + ret i32 %1 +} + +define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) nounwind { +; RV32I-FPELIM-LABEL: callee_float_on_stack: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -8 +; RV32I-FPELIM-NEXT: sw ra, 4(sp) +; RV32I-FPELIM-NEXT: sw s0, 0(sp) +; RV32I-FPELIM-NEXT: addi s0, sp, 8 +; RV32I-FPELIM-NEXT: lw a0, 8(s0) +; RV32I-FPELIM-NEXT: lw a1, 0(s0) +; RV32I-FPELIM-NEXT: add a0, a1, a0 +; RV32I-FPELIM-NEXT: addi sp, s0, -8 +; RV32I-FPELIM-NEXT: lw s0, 0(sp) +; RV32I-FPELIM-NEXT: lw ra, 4(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 8 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: callee_float_on_stack: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -8 +; RV32I-WITHFP-NEXT: sw ra, 4(sp) +; RV32I-WITHFP-NEXT: sw s0, 0(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 8 +; RV32I-WITHFP-NEXT: lw a0, 8(s0) +; RV32I-WITHFP-NEXT: lw a1, 0(s0) +; RV32I-WITHFP-NEXT: a0, a1, a0 +; RV32I-WITHFP-NEXT: addi sp, s0, -8 +; RV32I-WITHFP-NEXT: lw s0, 0(sp) +; RV32I-WITHFP-NEXT: lw ra, 4(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 8 +; RV32I-WITHFP-NEXT: ret + %1 = trunc i64 %d to i32 + %2 = bitcast float %e to i32 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @caller_float_on_stack() nounwind { +; RV32I-FPELIM-LABEL: caller_float_on_stack: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -24 +; RV32I-FPELIM-NEXT: sw ra, 20(sp) +; RV32I-FPELIM-NEXT: sw s0, 16(sp) +; RV32I-FPELIM-NEXT: addi s0, sp, 24 +; RV32I-FPELIM-NEXT: lui a0, 264704 +; RV32I-FPELIM-NEXT: sw a0, 8(sp) +; RV32I-FPELIM-NEXT: addi a0, zero, 4 +; RV32I-FPELIM-NEXT: sw a0, 0(sp) +; RV32I-FPELIM-NEXT: sw zero, 4(sp) +; RV32I-FPELIM-NEXT: addi a0, zero, 1 +; RV32I-FPELIM-NEXT: mv a1, zero +; RV32I-FPELIM-NEXT: addi a2, zero, 2 +; RV32I-FPELIM-NEXT: mv a3, zero +; RV32I-FPELIM-NEXT: addi a4, zero, 3 +; RV32I-FPELIM-NEXT: mv a5, zero +; RV32I-FPELIM-NEXT: call callee_float_on_stack +; RV32I-FPELIM-NEXT: addi sp, s0, -24 +; RV32I-FPELIM-NEXT: lw s0, 16(sp) +; RV32I-FPELIM-NEXT: lw ra, 20(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 24 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: caller_float_on_stack: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -24 +; RV32I-WITHFP-NEXT: sw ra, 20(sp) +; RV32I-WITHFP-NEXT: sw s0, 16(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 24 +; RV32I-WITHFP-NEXT: lui a0, 264704 +; RV32I-WITHFP-NEXT: sw a0, 8(sp) +; RV32I-WITHFP-NEXT: addi a0, zero, 4 +; RV32I-WITHFP-NEXT: sw a0, 0(sp) +; RV32I-WITHFP-NEXT: sw zero, 4(sp) +; RV32I-WITHFP-NEXT: addi a0, zero, 1 +; RV32I-WITHFP-NEXT: mv a1, zero +; RV32I-WITHFP-NEXT: addi a2, zero, 2 +; RV32I-WITHFP-NEXT: mv a3, zero +; RV32I-WITHFP-NEXT: addi a4, zero, 3 +; RV32I-WITHFP-NEXT: mv a5, zero +; RV32I-WITHFP-NEXT: call callee_float_on_stack +; RV32I-WITHFP-NEXT: addi sp, s0, -24 +; RV32I-WITHFP-NEXT: lw s0, 16(sp) +; RV32I-WITHFP-NEXT: lw ra, 20(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 24 +; RV32I-WITHFP-NEXT: ret + %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) + ret i32 %1 +} + +define float @callee_tiny_scalar_ret() nounwind { +; RV32I-FPELIM-LABEL: callee_tiny_scalar_ret: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: lui a0, 260096 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: callee_tiny_scalar_ret: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -8 +; RV32I-WITHFP-NEXT: sw ra, 4(sp) +; RV32I-WITHFP-NEXT: sw s0, 0(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 8 +; RV32I-WITHFP-NEXT: lui a0, 260096 +; RV32I-WITHFP-NEXT: lw s0, 0(sp) +; RV32I-WITHFP-NEXT: lw ra, 4(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 8 +; RV32I-WITHFP-NEXT: ret + ret float 1.0 +} + +define i32 @caller_tiny_scalar_ret() nounwind { +; RV32I-FPELIM-LABEL: caller_tiny_scalar_ret: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -4 +; RV32I-FPELIM-NEXT: sw ra, 0(sp) +; RV32I-FPELIM-NEXT: call callee_tiny_scalar_ret +; RV32I-FPELIM-NEXT: lw ra, 0(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 4 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: caller_tiny_scalar_ret: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -8 +; RV32I-WITHFP-NEXT: sw ra, 4(sp) +; RV32I-WITHFP-NEXT: sw s0, 0(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 8 +; RV32I-WITHFP-NEXT: call callee_tiny_scalar_ret +; RV32I-WITHFP-NEXT: lw s0, 0(sp) +; RV32I-WITHFP-NEXT: lw ra, 4(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 8 +; RV32I-WITHFP-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + ret i32 %2 +} + +define i32 @callee_double_on_stack(i64 %a, i64 %b, i64 %c, i32 %d, double %e) nounwind { +; RV32I-FPELIM-LABEL: callee_double_on_stack: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -8 +; RV32I-FPELIM-NEXT: sw ra, 4(sp) +; RV32I-FPELIM-NEXT: sw s0, 0(sp) +; RV32I-FPELIM-NEXT: addi s0, sp, 8 +; RV32I-FPELIM-NEXT: lw a0, 8(s0) +; RV32I-FPELIM-NEXT: add a0, a4, a0 +; RV32I-FPELIM-NEXT: addi sp, s0, -8 +; RV32I-FPELIM-NEXT: lw s0, 0(sp) +; RV32I-FPELIM-NEXT: lw ra, 4(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 8 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: callee_double_on_stack: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -8 +; RV32I-WITHFP-NEXT: sw ra, 4(sp) +; RV32I-WITHFP-NEXT: sw s0, 0(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 8 +; RV32I-WITHFP-NEXT: lw a0, 8(s0) +; RV32I-WITHFP-NEXT: add a0, a4, a0 +; RV32I-WITHFP-NEXT: addi sp, s0, -8 +; RV32I-WITHFP-NEXT: lw s0, 0(sp) +; RV32I-WITHFP-NEXT: lw ra, 4(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 8 +; RV32I-WITHFP-NEXT: ret + %1 = trunc i64 %c to i32 + %2 = bitcast double %e to i64 + %3 = trunc i64 %2 to i32 + %4 = add i32 %1, %3 + ret i32 %4 +} + +define i32 @caller_double_on_stack() nounwind { +; RV32I-FPELIM-LABEL: caller_double_on_stack: +; RV32I-FPELIM: # %bb.0: +; RV32I-FPELIM-NEXT: addi sp, sp, -24 +; RV32I-FPELIM-NEXT: sw ra, 20(sp) +; RV32I-FPELIM-NEXT: sw s0, 16(sp) +; RV32I-FPELIM-NEXT: addi s0, sp, 24 +; RV32I-FPELIM-NEXT: lui a0, 262464 +; RV32I-FPELIM-NEXT: sw a0, 12(sp) +; RV32I-FPELIM-NEXT: addi a0, zero, 4 +; RV32I-FPELIM-NEXT: sw a0, 0(sp) +; RV32I-FPELIM-NEXT: sw zero, 8(sp) +; RV32I-FPELIM-NEXT: addi a0, zero, 1 +; RV32I-FPELIM-NEXT: mv a1, zero +; RV32I-FPELIM-NEXT: addi a2, zero, 2 +; RV32I-FPELIM-NEXT: mv a3, zero +; RV32I-FPELIM-NEXT: addi a4, zero, 3 +; RV32I-FPELIM-NEXT: mv a5, zero +; RV32I-FPELIM-NEXT: call callee_double_on_stack +; RV32I-FPELIM-NEXT: addi sp, s0, -24 +; RV32I-FPELIM-NEXT: lw s0, 16(sp) +; RV32I-FPELIM-NEXT: lw ra, 20(sp) +; RV32I-FPELIM-NEXT: addi sp, sp, 24 +; RV32I-FPELIM-NEXT: ret +; +; RV32I-WITHFP-LABEL: caller_double_on_stack: +; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -24 +; RV32I-WITHFP-NEXT: sw ra, 20(sp) +; RV32I-WITHFP-NEXT: sw s0, 16(sp) +; RV32I-WITHFP-NEXT: addi s0, sp, 24 +; RV32I-WITHFP-NEXT: lui a0, 262464 +; RV32I-WITHFP-NEXT: sw a0, 12(sp) +; RV32I-WITHFP-NEXT: addi a0, zero, 4 +; RV32I-WITHFP-NEXT: sw a0, 0(sp) +; RV32I-WITHFP-NEXT: sw zero, 8(sp) +; RV32I-WITHFP-NEXT: addi a0, zero, 1 +; RV32I-WITHFP-NEXT: mv a1, zero +; RV32I-WITHFP-NEXT: addi a2, zero, 2 +; RV32I-WITHFP-NEXT: mv a3, zero +; RV32I-WITHFP-NEXT: addi a4, zero, 3 +; RV32I-WITHFP-NEXT: mv a5, zero +; RV32I-WITHFP-NEXT: call callee_double_on_stack +; RV32I-WITHFP-NEXT: addi sp, s0, -24 +; RV32I-WITHFP-NEXT: lw s0, 16(sp) +; RV32I-WITHFP-NEXT: lw ra, 20(sp) +; RV32I-WITHFP-NEXT: addi sp, sp, 24 +; RV32I-WITHFP-NEXT: ret + %1 = call i32 @callee_double_on_stack(i64 1, i64 2, i64 3, i32 4, double 5.0) + ret i32 %1 +}