diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2748,6 +2748,11 @@ let Documentation = [PreserveAllDocs]; } +def RISCVVectorCC: DeclOrTypeAttr { + let Spellings = [Clang<"riscv_vector_cc">]; + let Documentation = [RISCVVectorCCDocs]; +} + def Target : InheritableAttr { let Spellings = [GCC<"target">]; let Args = [StringArgument<"featuresStr">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5196,6 +5196,16 @@ }]; } +def RISCVVectorCCDocs : Documentation { + let Category = DocCatCallingConvs; + let Content = [{ +The ``riscv_vector_cc`` attribute can be applied to a function. It preserves 15 +registers namely, v1-v7 and v24-v31 as callee-saved. Callers thus don't need +to save these registers before function calls, and callees only need to save +them only if they use them. + }]; +} + def PreferredNameDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -288,6 +288,7 @@ CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) + CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) }; /// Checks whether the given calling convention supports variadic diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3273,6 +3273,7 @@ case CC_OpenCLKernel: case CC_PreserveMost: case CC_PreserveAll: + case CC_RISCVVectorCall: // FIXME: we should be mangling all of the above. return ""; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3355,6 +3355,7 @@ case CC_SwiftAsync: return "swiftasynccall"; case CC_PreserveMost: return "preserve_most"; case CC_PreserveAll: return "preserve_all"; + case CC_RISCVVectorCall: return "riscv_vector_cc"; } llvm_unreachable("Invalid calling convention."); @@ -3823,6 +3824,7 @@ case attr::IntelOclBicc: case attr::PreserveMost: case attr::PreserveAll: + case attr::RISCVVectorCC: return true; } llvm_unreachable("invalid attr kind"); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1035,6 +1035,9 @@ case CC_PreserveAll: OS << " __attribute__((preserve_all))"; break; + case CC_RISCVVectorCall: + OS << "__attribute__((riscv_vector_cc))"; + break; } } @@ -1855,6 +1858,9 @@ case attr::PreserveAll: OS << "preserve_all"; break; + case attr::RISCVVectorCC: + OS << "riscv_vector_cc"; + break; case attr::NoDeref: OS << "noderef"; break; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -106,6 +106,8 @@ bool hasBFloat16Type() const override { return true; } + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override; + bool useFP16ConversionIntrinsics() const override { return false; } diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -346,3 +346,14 @@ bool Is64Bit = getTriple().isArch64Bit(); llvm::RISCV::fillValidTuneCPUArchList(Values, Is64Bit); } + +TargetInfo::CallingConvCheckResult +RISCVTargetInfo::checkCallingConvention(CallingConv CC) const { + switch (CC) { + default: + return CCCR_Warning; + case CC_C: + case CC_RISCVVectorCall: + return CCCR_OK; + } +} diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -71,6 +71,7 @@ case CC_PreserveAll: return llvm::CallingConv::PreserveAll; case CC_Swift: return llvm::CallingConv::Swift; case CC_SwiftAsync: return llvm::CallingConv::SwiftTail; + case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall; } } @@ -251,6 +252,9 @@ if (D->hasAttr()) return CC_PreserveAll; + if (D->hasAttr()) + return CC_RISCVVectorCall; + return CC_C; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5194,6 +5194,9 @@ case ParsedAttr::AT_PreserveAll: D->addAttr(::new (S.Context) PreserveAllAttr(S.Context, AL)); return; + case ParsedAttr::AT_RISCVVectorCC: + D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL)); + return; default: llvm_unreachable("unexpected attribute kind"); } @@ -5393,6 +5396,9 @@ case ParsedAttr::AT_PreserveAll: CC = CC_PreserveAll; break; + case ParsedAttr::AT_RISCVVectorCC: + CC = CC_RISCVVectorCall; + break; default: llvm_unreachable("unexpected attribute kind"); } @@ -9248,6 +9254,7 @@ case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: case ParsedAttr::AT_AMDGPUKernelCall: + case ParsedAttr::AT_RISCVVectorCC: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -135,7 +135,8 @@ case ParsedAttr::AT_Pcs: \ case ParsedAttr::AT_IntelOclBicc: \ case ParsedAttr::AT_PreserveMost: \ - case ParsedAttr::AT_PreserveAll + case ParsedAttr::AT_PreserveAll: \ + case ParsedAttr::AT_RISCVVectorCC // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ @@ -7768,6 +7769,8 @@ return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_PreserveAll: return createSimpleAttr(Ctx, Attr); + case ParsedAttr::AT_RISCVVectorCC: + return createSimpleAttr(Ctx, Attr); } llvm_unreachable("unexpected attribute kind!"); } diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c @@ -0,0 +1,27 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s + +#include + +// CHECK-LLVM: call riscv_vector_cc @bar +vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input); +vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t ret; + vint32m1_t val; + val = __riscv_vle32_v_i32m1(base, vl); + ret = bar(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} + +// CHECK-LLVM: call @baz +vint32m1_t baz(vint32m1_t input); +vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t ret; + vint32m1_t val; + val = __riscv_vle32_v_i32m1(base, vl); + ret = baz(input); + __riscv_vse32_v_i32m1(base, val, vl); + return ret; +} diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp @@ -0,0 +1,19 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 %s -triple riscv64 -target-feature +v -verify + +__attribute__((riscv_vector_cc)) int var; // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'int'}} + +__attribute__((riscv_vector_cc)) void func(); +__attribute__((riscv_vector_cc(1))) void func_invalid(); // expected-error {{'riscv_vector_cc' attribute takes no arguments}} + +void test_no_attribute(int); // expected-note {{previous declaration is here}} +void __attribute__((riscv_vector_cc)) test_no_attribute(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}} + +class test_cc { + __attribute__((riscv_vector_cc)) void member_func(); +}; + +void test_lambda() { + __attribute__((riscv_vector_cc)) auto lambda = []() { // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'auto'}} + }; +} diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -175,6 +175,7 @@ kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, + kw_riscv_vector_cc, // Attributes: kw_attributes, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -245,6 +245,9 @@ /// placement. Preserves active lane values for input VGPRs. AMDGPU_CS_ChainPreserve = 105, + /// Calling convention used for RISC-V V-extension. + RISCV_VectorCall = 106, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -632,6 +632,7 @@ KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); + KEYWORD(riscv_vector_cc); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1999,6 +1999,7 @@ /// ::= 'amdgpu_cs_chain_preserve' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' +/// ::= 'riscv_vector_cc' /// ::= 'cc' UINT /// bool LLParser::parseOptionalCallingConv(unsigned &CC) { @@ -2067,6 +2068,7 @@ break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; + case lltok::kw_riscv_vector_cc:CC = CallingConv::RISCV_VectorCall; break; case lltok::kw_cc: { Lex.Lex(); return parseUInt32(CC); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -350,6 +350,7 @@ break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; + case CallingConv::RISCV_VectorCall: Out << "riscv_vector_cc"; break; } } diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -24,6 +24,19 @@ : CalleeSavedRegs<(add CSR_ILP32_LP64, F8_D, F9_D, (sequence "F%u_D", 18, 27))>; +defvar CSR_V = (add (sequence "V%u", 1, 7), (sequence "V%u", 24, 31), + V2M2, V4M2, V6M2, V24M2, V26M2, V28M2, V30M2, + V4M4, V24M4, V28M4, V24M8); + +def CSR_ILP32_LP64_V + : CalleeSavedRegs<(add CSR_ILP32_LP64, CSR_V)>; + +def CSR_ILP32F_LP64F_V + : CalleeSavedRegs<(add CSR_ILP32F_LP64F, CSR_V)>; + +def CSR_ILP32D_LP64D_V + : CalleeSavedRegs<(add CSR_ILP32D_LP64D, CSR_V)>; + // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -373,6 +373,21 @@ return NonLibcallCSI; } +static SmallVector +getRVVCalleeSavedInfo(const MachineFunction &MF, + const std::vector &CSI) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + SmallVector RVVCSI; + + for (auto &CS : CSI) { + int FI = CS.getFrameIdx(); + if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) + RVVCSI.push_back(CS); + } + + return RVVCSI; +} + void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -572,6 +587,10 @@ // directives. for (const auto &Entry : CSI) { int FrameIdx = Entry.getFrameIdx(); + if (FrameIdx >=0 && + MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) + continue; + int64_t Offset; // Offsets for objects with fixed locations (IE: those saved by libcall) are // simply calculated from the frame index. @@ -722,7 +741,7 @@ const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo()); - // Skip to before the restores of callee-saved registers + // Skip to before the restores of scalar callee-saved registers // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; @@ -1017,15 +1036,24 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); // Create a buffer of RVV objects to allocate. SmallVector ObjectsToAllocate; - for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { - unsigned StackID = MFI.getStackID(I); - if (StackID != TargetStackID::ScalableVector) - continue; - if (MFI.isDeadObjectIndex(I)) - continue; + auto pushRVVObjects = [&](int FIBegin, int FIEnd) { + for (int I = FIBegin, E = FIEnd; I != E; ++I) { + unsigned StackID = MFI.getStackID(I); + if (StackID != TargetStackID::ScalableVector) + continue; + if (MFI.isDeadObjectIndex(I)) + continue; - ObjectsToAllocate.push_back(I); - } + ObjectsToAllocate.push_back(I); + } + }; + // First push RVV Callee Saved object, then push RVV stack object + std::vector &CSI = MF.getFrameInfo().getCalleeSavedInfo(); + const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI); + if (RVVCSI.size()) + pushRVVObjects(RVVCSI[0].getFrameIdx(), + RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1); + pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size()); // The minimum alignment is 16 bytes. Align RVVStackAlign(16); @@ -1358,13 +1386,19 @@ // Manually spill values not spilled by libcall & Push/Pop. const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); - for (auto &CS : UnmanagedCSI) { - // Insert the spill to the stack frame. - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), - RC, TRI, Register()); - } + const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); + + auto storeRegToStackSlot = [&](decltype(UnmanagedCSI) CSInfo) { + for (auto &CS: CSInfo) { + // Insert the spill to the stack frame. + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), + RC, TRI, Register()); + } + }; + storeRegToStackSlot(UnmanagedCSI); + storeRegToStackSlot(RVVCSI); return true; } @@ -1387,14 +1421,46 @@ // first in the epilogue. It increases the opportunity to avoid the // load-to-use data hazard between loading RA and return by RA. // loadRegFromStackSlot can insert multiple instructions. + // + // + // We first change the restore order for scalar and vector + // callee-saved registers as the layout shown below: + // + // Epilog restore order (original): + // ---------------------------- + // RVV objects + // ---------------------------- + // Callee-saved regs(scalar) + // Callee-saved regs(vector) + // ---------------------------- + // + // Epilog restore order (after): + // ---------------------------- + // RVV objects + // ---------------------------- + // Callee-saved regs(vector) + // Callee-saved regs(scalar) + // ---------------------------- + // + // So that it is able to put all vector registers which need + // to be restored together. The return address will be restored + // first in the scalar regs. It increases the opportunity to avoid the + // load-to-use data hazard between loading RA and return by RA. + // loadRegFromStackSlot can insert multiple instructions. const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); - for (auto &CS : UnmanagedCSI) { - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); - } + const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); + + auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) { + for (auto &CS: CSInfo) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, + Register()); + assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); + } + }; + loadRegFromStackSlot(RVVCSI); + loadRegFromStackSlot(UnmanagedCSI); RISCVMachineFunctionInfo *RVFI = MF->getInfo(); if (RVFI->isPushable(*MF)) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15485,6 +15485,7 @@ report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: + case CallingConv::RISCV_VectorCall: break; case CallingConv::GHC: if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -66,17 +66,26 @@ return CSR_Interrupt_SaveList; } + bool HasVectorCSR = + MF->getFunction().getCallingConv() == CallingConv::RISCV_VectorCall; + switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: + if (HasVectorCSR) + return CSR_ILP32_LP64_V_SaveList; return CSR_ILP32_LP64_SaveList; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: + if (HasVectorCSR) + return CSR_ILP32F_LP64F_V_SaveList; return CSR_ILP32F_LP64F_SaveList; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: + if (HasVectorCSR) + return CSR_ILP32D_LP64D_V_SaveList; return CSR_ILP32D_LP64D_SaveList; } } @@ -640,12 +649,18 @@ llvm_unreachable("Unrecognized ABI"); case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32_LP64_V_RegMask; return CSR_ILP32_LP64_RegMask; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32F_LP64F_V_RegMask; return CSR_ILP32F_LP64F_RegMask; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32D_LP64D_V_RegMask; return CSR_ILP32D_LP64D_RegMask; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -mattr=+v -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define @test_vector_std( %va) nounwind { +; SPILL-O2-LABEL: test_vector_std: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret %va +} + +define riscv_vector_cc @test_vector_callee( %va) nounwind { +; SPILL-O2-LABEL: test_vector_callee: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret %va +}