diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2748,6 +2748,11 @@ let Documentation = [PreserveAllDocs]; } +def RISCVVectorCC: DeclOrTypeAttr { + let Spellings = [Clang<"riscv_vector_cc">]; + let Documentation = [RISCVVectorCCDocs]; +} + def Target : InheritableAttr { let Spellings = [GCC<"target">]; let Args = [StringArgument<"featuresStr">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5193,6 +5193,16 @@ }]; } +def RISCVVectorCCDocs : Documentation { + let Category = DocCatCallingConvs; + let Content = [{ +The ``riscv_vector_cc`` attribute can be applied to a function. It preserves 15 +registers namely, v1-v7 and v24-v31 as callee-saved. Callers thus don't need +to save these registers before function calls, and callees only need to save +them only if they use them. + }]; +} + def PreferredNameDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -288,6 +288,7 @@ CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) + CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) }; /// Checks whether the given calling convention supports variadic diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3264,6 +3264,7 @@ case CC_OpenCLKernel: case CC_PreserveMost: case CC_PreserveAll: + case CC_RISCVVectorCall: // FIXME: we should be mangling all of the above. return ""; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3350,6 +3350,7 @@ case CC_SwiftAsync: return "swiftasynccall"; case CC_PreserveMost: return "preserve_most"; case CC_PreserveAll: return "preserve_all"; + case CC_RISCVVectorCall: return "riscv_vector_cc"; } llvm_unreachable("Invalid calling convention."); @@ -3818,6 +3819,7 @@ case attr::IntelOclBicc: case attr::PreserveMost: case attr::PreserveAll: + case attr::RISCVVectorCC: return true; } llvm_unreachable("invalid attr kind"); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1035,6 +1035,9 @@ case CC_PreserveAll: OS << " __attribute__((preserve_all))"; break; + case CC_RISCVVectorCall: + OS << "__attribute__((riscv_vector_cc))"; + break; } } @@ -1855,6 +1858,9 @@ case attr::PreserveAll: OS << "preserve_all"; break; + case attr::RISCVVectorCC: + OS << "riscv_vector_cc"; + break; case attr::NoDeref: OS << "noderef"; break; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -101,6 +101,8 @@ bool hasBitIntType() const override { return true; } + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override; + bool useFP16ConversionIntrinsics() const override { return false; } diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -346,3 +346,14 @@ bool Is64Bit = getTriple().isArch64Bit(); llvm::RISCV::fillValidTuneCPUArchList(Values, Is64Bit); } + +TargetInfo::CallingConvCheckResult +RISCVTargetInfo::checkCallingConvention(CallingConv CC) const { + switch (CC) { + default: + return CCCR_Warning; + case CC_C: + case CC_RISCVVectorCall: + return CCCR_OK; + } +} diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -71,6 +71,7 @@ case CC_PreserveAll: return llvm::CallingConv::PreserveAll; case CC_Swift: return llvm::CallingConv::Swift; case CC_SwiftAsync: return llvm::CallingConv::SwiftTail; + case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall; } } @@ -252,6 +253,9 @@ if (D->hasAttr()) return CC_PreserveAll; + if (D->hasAttr()) + return CC_RISCVVectorCall; + return CC_C; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5194,6 +5194,9 @@ case ParsedAttr::AT_PreserveAll: D->addAttr(::new (S.Context) PreserveAllAttr(S.Context, AL)); return; + case ParsedAttr::AT_RISCVVectorCC: + D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL)); + return; default: llvm_unreachable("unexpected attribute kind"); } @@ -5393,6 +5396,9 @@ case ParsedAttr::AT_PreserveAll: CC = CC_PreserveAll; break; + case ParsedAttr::AT_RISCVVectorCC: + CC = CC_RISCVVectorCall; + break; default: llvm_unreachable("unexpected attribute kind"); } @@ -9229,6 +9235,7 @@ case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: case ParsedAttr::AT_AMDGPUKernelCall: + case ParsedAttr::AT_RISCVVectorCC: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -135,7 +135,8 @@ case ParsedAttr::AT_Pcs: \ case ParsedAttr::AT_IntelOclBicc: \ case ParsedAttr::AT_PreserveMost: \ - case ParsedAttr::AT_PreserveAll + case ParsedAttr::AT_PreserveAll: \ + case ParsedAttr::AT_RISCVVectorCC // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ @@ -7768,6 +7769,8 @@ return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_PreserveAll: return createSimpleAttr(Ctx, Attr); + case ParsedAttr::AT_RISCVVectorCC: + return createSimpleAttr(Ctx, Attr); } llvm_unreachable("unexpected attribute kind!"); } diff --git a/clang/test/CodeGen/RISCV/riscv-vector-cc-attr.c b/clang/test/CodeGen/RISCV/riscv-vector-cc-attr.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vector-cc-attr.c @@ -0,0 +1,30 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +#include + +vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input); + +// CHECK: call riscv_vector_cc @bar +vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t ret; + vint32m1_t val; + val = vle32_v_i32m1(base, vl); + ret = bar(input); + vse32_v_i32m1(base, val, vl); + return ret; +} + +vint32m1_t baz(vint32m1_t input); + +// CHECK: call @baz +vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) { + vint32m1_t ret; + vint32m1_t val; + val = vle32_v_i32m1(base, vl); + ret = baz(input); + vse32_v_i32m1(base, val, vl); + return ret; +} diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -175,6 +175,7 @@ kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, + kw_riscv_vector_cc, // Attributes: kw_attributes, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -245,6 +245,9 @@ /// placement. Preserves active lane values for input VGPRs. AMDGPU_CS_ChainPreserve = 105, + /// Calling convention used for RISC-V V-extension. + RISCV_VectorCall = 106, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -632,6 +632,7 @@ KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); + KEYWORD(riscv_vector_cc); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2031,6 +2031,7 @@ /// ::= 'amdgpu_cs_chain_preserve' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' +/// ::= 'riscv_vector_cc' /// ::= 'cc' UINT /// bool LLParser::parseOptionalCallingConv(unsigned &CC) { @@ -2099,6 +2100,7 @@ break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; + case lltok::kw_riscv_vector_cc:CC = CallingConv::RISCV_VectorCall; break; case lltok::kw_cc: { Lex.Lex(); return parseUInt32(CC); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -350,6 +350,7 @@ break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; + case CallingConv::RISCV_VectorCall: Out << "riscv_vector_cc"; break; } } diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -24,6 +24,17 @@ : CalleeSavedRegs<(add CSR_ILP32_LP64, F8_D, F9_D, (sequence "F%u_D", 18, 27))>; +defvar CSR_V = (add (sequence "V%u", 1, 7), (sequence "V%u", 24, 31)); + +def CSR_ILP32_LP64_V + : CalleeSavedRegs<(add CSR_ILP32_LP64, CSR_V)>; + +def CSR_ILP32F_LP64F_V + : CalleeSavedRegs<(add CSR_ILP32F_LP64F, CSR_V)>; + +def CSR_ILP32D_LP64D_V + : CalleeSavedRegs<(add CSR_ILP32D_LP64D, CSR_V)>; + // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -302,7 +302,8 @@ for (auto &CS : CSI) { int FI = CS.getFrameIdx(); - if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::Default) + if (FI >= 0 && (MFI.getStackID(FI) == TargetStackID::Default || + MFI.getStackID(FI) == TargetStackID::ScalableVector)) NonLibcallCSI.push_back(CS); } @@ -479,7 +480,13 @@ .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); - const auto &CSI = MFI.getCalleeSavedInfo(); + const auto &CSI = getNonLibcallCSI(MF, MFI.getCalleeSavedInfo()); + size_t VectorCSISize = 0; + for (auto &CS : CSI) { + int FI = CS.getFrameIdx(); + if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) + VectorCSISize++; + } // The frame pointer is callee-saved, and code has been generated for us to // save it to the stack. We need to skip over the storing of callee-saved @@ -487,7 +494,7 @@ // to the stack, not before. // FIXME: assumes exactly one instruction is used to save each callee-saved // register. - std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); + std::advance(MBBI, CSI.size() - VectorCSISize); // Iterate over list of callee-saved registers and emit .cfi_offset // directives. @@ -640,8 +647,12 @@ // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; - if (!CSI.empty()) - LastFrameDestroy = std::prev(MBBI, CSI.size()); + for (const auto &CS : CSI) { + int FI = CS.getFrameIdx(); + if (FI < 0 || MFI.getStackID(FI) != TargetStackID::Default) + break; + LastFrameDestroy = std::prev(LastFrameDestroy); + } uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); @@ -727,7 +738,8 @@ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - if (FI >= MinCSFI && FI <= MaxCSFI) { + if (FI >= MinCSFI && FI <= MaxCSFI && + StackID != TargetStackID::ScalableVector) { FrameReg = RISCV::X2; if (FirstSPAdjustAmount) @@ -1261,20 +1273,53 @@ DL = MI->getDebugLoc(); // Manually restore values not restored by libcall. - // Keep the same order as in the prologue. There is no need to reverse the - // order in the epilogue. In addition, the return address will be restored - // first in the epilogue. It increases the opportunity to avoid the + // + // We first change the restore order for scalar and vector + // callee-saved registers as the layout shown below: + // + // Epilog restore order (original): + // ---------------------------- + // RVV objects + // ---------------------------- + // Callee-saved regs(scalar) + // Callee-saved regs(vector) + // ---------------------------- + // + // Epilog restore order (after): + // ---------------------------- + // RVV objects + // ---------------------------- + // Callee-saved regs(vector) + // Callee-saved regs(scalar) + // ---------------------------- + // + // So that it is able to put all vector registers which need + // to be restored together. The return address will be restored + // first in the scalar regs. It increases the opportunity to avoid the // load-to-use data hazard between loading RA and return by RA. // loadRegFromStackSlot can insert multiple instructions. const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + auto *It = NonLibcallCSI.begin(); + while (It != NonLibcallCSI.end()) { + int FI = It->getFrameIdx(); + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) + break; + It++; } + auto loadRegFromStackSlot = [&](decltype(It) CSBeg, decltype(It) CSEnd) { + for (auto CS = CSBeg; CS != CSEnd; CS++) { + Register Reg = CS->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS->getFrameIdx(), RC, TRI, + Register()); + assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); + } + }; + loadRegFromStackSlot(It, NonLibcallCSI.end()); + loadRegFromStackSlot(NonLibcallCSI.begin(), It); + const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); if (RestoreLibCall) { // Add restore libcall via tail call. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -14890,6 +14890,7 @@ report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: + case CallingConv::RISCV_VectorCall: break; case CallingConv::GHC: if (!Subtarget.hasStdExtF() || !Subtarget.hasStdExtD()) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -66,17 +66,26 @@ return CSR_Interrupt_SaveList; } + bool HasVectorCSR = MF->getFunction().getCallingConv() == + CallingConv::RISCV_VectorCall; + switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: + if (HasVectorCSR) + return CSR_ILP32_LP64_V_SaveList; return CSR_ILP32_LP64_SaveList; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: + if (HasVectorCSR) + return CSR_ILP32F_LP64F_V_SaveList; return CSR_ILP32F_LP64F_SaveList; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: + if (HasVectorCSR) + return CSR_ILP32D_LP64D_V_SaveList; return CSR_ILP32D_LP64D_SaveList; } } @@ -636,12 +645,18 @@ llvm_unreachable("Unrecognized ABI"); case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32_LP64_V_RegMask; return CSR_ILP32_LP64_RegMask; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32F_LP64F_V_RegMask; return CSR_ILP32F_LP64F_RegMask; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: + if (CC == CallingConv::RISCV_VectorCall) + return CSR_ILP32D_LP64D_V_RegMask; return CSR_ILP32D_LP64D_RegMask; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -0,0 +1,222 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -mattr=+v -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define @test_vector_std( %va) nounwind { +; SPILL-O2-LABEL: test_vector_std: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret %va +} + +define riscv_vector_cc @test_vector_callee( %va) nounwind { +; SPILL-O2-LABEL: test_vector_callee: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret %va +}